]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - sys/kern/kern_rctl.c
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / sys / kern / kern_rctl.c
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/refcount.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/loginclass.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/racct.h>
47 #include <sys/rctl.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sx.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
58 #include <sys/sbuf.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <vm/uma.h>
62
63 #ifdef RCTL
64 #ifndef RACCT
65 #error "The RCTL option requires the RACCT option"
66 #endif
67
68 FEATURE(rctl, "Resource Limits");
69
70 #define HRF_DEFAULT             0
71 #define HRF_DONT_INHERIT        1
72 #define HRF_DONT_ACCUMULATE     2
73
74 /* Default buffer size for rctl_get_rules(2). */
75 #define RCTL_DEFAULT_BUFSIZE    4096
76 #define RCTL_MAX_INBUFLEN       4096
77 #define RCTL_LOG_BUFSIZE        128
78
79 #define RCTL_PCPU_SHIFT         (10 * 1000000)
80
81 /*
82  * 'rctl_rule_link' connects a rule with every racct it's related to.
83  * For example, rule 'user:X:openfiles:deny=N/process' is linked
84  * with uidinfo for user X, and to each process of that user.
85  */
86 struct rctl_rule_link {
87         LIST_ENTRY(rctl_rule_link)      rrl_next;
88         struct rctl_rule                *rrl_rule;
89         int                             rrl_exceeded;
90 };
91
92 struct dict {
93         const char      *d_name;
94         int             d_value;
95 };
96
97 static struct dict subjectnames[] = {
98         { "process", RCTL_SUBJECT_TYPE_PROCESS },
99         { "user", RCTL_SUBJECT_TYPE_USER },
100         { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
101         { "jail", RCTL_SUBJECT_TYPE_JAIL },
102         { NULL, -1 }};
103
104 static struct dict resourcenames[] = {
105         { "cputime", RACCT_CPU },
106         { "datasize", RACCT_DATA },
107         { "stacksize", RACCT_STACK },
108         { "coredumpsize", RACCT_CORE },
109         { "memoryuse", RACCT_RSS },
110         { "memorylocked", RACCT_MEMLOCK },
111         { "maxproc", RACCT_NPROC },
112         { "openfiles", RACCT_NOFILE },
113         { "vmemoryuse", RACCT_VMEM },
114         { "pseudoterminals", RACCT_NPTS },
115         { "swapuse", RACCT_SWAP },
116         { "nthr", RACCT_NTHR },
117         { "msgqqueued", RACCT_MSGQQUEUED },
118         { "msgqsize", RACCT_MSGQSIZE },
119         { "nmsgq", RACCT_NMSGQ },
120         { "nsem", RACCT_NSEM },
121         { "nsemop", RACCT_NSEMOP },
122         { "nshm", RACCT_NSHM },
123         { "shmsize", RACCT_SHMSIZE },
124         { "wallclock", RACCT_WALLCLOCK },
125         { "pcpu", RACCT_PCTCPU },
126         { NULL, -1 }};
127
128 static struct dict actionnames[] = {
129         { "sighup", RCTL_ACTION_SIGHUP },
130         { "sigint", RCTL_ACTION_SIGINT },
131         { "sigquit", RCTL_ACTION_SIGQUIT },
132         { "sigill", RCTL_ACTION_SIGILL },
133         { "sigtrap", RCTL_ACTION_SIGTRAP },
134         { "sigabrt", RCTL_ACTION_SIGABRT },
135         { "sigemt", RCTL_ACTION_SIGEMT },
136         { "sigfpe", RCTL_ACTION_SIGFPE },
137         { "sigkill", RCTL_ACTION_SIGKILL },
138         { "sigbus", RCTL_ACTION_SIGBUS },
139         { "sigsegv", RCTL_ACTION_SIGSEGV },
140         { "sigsys", RCTL_ACTION_SIGSYS },
141         { "sigpipe", RCTL_ACTION_SIGPIPE },
142         { "sigalrm", RCTL_ACTION_SIGALRM },
143         { "sigterm", RCTL_ACTION_SIGTERM },
144         { "sigurg", RCTL_ACTION_SIGURG },
145         { "sigstop", RCTL_ACTION_SIGSTOP },
146         { "sigtstp", RCTL_ACTION_SIGTSTP },
147         { "sigchld", RCTL_ACTION_SIGCHLD },
148         { "sigttin", RCTL_ACTION_SIGTTIN },
149         { "sigttou", RCTL_ACTION_SIGTTOU },
150         { "sigio", RCTL_ACTION_SIGIO },
151         { "sigxcpu", RCTL_ACTION_SIGXCPU },
152         { "sigxfsz", RCTL_ACTION_SIGXFSZ },
153         { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
154         { "sigprof", RCTL_ACTION_SIGPROF },
155         { "sigwinch", RCTL_ACTION_SIGWINCH },
156         { "siginfo", RCTL_ACTION_SIGINFO },
157         { "sigusr1", RCTL_ACTION_SIGUSR1 },
158         { "sigusr2", RCTL_ACTION_SIGUSR2 },
159         { "sigthr", RCTL_ACTION_SIGTHR },
160         { "deny", RCTL_ACTION_DENY },
161         { "log", RCTL_ACTION_LOG },
162         { "devctl", RCTL_ACTION_DEVCTL },
163         { NULL, -1 }};
164
165 static void rctl_init(void);
166 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
167
168 static uma_zone_t rctl_rule_link_zone;
169 static uma_zone_t rctl_rule_zone;
170 static struct rwlock rctl_lock;
171 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
172
173 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
174 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
175
176 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
177
178 static const char *
179 rctl_subject_type_name(int subject)
180 {
181         int i;
182
183         for (i = 0; subjectnames[i].d_name != NULL; i++) {
184                 if (subjectnames[i].d_value == subject)
185                         return (subjectnames[i].d_name);
186         }
187
188         panic("rctl_subject_type_name: unknown subject type %d", subject);
189 }
190
191 static const char *
192 rctl_action_name(int action)
193 {
194         int i;
195
196         for (i = 0; actionnames[i].d_name != NULL; i++) {
197                 if (actionnames[i].d_value == action)
198                         return (actionnames[i].d_name);
199         }
200
201         panic("rctl_action_name: unknown action %d", action);
202 }
203
204 const char *
205 rctl_resource_name(int resource)
206 {
207         int i;
208
209         for (i = 0; resourcenames[i].d_name != NULL; i++) {
210                 if (resourcenames[i].d_value == resource)
211                         return (resourcenames[i].d_name);
212         }
213
214         panic("rctl_resource_name: unknown resource %d", resource);
215 }
216
217 /*
218  * Return the amount of resource that can be allocated by 'p' before
219  * hitting 'rule'.
220  */
221 static int64_t
222 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
223 {
224         int resource;
225         int64_t available = INT64_MAX;
226         struct ucred *cred = p->p_ucred;
227
228         ASSERT_RACCT_ENABLED();
229         rw_assert(&rctl_lock, RA_LOCKED);
230
231         resource = rule->rr_resource;
232         switch (rule->rr_per) {
233         case RCTL_SUBJECT_TYPE_PROCESS:
234                 available = rule->rr_amount -
235                     p->p_racct->r_resources[resource];
236                 break;
237         case RCTL_SUBJECT_TYPE_USER:
238                 available = rule->rr_amount -
239                     cred->cr_ruidinfo->ui_racct->r_resources[resource];
240                 break;
241         case RCTL_SUBJECT_TYPE_LOGINCLASS:
242                 available = rule->rr_amount -
243                     cred->cr_loginclass->lc_racct->r_resources[resource];
244                 break;
245         case RCTL_SUBJECT_TYPE_JAIL:
246                 available = rule->rr_amount -
247                     cred->cr_prison->pr_prison_racct->prr_racct->
248                         r_resources[resource];
249                 break;
250         default:
251                 panic("rctl_compute_available: unknown per %d",
252                     rule->rr_per);
253         }
254
255         return (available);
256 }
257
258 /*
259  * Return non-zero if allocating 'amount' by proc 'p' would exceed
260  * resource limit specified by 'rule'.
261  */
262 static int
263 rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
264     int64_t amount)
265 {
266         int64_t available;
267
268         ASSERT_RACCT_ENABLED();
269
270         rw_assert(&rctl_lock, RA_LOCKED);
271
272         available = rctl_available_resource(p, rule);
273         if (available >= amount)
274                 return (0);
275
276         return (1);
277 }
278
279 /*
280  * Special version of rctl_available() function for the %cpu resource.
281  * We slightly cheat here and return less than we normally would.
282  */
283 int64_t
284 rctl_pcpu_available(const struct proc *p) {
285         struct rctl_rule *rule;
286         struct rctl_rule_link *link;
287         int64_t available, minavailable, limit;
288
289         ASSERT_RACCT_ENABLED();
290
291         minavailable = INT64_MAX;
292         limit = 0;
293
294         rw_rlock(&rctl_lock);
295
296         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
297                 rule = link->rrl_rule;
298                 if (rule->rr_resource != RACCT_PCTCPU)
299                         continue;
300                 if (rule->rr_action != RCTL_ACTION_DENY)
301                         continue;
302                 available = rctl_available_resource(p, rule);
303                 if (available < minavailable) {
304                         minavailable = available;
305                         limit = rule->rr_amount;
306                 }
307         }
308
309         rw_runlock(&rctl_lock);
310
311         /*
312          * Return slightly less than actual value of the available
313          * %cpu resource.  This makes %cpu throttling more agressive
314          * and lets us act sooner than the limits are already exceeded.
315          */
316         if (limit != 0) {
317                 if (limit > 2 * RCTL_PCPU_SHIFT)
318                         minavailable -= RCTL_PCPU_SHIFT;
319                 else
320                         minavailable -= (limit / 2);
321         }
322
323         return (minavailable);
324 }
325
326 /*
327  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
328  * to what it keeps allocated now.  Returns non-zero if the allocation should
329  * be denied, 0 otherwise.
330  */
331 int
332 rctl_enforce(struct proc *p, int resource, uint64_t amount)
333 {
334         struct rctl_rule *rule;
335         struct rctl_rule_link *link;
336         struct sbuf sb;
337         int should_deny = 0;
338         char *buf;
339         static int curtime = 0;
340         static struct timeval lasttime;
341
342         ASSERT_RACCT_ENABLED();
343
344         rw_rlock(&rctl_lock);
345
346         /*
347          * There may be more than one matching rule; go through all of them.
348          * Denial should be done last, after logging and sending signals.
349          */
350         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
351                 rule = link->rrl_rule;
352                 if (rule->rr_resource != resource)
353                         continue;
354                 if (!rctl_would_exceed(p, rule, amount)) {
355                         link->rrl_exceeded = 0;
356                         continue;
357                 }
358
359                 switch (rule->rr_action) {
360                 case RCTL_ACTION_DENY:
361                         should_deny = 1;
362                         continue;
363                 case RCTL_ACTION_LOG:
364                         /*
365                          * If rrl_exceeded != 0, it means we've already
366                          * logged a warning for this process.
367                          */
368                         if (link->rrl_exceeded != 0)
369                                 continue;
370
371                         /*
372                          * If the process state is not fully initialized yet,
373                          * we can't access most of the required fields, e.g.
374                          * p->p_comm.  This happens when called from fork1().
375                          * Ignore this rule for now; it will be processed just
376                          * after fork, when called from racct_proc_fork_done().
377                          */
378                         if (p->p_state != PRS_NORMAL)
379                                 continue;
380
381                         if (!ppsratecheck(&lasttime, &curtime, 10))
382                                 continue;
383
384                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
385                         if (buf == NULL) {
386                                 printf("rctl_enforce: out of memory\n");
387                                 continue;
388                         }
389                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
390                         rctl_rule_to_sbuf(&sb, rule);
391                         sbuf_finish(&sb);
392                         printf("rctl: rule \"%s\" matched by pid %d "
393                             "(%s), uid %d, jail %s\n", sbuf_data(&sb),
394                             p->p_pid, p->p_comm, p->p_ucred->cr_uid,
395                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
396                         sbuf_delete(&sb);
397                         free(buf, M_RCTL);
398                         link->rrl_exceeded = 1;
399                         continue;
400                 case RCTL_ACTION_DEVCTL:
401                         if (link->rrl_exceeded != 0)
402                                 continue;
403
404                         if (p->p_state != PRS_NORMAL)
405                                 continue;
406         
407                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
408                         if (buf == NULL) {
409                                 printf("rctl_enforce: out of memory\n");
410                                 continue;
411                         }
412                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
413                         sbuf_printf(&sb, "rule=");
414                         rctl_rule_to_sbuf(&sb, rule);
415                         sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
416                             p->p_pid, p->p_ucred->cr_ruid,
417                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
418                         sbuf_finish(&sb);
419                         devctl_notify_f("RCTL", "rule", "matched",
420                             sbuf_data(&sb), M_NOWAIT);
421                         sbuf_delete(&sb);
422                         free(buf, M_RCTL);
423                         link->rrl_exceeded = 1;
424                         continue;
425                 default:
426                         if (link->rrl_exceeded != 0)
427                                 continue;
428
429                         if (p->p_state != PRS_NORMAL)
430                                 continue;
431
432                         KASSERT(rule->rr_action > 0 &&
433                             rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
434                             ("rctl_enforce: unknown action %d",
435                              rule->rr_action));
436
437                         /*
438                          * We're using the fact that RCTL_ACTION_SIG* values
439                          * are equal to their counterparts from sys/signal.h.
440                          */
441                         kern_psignal(p, rule->rr_action);
442                         link->rrl_exceeded = 1;
443                         continue;
444                 }
445         }
446
447         rw_runlock(&rctl_lock);
448
449         if (should_deny) {
450                 /*
451                  * Return fake error code; the caller should change it
452                  * into one proper for the situation - EFSIZ, ENOMEM etc.
453                  */
454                 return (EDOOFUS);
455         }
456
457         return (0);
458 }
459
460 uint64_t
461 rctl_get_limit(struct proc *p, int resource)
462 {
463         struct rctl_rule *rule;
464         struct rctl_rule_link *link;
465         uint64_t amount = UINT64_MAX;
466
467         ASSERT_RACCT_ENABLED();
468
469         rw_rlock(&rctl_lock);
470
471         /*
472          * There may be more than one matching rule; go through all of them.
473          * Denial should be done last, after logging and sending signals.
474          */
475         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
476                 rule = link->rrl_rule;
477                 if (rule->rr_resource != resource)
478                         continue;
479                 if (rule->rr_action != RCTL_ACTION_DENY)
480                         continue;
481                 if (rule->rr_amount < amount)
482                         amount = rule->rr_amount;
483         }
484
485         rw_runlock(&rctl_lock);
486
487         return (amount);
488 }
489
490 uint64_t
491 rctl_get_available(struct proc *p, int resource)
492 {
493         struct rctl_rule *rule;
494         struct rctl_rule_link *link;
495         int64_t available, minavailable, allocated;
496
497         minavailable = INT64_MAX;
498
499         ASSERT_RACCT_ENABLED();
500
501         rw_rlock(&rctl_lock);
502
503         /*
504          * There may be more than one matching rule; go through all of them.
505          * Denial should be done last, after logging and sending signals.
506          */
507         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
508                 rule = link->rrl_rule;
509                 if (rule->rr_resource != resource)
510                         continue;
511                 if (rule->rr_action != RCTL_ACTION_DENY)
512                         continue;
513                 available = rctl_available_resource(p, rule);
514                 if (available < minavailable)
515                         minavailable = available;
516         }
517
518         rw_runlock(&rctl_lock);
519
520         /*
521          * XXX: Think about this _hard_.
522          */
523         allocated = p->p_racct->r_resources[resource];
524         if (minavailable < INT64_MAX - allocated)
525                 minavailable += allocated;
526         if (minavailable < 0)
527                 minavailable = 0;
528         return (minavailable);
529 }
530
531 static int
532 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
533 {
534
535         ASSERT_RACCT_ENABLED();
536
537         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
538                 if (rule->rr_subject_type != filter->rr_subject_type)
539                         return (0);
540
541                 switch (filter->rr_subject_type) {
542                 case RCTL_SUBJECT_TYPE_PROCESS:
543                         if (filter->rr_subject.rs_proc != NULL &&
544                             rule->rr_subject.rs_proc !=
545                             filter->rr_subject.rs_proc)
546                                 return (0);
547                         break;
548                 case RCTL_SUBJECT_TYPE_USER:
549                         if (filter->rr_subject.rs_uip != NULL &&
550                             rule->rr_subject.rs_uip !=
551                             filter->rr_subject.rs_uip)
552                                 return (0);
553                         break;
554                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
555                         if (filter->rr_subject.rs_loginclass != NULL &&
556                             rule->rr_subject.rs_loginclass !=
557                             filter->rr_subject.rs_loginclass)
558                                 return (0);
559                         break;
560                 case RCTL_SUBJECT_TYPE_JAIL:
561                         if (filter->rr_subject.rs_prison_racct != NULL &&
562                             rule->rr_subject.rs_prison_racct !=
563                             filter->rr_subject.rs_prison_racct)
564                                 return (0);
565                         break;
566                 default:
567                         panic("rctl_rule_matches: unknown subject type %d",
568                             filter->rr_subject_type);
569                 }
570         }
571
572         if (filter->rr_resource != RACCT_UNDEFINED) {
573                 if (rule->rr_resource != filter->rr_resource)
574                         return (0);
575         }
576
577         if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
578                 if (rule->rr_action != filter->rr_action)
579                         return (0);
580         }
581
582         if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
583                 if (rule->rr_amount != filter->rr_amount)
584                         return (0);
585         }
586
587         if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
588                 if (rule->rr_per != filter->rr_per)
589                         return (0);
590         }
591
592         return (1);
593 }
594
595 static int
596 str2value(const char *str, int *value, struct dict *table)
597 {
598         int i;
599
600         if (value == NULL)
601                 return (EINVAL);
602
603         for (i = 0; table[i].d_name != NULL; i++) {
604                 if (strcasecmp(table[i].d_name, str) == 0) {
605                         *value =  table[i].d_value;
606                         return (0);
607                 }
608         }
609
610         return (EINVAL);
611 }
612
613 static int
614 str2id(const char *str, id_t *value)
615 {
616         char *end;
617
618         if (str == NULL)
619                 return (EINVAL);
620
621         *value = strtoul(str, &end, 10);
622         if ((size_t)(end - str) != strlen(str))
623                 return (EINVAL);
624
625         return (0);
626 }
627
628 static int
629 str2int64(const char *str, int64_t *value)
630 {
631         char *end;
632
633         if (str == NULL)
634                 return (EINVAL);
635
636         *value = strtoul(str, &end, 10);
637         if ((size_t)(end - str) != strlen(str))
638                 return (EINVAL);
639
640         return (0);
641 }
642
643 /*
644  * Connect the rule to the racct, increasing refcount for the rule.
645  */
646 static void
647 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
648 {
649         struct rctl_rule_link *link;
650
651         ASSERT_RACCT_ENABLED();
652         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
653
654         rctl_rule_acquire(rule);
655         link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
656         link->rrl_rule = rule;
657         link->rrl_exceeded = 0;
658
659         rw_wlock(&rctl_lock);
660         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
661         rw_wunlock(&rctl_lock);
662 }
663
664 static int
665 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
666 {
667         struct rctl_rule_link *link;
668
669         ASSERT_RACCT_ENABLED();
670         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
671         rw_assert(&rctl_lock, RA_WLOCKED);
672
673         link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
674         if (link == NULL)
675                 return (ENOMEM);
676         rctl_rule_acquire(rule);
677         link->rrl_rule = rule;
678         link->rrl_exceeded = 0;
679
680         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
681         return (0);
682 }
683
684 /*
685  * Remove limits for a rules matching the filter and release
686  * the refcounts for the rules, possibly freeing them.  Returns
687  * the number of limit structures removed.
688  */
689 static int
690 rctl_racct_remove_rules(struct racct *racct,
691     const struct rctl_rule *filter)
692 {
693         int removed = 0;
694         struct rctl_rule_link *link, *linktmp;
695
696         ASSERT_RACCT_ENABLED();
697         rw_assert(&rctl_lock, RA_WLOCKED);
698
699         LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
700                 if (!rctl_rule_matches(link->rrl_rule, filter))
701                         continue;
702
703                 LIST_REMOVE(link, rrl_next);
704                 rctl_rule_release(link->rrl_rule);
705                 uma_zfree(rctl_rule_link_zone, link);
706                 removed++;
707         }
708         return (removed);
709 }
710
711 static void
712 rctl_rule_acquire_subject(struct rctl_rule *rule)
713 {
714
715         ASSERT_RACCT_ENABLED();
716
717         switch (rule->rr_subject_type) {
718         case RCTL_SUBJECT_TYPE_UNDEFINED:
719         case RCTL_SUBJECT_TYPE_PROCESS:
720                 break;
721         case RCTL_SUBJECT_TYPE_JAIL:
722                 if (rule->rr_subject.rs_prison_racct != NULL)
723                         prison_racct_hold(rule->rr_subject.rs_prison_racct);
724                 break;
725         case RCTL_SUBJECT_TYPE_USER:
726                 if (rule->rr_subject.rs_uip != NULL)
727                         uihold(rule->rr_subject.rs_uip);
728                 break;
729         case RCTL_SUBJECT_TYPE_LOGINCLASS:
730                 if (rule->rr_subject.rs_loginclass != NULL)
731                         loginclass_hold(rule->rr_subject.rs_loginclass);
732                 break;
733         default:
734                 panic("rctl_rule_acquire_subject: unknown subject type %d",
735                     rule->rr_subject_type);
736         }
737 }
738
739 static void
740 rctl_rule_release_subject(struct rctl_rule *rule)
741 {
742
743         ASSERT_RACCT_ENABLED();
744
745         switch (rule->rr_subject_type) {
746         case RCTL_SUBJECT_TYPE_UNDEFINED:
747         case RCTL_SUBJECT_TYPE_PROCESS:
748                 break;
749         case RCTL_SUBJECT_TYPE_JAIL:
750                 if (rule->rr_subject.rs_prison_racct != NULL)
751                         prison_racct_free(rule->rr_subject.rs_prison_racct);
752                 break;
753         case RCTL_SUBJECT_TYPE_USER:
754                 if (rule->rr_subject.rs_uip != NULL)
755                         uifree(rule->rr_subject.rs_uip);
756                 break;
757         case RCTL_SUBJECT_TYPE_LOGINCLASS:
758                 if (rule->rr_subject.rs_loginclass != NULL)
759                         loginclass_free(rule->rr_subject.rs_loginclass);
760                 break;
761         default:
762                 panic("rctl_rule_release_subject: unknown subject type %d",
763                     rule->rr_subject_type);
764         }
765 }
766
767 struct rctl_rule *
768 rctl_rule_alloc(int flags)
769 {
770         struct rctl_rule *rule;
771
772         ASSERT_RACCT_ENABLED();
773
774         rule = uma_zalloc(rctl_rule_zone, flags);
775         if (rule == NULL)
776                 return (NULL);
777         rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
778         rule->rr_subject.rs_proc = NULL;
779         rule->rr_subject.rs_uip = NULL;
780         rule->rr_subject.rs_loginclass = NULL;
781         rule->rr_subject.rs_prison_racct = NULL;
782         rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
783         rule->rr_resource = RACCT_UNDEFINED;
784         rule->rr_action = RCTL_ACTION_UNDEFINED;
785         rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
786         refcount_init(&rule->rr_refcount, 1);
787
788         return (rule);
789 }
790
791 struct rctl_rule *
792 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
793 {
794         struct rctl_rule *copy;
795
796         ASSERT_RACCT_ENABLED();
797
798         copy = uma_zalloc(rctl_rule_zone, flags);
799         if (copy == NULL)
800                 return (NULL);
801         copy->rr_subject_type = rule->rr_subject_type;
802         copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
803         copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
804         copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
805         copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
806         copy->rr_per = rule->rr_per;
807         copy->rr_resource = rule->rr_resource;
808         copy->rr_action = rule->rr_action;
809         copy->rr_amount = rule->rr_amount;
810         refcount_init(&copy->rr_refcount, 1);
811         rctl_rule_acquire_subject(copy);
812
813         return (copy);
814 }
815
816 void
817 rctl_rule_acquire(struct rctl_rule *rule)
818 {
819
820         ASSERT_RACCT_ENABLED();
821         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
822
823         refcount_acquire(&rule->rr_refcount);
824 }
825
826 static void
827 rctl_rule_free(void *context, int pending)
828 {
829         struct rctl_rule *rule;
830         
831         rule = (struct rctl_rule *)context;
832
833         ASSERT_RACCT_ENABLED();
834         KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
835         
836         /*
837          * We don't need locking here; rule is guaranteed to be inaccessible.
838          */
839         
840         rctl_rule_release_subject(rule);
841         uma_zfree(rctl_rule_zone, rule);
842 }
843
844 void
845 rctl_rule_release(struct rctl_rule *rule)
846 {
847
848         ASSERT_RACCT_ENABLED();
849         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
850
851         if (refcount_release(&rule->rr_refcount)) {
852                 /*
853                  * rctl_rule_release() is often called when iterating
854                  * over all the uidinfo structures in the system,
855                  * holding uihashtbl_lock.  Since rctl_rule_free()
856                  * might end up calling uifree(), this would lead
857                  * to lock recursion.  Use taskqueue to avoid this.
858                  */
859                 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
860                 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
861         }
862 }
863
864 static int
865 rctl_rule_fully_specified(const struct rctl_rule *rule)
866 {
867
868         ASSERT_RACCT_ENABLED();
869
870         switch (rule->rr_subject_type) {
871         case RCTL_SUBJECT_TYPE_UNDEFINED:
872                 return (0);
873         case RCTL_SUBJECT_TYPE_PROCESS:
874                 if (rule->rr_subject.rs_proc == NULL)
875                         return (0);
876                 break;
877         case RCTL_SUBJECT_TYPE_USER:
878                 if (rule->rr_subject.rs_uip == NULL)
879                         return (0);
880                 break;
881         case RCTL_SUBJECT_TYPE_LOGINCLASS:
882                 if (rule->rr_subject.rs_loginclass == NULL)
883                         return (0);
884                 break;
885         case RCTL_SUBJECT_TYPE_JAIL:
886                 if (rule->rr_subject.rs_prison_racct == NULL)
887                         return (0);
888                 break;
889         default:
890                 panic("rctl_rule_fully_specified: unknown subject type %d",
891                     rule->rr_subject_type);
892         }
893         if (rule->rr_resource == RACCT_UNDEFINED)
894                 return (0);
895         if (rule->rr_action == RCTL_ACTION_UNDEFINED)
896                 return (0);
897         if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
898                 return (0);
899         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
900                 return (0);
901
902         return (1);
903 }
904
905 static int
906 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
907 {
908         int error = 0;
909         char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
910              *amountstr, *perstr;
911         struct rctl_rule *rule;
912         id_t id;
913
914         ASSERT_RACCT_ENABLED();
915
916         rule = rctl_rule_alloc(M_WAITOK);
917
918         subjectstr = strsep(&rulestr, ":");
919         subject_idstr = strsep(&rulestr, ":");
920         resourcestr = strsep(&rulestr, ":");
921         actionstr = strsep(&rulestr, "=/");
922         amountstr = strsep(&rulestr, "/");
923         perstr = rulestr;
924
925         if (subjectstr == NULL || subjectstr[0] == '\0')
926                 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
927         else {
928                 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
929                 if (error != 0)
930                         goto out;
931         }
932
933         if (subject_idstr == NULL || subject_idstr[0] == '\0') {
934                 rule->rr_subject.rs_proc = NULL;
935                 rule->rr_subject.rs_uip = NULL;
936                 rule->rr_subject.rs_loginclass = NULL;
937                 rule->rr_subject.rs_prison_racct = NULL;
938         } else {
939                 switch (rule->rr_subject_type) {
940                 case RCTL_SUBJECT_TYPE_UNDEFINED:
941                         error = EINVAL;
942                         goto out;
943                 case RCTL_SUBJECT_TYPE_PROCESS:
944                         error = str2id(subject_idstr, &id);
945                         if (error != 0)
946                                 goto out;
947                         sx_assert(&allproc_lock, SA_LOCKED);
948                         rule->rr_subject.rs_proc = pfind(id);
949                         if (rule->rr_subject.rs_proc == NULL) {
950                                 error = ESRCH;
951                                 goto out;
952                         }
953                         PROC_UNLOCK(rule->rr_subject.rs_proc);
954                         break;
955                 case RCTL_SUBJECT_TYPE_USER:
956                         error = str2id(subject_idstr, &id);
957                         if (error != 0)
958                                 goto out;
959                         rule->rr_subject.rs_uip = uifind(id);
960                         break;
961                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
962                         rule->rr_subject.rs_loginclass =
963                             loginclass_find(subject_idstr);
964                         if (rule->rr_subject.rs_loginclass == NULL) {
965                                 error = ENAMETOOLONG;
966                                 goto out;
967                         }
968                         break;
969                 case RCTL_SUBJECT_TYPE_JAIL:
970                         rule->rr_subject.rs_prison_racct =
971                             prison_racct_find(subject_idstr);
972                         if (rule->rr_subject.rs_prison_racct == NULL) {
973                                 error = ENAMETOOLONG;
974                                 goto out;
975                         }
976                         break;
977                default:
978                        panic("rctl_string_to_rule: unknown subject type %d",
979                            rule->rr_subject_type);
980                }
981         }
982
983         if (resourcestr == NULL || resourcestr[0] == '\0')
984                 rule->rr_resource = RACCT_UNDEFINED;
985         else {
986                 error = str2value(resourcestr, &rule->rr_resource,
987                     resourcenames);
988                 if (error != 0)
989                         goto out;
990         }
991
992         if (actionstr == NULL || actionstr[0] == '\0')
993                 rule->rr_action = RCTL_ACTION_UNDEFINED;
994         else {
995                 error = str2value(actionstr, &rule->rr_action, actionnames);
996                 if (error != 0)
997                         goto out;
998         }
999
1000         if (amountstr == NULL || amountstr[0] == '\0')
1001                 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1002         else {
1003                 error = str2int64(amountstr, &rule->rr_amount);
1004                 if (error != 0)
1005                         goto out;
1006                 if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
1007                         rule->rr_amount *= 1000000;
1008         }
1009
1010         if (perstr == NULL || perstr[0] == '\0')
1011                 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1012         else {
1013                 error = str2value(perstr, &rule->rr_per, subjectnames);
1014                 if (error != 0)
1015                         goto out;
1016         }
1017
1018 out:
1019         if (error == 0)
1020                 *rulep = rule;
1021         else
1022                 rctl_rule_release(rule);
1023
1024         return (error);
1025 }
1026
1027 /*
1028  * Link a rule with all the subjects it applies to.
1029  */
1030 int
1031 rctl_rule_add(struct rctl_rule *rule)
1032 {
1033         struct proc *p;
1034         struct ucred *cred;
1035         struct uidinfo *uip;
1036         struct prison *pr;
1037         struct prison_racct *prr;
1038         struct loginclass *lc;
1039         struct rctl_rule *rule2;
1040         int match;
1041
1042         ASSERT_RACCT_ENABLED();
1043         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1044
1045         /*
1046          * Some rules just don't make sense.  Note that the one below
1047          * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
1048          * for example, is not deniable in the racct sense, but the
1049          * limit is enforced in a different way, so "deny" rules for %CPU
1050          * do make sense.
1051          */
1052         if (rule->rr_action == RCTL_ACTION_DENY &&
1053             (rule->rr_resource == RACCT_CPU ||
1054             rule->rr_resource == RACCT_WALLCLOCK))
1055                 return (EOPNOTSUPP);
1056
1057         if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1058             RACCT_IS_SLOPPY(rule->rr_resource))
1059                 return (EOPNOTSUPP);
1060
1061         /*
1062          * Make sure there are no duplicated rules.  Also, for the "deny"
1063          * rules, remove ones differing only by "amount".
1064          */
1065         if (rule->rr_action == RCTL_ACTION_DENY) {
1066                 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1067                 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1068                 rctl_rule_remove(rule2);
1069                 rctl_rule_release(rule2);
1070         } else
1071                 rctl_rule_remove(rule);
1072
1073         switch (rule->rr_subject_type) {
1074         case RCTL_SUBJECT_TYPE_PROCESS:
1075                 p = rule->rr_subject.rs_proc;
1076                 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1077
1078                 rctl_racct_add_rule(p->p_racct, rule);
1079                 /*
1080                  * In case of per-process rule, we don't have anything more
1081                  * to do.
1082                  */
1083                 return (0);
1084
1085         case RCTL_SUBJECT_TYPE_USER:
1086                 uip = rule->rr_subject.rs_uip;
1087                 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1088                 rctl_racct_add_rule(uip->ui_racct, rule);
1089                 break;
1090
1091         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1092                 lc = rule->rr_subject.rs_loginclass;
1093                 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1094                 rctl_racct_add_rule(lc->lc_racct, rule);
1095                 break;
1096
1097         case RCTL_SUBJECT_TYPE_JAIL:
1098                 prr = rule->rr_subject.rs_prison_racct;
1099                 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1100                 rctl_racct_add_rule(prr->prr_racct, rule);
1101                 break;
1102
1103         default:
1104                 panic("rctl_rule_add: unknown subject type %d",
1105                     rule->rr_subject_type);
1106         }
1107
1108         /*
1109          * Now go through all the processes and add the new rule to the ones
1110          * it applies to.
1111          */
1112         sx_assert(&allproc_lock, SA_LOCKED);
1113         FOREACH_PROC_IN_SYSTEM(p) {
1114                 cred = p->p_ucred;
1115                 switch (rule->rr_subject_type) {
1116                 case RCTL_SUBJECT_TYPE_USER:
1117                         if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1118                             cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1119                                 break;
1120                         continue;
1121                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1122                         if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1123                                 break;
1124                         continue;
1125                 case RCTL_SUBJECT_TYPE_JAIL:
1126                         match = 0;
1127                         for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1128                                 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1129                                         match = 1;
1130                                         break;
1131                                 }
1132                         }
1133                         if (match)
1134                                 break;
1135                         continue;
1136                 default:
1137                         panic("rctl_rule_add: unknown subject type %d",
1138                             rule->rr_subject_type);
1139                 }
1140
1141                 rctl_racct_add_rule(p->p_racct, rule);
1142         }
1143
1144         return (0);
1145 }
1146
1147 static void
1148 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1149 {
1150         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1151         int found = 0;
1152
1153         ASSERT_RACCT_ENABLED();
1154
1155         rw_wlock(&rctl_lock);
1156         found += rctl_racct_remove_rules(racct, filter);
1157         rw_wunlock(&rctl_lock);
1158
1159         *((int *)arg3) += found;
1160 }
1161
1162 /*
1163  * Remove all rules that match the filter.
1164  */
1165 int
1166 rctl_rule_remove(struct rctl_rule *filter)
1167 {
1168         int found = 0;
1169         struct proc *p;
1170
1171         ASSERT_RACCT_ENABLED();
1172
1173         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1174             filter->rr_subject.rs_proc != NULL) {
1175                 p = filter->rr_subject.rs_proc;
1176                 rw_wlock(&rctl_lock);
1177                 found = rctl_racct_remove_rules(p->p_racct, filter);
1178                 rw_wunlock(&rctl_lock);
1179                 if (found)
1180                         return (0);
1181                 return (ESRCH);
1182         }
1183
1184         loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1185             (void *)&found);
1186         ui_racct_foreach(rctl_rule_remove_callback, filter,
1187             (void *)&found);
1188         prison_racct_foreach(rctl_rule_remove_callback, filter,
1189             (void *)&found);
1190
1191         sx_assert(&allproc_lock, SA_LOCKED);
1192         rw_wlock(&rctl_lock);
1193         FOREACH_PROC_IN_SYSTEM(p) {
1194                 found += rctl_racct_remove_rules(p->p_racct, filter);
1195         }
1196         rw_wunlock(&rctl_lock);
1197
1198         if (found)
1199                 return (0);
1200         return (ESRCH);
1201 }
1202
1203 /*
1204  * Appends a rule to the sbuf.
1205  */
1206 static void
1207 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1208 {
1209         int64_t amount;
1210
1211         ASSERT_RACCT_ENABLED();
1212
1213         sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1214
1215         switch (rule->rr_subject_type) {
1216         case RCTL_SUBJECT_TYPE_PROCESS:
1217                 if (rule->rr_subject.rs_proc == NULL)
1218                         sbuf_printf(sb, ":");
1219                 else
1220                         sbuf_printf(sb, "%d:",
1221                             rule->rr_subject.rs_proc->p_pid);
1222                 break;
1223         case RCTL_SUBJECT_TYPE_USER:
1224                 if (rule->rr_subject.rs_uip == NULL)
1225                         sbuf_printf(sb, ":");
1226                 else
1227                         sbuf_printf(sb, "%d:",
1228                             rule->rr_subject.rs_uip->ui_uid);
1229                 break;
1230         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1231                 if (rule->rr_subject.rs_loginclass == NULL)
1232                         sbuf_printf(sb, ":");
1233                 else
1234                         sbuf_printf(sb, "%s:",
1235                             rule->rr_subject.rs_loginclass->lc_name);
1236                 break;
1237         case RCTL_SUBJECT_TYPE_JAIL:
1238                 if (rule->rr_subject.rs_prison_racct == NULL)
1239                         sbuf_printf(sb, ":");
1240                 else
1241                         sbuf_printf(sb, "%s:",
1242                             rule->rr_subject.rs_prison_racct->prr_name);
1243                 break;
1244         default:
1245                 panic("rctl_rule_to_sbuf: unknown subject type %d",
1246                     rule->rr_subject_type);
1247         }
1248
1249         amount = rule->rr_amount;
1250         if (amount != RCTL_AMOUNT_UNDEFINED &&
1251             RACCT_IS_IN_MILLIONS(rule->rr_resource))
1252                 amount /= 1000000;
1253
1254         sbuf_printf(sb, "%s:%s=%jd",
1255             rctl_resource_name(rule->rr_resource),
1256             rctl_action_name(rule->rr_action),
1257             amount);
1258
1259         if (rule->rr_per != rule->rr_subject_type)
1260                 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1261 }
1262
1263 /*
1264  * Routine used by RCTL syscalls to read in input string.
1265  */
1266 static int
1267 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1268 {
1269         int error;
1270         char *str;
1271
1272         ASSERT_RACCT_ENABLED();
1273
1274         if (inbuflen <= 0)
1275                 return (EINVAL);
1276         if (inbuflen > RCTL_MAX_INBUFLEN)
1277                 return (E2BIG);
1278
1279         str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1280         error = copyinstr(inbufp, str, inbuflen, NULL);
1281         if (error != 0) {
1282                 free(str, M_RCTL);
1283                 return (error);
1284         }
1285
1286         *inputstr = str;
1287
1288         return (0);
1289 }
1290
1291 /*
1292  * Routine used by RCTL syscalls to write out output string.
1293  */
1294 static int
1295 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1296 {
1297         int error;
1298
1299         ASSERT_RACCT_ENABLED();
1300
1301         if (outputsbuf == NULL)
1302                 return (0);
1303
1304         sbuf_finish(outputsbuf);
1305         if (outbuflen < sbuf_len(outputsbuf) + 1) {
1306                 sbuf_delete(outputsbuf);
1307                 return (ERANGE);
1308         }
1309         error = copyout(sbuf_data(outputsbuf), outbufp,
1310             sbuf_len(outputsbuf) + 1);
1311         sbuf_delete(outputsbuf);
1312         return (error);
1313 }
1314
1315 static struct sbuf *
1316 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1317 {
1318         int i;
1319         int64_t amount;
1320         struct sbuf *sb;
1321
1322         ASSERT_RACCT_ENABLED();
1323
1324         sb = sbuf_new_auto();
1325         for (i = 0; i <= RACCT_MAX; i++) {
1326                 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1327                         continue;
1328                 amount = racct->r_resources[i];
1329                 if (RACCT_IS_IN_MILLIONS(i))
1330                         amount /= 1000000;
1331                 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1332         }
1333         sbuf_setpos(sb, sbuf_len(sb) - 1);
1334         return (sb);
1335 }
1336
1337 int
1338 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1339 {
1340         int error;
1341         char *inputstr;
1342         struct rctl_rule *filter;
1343         struct sbuf *outputsbuf = NULL;
1344         struct proc *p;
1345         struct uidinfo *uip;
1346         struct loginclass *lc;
1347         struct prison_racct *prr;
1348
1349         if (!racct_enable)
1350                 return (ENOSYS);
1351
1352         error = priv_check(td, PRIV_RCTL_GET_RACCT);
1353         if (error != 0)
1354                 return (error);
1355
1356         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1357         if (error != 0)
1358                 return (error);
1359
1360         sx_slock(&allproc_lock);
1361         error = rctl_string_to_rule(inputstr, &filter);
1362         free(inputstr, M_RCTL);
1363         if (error != 0) {
1364                 sx_sunlock(&allproc_lock);
1365                 return (error);
1366         }
1367
1368         switch (filter->rr_subject_type) {
1369         case RCTL_SUBJECT_TYPE_PROCESS:
1370                 p = filter->rr_subject.rs_proc;
1371                 if (p == NULL) {
1372                         error = EINVAL;
1373                         goto out;
1374                 }
1375                 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1376                 break;
1377         case RCTL_SUBJECT_TYPE_USER:
1378                 uip = filter->rr_subject.rs_uip;
1379                 if (uip == NULL) {
1380                         error = EINVAL;
1381                         goto out;
1382                 }
1383                 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1384                 break;
1385         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1386                 lc = filter->rr_subject.rs_loginclass;
1387                 if (lc == NULL) {
1388                         error = EINVAL;
1389                         goto out;
1390                 }
1391                 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1392                 break;
1393         case RCTL_SUBJECT_TYPE_JAIL:
1394                 prr = filter->rr_subject.rs_prison_racct;
1395                 if (prr == NULL) {
1396                         error = EINVAL;
1397                         goto out;
1398                 }
1399                 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1400                 break;
1401         default:
1402                 error = EINVAL;
1403         }
1404 out:
1405         rctl_rule_release(filter);
1406         sx_sunlock(&allproc_lock);
1407         if (error != 0)
1408                 return (error);
1409
1410         error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1411
1412         return (error);
1413 }
1414
1415 static void
1416 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1417 {
1418         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1419         struct rctl_rule_link *link;
1420         struct sbuf *sb = (struct sbuf *)arg3;
1421
1422         ASSERT_RACCT_ENABLED();
1423
1424         rw_rlock(&rctl_lock);
1425         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1426                 if (!rctl_rule_matches(link->rrl_rule, filter))
1427                         continue;
1428                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1429                 sbuf_printf(sb, ",");
1430         }
1431         rw_runlock(&rctl_lock);
1432 }
1433
1434 int
1435 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1436 {
1437         int error;
1438         size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1439         char *inputstr, *buf;
1440         struct sbuf *sb;
1441         struct rctl_rule *filter;
1442         struct rctl_rule_link *link;
1443         struct proc *p;
1444
1445         if (!racct_enable)
1446                 return (ENOSYS);
1447
1448         error = priv_check(td, PRIV_RCTL_GET_RULES);
1449         if (error != 0)
1450                 return (error);
1451
1452         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1453         if (error != 0)
1454                 return (error);
1455
1456         sx_slock(&allproc_lock);
1457         error = rctl_string_to_rule(inputstr, &filter);
1458         free(inputstr, M_RCTL);
1459         if (error != 0) {
1460                 sx_sunlock(&allproc_lock);
1461                 return (error);
1462         }
1463
1464 again:
1465         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1466         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1467         KASSERT(sb != NULL, ("sbuf_new failed"));
1468
1469         sx_assert(&allproc_lock, SA_LOCKED);
1470         FOREACH_PROC_IN_SYSTEM(p) {
1471                 rw_rlock(&rctl_lock);
1472                 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1473                         /*
1474                          * Non-process rules will be added to the buffer later.
1475                          * Adding them here would result in duplicated output.
1476                          */
1477                         if (link->rrl_rule->rr_subject_type !=
1478                             RCTL_SUBJECT_TYPE_PROCESS)
1479                                 continue;
1480                         if (!rctl_rule_matches(link->rrl_rule, filter))
1481                                 continue;
1482                         rctl_rule_to_sbuf(sb, link->rrl_rule);
1483                         sbuf_printf(sb, ",");
1484                 }
1485                 rw_runlock(&rctl_lock);
1486         }
1487
1488         loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1489         ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1490         prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1491         if (sbuf_error(sb) == ENOMEM) {
1492                 sbuf_delete(sb);
1493                 free(buf, M_RCTL);
1494                 bufsize *= 4;
1495                 goto again;
1496         }
1497
1498         /*
1499          * Remove trailing ",".
1500          */
1501         if (sbuf_len(sb) > 0)
1502                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1503
1504         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1505
1506         rctl_rule_release(filter);
1507         sx_sunlock(&allproc_lock);
1508         free(buf, M_RCTL);
1509         return (error);
1510 }
1511
1512 int
1513 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1514 {
1515         int error;
1516         size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1517         char *inputstr, *buf;
1518         struct sbuf *sb;
1519         struct rctl_rule *filter;
1520         struct rctl_rule_link *link;
1521
1522         if (!racct_enable)
1523                 return (ENOSYS);
1524
1525         error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1526         if (error != 0)
1527                 return (error);
1528
1529         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1530         if (error != 0)
1531                 return (error);
1532
1533         sx_slock(&allproc_lock);
1534         error = rctl_string_to_rule(inputstr, &filter);
1535         free(inputstr, M_RCTL);
1536         if (error != 0) {
1537                 sx_sunlock(&allproc_lock);
1538                 return (error);
1539         }
1540
1541         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1542                 rctl_rule_release(filter);
1543                 sx_sunlock(&allproc_lock);
1544                 return (EINVAL);
1545         }
1546         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1547                 rctl_rule_release(filter);
1548                 sx_sunlock(&allproc_lock);
1549                 return (EOPNOTSUPP);
1550         }
1551         if (filter->rr_subject.rs_proc == NULL) {
1552                 rctl_rule_release(filter);
1553                 sx_sunlock(&allproc_lock);
1554                 return (EINVAL);
1555         }
1556
1557 again:
1558         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1559         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1560         KASSERT(sb != NULL, ("sbuf_new failed"));
1561
1562         rw_rlock(&rctl_lock);
1563         LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1564             rrl_next) {
1565                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1566                 sbuf_printf(sb, ",");
1567         }
1568         rw_runlock(&rctl_lock);
1569         if (sbuf_error(sb) == ENOMEM) {
1570                 sbuf_delete(sb);
1571                 free(buf, M_RCTL);
1572                 bufsize *= 4;
1573                 goto again;
1574         }
1575
1576         /*
1577          * Remove trailing ",".
1578          */
1579         if (sbuf_len(sb) > 0)
1580                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1581
1582         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1583         rctl_rule_release(filter);
1584         sx_sunlock(&allproc_lock);
1585         free(buf, M_RCTL);
1586         return (error);
1587 }
1588
1589 int
1590 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1591 {
1592         int error;
1593         struct rctl_rule *rule;
1594         char *inputstr;
1595
1596         if (!racct_enable)
1597                 return (ENOSYS);
1598
1599         error = priv_check(td, PRIV_RCTL_ADD_RULE);
1600         if (error != 0)
1601                 return (error);
1602
1603         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1604         if (error != 0)
1605                 return (error);
1606
1607         sx_slock(&allproc_lock);
1608         error = rctl_string_to_rule(inputstr, &rule);
1609         free(inputstr, M_RCTL);
1610         if (error != 0) {
1611                 sx_sunlock(&allproc_lock);
1612                 return (error);
1613         }
1614         /*
1615          * The 'per' part of a rule is optional.
1616          */
1617         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1618             rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1619                 rule->rr_per = rule->rr_subject_type;
1620
1621         if (!rctl_rule_fully_specified(rule)) {
1622                 error = EINVAL;
1623                 goto out;
1624         }
1625
1626         error = rctl_rule_add(rule);
1627
1628 out:
1629         rctl_rule_release(rule);
1630         sx_sunlock(&allproc_lock);
1631         return (error);
1632 }
1633
1634 int
1635 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1636 {
1637         int error;
1638         struct rctl_rule *filter;
1639         char *inputstr;
1640
1641         if (!racct_enable)
1642                 return (ENOSYS);
1643
1644         error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1645         if (error != 0)
1646                 return (error);
1647
1648         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1649         if (error != 0)
1650                 return (error);
1651
1652         sx_slock(&allproc_lock);
1653         error = rctl_string_to_rule(inputstr, &filter);
1654         free(inputstr, M_RCTL);
1655         if (error != 0) {
1656                 sx_sunlock(&allproc_lock);
1657                 return (error);
1658         }
1659
1660         error = rctl_rule_remove(filter);
1661         rctl_rule_release(filter);
1662         sx_sunlock(&allproc_lock);
1663
1664         return (error);
1665 }
1666
1667 /*
1668  * Update RCTL rule list after credential change.
1669  */
1670 void
1671 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1672 {
1673         int rulecnt, i;
1674         struct rctl_rule_link *link, *newlink;
1675         struct uidinfo *newuip;
1676         struct loginclass *newlc;
1677         struct prison_racct *newprr;
1678         LIST_HEAD(, rctl_rule_link) newrules;
1679
1680         ASSERT_RACCT_ENABLED();
1681
1682         newuip = newcred->cr_ruidinfo;
1683         newlc = newcred->cr_loginclass;
1684         newprr = newcred->cr_prison->pr_prison_racct;
1685         
1686         LIST_INIT(&newrules);
1687
1688 again:
1689         /*
1690          * First, count the rules that apply to the process with new
1691          * credentials.
1692          */
1693         rulecnt = 0;
1694         rw_rlock(&rctl_lock);
1695         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1696                 if (link->rrl_rule->rr_subject_type ==
1697                     RCTL_SUBJECT_TYPE_PROCESS)
1698                         rulecnt++;
1699         }
1700         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1701                 rulecnt++;
1702         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1703                 rulecnt++;
1704         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1705                 rulecnt++;
1706         rw_runlock(&rctl_lock);
1707
1708         /*
1709          * Create temporary list.  We've dropped the rctl_lock in order
1710          * to use M_WAITOK.
1711          */
1712         for (i = 0; i < rulecnt; i++) {
1713                 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1714                 newlink->rrl_rule = NULL;
1715                 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1716         }
1717
1718         newlink = LIST_FIRST(&newrules);
1719
1720         /*
1721          * Assign rules to the newly allocated list entries.
1722          */
1723         rw_wlock(&rctl_lock);
1724         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1725                 if (link->rrl_rule->rr_subject_type ==
1726                     RCTL_SUBJECT_TYPE_PROCESS) {
1727                         if (newlink == NULL)
1728                                 goto goaround;
1729                         rctl_rule_acquire(link->rrl_rule);
1730                         newlink->rrl_rule = link->rrl_rule;
1731                         newlink = LIST_NEXT(newlink, rrl_next);
1732                         rulecnt--;
1733                 }
1734         }
1735         
1736         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1737                 if (newlink == NULL)
1738                         goto goaround;
1739                 rctl_rule_acquire(link->rrl_rule);
1740                 newlink->rrl_rule = link->rrl_rule;
1741                 newlink = LIST_NEXT(newlink, rrl_next);
1742                 rulecnt--;
1743         }
1744
1745         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1746                 if (newlink == NULL)
1747                         goto goaround;
1748                 rctl_rule_acquire(link->rrl_rule);
1749                 newlink->rrl_rule = link->rrl_rule;
1750                 newlink = LIST_NEXT(newlink, rrl_next);
1751                 rulecnt--;
1752         }
1753
1754         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1755                 if (newlink == NULL)
1756                         goto goaround;
1757                 rctl_rule_acquire(link->rrl_rule);
1758                 newlink->rrl_rule = link->rrl_rule;
1759                 newlink = LIST_NEXT(newlink, rrl_next);
1760                 rulecnt--;
1761         }
1762
1763         if (rulecnt == 0) {
1764                 /*
1765                  * Free the old rule list.
1766                  */
1767                 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1768                         link = LIST_FIRST(&p->p_racct->r_rule_links);
1769                         LIST_REMOVE(link, rrl_next);
1770                         rctl_rule_release(link->rrl_rule);
1771                         uma_zfree(rctl_rule_link_zone, link);
1772                 }
1773
1774                 /*
1775                  * Replace lists and we're done.
1776                  *
1777                  * XXX: Is there any way to switch list heads instead
1778                  *      of iterating here?
1779                  */
1780                 while (!LIST_EMPTY(&newrules)) {
1781                         newlink = LIST_FIRST(&newrules);
1782                         LIST_REMOVE(newlink, rrl_next);
1783                         LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1784                             newlink, rrl_next);
1785                 }
1786
1787                 rw_wunlock(&rctl_lock);
1788
1789                 return;
1790         }
1791
1792 goaround:
1793         rw_wunlock(&rctl_lock);
1794
1795         /*
1796          * Rule list changed while we were not holding the rctl_lock.
1797          * Free the new list and try again.
1798          */
1799         while (!LIST_EMPTY(&newrules)) {
1800                 newlink = LIST_FIRST(&newrules);
1801                 LIST_REMOVE(newlink, rrl_next);
1802                 if (newlink->rrl_rule != NULL)
1803                         rctl_rule_release(newlink->rrl_rule);
1804                 uma_zfree(rctl_rule_link_zone, newlink);
1805         }
1806
1807         goto again;
1808 }
1809
1810 /*
1811  * Assign RCTL rules to the newly created process.
1812  */
1813 int
1814 rctl_proc_fork(struct proc *parent, struct proc *child)
1815 {
1816         int error;
1817         struct rctl_rule_link *link;
1818         struct rctl_rule *rule;
1819
1820         LIST_INIT(&child->p_racct->r_rule_links);
1821
1822         ASSERT_RACCT_ENABLED();
1823         KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
1824
1825         rw_wlock(&rctl_lock);
1826
1827         /*
1828          * Go through limits applicable to the parent and assign them
1829          * to the child.  Rules with 'process' subject have to be duplicated
1830          * in order to make their rr_subject point to the new process.
1831          */
1832         LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1833                 if (link->rrl_rule->rr_subject_type ==
1834                     RCTL_SUBJECT_TYPE_PROCESS) {
1835                         rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1836                         if (rule == NULL)
1837                                 goto fail;
1838                         KASSERT(rule->rr_subject.rs_proc == parent,
1839                             ("rule->rr_subject.rs_proc != parent"));
1840                         rule->rr_subject.rs_proc = child;
1841                         error = rctl_racct_add_rule_locked(child->p_racct,
1842                             rule);
1843                         rctl_rule_release(rule);
1844                         if (error != 0)
1845                                 goto fail;
1846                 } else {
1847                         error = rctl_racct_add_rule_locked(child->p_racct,
1848                             link->rrl_rule);
1849                         if (error != 0)
1850                                 goto fail;
1851                 }
1852         }
1853
1854         rw_wunlock(&rctl_lock);
1855         return (0);
1856
1857 fail:
1858         while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1859                 link = LIST_FIRST(&child->p_racct->r_rule_links);
1860                 LIST_REMOVE(link, rrl_next);
1861                 rctl_rule_release(link->rrl_rule);
1862                 uma_zfree(rctl_rule_link_zone, link);
1863         }
1864         rw_wunlock(&rctl_lock);
1865         return (EAGAIN);
1866 }
1867
1868 /*
1869  * Release rules attached to the racct.
1870  */
1871 void
1872 rctl_racct_release(struct racct *racct)
1873 {
1874         struct rctl_rule_link *link;
1875
1876         ASSERT_RACCT_ENABLED();
1877
1878         rw_wlock(&rctl_lock);
1879         while (!LIST_EMPTY(&racct->r_rule_links)) {
1880                 link = LIST_FIRST(&racct->r_rule_links);
1881                 LIST_REMOVE(link, rrl_next);
1882                 rctl_rule_release(link->rrl_rule);
1883                 uma_zfree(rctl_rule_link_zone, link);
1884         }
1885         rw_wunlock(&rctl_lock);
1886 }
1887
1888 static void
1889 rctl_init(void)
1890 {
1891
1892         if (!racct_enable)
1893                 return;
1894
1895         rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1896             sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1897             UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1898         rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1899             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1900 }
1901
1902 #else /* !RCTL */
1903
1904 int
1905 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1906 {
1907         
1908         return (ENOSYS);
1909 }
1910
1911 int
1912 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1913 {
1914         
1915         return (ENOSYS);
1916 }
1917
1918 int
1919 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1920 {
1921         
1922         return (ENOSYS);
1923 }
1924
1925 int
1926 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1927 {
1928         
1929         return (ENOSYS);
1930 }
1931
1932 int
1933 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1934 {
1935         
1936         return (ENOSYS);
1937 }
1938
1939 #endif /* !RCTL */