]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/kern/kern_rctl.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / sys / kern / kern_rctl.c
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/refcount.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/loginclass.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/racct.h>
47 #include <sys/rctl.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sx.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
58 #include <sys/sbuf.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <vm/uma.h>
62
63 #ifdef RCTL
64 #ifndef RACCT
65 #error "The RCTL option requires the RACCT option"
66 #endif
67
68 FEATURE(rctl, "Resource Limits");
69
70 #define HRF_DEFAULT             0
71 #define HRF_DONT_INHERIT        1
72 #define HRF_DONT_ACCUMULATE     2
73
74 /* Default buffer size for rctl_get_rules(2). */
75 #define RCTL_DEFAULT_BUFSIZE    4096
76 #define RCTL_MAX_INBUFLEN       4096
77 #define RCTL_LOG_BUFSIZE        128
78
79 #define RCTL_PCPU_SHIFT         (10 * 1000000)
80
81 /*
82  * 'rctl_rule_link' connects a rule with every racct it's related to.
83  * For example, rule 'user:X:openfiles:deny=N/process' is linked
84  * with uidinfo for user X, and to each process of that user.
85  */
86 struct rctl_rule_link {
87         LIST_ENTRY(rctl_rule_link)      rrl_next;
88         struct rctl_rule                *rrl_rule;
89         int                             rrl_exceeded;
90 };
91
92 struct dict {
93         const char      *d_name;
94         int             d_value;
95 };
96
97 static struct dict subjectnames[] = {
98         { "process", RCTL_SUBJECT_TYPE_PROCESS },
99         { "user", RCTL_SUBJECT_TYPE_USER },
100         { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
101         { "jail", RCTL_SUBJECT_TYPE_JAIL },
102         { NULL, -1 }};
103
104 static struct dict resourcenames[] = {
105         { "cputime", RACCT_CPU },
106         { "datasize", RACCT_DATA },
107         { "stacksize", RACCT_STACK },
108         { "coredumpsize", RACCT_CORE },
109         { "memoryuse", RACCT_RSS },
110         { "memorylocked", RACCT_MEMLOCK },
111         { "maxproc", RACCT_NPROC },
112         { "openfiles", RACCT_NOFILE },
113         { "vmemoryuse", RACCT_VMEM },
114         { "pseudoterminals", RACCT_NPTS },
115         { "swapuse", RACCT_SWAP },
116         { "nthr", RACCT_NTHR },
117         { "msgqqueued", RACCT_MSGQQUEUED },
118         { "msgqsize", RACCT_MSGQSIZE },
119         { "nmsgq", RACCT_NMSGQ },
120         { "nsem", RACCT_NSEM },
121         { "nsemop", RACCT_NSEMOP },
122         { "nshm", RACCT_NSHM },
123         { "shmsize", RACCT_SHMSIZE },
124         { "wallclock", RACCT_WALLCLOCK },
125         { "pcpu", RACCT_PCTCPU },
126         { NULL, -1 }};
127
128 static struct dict actionnames[] = {
129         { "sighup", RCTL_ACTION_SIGHUP },
130         { "sigint", RCTL_ACTION_SIGINT },
131         { "sigquit", RCTL_ACTION_SIGQUIT },
132         { "sigill", RCTL_ACTION_SIGILL },
133         { "sigtrap", RCTL_ACTION_SIGTRAP },
134         { "sigabrt", RCTL_ACTION_SIGABRT },
135         { "sigemt", RCTL_ACTION_SIGEMT },
136         { "sigfpe", RCTL_ACTION_SIGFPE },
137         { "sigkill", RCTL_ACTION_SIGKILL },
138         { "sigbus", RCTL_ACTION_SIGBUS },
139         { "sigsegv", RCTL_ACTION_SIGSEGV },
140         { "sigsys", RCTL_ACTION_SIGSYS },
141         { "sigpipe", RCTL_ACTION_SIGPIPE },
142         { "sigalrm", RCTL_ACTION_SIGALRM },
143         { "sigterm", RCTL_ACTION_SIGTERM },
144         { "sigurg", RCTL_ACTION_SIGURG },
145         { "sigstop", RCTL_ACTION_SIGSTOP },
146         { "sigtstp", RCTL_ACTION_SIGTSTP },
147         { "sigchld", RCTL_ACTION_SIGCHLD },
148         { "sigttin", RCTL_ACTION_SIGTTIN },
149         { "sigttou", RCTL_ACTION_SIGTTOU },
150         { "sigio", RCTL_ACTION_SIGIO },
151         { "sigxcpu", RCTL_ACTION_SIGXCPU },
152         { "sigxfsz", RCTL_ACTION_SIGXFSZ },
153         { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
154         { "sigprof", RCTL_ACTION_SIGPROF },
155         { "sigwinch", RCTL_ACTION_SIGWINCH },
156         { "siginfo", RCTL_ACTION_SIGINFO },
157         { "sigusr1", RCTL_ACTION_SIGUSR1 },
158         { "sigusr2", RCTL_ACTION_SIGUSR2 },
159         { "sigthr", RCTL_ACTION_SIGTHR },
160         { "deny", RCTL_ACTION_DENY },
161         { "log", RCTL_ACTION_LOG },
162         { "devctl", RCTL_ACTION_DEVCTL },
163         { NULL, -1 }};
164
165 static void rctl_init(void);
166 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
167
168 static uma_zone_t rctl_rule_link_zone;
169 static uma_zone_t rctl_rule_zone;
170 static struct rwlock rctl_lock;
171 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
172
173 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
174 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
175
176 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
177
178 static const char *
179 rctl_subject_type_name(int subject)
180 {
181         int i;
182
183         for (i = 0; subjectnames[i].d_name != NULL; i++) {
184                 if (subjectnames[i].d_value == subject)
185                         return (subjectnames[i].d_name);
186         }
187
188         panic("rctl_subject_type_name: unknown subject type %d", subject);
189 }
190
191 static const char *
192 rctl_action_name(int action)
193 {
194         int i;
195
196         for (i = 0; actionnames[i].d_name != NULL; i++) {
197                 if (actionnames[i].d_value == action)
198                         return (actionnames[i].d_name);
199         }
200
201         panic("rctl_action_name: unknown action %d", action);
202 }
203
204 const char *
205 rctl_resource_name(int resource)
206 {
207         int i;
208
209         for (i = 0; resourcenames[i].d_name != NULL; i++) {
210                 if (resourcenames[i].d_value == resource)
211                         return (resourcenames[i].d_name);
212         }
213
214         panic("rctl_resource_name: unknown resource %d", resource);
215 }
216
217 /*
218  * Return the amount of resource that can be allocated by 'p' before
219  * hitting 'rule'.
220  */
221 static int64_t
222 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
223 {
224         int resource;
225         int64_t available = INT64_MAX;
226         struct ucred *cred = p->p_ucred;
227
228         rw_assert(&rctl_lock, RA_LOCKED);
229
230         resource = rule->rr_resource;
231         switch (rule->rr_per) {
232         case RCTL_SUBJECT_TYPE_PROCESS:
233                 available = rule->rr_amount -
234                     p->p_racct->r_resources[resource];
235                 break;
236         case RCTL_SUBJECT_TYPE_USER:
237                 available = rule->rr_amount -
238                     cred->cr_ruidinfo->ui_racct->r_resources[resource];
239                 break;
240         case RCTL_SUBJECT_TYPE_LOGINCLASS:
241                 available = rule->rr_amount -
242                     cred->cr_loginclass->lc_racct->r_resources[resource];
243                 break;
244         case RCTL_SUBJECT_TYPE_JAIL:
245                 available = rule->rr_amount -
246                     cred->cr_prison->pr_prison_racct->prr_racct->
247                         r_resources[resource];
248                 break;
249         default:
250                 panic("rctl_compute_available: unknown per %d",
251                     rule->rr_per);
252         }
253
254         return (available);
255 }
256
257 /*
258  * Return non-zero if allocating 'amount' by proc 'p' would exceed
259  * resource limit specified by 'rule'.
260  */
261 static int
262 rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
263     int64_t amount)
264 {
265         int64_t available;
266
267         rw_assert(&rctl_lock, RA_LOCKED);
268
269         available = rctl_available_resource(p, rule);
270         if (available >= amount)
271                 return (0);
272
273         return (1);
274 }
275
276 /*
277  * Special version of rctl_available() function for the %cpu resource.
278  * We slightly cheat here and return less than we normally would.
279  */
280 int64_t
281 rctl_pcpu_available(const struct proc *p) {
282         struct rctl_rule *rule;
283         struct rctl_rule_link *link;
284         int64_t available, minavailable, limit;
285
286         minavailable = INT64_MAX;
287         limit = 0;
288
289         rw_rlock(&rctl_lock);
290
291         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
292                 rule = link->rrl_rule;
293                 if (rule->rr_resource != RACCT_PCTCPU)
294                         continue;
295                 if (rule->rr_action != RCTL_ACTION_DENY)
296                         continue;
297                 available = rctl_available_resource(p, rule);
298                 if (available < minavailable) {
299                         minavailable = available;
300                         limit = rule->rr_amount;
301                 }
302         }
303
304         rw_runlock(&rctl_lock);
305
306         /*
307          * Return slightly less than actual value of the available
308          * %cpu resource.  This makes %cpu throttling more agressive
309          * and lets us act sooner than the limits are already exceeded.
310          */
311         if (limit != 0) {
312                 if (limit > 2 * RCTL_PCPU_SHIFT)
313                         minavailable -= RCTL_PCPU_SHIFT;
314                 else
315                         minavailable -= (limit / 2);
316         }
317
318         return (minavailable);
319 }
320
321 /*
322  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
323  * to what it keeps allocated now.  Returns non-zero if the allocation should
324  * be denied, 0 otherwise.
325  */
326 int
327 rctl_enforce(struct proc *p, int resource, uint64_t amount)
328 {
329         struct rctl_rule *rule;
330         struct rctl_rule_link *link;
331         struct sbuf sb;
332         int should_deny = 0;
333         char *buf;
334         static int curtime = 0;
335         static struct timeval lasttime;
336
337         rw_rlock(&rctl_lock);
338
339         /*
340          * There may be more than one matching rule; go through all of them.
341          * Denial should be done last, after logging and sending signals.
342          */
343         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
344                 rule = link->rrl_rule;
345                 if (rule->rr_resource != resource)
346                         continue;
347                 if (!rctl_would_exceed(p, rule, amount)) {
348                         link->rrl_exceeded = 0;
349                         continue;
350                 }
351
352                 switch (rule->rr_action) {
353                 case RCTL_ACTION_DENY:
354                         should_deny = 1;
355                         continue;
356                 case RCTL_ACTION_LOG:
357                         /*
358                          * If rrl_exceeded != 0, it means we've already
359                          * logged a warning for this process.
360                          */
361                         if (link->rrl_exceeded != 0)
362                                 continue;
363
364                         /*
365                          * If the process state is not fully initialized yet,
366                          * we can't access most of the required fields, e.g.
367                          * p->p_comm.  This happens when called from fork1().
368                          * Ignore this rule for now; it will be processed just
369                          * after fork, when called from racct_proc_fork_done().
370                          */
371                         if (p->p_state != PRS_NORMAL)
372                                 continue;
373
374                         if (!ppsratecheck(&lasttime, &curtime, 10))
375                                 continue;
376
377                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
378                         if (buf == NULL) {
379                                 printf("rctl_enforce: out of memory\n");
380                                 continue;
381                         }
382                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
383                         rctl_rule_to_sbuf(&sb, rule);
384                         sbuf_finish(&sb);
385                         printf("rctl: rule \"%s\" matched by pid %d "
386                             "(%s), uid %d, jail %s\n", sbuf_data(&sb),
387                             p->p_pid, p->p_comm, p->p_ucred->cr_uid,
388                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
389                         sbuf_delete(&sb);
390                         free(buf, M_RCTL);
391                         link->rrl_exceeded = 1;
392                         continue;
393                 case RCTL_ACTION_DEVCTL:
394                         if (link->rrl_exceeded != 0)
395                                 continue;
396
397                         if (p->p_state != PRS_NORMAL)
398                                 continue;
399         
400                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
401                         if (buf == NULL) {
402                                 printf("rctl_enforce: out of memory\n");
403                                 continue;
404                         }
405                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
406                         sbuf_printf(&sb, "rule=");
407                         rctl_rule_to_sbuf(&sb, rule);
408                         sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
409                             p->p_pid, p->p_ucred->cr_ruid,
410                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
411                         sbuf_finish(&sb);
412                         devctl_notify_f("RCTL", "rule", "matched",
413                             sbuf_data(&sb), M_NOWAIT);
414                         sbuf_delete(&sb);
415                         free(buf, M_RCTL);
416                         link->rrl_exceeded = 1;
417                         continue;
418                 default:
419                         if (link->rrl_exceeded != 0)
420                                 continue;
421
422                         if (p->p_state != PRS_NORMAL)
423                                 continue;
424
425                         KASSERT(rule->rr_action > 0 &&
426                             rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
427                             ("rctl_enforce: unknown action %d",
428                              rule->rr_action));
429
430                         /*
431                          * We're using the fact that RCTL_ACTION_SIG* values
432                          * are equal to their counterparts from sys/signal.h.
433                          */
434                         kern_psignal(p, rule->rr_action);
435                         link->rrl_exceeded = 1;
436                         continue;
437                 }
438         }
439
440         rw_runlock(&rctl_lock);
441
442         if (should_deny) {
443                 /*
444                  * Return fake error code; the caller should change it
445                  * into one proper for the situation - EFSIZ, ENOMEM etc.
446                  */
447                 return (EDOOFUS);
448         }
449
450         return (0);
451 }
452
453 uint64_t
454 rctl_get_limit(struct proc *p, int resource)
455 {
456         struct rctl_rule *rule;
457         struct rctl_rule_link *link;
458         uint64_t amount = UINT64_MAX;
459
460         rw_rlock(&rctl_lock);
461
462         /*
463          * There may be more than one matching rule; go through all of them.
464          * Denial should be done last, after logging and sending signals.
465          */
466         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
467                 rule = link->rrl_rule;
468                 if (rule->rr_resource != resource)
469                         continue;
470                 if (rule->rr_action != RCTL_ACTION_DENY)
471                         continue;
472                 if (rule->rr_amount < amount)
473                         amount = rule->rr_amount;
474         }
475
476         rw_runlock(&rctl_lock);
477
478         return (amount);
479 }
480
481 uint64_t
482 rctl_get_available(struct proc *p, int resource)
483 {
484         struct rctl_rule *rule;
485         struct rctl_rule_link *link;
486         int64_t available, minavailable, allocated;
487
488         minavailable = INT64_MAX;
489
490         rw_rlock(&rctl_lock);
491
492         /*
493          * There may be more than one matching rule; go through all of them.
494          * Denial should be done last, after logging and sending signals.
495          */
496         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
497                 rule = link->rrl_rule;
498                 if (rule->rr_resource != resource)
499                         continue;
500                 if (rule->rr_action != RCTL_ACTION_DENY)
501                         continue;
502                 available = rctl_available_resource(p, rule);
503                 if (available < minavailable)
504                         minavailable = available;
505         }
506
507         rw_runlock(&rctl_lock);
508
509         /*
510          * XXX: Think about this _hard_.
511          */
512         allocated = p->p_racct->r_resources[resource];
513         if (minavailable < INT64_MAX - allocated)
514                 minavailable += allocated;
515         if (minavailable < 0)
516                 minavailable = 0;
517         return (minavailable);
518 }
519
520 static int
521 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
522 {
523
524         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
525                 if (rule->rr_subject_type != filter->rr_subject_type)
526                         return (0);
527
528                 switch (filter->rr_subject_type) {
529                 case RCTL_SUBJECT_TYPE_PROCESS:
530                         if (filter->rr_subject.rs_proc != NULL &&
531                             rule->rr_subject.rs_proc !=
532                             filter->rr_subject.rs_proc)
533                                 return (0);
534                         break;
535                 case RCTL_SUBJECT_TYPE_USER:
536                         if (filter->rr_subject.rs_uip != NULL &&
537                             rule->rr_subject.rs_uip !=
538                             filter->rr_subject.rs_uip)
539                                 return (0);
540                         break;
541                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
542                         if (filter->rr_subject.rs_loginclass != NULL &&
543                             rule->rr_subject.rs_loginclass !=
544                             filter->rr_subject.rs_loginclass)
545                                 return (0);
546                         break;
547                 case RCTL_SUBJECT_TYPE_JAIL:
548                         if (filter->rr_subject.rs_prison_racct != NULL &&
549                             rule->rr_subject.rs_prison_racct !=
550                             filter->rr_subject.rs_prison_racct)
551                                 return (0);
552                         break;
553                 default:
554                         panic("rctl_rule_matches: unknown subject type %d",
555                             filter->rr_subject_type);
556                 }
557         }
558
559         if (filter->rr_resource != RACCT_UNDEFINED) {
560                 if (rule->rr_resource != filter->rr_resource)
561                         return (0);
562         }
563
564         if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
565                 if (rule->rr_action != filter->rr_action)
566                         return (0);
567         }
568
569         if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
570                 if (rule->rr_amount != filter->rr_amount)
571                         return (0);
572         }
573
574         if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
575                 if (rule->rr_per != filter->rr_per)
576                         return (0);
577         }
578
579         return (1);
580 }
581
582 static int
583 str2value(const char *str, int *value, struct dict *table)
584 {
585         int i;
586
587         if (value == NULL)
588                 return (EINVAL);
589
590         for (i = 0; table[i].d_name != NULL; i++) {
591                 if (strcasecmp(table[i].d_name, str) == 0) {
592                         *value =  table[i].d_value;
593                         return (0);
594                 }
595         }
596
597         return (EINVAL);
598 }
599
600 static int
601 str2id(const char *str, id_t *value)
602 {
603         char *end;
604
605         if (str == NULL)
606                 return (EINVAL);
607
608         *value = strtoul(str, &end, 10);
609         if ((size_t)(end - str) != strlen(str))
610                 return (EINVAL);
611
612         return (0);
613 }
614
615 static int
616 str2int64(const char *str, int64_t *value)
617 {
618         char *end;
619
620         if (str == NULL)
621                 return (EINVAL);
622
623         *value = strtoul(str, &end, 10);
624         if ((size_t)(end - str) != strlen(str))
625                 return (EINVAL);
626
627         return (0);
628 }
629
630 /*
631  * Connect the rule to the racct, increasing refcount for the rule.
632  */
633 static void
634 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
635 {
636         struct rctl_rule_link *link;
637
638         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
639
640         rctl_rule_acquire(rule);
641         link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
642         link->rrl_rule = rule;
643         link->rrl_exceeded = 0;
644
645         rw_wlock(&rctl_lock);
646         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
647         rw_wunlock(&rctl_lock);
648 }
649
650 static int
651 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
652 {
653         struct rctl_rule_link *link;
654
655         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
656         rw_assert(&rctl_lock, RA_WLOCKED);
657
658         link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
659         if (link == NULL)
660                 return (ENOMEM);
661         rctl_rule_acquire(rule);
662         link->rrl_rule = rule;
663         link->rrl_exceeded = 0;
664
665         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
666         return (0);
667 }
668
669 /*
670  * Remove limits for a rules matching the filter and release
671  * the refcounts for the rules, possibly freeing them.  Returns
672  * the number of limit structures removed.
673  */
674 static int
675 rctl_racct_remove_rules(struct racct *racct,
676     const struct rctl_rule *filter)
677 {
678         int removed = 0;
679         struct rctl_rule_link *link, *linktmp;
680
681         rw_assert(&rctl_lock, RA_WLOCKED);
682
683         LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
684                 if (!rctl_rule_matches(link->rrl_rule, filter))
685                         continue;
686
687                 LIST_REMOVE(link, rrl_next);
688                 rctl_rule_release(link->rrl_rule);
689                 uma_zfree(rctl_rule_link_zone, link);
690                 removed++;
691         }
692         return (removed);
693 }
694
695 static void
696 rctl_rule_acquire_subject(struct rctl_rule *rule)
697 {
698
699         switch (rule->rr_subject_type) {
700         case RCTL_SUBJECT_TYPE_UNDEFINED:
701         case RCTL_SUBJECT_TYPE_PROCESS:
702                 break;
703         case RCTL_SUBJECT_TYPE_JAIL:
704                 if (rule->rr_subject.rs_prison_racct != NULL)
705                         prison_racct_hold(rule->rr_subject.rs_prison_racct);
706                 break;
707         case RCTL_SUBJECT_TYPE_USER:
708                 if (rule->rr_subject.rs_uip != NULL)
709                         uihold(rule->rr_subject.rs_uip);
710                 break;
711         case RCTL_SUBJECT_TYPE_LOGINCLASS:
712                 if (rule->rr_subject.rs_loginclass != NULL)
713                         loginclass_hold(rule->rr_subject.rs_loginclass);
714                 break;
715         default:
716                 panic("rctl_rule_acquire_subject: unknown subject type %d",
717                     rule->rr_subject_type);
718         }
719 }
720
721 static void
722 rctl_rule_release_subject(struct rctl_rule *rule)
723 {
724
725         switch (rule->rr_subject_type) {
726         case RCTL_SUBJECT_TYPE_UNDEFINED:
727         case RCTL_SUBJECT_TYPE_PROCESS:
728                 break;
729         case RCTL_SUBJECT_TYPE_JAIL:
730                 if (rule->rr_subject.rs_prison_racct != NULL)
731                         prison_racct_free(rule->rr_subject.rs_prison_racct);
732                 break;
733         case RCTL_SUBJECT_TYPE_USER:
734                 if (rule->rr_subject.rs_uip != NULL)
735                         uifree(rule->rr_subject.rs_uip);
736                 break;
737         case RCTL_SUBJECT_TYPE_LOGINCLASS:
738                 if (rule->rr_subject.rs_loginclass != NULL)
739                         loginclass_free(rule->rr_subject.rs_loginclass);
740                 break;
741         default:
742                 panic("rctl_rule_release_subject: unknown subject type %d",
743                     rule->rr_subject_type);
744         }
745 }
746
747 struct rctl_rule *
748 rctl_rule_alloc(int flags)
749 {
750         struct rctl_rule *rule;
751
752         rule = uma_zalloc(rctl_rule_zone, flags);
753         if (rule == NULL)
754                 return (NULL);
755         rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
756         rule->rr_subject.rs_proc = NULL;
757         rule->rr_subject.rs_uip = NULL;
758         rule->rr_subject.rs_loginclass = NULL;
759         rule->rr_subject.rs_prison_racct = NULL;
760         rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
761         rule->rr_resource = RACCT_UNDEFINED;
762         rule->rr_action = RCTL_ACTION_UNDEFINED;
763         rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
764         refcount_init(&rule->rr_refcount, 1);
765
766         return (rule);
767 }
768
769 struct rctl_rule *
770 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
771 {
772         struct rctl_rule *copy;
773
774         copy = uma_zalloc(rctl_rule_zone, flags);
775         if (copy == NULL)
776                 return (NULL);
777         copy->rr_subject_type = rule->rr_subject_type;
778         copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
779         copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
780         copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
781         copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
782         copy->rr_per = rule->rr_per;
783         copy->rr_resource = rule->rr_resource;
784         copy->rr_action = rule->rr_action;
785         copy->rr_amount = rule->rr_amount;
786         refcount_init(&copy->rr_refcount, 1);
787         rctl_rule_acquire_subject(copy);
788
789         return (copy);
790 }
791
792 void
793 rctl_rule_acquire(struct rctl_rule *rule)
794 {
795
796         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
797
798         refcount_acquire(&rule->rr_refcount);
799 }
800
801 static void
802 rctl_rule_free(void *context, int pending)
803 {
804         struct rctl_rule *rule;
805         
806         rule = (struct rctl_rule *)context;
807
808         KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
809         
810         /*
811          * We don't need locking here; rule is guaranteed to be inaccessible.
812          */
813         
814         rctl_rule_release_subject(rule);
815         uma_zfree(rctl_rule_zone, rule);
816 }
817
818 void
819 rctl_rule_release(struct rctl_rule *rule)
820 {
821
822         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
823
824         if (refcount_release(&rule->rr_refcount)) {
825                 /*
826                  * rctl_rule_release() is often called when iterating
827                  * over all the uidinfo structures in the system,
828                  * holding uihashtbl_lock.  Since rctl_rule_free()
829                  * might end up calling uifree(), this would lead
830                  * to lock recursion.  Use taskqueue to avoid this.
831                  */
832                 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
833                 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
834         }
835 }
836
837 static int
838 rctl_rule_fully_specified(const struct rctl_rule *rule)
839 {
840
841         switch (rule->rr_subject_type) {
842         case RCTL_SUBJECT_TYPE_UNDEFINED:
843                 return (0);
844         case RCTL_SUBJECT_TYPE_PROCESS:
845                 if (rule->rr_subject.rs_proc == NULL)
846                         return (0);
847                 break;
848         case RCTL_SUBJECT_TYPE_USER:
849                 if (rule->rr_subject.rs_uip == NULL)
850                         return (0);
851                 break;
852         case RCTL_SUBJECT_TYPE_LOGINCLASS:
853                 if (rule->rr_subject.rs_loginclass == NULL)
854                         return (0);
855                 break;
856         case RCTL_SUBJECT_TYPE_JAIL:
857                 if (rule->rr_subject.rs_prison_racct == NULL)
858                         return (0);
859                 break;
860         default:
861                 panic("rctl_rule_fully_specified: unknown subject type %d",
862                     rule->rr_subject_type);
863         }
864         if (rule->rr_resource == RACCT_UNDEFINED)
865                 return (0);
866         if (rule->rr_action == RCTL_ACTION_UNDEFINED)
867                 return (0);
868         if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
869                 return (0);
870         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
871                 return (0);
872
873         return (1);
874 }
875
876 static int
877 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
878 {
879         int error = 0;
880         char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
881              *amountstr, *perstr;
882         struct rctl_rule *rule;
883         id_t id;
884
885         rule = rctl_rule_alloc(M_WAITOK);
886
887         subjectstr = strsep(&rulestr, ":");
888         subject_idstr = strsep(&rulestr, ":");
889         resourcestr = strsep(&rulestr, ":");
890         actionstr = strsep(&rulestr, "=/");
891         amountstr = strsep(&rulestr, "/");
892         perstr = rulestr;
893
894         if (subjectstr == NULL || subjectstr[0] == '\0')
895                 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
896         else {
897                 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
898                 if (error != 0)
899                         goto out;
900         }
901
902         if (subject_idstr == NULL || subject_idstr[0] == '\0') {
903                 rule->rr_subject.rs_proc = NULL;
904                 rule->rr_subject.rs_uip = NULL;
905                 rule->rr_subject.rs_loginclass = NULL;
906                 rule->rr_subject.rs_prison_racct = NULL;
907         } else {
908                 switch (rule->rr_subject_type) {
909                 case RCTL_SUBJECT_TYPE_UNDEFINED:
910                         error = EINVAL;
911                         goto out;
912                 case RCTL_SUBJECT_TYPE_PROCESS:
913                         error = str2id(subject_idstr, &id);
914                         if (error != 0)
915                                 goto out;
916                         sx_assert(&allproc_lock, SA_LOCKED);
917                         rule->rr_subject.rs_proc = pfind(id);
918                         if (rule->rr_subject.rs_proc == NULL) {
919                                 error = ESRCH;
920                                 goto out;
921                         }
922                         PROC_UNLOCK(rule->rr_subject.rs_proc);
923                         break;
924                 case RCTL_SUBJECT_TYPE_USER:
925                         error = str2id(subject_idstr, &id);
926                         if (error != 0)
927                                 goto out;
928                         rule->rr_subject.rs_uip = uifind(id);
929                         break;
930                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
931                         rule->rr_subject.rs_loginclass =
932                             loginclass_find(subject_idstr);
933                         if (rule->rr_subject.rs_loginclass == NULL) {
934                                 error = ENAMETOOLONG;
935                                 goto out;
936                         }
937                         break;
938                 case RCTL_SUBJECT_TYPE_JAIL:
939                         rule->rr_subject.rs_prison_racct =
940                             prison_racct_find(subject_idstr);
941                         if (rule->rr_subject.rs_prison_racct == NULL) {
942                                 error = ENAMETOOLONG;
943                                 goto out;
944                         }
945                         break;
946                default:
947                        panic("rctl_string_to_rule: unknown subject type %d",
948                            rule->rr_subject_type);
949                }
950         }
951
952         if (resourcestr == NULL || resourcestr[0] == '\0')
953                 rule->rr_resource = RACCT_UNDEFINED;
954         else {
955                 error = str2value(resourcestr, &rule->rr_resource,
956                     resourcenames);
957                 if (error != 0)
958                         goto out;
959         }
960
961         if (actionstr == NULL || actionstr[0] == '\0')
962                 rule->rr_action = RCTL_ACTION_UNDEFINED;
963         else {
964                 error = str2value(actionstr, &rule->rr_action, actionnames);
965                 if (error != 0)
966                         goto out;
967         }
968
969         if (amountstr == NULL || amountstr[0] == '\0')
970                 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
971         else {
972                 error = str2int64(amountstr, &rule->rr_amount);
973                 if (error != 0)
974                         goto out;
975                 if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
976                         rule->rr_amount *= 1000000;
977         }
978
979         if (perstr == NULL || perstr[0] == '\0')
980                 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
981         else {
982                 error = str2value(perstr, &rule->rr_per, subjectnames);
983                 if (error != 0)
984                         goto out;
985         }
986
987 out:
988         if (error == 0)
989                 *rulep = rule;
990         else
991                 rctl_rule_release(rule);
992
993         return (error);
994 }
995
996 /*
997  * Link a rule with all the subjects it applies to.
998  */
999 int
1000 rctl_rule_add(struct rctl_rule *rule)
1001 {
1002         struct proc *p;
1003         struct ucred *cred;
1004         struct uidinfo *uip;
1005         struct prison *pr;
1006         struct prison_racct *prr;
1007         struct loginclass *lc;
1008         struct rctl_rule *rule2;
1009         int match;
1010
1011         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1012
1013         /*
1014          * Some rules just don't make sense.  Note that the one below
1015          * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
1016          * for example, is not deniable in the racct sense, but the
1017          * limit is enforced in a different way, so "deny" rules for %CPU
1018          * do make sense.
1019          */
1020         if (rule->rr_action == RCTL_ACTION_DENY &&
1021             (rule->rr_resource == RACCT_CPU ||
1022             rule->rr_resource == RACCT_WALLCLOCK))
1023                 return (EOPNOTSUPP);
1024
1025         if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1026             RACCT_IS_SLOPPY(rule->rr_resource))
1027                 return (EOPNOTSUPP);
1028
1029         /*
1030          * Make sure there are no duplicated rules.  Also, for the "deny"
1031          * rules, remove ones differing only by "amount".
1032          */
1033         if (rule->rr_action == RCTL_ACTION_DENY) {
1034                 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1035                 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1036                 rctl_rule_remove(rule2);
1037                 rctl_rule_release(rule2);
1038         } else
1039                 rctl_rule_remove(rule);
1040
1041         switch (rule->rr_subject_type) {
1042         case RCTL_SUBJECT_TYPE_PROCESS:
1043                 p = rule->rr_subject.rs_proc;
1044                 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1045
1046                 rctl_racct_add_rule(p->p_racct, rule);
1047                 /*
1048                  * In case of per-process rule, we don't have anything more
1049                  * to do.
1050                  */
1051                 return (0);
1052
1053         case RCTL_SUBJECT_TYPE_USER:
1054                 uip = rule->rr_subject.rs_uip;
1055                 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1056                 rctl_racct_add_rule(uip->ui_racct, rule);
1057                 break;
1058
1059         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1060                 lc = rule->rr_subject.rs_loginclass;
1061                 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1062                 rctl_racct_add_rule(lc->lc_racct, rule);
1063                 break;
1064
1065         case RCTL_SUBJECT_TYPE_JAIL:
1066                 prr = rule->rr_subject.rs_prison_racct;
1067                 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1068                 rctl_racct_add_rule(prr->prr_racct, rule);
1069                 break;
1070
1071         default:
1072                 panic("rctl_rule_add: unknown subject type %d",
1073                     rule->rr_subject_type);
1074         }
1075
1076         /*
1077          * Now go through all the processes and add the new rule to the ones
1078          * it applies to.
1079          */
1080         sx_assert(&allproc_lock, SA_LOCKED);
1081         FOREACH_PROC_IN_SYSTEM(p) {
1082                 cred = p->p_ucred;
1083                 switch (rule->rr_subject_type) {
1084                 case RCTL_SUBJECT_TYPE_USER:
1085                         if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1086                             cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1087                                 break;
1088                         continue;
1089                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1090                         if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1091                                 break;
1092                         continue;
1093                 case RCTL_SUBJECT_TYPE_JAIL:
1094                         match = 0;
1095                         for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1096                                 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1097                                         match = 1;
1098                                         break;
1099                                 }
1100                         }
1101                         if (match)
1102                                 break;
1103                         continue;
1104                 default:
1105                         panic("rctl_rule_add: unknown subject type %d",
1106                             rule->rr_subject_type);
1107                 }
1108
1109                 rctl_racct_add_rule(p->p_racct, rule);
1110         }
1111
1112         return (0);
1113 }
1114
1115 static void
1116 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1117 {
1118         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1119         int found = 0;
1120
1121         rw_wlock(&rctl_lock);
1122         found += rctl_racct_remove_rules(racct, filter);
1123         rw_wunlock(&rctl_lock);
1124
1125         *((int *)arg3) += found;
1126 }
1127
1128 /*
1129  * Remove all rules that match the filter.
1130  */
1131 int
1132 rctl_rule_remove(struct rctl_rule *filter)
1133 {
1134         int found = 0;
1135         struct proc *p;
1136
1137         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1138             filter->rr_subject.rs_proc != NULL) {
1139                 p = filter->rr_subject.rs_proc;
1140                 rw_wlock(&rctl_lock);
1141                 found = rctl_racct_remove_rules(p->p_racct, filter);
1142                 rw_wunlock(&rctl_lock);
1143                 if (found)
1144                         return (0);
1145                 return (ESRCH);
1146         }
1147
1148         loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1149             (void *)&found);
1150         ui_racct_foreach(rctl_rule_remove_callback, filter,
1151             (void *)&found);
1152         prison_racct_foreach(rctl_rule_remove_callback, filter,
1153             (void *)&found);
1154
1155         sx_assert(&allproc_lock, SA_LOCKED);
1156         rw_wlock(&rctl_lock);
1157         FOREACH_PROC_IN_SYSTEM(p) {
1158                 found += rctl_racct_remove_rules(p->p_racct, filter);
1159         }
1160         rw_wunlock(&rctl_lock);
1161
1162         if (found)
1163                 return (0);
1164         return (ESRCH);
1165 }
1166
1167 /*
1168  * Appends a rule to the sbuf.
1169  */
1170 static void
1171 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1172 {
1173         int64_t amount;
1174
1175         sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1176
1177         switch (rule->rr_subject_type) {
1178         case RCTL_SUBJECT_TYPE_PROCESS:
1179                 if (rule->rr_subject.rs_proc == NULL)
1180                         sbuf_printf(sb, ":");
1181                 else
1182                         sbuf_printf(sb, "%d:",
1183                             rule->rr_subject.rs_proc->p_pid);
1184                 break;
1185         case RCTL_SUBJECT_TYPE_USER:
1186                 if (rule->rr_subject.rs_uip == NULL)
1187                         sbuf_printf(sb, ":");
1188                 else
1189                         sbuf_printf(sb, "%d:",
1190                             rule->rr_subject.rs_uip->ui_uid);
1191                 break;
1192         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1193                 if (rule->rr_subject.rs_loginclass == NULL)
1194                         sbuf_printf(sb, ":");
1195                 else
1196                         sbuf_printf(sb, "%s:",
1197                             rule->rr_subject.rs_loginclass->lc_name);
1198                 break;
1199         case RCTL_SUBJECT_TYPE_JAIL:
1200                 if (rule->rr_subject.rs_prison_racct == NULL)
1201                         sbuf_printf(sb, ":");
1202                 else
1203                         sbuf_printf(sb, "%s:",
1204                             rule->rr_subject.rs_prison_racct->prr_name);
1205                 break;
1206         default:
1207                 panic("rctl_rule_to_sbuf: unknown subject type %d",
1208                     rule->rr_subject_type);
1209         }
1210
1211         amount = rule->rr_amount;
1212         if (amount != RCTL_AMOUNT_UNDEFINED &&
1213             RACCT_IS_IN_MILLIONS(rule->rr_resource))
1214                 amount /= 1000000;
1215
1216         sbuf_printf(sb, "%s:%s=%jd",
1217             rctl_resource_name(rule->rr_resource),
1218             rctl_action_name(rule->rr_action),
1219             amount);
1220
1221         if (rule->rr_per != rule->rr_subject_type)
1222                 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1223 }
1224
1225 /*
1226  * Routine used by RCTL syscalls to read in input string.
1227  */
1228 static int
1229 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1230 {
1231         int error;
1232         char *str;
1233
1234         if (inbuflen <= 0)
1235                 return (EINVAL);
1236         if (inbuflen > RCTL_MAX_INBUFLEN)
1237                 return (E2BIG);
1238
1239         str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1240         error = copyinstr(inbufp, str, inbuflen, NULL);
1241         if (error != 0) {
1242                 free(str, M_RCTL);
1243                 return (error);
1244         }
1245
1246         *inputstr = str;
1247
1248         return (0);
1249 }
1250
1251 /*
1252  * Routine used by RCTL syscalls to write out output string.
1253  */
1254 static int
1255 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1256 {
1257         int error;
1258
1259         if (outputsbuf == NULL)
1260                 return (0);
1261
1262         sbuf_finish(outputsbuf);
1263         if (outbuflen < sbuf_len(outputsbuf) + 1) {
1264                 sbuf_delete(outputsbuf);
1265                 return (ERANGE);
1266         }
1267         error = copyout(sbuf_data(outputsbuf), outbufp,
1268             sbuf_len(outputsbuf) + 1);
1269         sbuf_delete(outputsbuf);
1270         return (error);
1271 }
1272
1273 static struct sbuf *
1274 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1275 {
1276         int i;
1277         int64_t amount;
1278         struct sbuf *sb;
1279
1280         sb = sbuf_new_auto();
1281         for (i = 0; i <= RACCT_MAX; i++) {
1282                 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1283                         continue;
1284                 amount = racct->r_resources[i];
1285                 if (RACCT_IS_IN_MILLIONS(i))
1286                         amount /= 1000000;
1287                 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1288         }
1289         sbuf_setpos(sb, sbuf_len(sb) - 1);
1290         return (sb);
1291 }
1292
1293 int
1294 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1295 {
1296         int error;
1297         char *inputstr;
1298         struct rctl_rule *filter;
1299         struct sbuf *outputsbuf = NULL;
1300         struct proc *p;
1301         struct uidinfo *uip;
1302         struct loginclass *lc;
1303         struct prison_racct *prr;
1304
1305         error = priv_check(td, PRIV_RCTL_GET_RACCT);
1306         if (error != 0)
1307                 return (error);
1308
1309         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1310         if (error != 0)
1311                 return (error);
1312
1313         sx_slock(&allproc_lock);
1314         error = rctl_string_to_rule(inputstr, &filter);
1315         free(inputstr, M_RCTL);
1316         if (error != 0) {
1317                 sx_sunlock(&allproc_lock);
1318                 return (error);
1319         }
1320
1321         switch (filter->rr_subject_type) {
1322         case RCTL_SUBJECT_TYPE_PROCESS:
1323                 p = filter->rr_subject.rs_proc;
1324                 if (p == NULL) {
1325                         error = EINVAL;
1326                         goto out;
1327                 }
1328                 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1329                 break;
1330         case RCTL_SUBJECT_TYPE_USER:
1331                 uip = filter->rr_subject.rs_uip;
1332                 if (uip == NULL) {
1333                         error = EINVAL;
1334                         goto out;
1335                 }
1336                 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1337                 break;
1338         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1339                 lc = filter->rr_subject.rs_loginclass;
1340                 if (lc == NULL) {
1341                         error = EINVAL;
1342                         goto out;
1343                 }
1344                 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1345                 break;
1346         case RCTL_SUBJECT_TYPE_JAIL:
1347                 prr = filter->rr_subject.rs_prison_racct;
1348                 if (prr == NULL) {
1349                         error = EINVAL;
1350                         goto out;
1351                 }
1352                 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1353                 break;
1354         default:
1355                 error = EINVAL;
1356         }
1357 out:
1358         rctl_rule_release(filter);
1359         sx_sunlock(&allproc_lock);
1360         if (error != 0)
1361                 return (error);
1362
1363         error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1364
1365         return (error);
1366 }
1367
1368 static void
1369 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1370 {
1371         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1372         struct rctl_rule_link *link;
1373         struct sbuf *sb = (struct sbuf *)arg3;
1374
1375         rw_rlock(&rctl_lock);
1376         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1377                 if (!rctl_rule_matches(link->rrl_rule, filter))
1378                         continue;
1379                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1380                 sbuf_printf(sb, ",");
1381         }
1382         rw_runlock(&rctl_lock);
1383 }
1384
1385 int
1386 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1387 {
1388         int error;
1389         size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1390         char *inputstr, *buf;
1391         struct sbuf *sb;
1392         struct rctl_rule *filter;
1393         struct rctl_rule_link *link;
1394         struct proc *p;
1395
1396         error = priv_check(td, PRIV_RCTL_GET_RULES);
1397         if (error != 0)
1398                 return (error);
1399
1400         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1401         if (error != 0)
1402                 return (error);
1403
1404         sx_slock(&allproc_lock);
1405         error = rctl_string_to_rule(inputstr, &filter);
1406         free(inputstr, M_RCTL);
1407         if (error != 0) {
1408                 sx_sunlock(&allproc_lock);
1409                 return (error);
1410         }
1411
1412 again:
1413         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1414         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1415         KASSERT(sb != NULL, ("sbuf_new failed"));
1416
1417         sx_assert(&allproc_lock, SA_LOCKED);
1418         FOREACH_PROC_IN_SYSTEM(p) {
1419                 rw_rlock(&rctl_lock);
1420                 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1421                         /*
1422                          * Non-process rules will be added to the buffer later.
1423                          * Adding them here would result in duplicated output.
1424                          */
1425                         if (link->rrl_rule->rr_subject_type !=
1426                             RCTL_SUBJECT_TYPE_PROCESS)
1427                                 continue;
1428                         if (!rctl_rule_matches(link->rrl_rule, filter))
1429                                 continue;
1430                         rctl_rule_to_sbuf(sb, link->rrl_rule);
1431                         sbuf_printf(sb, ",");
1432                 }
1433                 rw_runlock(&rctl_lock);
1434         }
1435
1436         loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1437         ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1438         prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1439         if (sbuf_error(sb) == ENOMEM) {
1440                 sbuf_delete(sb);
1441                 free(buf, M_RCTL);
1442                 bufsize *= 4;
1443                 goto again;
1444         }
1445
1446         /*
1447          * Remove trailing ",".
1448          */
1449         if (sbuf_len(sb) > 0)
1450                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1451
1452         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1453
1454         rctl_rule_release(filter);
1455         sx_sunlock(&allproc_lock);
1456         free(buf, M_RCTL);
1457         return (error);
1458 }
1459
1460 int
1461 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1462 {
1463         int error;
1464         size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1465         char *inputstr, *buf;
1466         struct sbuf *sb;
1467         struct rctl_rule *filter;
1468         struct rctl_rule_link *link;
1469
1470         error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1471         if (error != 0)
1472                 return (error);
1473
1474         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1475         if (error != 0)
1476                 return (error);
1477
1478         sx_slock(&allproc_lock);
1479         error = rctl_string_to_rule(inputstr, &filter);
1480         free(inputstr, M_RCTL);
1481         if (error != 0) {
1482                 sx_sunlock(&allproc_lock);
1483                 return (error);
1484         }
1485
1486         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1487                 rctl_rule_release(filter);
1488                 sx_sunlock(&allproc_lock);
1489                 return (EINVAL);
1490         }
1491         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1492                 rctl_rule_release(filter);
1493                 sx_sunlock(&allproc_lock);
1494                 return (EOPNOTSUPP);
1495         }
1496         if (filter->rr_subject.rs_proc == NULL) {
1497                 rctl_rule_release(filter);
1498                 sx_sunlock(&allproc_lock);
1499                 return (EINVAL);
1500         }
1501
1502 again:
1503         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1504         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1505         KASSERT(sb != NULL, ("sbuf_new failed"));
1506
1507         rw_rlock(&rctl_lock);
1508         LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1509             rrl_next) {
1510                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1511                 sbuf_printf(sb, ",");
1512         }
1513         rw_runlock(&rctl_lock);
1514         if (sbuf_error(sb) == ENOMEM) {
1515                 sbuf_delete(sb);
1516                 free(buf, M_RCTL);
1517                 bufsize *= 4;
1518                 goto again;
1519         }
1520
1521         /*
1522          * Remove trailing ",".
1523          */
1524         if (sbuf_len(sb) > 0)
1525                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1526
1527         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1528         rctl_rule_release(filter);
1529         sx_sunlock(&allproc_lock);
1530         free(buf, M_RCTL);
1531         return (error);
1532 }
1533
1534 int
1535 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1536 {
1537         int error;
1538         struct rctl_rule *rule;
1539         char *inputstr;
1540
1541         error = priv_check(td, PRIV_RCTL_ADD_RULE);
1542         if (error != 0)
1543                 return (error);
1544
1545         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1546         if (error != 0)
1547                 return (error);
1548
1549         sx_slock(&allproc_lock);
1550         error = rctl_string_to_rule(inputstr, &rule);
1551         free(inputstr, M_RCTL);
1552         if (error != 0) {
1553                 sx_sunlock(&allproc_lock);
1554                 return (error);
1555         }
1556         /*
1557          * The 'per' part of a rule is optional.
1558          */
1559         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1560             rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1561                 rule->rr_per = rule->rr_subject_type;
1562
1563         if (!rctl_rule_fully_specified(rule)) {
1564                 error = EINVAL;
1565                 goto out;
1566         }
1567
1568         error = rctl_rule_add(rule);
1569
1570 out:
1571         rctl_rule_release(rule);
1572         sx_sunlock(&allproc_lock);
1573         return (error);
1574 }
1575
1576 int
1577 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1578 {
1579         int error;
1580         struct rctl_rule *filter;
1581         char *inputstr;
1582
1583         error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1584         if (error != 0)
1585                 return (error);
1586
1587         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1588         if (error != 0)
1589                 return (error);
1590
1591         sx_slock(&allproc_lock);
1592         error = rctl_string_to_rule(inputstr, &filter);
1593         free(inputstr, M_RCTL);
1594         if (error != 0) {
1595                 sx_sunlock(&allproc_lock);
1596                 return (error);
1597         }
1598
1599         error = rctl_rule_remove(filter);
1600         rctl_rule_release(filter);
1601         sx_sunlock(&allproc_lock);
1602
1603         return (error);
1604 }
1605
1606 /*
1607  * Update RCTL rule list after credential change.
1608  */
1609 void
1610 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1611 {
1612         int rulecnt, i;
1613         struct rctl_rule_link *link, *newlink;
1614         struct uidinfo *newuip;
1615         struct loginclass *newlc;
1616         struct prison_racct *newprr;
1617         LIST_HEAD(, rctl_rule_link) newrules;
1618
1619         newuip = newcred->cr_ruidinfo;
1620         newlc = newcred->cr_loginclass;
1621         newprr = newcred->cr_prison->pr_prison_racct;
1622         
1623         LIST_INIT(&newrules);
1624
1625 again:
1626         /*
1627          * First, count the rules that apply to the process with new
1628          * credentials.
1629          */
1630         rulecnt = 0;
1631         rw_rlock(&rctl_lock);
1632         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1633                 if (link->rrl_rule->rr_subject_type ==
1634                     RCTL_SUBJECT_TYPE_PROCESS)
1635                         rulecnt++;
1636         }
1637         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1638                 rulecnt++;
1639         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1640                 rulecnt++;
1641         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1642                 rulecnt++;
1643         rw_runlock(&rctl_lock);
1644
1645         /*
1646          * Create temporary list.  We've dropped the rctl_lock in order
1647          * to use M_WAITOK.
1648          */
1649         for (i = 0; i < rulecnt; i++) {
1650                 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1651                 newlink->rrl_rule = NULL;
1652                 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1653         }
1654
1655         newlink = LIST_FIRST(&newrules);
1656
1657         /*
1658          * Assign rules to the newly allocated list entries.
1659          */
1660         rw_wlock(&rctl_lock);
1661         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1662                 if (link->rrl_rule->rr_subject_type ==
1663                     RCTL_SUBJECT_TYPE_PROCESS) {
1664                         if (newlink == NULL)
1665                                 goto goaround;
1666                         rctl_rule_acquire(link->rrl_rule);
1667                         newlink->rrl_rule = link->rrl_rule;
1668                         newlink = LIST_NEXT(newlink, rrl_next);
1669                         rulecnt--;
1670                 }
1671         }
1672         
1673         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1674                 if (newlink == NULL)
1675                         goto goaround;
1676                 rctl_rule_acquire(link->rrl_rule);
1677                 newlink->rrl_rule = link->rrl_rule;
1678                 newlink = LIST_NEXT(newlink, rrl_next);
1679                 rulecnt--;
1680         }
1681
1682         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1683                 if (newlink == NULL)
1684                         goto goaround;
1685                 rctl_rule_acquire(link->rrl_rule);
1686                 newlink->rrl_rule = link->rrl_rule;
1687                 newlink = LIST_NEXT(newlink, rrl_next);
1688                 rulecnt--;
1689         }
1690
1691         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1692                 if (newlink == NULL)
1693                         goto goaround;
1694                 rctl_rule_acquire(link->rrl_rule);
1695                 newlink->rrl_rule = link->rrl_rule;
1696                 newlink = LIST_NEXT(newlink, rrl_next);
1697                 rulecnt--;
1698         }
1699
1700         if (rulecnt == 0) {
1701                 /*
1702                  * Free the old rule list.
1703                  */
1704                 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1705                         link = LIST_FIRST(&p->p_racct->r_rule_links);
1706                         LIST_REMOVE(link, rrl_next);
1707                         rctl_rule_release(link->rrl_rule);
1708                         uma_zfree(rctl_rule_link_zone, link);
1709                 }
1710
1711                 /*
1712                  * Replace lists and we're done.
1713                  *
1714                  * XXX: Is there any way to switch list heads instead
1715                  *      of iterating here?
1716                  */
1717                 while (!LIST_EMPTY(&newrules)) {
1718                         newlink = LIST_FIRST(&newrules);
1719                         LIST_REMOVE(newlink, rrl_next);
1720                         LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1721                             newlink, rrl_next);
1722                 }
1723
1724                 rw_wunlock(&rctl_lock);
1725
1726                 return;
1727         }
1728
1729 goaround:
1730         rw_wunlock(&rctl_lock);
1731
1732         /*
1733          * Rule list changed while we were not holding the rctl_lock.
1734          * Free the new list and try again.
1735          */
1736         while (!LIST_EMPTY(&newrules)) {
1737                 newlink = LIST_FIRST(&newrules);
1738                 LIST_REMOVE(newlink, rrl_next);
1739                 if (newlink->rrl_rule != NULL)
1740                         rctl_rule_release(newlink->rrl_rule);
1741                 uma_zfree(rctl_rule_link_zone, newlink);
1742         }
1743
1744         goto again;
1745 }
1746
1747 /*
1748  * Assign RCTL rules to the newly created process.
1749  */
1750 int
1751 rctl_proc_fork(struct proc *parent, struct proc *child)
1752 {
1753         int error;
1754         struct rctl_rule_link *link;
1755         struct rctl_rule *rule;
1756
1757         LIST_INIT(&child->p_racct->r_rule_links);
1758
1759         KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
1760
1761         rw_wlock(&rctl_lock);
1762
1763         /*
1764          * Go through limits applicable to the parent and assign them
1765          * to the child.  Rules with 'process' subject have to be duplicated
1766          * in order to make their rr_subject point to the new process.
1767          */
1768         LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1769                 if (link->rrl_rule->rr_subject_type ==
1770                     RCTL_SUBJECT_TYPE_PROCESS) {
1771                         rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1772                         if (rule == NULL)
1773                                 goto fail;
1774                         KASSERT(rule->rr_subject.rs_proc == parent,
1775                             ("rule->rr_subject.rs_proc != parent"));
1776                         rule->rr_subject.rs_proc = child;
1777                         error = rctl_racct_add_rule_locked(child->p_racct,
1778                             rule);
1779                         rctl_rule_release(rule);
1780                         if (error != 0)
1781                                 goto fail;
1782                 } else {
1783                         error = rctl_racct_add_rule_locked(child->p_racct,
1784                             link->rrl_rule);
1785                         if (error != 0)
1786                                 goto fail;
1787                 }
1788         }
1789
1790         rw_wunlock(&rctl_lock);
1791         return (0);
1792
1793 fail:
1794         while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1795                 link = LIST_FIRST(&child->p_racct->r_rule_links);
1796                 LIST_REMOVE(link, rrl_next);
1797                 rctl_rule_release(link->rrl_rule);
1798                 uma_zfree(rctl_rule_link_zone, link);
1799         }
1800         rw_wunlock(&rctl_lock);
1801         return (EAGAIN);
1802 }
1803
1804 /*
1805  * Release rules attached to the racct.
1806  */
1807 void
1808 rctl_racct_release(struct racct *racct)
1809 {
1810         struct rctl_rule_link *link;
1811
1812         rw_wlock(&rctl_lock);
1813         while (!LIST_EMPTY(&racct->r_rule_links)) {
1814                 link = LIST_FIRST(&racct->r_rule_links);
1815                 LIST_REMOVE(link, rrl_next);
1816                 rctl_rule_release(link->rrl_rule);
1817                 uma_zfree(rctl_rule_link_zone, link);
1818         }
1819         rw_wunlock(&rctl_lock);
1820 }
1821
1822 static void
1823 rctl_init(void)
1824 {
1825
1826         rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1827             sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1828             UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1829         rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1830             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1831 }
1832
1833 #else /* !RCTL */
1834
1835 int
1836 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1837 {
1838         
1839         return (ENOSYS);
1840 }
1841
1842 int
1843 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1844 {
1845         
1846         return (ENOSYS);
1847 }
1848
1849 int
1850 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1851 {
1852         
1853         return (ENOSYS);
1854 }
1855
1856 int
1857 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1858 {
1859         
1860         return (ENOSYS);
1861 }
1862
1863 int
1864 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1865 {
1866         
1867         return (ENOSYS);
1868 }
1869
1870 #endif /* !RCTL */