]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/kern/kern_rctl.c
MFC r234380:
[FreeBSD/stable/9.git] / sys / kern / kern_rctl.c
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/refcount.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/loginclass.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/racct.h>
47 #include <sys/rctl.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sx.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
58 #include <sys/sbuf.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <vm/uma.h>
62
63 #ifdef RCTL
64 #ifndef RACCT
65 #error "The RCTL option requires the RACCT option"
66 #endif
67
68 FEATURE(rctl, "Resource Limits");
69
70 #define HRF_DEFAULT             0
71 #define HRF_DONT_INHERIT        1
72 #define HRF_DONT_ACCUMULATE     2
73
74 /* Default buffer size for rctl_get_rules(2). */
75 #define RCTL_DEFAULT_BUFSIZE    4096
76 #define RCTL_MAX_INBUFLEN       4096
77 #define RCTL_LOG_BUFSIZE        128
78
79 /*
80  * 'rctl_rule_link' connects a rule with every racct it's related to.
81  * For example, rule 'user:X:openfiles:deny=N/process' is linked
82  * with uidinfo for user X, and to each process of that user.
83  */
84 struct rctl_rule_link {
85         LIST_ENTRY(rctl_rule_link)      rrl_next;
86         struct rctl_rule                *rrl_rule;
87         int                             rrl_exceeded;
88 };
89
90 struct dict {
91         const char      *d_name;
92         int             d_value;
93 };
94
95 static struct dict subjectnames[] = {
96         { "process", RCTL_SUBJECT_TYPE_PROCESS },
97         { "user", RCTL_SUBJECT_TYPE_USER },
98         { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
99         { "jail", RCTL_SUBJECT_TYPE_JAIL },
100         { NULL, -1 }};
101
102 static struct dict resourcenames[] = {
103         { "cputime", RACCT_CPU },
104         { "datasize", RACCT_DATA },
105         { "stacksize", RACCT_STACK },
106         { "coredumpsize", RACCT_CORE },
107         { "memoryuse", RACCT_RSS },
108         { "memorylocked", RACCT_MEMLOCK },
109         { "maxproc", RACCT_NPROC },
110         { "openfiles", RACCT_NOFILE },
111         { "vmemoryuse", RACCT_VMEM },
112         { "pseudoterminals", RACCT_NPTS },
113         { "swapuse", RACCT_SWAP },
114         { "nthr", RACCT_NTHR },
115         { "msgqqueued", RACCT_MSGQQUEUED },
116         { "msgqsize", RACCT_MSGQSIZE },
117         { "nmsgq", RACCT_NMSGQ },
118         { "nsem", RACCT_NSEM },
119         { "nsemop", RACCT_NSEMOP },
120         { "nshm", RACCT_NSHM },
121         { "shmsize", RACCT_SHMSIZE },
122         { "wallclock", RACCT_WALLCLOCK },
123         { NULL, -1 }};
124
125 static struct dict actionnames[] = {
126         { "sighup", RCTL_ACTION_SIGHUP },
127         { "sigint", RCTL_ACTION_SIGINT },
128         { "sigquit", RCTL_ACTION_SIGQUIT },
129         { "sigill", RCTL_ACTION_SIGILL },
130         { "sigtrap", RCTL_ACTION_SIGTRAP },
131         { "sigabrt", RCTL_ACTION_SIGABRT },
132         { "sigemt", RCTL_ACTION_SIGEMT },
133         { "sigfpe", RCTL_ACTION_SIGFPE },
134         { "sigkill", RCTL_ACTION_SIGKILL },
135         { "sigbus", RCTL_ACTION_SIGBUS },
136         { "sigsegv", RCTL_ACTION_SIGSEGV },
137         { "sigsys", RCTL_ACTION_SIGSYS },
138         { "sigpipe", RCTL_ACTION_SIGPIPE },
139         { "sigalrm", RCTL_ACTION_SIGALRM },
140         { "sigterm", RCTL_ACTION_SIGTERM },
141         { "sigurg", RCTL_ACTION_SIGURG },
142         { "sigstop", RCTL_ACTION_SIGSTOP },
143         { "sigtstp", RCTL_ACTION_SIGTSTP },
144         { "sigchld", RCTL_ACTION_SIGCHLD },
145         { "sigttin", RCTL_ACTION_SIGTTIN },
146         { "sigttou", RCTL_ACTION_SIGTTOU },
147         { "sigio", RCTL_ACTION_SIGIO },
148         { "sigxcpu", RCTL_ACTION_SIGXCPU },
149         { "sigxfsz", RCTL_ACTION_SIGXFSZ },
150         { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
151         { "sigprof", RCTL_ACTION_SIGPROF },
152         { "sigwinch", RCTL_ACTION_SIGWINCH },
153         { "siginfo", RCTL_ACTION_SIGINFO },
154         { "sigusr1", RCTL_ACTION_SIGUSR1 },
155         { "sigusr2", RCTL_ACTION_SIGUSR2 },
156         { "sigthr", RCTL_ACTION_SIGTHR },
157         { "deny", RCTL_ACTION_DENY },
158         { "log", RCTL_ACTION_LOG },
159         { "devctl", RCTL_ACTION_DEVCTL },
160         { NULL, -1 }};
161
162 static void rctl_init(void);
163 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
164
165 static uma_zone_t rctl_rule_link_zone;
166 static uma_zone_t rctl_rule_zone;
167 static struct rwlock rctl_lock;
168 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
169
170 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
171 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
172
173 MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
174
175 static const char *
176 rctl_subject_type_name(int subject)
177 {
178         int i;
179
180         for (i = 0; subjectnames[i].d_name != NULL; i++) {
181                 if (subjectnames[i].d_value == subject)
182                         return (subjectnames[i].d_name);
183         }
184
185         panic("rctl_subject_type_name: unknown subject type %d", subject);
186 }
187
188 static const char *
189 rctl_action_name(int action)
190 {
191         int i;
192
193         for (i = 0; actionnames[i].d_name != NULL; i++) {
194                 if (actionnames[i].d_value == action)
195                         return (actionnames[i].d_name);
196         }
197
198         panic("rctl_action_name: unknown action %d", action);
199 }
200
201 const char *
202 rctl_resource_name(int resource)
203 {
204         int i;
205
206         for (i = 0; resourcenames[i].d_name != NULL; i++) {
207                 if (resourcenames[i].d_value == resource)
208                         return (resourcenames[i].d_name);
209         }
210
211         panic("rctl_resource_name: unknown resource %d", resource);
212 }
213
214 /*
215  * Return the amount of resource that can be allocated by 'p' before
216  * hitting 'rule'.
217  */
218 static int64_t
219 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
220 {
221         int resource;
222         int64_t available = INT64_MAX;
223         struct ucred *cred = p->p_ucred;
224
225         rw_assert(&rctl_lock, RA_LOCKED);
226
227         resource = rule->rr_resource;
228         switch (rule->rr_per) {
229         case RCTL_SUBJECT_TYPE_PROCESS:
230                 available = rule->rr_amount -
231                     p->p_racct->r_resources[resource];
232                 break;
233         case RCTL_SUBJECT_TYPE_USER:
234                 available = rule->rr_amount -
235                     cred->cr_ruidinfo->ui_racct->r_resources[resource];
236                 break;
237         case RCTL_SUBJECT_TYPE_LOGINCLASS:
238                 available = rule->rr_amount -
239                     cred->cr_loginclass->lc_racct->r_resources[resource];
240                 break;
241         case RCTL_SUBJECT_TYPE_JAIL:
242                 available = rule->rr_amount -
243                     cred->cr_prison->pr_prison_racct->prr_racct->
244                         r_resources[resource];
245                 break;
246         default:
247                 panic("rctl_compute_available: unknown per %d",
248                     rule->rr_per);
249         }
250
251         return (available);
252 }
253
254 /*
255  * Return non-zero if allocating 'amount' by proc 'p' would exceed
256  * resource limit specified by 'rule'.
257  */
258 static int
259 rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
260     int64_t amount)
261 {
262         int64_t available;
263
264         rw_assert(&rctl_lock, RA_LOCKED);
265
266         available = rctl_available_resource(p, rule);
267         if (available >= amount)
268                 return (0);
269
270         return (1);
271 }
272
273 /*
274  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
275  * to what it keeps allocated now.  Returns non-zero if the allocation should
276  * be denied, 0 otherwise.
277  */
278 int
279 rctl_enforce(struct proc *p, int resource, uint64_t amount)
280 {
281         struct rctl_rule *rule;
282         struct rctl_rule_link *link;
283         struct sbuf sb;
284         int should_deny = 0;
285         char *buf;
286         static int curtime = 0;
287         static struct timeval lasttime;
288
289         rw_rlock(&rctl_lock);
290
291         /*
292          * There may be more than one matching rule; go through all of them.
293          * Denial should be done last, after logging and sending signals.
294          */
295         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
296                 rule = link->rrl_rule;
297                 if (rule->rr_resource != resource)
298                         continue;
299                 if (!rctl_would_exceed(p, rule, amount)) {
300                         link->rrl_exceeded = 0;
301                         continue;
302                 }
303
304                 switch (rule->rr_action) {
305                 case RCTL_ACTION_DENY:
306                         should_deny = 1;
307                         continue;
308                 case RCTL_ACTION_LOG:
309                         /*
310                          * If rrl_exceeded != 0, it means we've already
311                          * logged a warning for this process.
312                          */
313                         if (link->rrl_exceeded != 0)
314                                 continue;
315
316                         /*
317                          * If the process state is not fully initialized yet,
318                          * we can't access most of the required fields, e.g.
319                          * p->p_comm.  This happens when called from fork1().
320                          * Ignore this rule for now; it will be processed just
321                          * after fork, when called from racct_proc_fork_done().
322                          */
323                         if (p->p_state != PRS_NORMAL)
324                                 continue;
325
326                         if (!ppsratecheck(&lasttime, &curtime, 10))
327                                 continue;
328
329                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
330                         if (buf == NULL) {
331                                 printf("rctl_enforce: out of memory\n");
332                                 continue;
333                         }
334                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
335                         rctl_rule_to_sbuf(&sb, rule);
336                         sbuf_finish(&sb);
337                         printf("rctl: rule \"%s\" matched by pid %d "
338                             "(%s), uid %d, jail %s\n", sbuf_data(&sb),
339                             p->p_pid, p->p_comm, p->p_ucred->cr_uid,
340                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
341                         sbuf_delete(&sb);
342                         free(buf, M_RCTL);
343                         link->rrl_exceeded = 1;
344                         continue;
345                 case RCTL_ACTION_DEVCTL:
346                         if (link->rrl_exceeded != 0)
347                                 continue;
348
349                         if (p->p_state != PRS_NORMAL)
350                                 continue;
351         
352                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
353                         if (buf == NULL) {
354                                 printf("rctl_enforce: out of memory\n");
355                                 continue;
356                         }
357                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
358                         sbuf_printf(&sb, "rule=");
359                         rctl_rule_to_sbuf(&sb, rule);
360                         sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
361                             p->p_pid, p->p_ucred->cr_ruid,
362                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
363                         sbuf_finish(&sb);
364                         devctl_notify_f("RCTL", "rule", "matched",
365                             sbuf_data(&sb), M_NOWAIT);
366                         sbuf_delete(&sb);
367                         free(buf, M_RCTL);
368                         link->rrl_exceeded = 1;
369                         continue;
370                 default:
371                         if (link->rrl_exceeded != 0)
372                                 continue;
373
374                         if (p->p_state != PRS_NORMAL)
375                                 continue;
376
377                         KASSERT(rule->rr_action > 0 &&
378                             rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
379                             ("rctl_enforce: unknown action %d",
380                              rule->rr_action));
381
382                         /*
383                          * We're using the fact that RCTL_ACTION_SIG* values
384                          * are equal to their counterparts from sys/signal.h.
385                          */
386                         kern_psignal(p, rule->rr_action);
387                         link->rrl_exceeded = 1;
388                         continue;
389                 }
390         }
391
392         rw_runlock(&rctl_lock);
393
394         if (should_deny) {
395                 /*
396                  * Return fake error code; the caller should change it
397                  * into one proper for the situation - EFSIZ, ENOMEM etc.
398                  */
399                 return (EDOOFUS);
400         }
401
402         return (0);
403 }
404
405 uint64_t
406 rctl_get_limit(struct proc *p, int resource)
407 {
408         struct rctl_rule *rule;
409         struct rctl_rule_link *link;
410         uint64_t amount = UINT64_MAX;
411
412         rw_rlock(&rctl_lock);
413
414         /*
415          * There may be more than one matching rule; go through all of them.
416          * Denial should be done last, after logging and sending signals.
417          */
418         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
419                 rule = link->rrl_rule;
420                 if (rule->rr_resource != resource)
421                         continue;
422                 if (rule->rr_action != RCTL_ACTION_DENY)
423                         continue;
424                 if (rule->rr_amount < amount)
425                         amount = rule->rr_amount;
426         }
427
428         rw_runlock(&rctl_lock);
429
430         return (amount);
431 }
432
433 uint64_t
434 rctl_get_available(struct proc *p, int resource)
435 {
436         struct rctl_rule *rule;
437         struct rctl_rule_link *link;
438         int64_t available, minavailable, allocated;
439
440         minavailable = INT64_MAX;
441
442         rw_rlock(&rctl_lock);
443
444         /*
445          * There may be more than one matching rule; go through all of them.
446          * Denial should be done last, after logging and sending signals.
447          */
448         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
449                 rule = link->rrl_rule;
450                 if (rule->rr_resource != resource)
451                         continue;
452                 if (rule->rr_action != RCTL_ACTION_DENY)
453                         continue;
454                 available = rctl_available_resource(p, rule);
455                 if (available < minavailable)
456                         minavailable = available;
457         }
458
459         rw_runlock(&rctl_lock);
460
461         /*
462          * XXX: Think about this _hard_.
463          */
464         allocated = p->p_racct->r_resources[resource];
465         if (minavailable < INT64_MAX - allocated)
466                 minavailable += allocated;
467         if (minavailable < 0)
468                 minavailable = 0;
469         return (minavailable);
470 }
471
472 static int
473 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
474 {
475
476         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
477                 if (rule->rr_subject_type != filter->rr_subject_type)
478                         return (0);
479
480                 switch (filter->rr_subject_type) {
481                 case RCTL_SUBJECT_TYPE_PROCESS:
482                         if (filter->rr_subject.rs_proc != NULL &&
483                             rule->rr_subject.rs_proc !=
484                             filter->rr_subject.rs_proc)
485                                 return (0);
486                         break;
487                 case RCTL_SUBJECT_TYPE_USER:
488                         if (filter->rr_subject.rs_uip != NULL &&
489                             rule->rr_subject.rs_uip !=
490                             filter->rr_subject.rs_uip)
491                                 return (0);
492                         break;
493                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
494                         if (filter->rr_subject.rs_loginclass != NULL &&
495                             rule->rr_subject.rs_loginclass !=
496                             filter->rr_subject.rs_loginclass)
497                                 return (0);
498                         break;
499                 case RCTL_SUBJECT_TYPE_JAIL:
500                         if (filter->rr_subject.rs_prison_racct != NULL &&
501                             rule->rr_subject.rs_prison_racct !=
502                             filter->rr_subject.rs_prison_racct)
503                                 return (0);
504                         break;
505                 default:
506                         panic("rctl_rule_matches: unknown subject type %d",
507                             filter->rr_subject_type);
508                 }
509         }
510
511         if (filter->rr_resource != RACCT_UNDEFINED) {
512                 if (rule->rr_resource != filter->rr_resource)
513                         return (0);
514         }
515
516         if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
517                 if (rule->rr_action != filter->rr_action)
518                         return (0);
519         }
520
521         if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
522                 if (rule->rr_amount != filter->rr_amount)
523                         return (0);
524         }
525
526         if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
527                 if (rule->rr_per != filter->rr_per)
528                         return (0);
529         }
530
531         return (1);
532 }
533
534 static int
535 str2value(const char *str, int *value, struct dict *table)
536 {
537         int i;
538
539         if (value == NULL)
540                 return (EINVAL);
541
542         for (i = 0; table[i].d_name != NULL; i++) {
543                 if (strcasecmp(table[i].d_name, str) == 0) {
544                         *value =  table[i].d_value;
545                         return (0);
546                 }
547         }
548
549         return (EINVAL);
550 }
551
552 static int
553 str2id(const char *str, id_t *value)
554 {
555         char *end;
556
557         if (str == NULL)
558                 return (EINVAL);
559
560         *value = strtoul(str, &end, 10);
561         if ((size_t)(end - str) != strlen(str))
562                 return (EINVAL);
563
564         return (0);
565 }
566
567 static int
568 str2int64(const char *str, int64_t *value)
569 {
570         char *end;
571
572         if (str == NULL)
573                 return (EINVAL);
574
575         *value = strtoul(str, &end, 10);
576         if ((size_t)(end - str) != strlen(str))
577                 return (EINVAL);
578
579         return (0);
580 }
581
582 /*
583  * Connect the rule to the racct, increasing refcount for the rule.
584  */
585 static void
586 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
587 {
588         struct rctl_rule_link *link;
589
590         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
591
592         rctl_rule_acquire(rule);
593         link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
594         link->rrl_rule = rule;
595         link->rrl_exceeded = 0;
596
597         rw_wlock(&rctl_lock);
598         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
599         rw_wunlock(&rctl_lock);
600 }
601
602 static int
603 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
604 {
605         struct rctl_rule_link *link;
606
607         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
608         rw_assert(&rctl_lock, RA_WLOCKED);
609
610         link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
611         if (link == NULL)
612                 return (ENOMEM);
613         rctl_rule_acquire(rule);
614         link->rrl_rule = rule;
615         link->rrl_exceeded = 0;
616
617         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
618         return (0);
619 }
620
621 /*
622  * Remove limits for a rules matching the filter and release
623  * the refcounts for the rules, possibly freeing them.  Returns
624  * the number of limit structures removed.
625  */
626 static int
627 rctl_racct_remove_rules(struct racct *racct,
628     const struct rctl_rule *filter)
629 {
630         int removed = 0;
631         struct rctl_rule_link *link, *linktmp;
632
633         rw_assert(&rctl_lock, RA_WLOCKED);
634
635         LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
636                 if (!rctl_rule_matches(link->rrl_rule, filter))
637                         continue;
638
639                 LIST_REMOVE(link, rrl_next);
640                 rctl_rule_release(link->rrl_rule);
641                 uma_zfree(rctl_rule_link_zone, link);
642                 removed++;
643         }
644         return (removed);
645 }
646
647 static void
648 rctl_rule_acquire_subject(struct rctl_rule *rule)
649 {
650
651         switch (rule->rr_subject_type) {
652         case RCTL_SUBJECT_TYPE_UNDEFINED:
653         case RCTL_SUBJECT_TYPE_PROCESS:
654                 break;
655         case RCTL_SUBJECT_TYPE_JAIL:
656                 if (rule->rr_subject.rs_prison_racct != NULL)
657                         prison_racct_hold(rule->rr_subject.rs_prison_racct);
658                 break;
659         case RCTL_SUBJECT_TYPE_USER:
660                 if (rule->rr_subject.rs_uip != NULL)
661                         uihold(rule->rr_subject.rs_uip);
662                 break;
663         case RCTL_SUBJECT_TYPE_LOGINCLASS:
664                 if (rule->rr_subject.rs_loginclass != NULL)
665                         loginclass_hold(rule->rr_subject.rs_loginclass);
666                 break;
667         default:
668                 panic("rctl_rule_acquire_subject: unknown subject type %d",
669                     rule->rr_subject_type);
670         }
671 }
672
673 static void
674 rctl_rule_release_subject(struct rctl_rule *rule)
675 {
676
677         switch (rule->rr_subject_type) {
678         case RCTL_SUBJECT_TYPE_UNDEFINED:
679         case RCTL_SUBJECT_TYPE_PROCESS:
680                 break;
681         case RCTL_SUBJECT_TYPE_JAIL:
682                 if (rule->rr_subject.rs_prison_racct != NULL)
683                         prison_racct_free(rule->rr_subject.rs_prison_racct);
684                 break;
685         case RCTL_SUBJECT_TYPE_USER:
686                 if (rule->rr_subject.rs_uip != NULL)
687                         uifree(rule->rr_subject.rs_uip);
688                 break;
689         case RCTL_SUBJECT_TYPE_LOGINCLASS:
690                 if (rule->rr_subject.rs_loginclass != NULL)
691                         loginclass_free(rule->rr_subject.rs_loginclass);
692                 break;
693         default:
694                 panic("rctl_rule_release_subject: unknown subject type %d",
695                     rule->rr_subject_type);
696         }
697 }
698
699 struct rctl_rule *
700 rctl_rule_alloc(int flags)
701 {
702         struct rctl_rule *rule;
703
704         rule = uma_zalloc(rctl_rule_zone, flags);
705         if (rule == NULL)
706                 return (NULL);
707         rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
708         rule->rr_subject.rs_proc = NULL;
709         rule->rr_subject.rs_uip = NULL;
710         rule->rr_subject.rs_loginclass = NULL;
711         rule->rr_subject.rs_prison_racct = NULL;
712         rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
713         rule->rr_resource = RACCT_UNDEFINED;
714         rule->rr_action = RCTL_ACTION_UNDEFINED;
715         rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
716         refcount_init(&rule->rr_refcount, 1);
717
718         return (rule);
719 }
720
721 struct rctl_rule *
722 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
723 {
724         struct rctl_rule *copy;
725
726         copy = uma_zalloc(rctl_rule_zone, flags);
727         if (copy == NULL)
728                 return (NULL);
729         copy->rr_subject_type = rule->rr_subject_type;
730         copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
731         copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
732         copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
733         copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
734         copy->rr_per = rule->rr_per;
735         copy->rr_resource = rule->rr_resource;
736         copy->rr_action = rule->rr_action;
737         copy->rr_amount = rule->rr_amount;
738         refcount_init(&copy->rr_refcount, 1);
739         rctl_rule_acquire_subject(copy);
740
741         return (copy);
742 }
743
744 void
745 rctl_rule_acquire(struct rctl_rule *rule)
746 {
747
748         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
749
750         refcount_acquire(&rule->rr_refcount);
751 }
752
753 static void
754 rctl_rule_free(void *context, int pending)
755 {
756         struct rctl_rule *rule;
757         
758         rule = (struct rctl_rule *)context;
759
760         KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
761         
762         /*
763          * We don't need locking here; rule is guaranteed to be inaccessible.
764          */
765         
766         rctl_rule_release_subject(rule);
767         uma_zfree(rctl_rule_zone, rule);
768 }
769
770 void
771 rctl_rule_release(struct rctl_rule *rule)
772 {
773
774         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
775
776         if (refcount_release(&rule->rr_refcount)) {
777                 /*
778                  * rctl_rule_release() is often called when iterating
779                  * over all the uidinfo structures in the system,
780                  * holding uihashtbl_lock.  Since rctl_rule_free()
781                  * might end up calling uifree(), this would lead
782                  * to lock recursion.  Use taskqueue to avoid this.
783                  */
784                 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
785                 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
786         }
787 }
788
789 static int
790 rctl_rule_fully_specified(const struct rctl_rule *rule)
791 {
792
793         switch (rule->rr_subject_type) {
794         case RCTL_SUBJECT_TYPE_UNDEFINED:
795                 return (0);
796         case RCTL_SUBJECT_TYPE_PROCESS:
797                 if (rule->rr_subject.rs_proc == NULL)
798                         return (0);
799                 break;
800         case RCTL_SUBJECT_TYPE_USER:
801                 if (rule->rr_subject.rs_uip == NULL)
802                         return (0);
803                 break;
804         case RCTL_SUBJECT_TYPE_LOGINCLASS:
805                 if (rule->rr_subject.rs_loginclass == NULL)
806                         return (0);
807                 break;
808         case RCTL_SUBJECT_TYPE_JAIL:
809                 if (rule->rr_subject.rs_prison_racct == NULL)
810                         return (0);
811                 break;
812         default:
813                 panic("rctl_rule_fully_specified: unknown subject type %d",
814                     rule->rr_subject_type);
815         }
816         if (rule->rr_resource == RACCT_UNDEFINED)
817                 return (0);
818         if (rule->rr_action == RCTL_ACTION_UNDEFINED)
819                 return (0);
820         if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
821                 return (0);
822         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
823                 return (0);
824
825         return (1);
826 }
827
828 static int
829 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
830 {
831         int error = 0;
832         char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
833              *amountstr, *perstr;
834         struct rctl_rule *rule;
835         id_t id;
836
837         rule = rctl_rule_alloc(M_WAITOK);
838
839         subjectstr = strsep(&rulestr, ":");
840         subject_idstr = strsep(&rulestr, ":");
841         resourcestr = strsep(&rulestr, ":");
842         actionstr = strsep(&rulestr, "=/");
843         amountstr = strsep(&rulestr, "/");
844         perstr = rulestr;
845
846         if (subjectstr == NULL || subjectstr[0] == '\0')
847                 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
848         else {
849                 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
850                 if (error != 0)
851                         goto out;
852         }
853
854         if (subject_idstr == NULL || subject_idstr[0] == '\0') {
855                 rule->rr_subject.rs_proc = NULL;
856                 rule->rr_subject.rs_uip = NULL;
857                 rule->rr_subject.rs_loginclass = NULL;
858                 rule->rr_subject.rs_prison_racct = NULL;
859         } else {
860                 switch (rule->rr_subject_type) {
861                 case RCTL_SUBJECT_TYPE_UNDEFINED:
862                         error = EINVAL;
863                         goto out;
864                 case RCTL_SUBJECT_TYPE_PROCESS:
865                         error = str2id(subject_idstr, &id);
866                         if (error != 0)
867                                 goto out;
868                         sx_assert(&allproc_lock, SA_LOCKED);
869                         rule->rr_subject.rs_proc = pfind(id);
870                         if (rule->rr_subject.rs_proc == NULL) {
871                                 error = ESRCH;
872                                 goto out;
873                         }
874                         PROC_UNLOCK(rule->rr_subject.rs_proc);
875                         break;
876                 case RCTL_SUBJECT_TYPE_USER:
877                         error = str2id(subject_idstr, &id);
878                         if (error != 0)
879                                 goto out;
880                         rule->rr_subject.rs_uip = uifind(id);
881                         break;
882                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
883                         rule->rr_subject.rs_loginclass =
884                             loginclass_find(subject_idstr);
885                         if (rule->rr_subject.rs_loginclass == NULL) {
886                                 error = ENAMETOOLONG;
887                                 goto out;
888                         }
889                         break;
890                 case RCTL_SUBJECT_TYPE_JAIL:
891                         rule->rr_subject.rs_prison_racct =
892                             prison_racct_find(subject_idstr);
893                         if (rule->rr_subject.rs_prison_racct == NULL) {
894                                 error = ENAMETOOLONG;
895                                 goto out;
896                         }
897                         break;
898                default:
899                        panic("rctl_string_to_rule: unknown subject type %d",
900                            rule->rr_subject_type);
901                }
902         }
903
904         if (resourcestr == NULL || resourcestr[0] == '\0')
905                 rule->rr_resource = RACCT_UNDEFINED;
906         else {
907                 error = str2value(resourcestr, &rule->rr_resource,
908                     resourcenames);
909                 if (error != 0)
910                         goto out;
911         }
912
913         if (actionstr == NULL || actionstr[0] == '\0')
914                 rule->rr_action = RCTL_ACTION_UNDEFINED;
915         else {
916                 error = str2value(actionstr, &rule->rr_action, actionnames);
917                 if (error != 0)
918                         goto out;
919         }
920
921         if (amountstr == NULL || amountstr[0] == '\0')
922                 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
923         else {
924                 error = str2int64(amountstr, &rule->rr_amount);
925                 if (error != 0)
926                         goto out;
927                 if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
928                         rule->rr_amount *= 1000000;
929         }
930
931         if (perstr == NULL || perstr[0] == '\0')
932                 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
933         else {
934                 error = str2value(perstr, &rule->rr_per, subjectnames);
935                 if (error != 0)
936                         goto out;
937         }
938
939 out:
940         if (error == 0)
941                 *rulep = rule;
942         else
943                 rctl_rule_release(rule);
944
945         return (error);
946 }
947
948 /*
949  * Link a rule with all the subjects it applies to.
950  */
951 int
952 rctl_rule_add(struct rctl_rule *rule)
953 {
954         struct proc *p;
955         struct ucred *cred;
956         struct uidinfo *uip;
957         struct prison *pr;
958         struct prison_racct *prr;
959         struct loginclass *lc;
960         struct rctl_rule *rule2;
961         int match;
962
963         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
964
965         /*
966          * Some rules just don't make sense.  Note that the one below
967          * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
968          * for example, is not deniable in the racct sense, but the
969          * limit is enforced in a different way, so "deny" rules for %CPU
970          * do make sense.
971          */
972         if (rule->rr_action == RCTL_ACTION_DENY &&
973             (rule->rr_resource == RACCT_CPU ||
974             rule->rr_resource == RACCT_WALLCLOCK))
975                 return (EOPNOTSUPP);
976
977         if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
978             RACCT_IS_SLOPPY(rule->rr_resource))
979                 return (EOPNOTSUPP);
980
981         /*
982          * Make sure there are no duplicated rules.  Also, for the "deny"
983          * rules, remove ones differing only by "amount".
984          */
985         if (rule->rr_action == RCTL_ACTION_DENY) {
986                 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
987                 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
988                 rctl_rule_remove(rule2);
989                 rctl_rule_release(rule2);
990         } else
991                 rctl_rule_remove(rule);
992
993         switch (rule->rr_subject_type) {
994         case RCTL_SUBJECT_TYPE_PROCESS:
995                 p = rule->rr_subject.rs_proc;
996                 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
997                 /*
998                  * No resource limits for system processes.
999                  */
1000                 if (p->p_flag & P_SYSTEM)
1001                         return (EPERM);
1002
1003                 rctl_racct_add_rule(p->p_racct, rule);
1004                 /*
1005                  * In case of per-process rule, we don't have anything more
1006                  * to do.
1007                  */
1008                 return (0);
1009
1010         case RCTL_SUBJECT_TYPE_USER:
1011                 uip = rule->rr_subject.rs_uip;
1012                 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1013                 rctl_racct_add_rule(uip->ui_racct, rule);
1014                 break;
1015
1016         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1017                 lc = rule->rr_subject.rs_loginclass;
1018                 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1019                 rctl_racct_add_rule(lc->lc_racct, rule);
1020                 break;
1021
1022         case RCTL_SUBJECT_TYPE_JAIL:
1023                 prr = rule->rr_subject.rs_prison_racct;
1024                 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1025                 rctl_racct_add_rule(prr->prr_racct, rule);
1026                 break;
1027
1028         default:
1029                 panic("rctl_rule_add: unknown subject type %d",
1030                     rule->rr_subject_type);
1031         }
1032
1033         /*
1034          * Now go through all the processes and add the new rule to the ones
1035          * it applies to.
1036          */
1037         sx_assert(&allproc_lock, SA_LOCKED);
1038         FOREACH_PROC_IN_SYSTEM(p) {
1039                 if (p->p_flag & P_SYSTEM)
1040                         continue;
1041                 cred = p->p_ucred;
1042                 switch (rule->rr_subject_type) {
1043                 case RCTL_SUBJECT_TYPE_USER:
1044                         if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1045                             cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1046                                 break;
1047                         continue;
1048                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1049                         if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1050                                 break;
1051                         continue;
1052                 case RCTL_SUBJECT_TYPE_JAIL:
1053                         match = 0;
1054                         for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1055                                 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1056                                         match = 1;
1057                                         break;
1058                                 }
1059                         }
1060                         if (match)
1061                                 break;
1062                         continue;
1063                 default:
1064                         panic("rctl_rule_add: unknown subject type %d",
1065                             rule->rr_subject_type);
1066                 }
1067
1068                 rctl_racct_add_rule(p->p_racct, rule);
1069         }
1070
1071         return (0);
1072 }
1073
1074 static void
1075 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1076 {
1077         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1078         int found = 0;
1079
1080         rw_wlock(&rctl_lock);
1081         found += rctl_racct_remove_rules(racct, filter);
1082         rw_wunlock(&rctl_lock);
1083
1084         *((int *)arg3) += found;
1085 }
1086
1087 /*
1088  * Remove all rules that match the filter.
1089  */
1090 int
1091 rctl_rule_remove(struct rctl_rule *filter)
1092 {
1093         int found = 0;
1094         struct proc *p;
1095
1096         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1097             filter->rr_subject.rs_proc != NULL) {
1098                 p = filter->rr_subject.rs_proc;
1099                 rw_wlock(&rctl_lock);
1100                 found = rctl_racct_remove_rules(p->p_racct, filter);
1101                 rw_wunlock(&rctl_lock);
1102                 if (found)
1103                         return (0);
1104                 return (ESRCH);
1105         }
1106
1107         loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1108             (void *)&found);
1109         ui_racct_foreach(rctl_rule_remove_callback, filter,
1110             (void *)&found);
1111         prison_racct_foreach(rctl_rule_remove_callback, filter,
1112             (void *)&found);
1113
1114         sx_assert(&allproc_lock, SA_LOCKED);
1115         rw_wlock(&rctl_lock);
1116         FOREACH_PROC_IN_SYSTEM(p) {
1117                 found += rctl_racct_remove_rules(p->p_racct, filter);
1118         }
1119         rw_wunlock(&rctl_lock);
1120
1121         if (found)
1122                 return (0);
1123         return (ESRCH);
1124 }
1125
1126 /*
1127  * Appends a rule to the sbuf.
1128  */
1129 static void
1130 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1131 {
1132         int64_t amount;
1133
1134         sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1135
1136         switch (rule->rr_subject_type) {
1137         case RCTL_SUBJECT_TYPE_PROCESS:
1138                 if (rule->rr_subject.rs_proc == NULL)
1139                         sbuf_printf(sb, ":");
1140                 else
1141                         sbuf_printf(sb, "%d:",
1142                             rule->rr_subject.rs_proc->p_pid);
1143                 break;
1144         case RCTL_SUBJECT_TYPE_USER:
1145                 if (rule->rr_subject.rs_uip == NULL)
1146                         sbuf_printf(sb, ":");
1147                 else
1148                         sbuf_printf(sb, "%d:",
1149                             rule->rr_subject.rs_uip->ui_uid);
1150                 break;
1151         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1152                 if (rule->rr_subject.rs_loginclass == NULL)
1153                         sbuf_printf(sb, ":");
1154                 else
1155                         sbuf_printf(sb, "%s:",
1156                             rule->rr_subject.rs_loginclass->lc_name);
1157                 break;
1158         case RCTL_SUBJECT_TYPE_JAIL:
1159                 if (rule->rr_subject.rs_prison_racct == NULL)
1160                         sbuf_printf(sb, ":");
1161                 else
1162                         sbuf_printf(sb, "%s:",
1163                             rule->rr_subject.rs_prison_racct->prr_name);
1164                 break;
1165         default:
1166                 panic("rctl_rule_to_sbuf: unknown subject type %d",
1167                     rule->rr_subject_type);
1168         }
1169
1170         amount = rule->rr_amount;
1171         if (amount != RCTL_AMOUNT_UNDEFINED &&
1172             RACCT_IS_IN_MILLIONS(rule->rr_resource))
1173                 amount /= 1000000;
1174
1175         sbuf_printf(sb, "%s:%s=%jd",
1176             rctl_resource_name(rule->rr_resource),
1177             rctl_action_name(rule->rr_action),
1178             amount);
1179
1180         if (rule->rr_per != rule->rr_subject_type)
1181                 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1182 }
1183
1184 /*
1185  * Routine used by RCTL syscalls to read in input string.
1186  */
1187 static int
1188 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1189 {
1190         int error;
1191         char *str;
1192
1193         if (inbuflen <= 0)
1194                 return (EINVAL);
1195         if (inbuflen > RCTL_MAX_INBUFLEN)
1196                 return (E2BIG);
1197
1198         str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1199         error = copyinstr(inbufp, str, inbuflen, NULL);
1200         if (error != 0) {
1201                 free(str, M_RCTL);
1202                 return (error);
1203         }
1204
1205         *inputstr = str;
1206
1207         return (0);
1208 }
1209
1210 /*
1211  * Routine used by RCTL syscalls to write out output string.
1212  */
1213 static int
1214 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1215 {
1216         int error;
1217
1218         if (outputsbuf == NULL)
1219                 return (0);
1220
1221         sbuf_finish(outputsbuf);
1222         if (outbuflen < sbuf_len(outputsbuf) + 1) {
1223                 sbuf_delete(outputsbuf);
1224                 return (ERANGE);
1225         }
1226         error = copyout(sbuf_data(outputsbuf), outbufp,
1227             sbuf_len(outputsbuf) + 1);
1228         sbuf_delete(outputsbuf);
1229         return (error);
1230 }
1231
1232 static struct sbuf *
1233 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1234 {
1235         int i;
1236         int64_t amount;
1237         struct sbuf *sb;
1238
1239         sb = sbuf_new_auto();
1240         for (i = 0; i <= RACCT_MAX; i++) {
1241                 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1242                         continue;
1243                 amount = racct->r_resources[i];
1244                 if (RACCT_IS_IN_MILLIONS(i))
1245                         amount /= 1000000;
1246                 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1247         }
1248         sbuf_setpos(sb, sbuf_len(sb) - 1);
1249         return (sb);
1250 }
1251
1252 int
1253 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1254 {
1255         int error;
1256         char *inputstr;
1257         struct rctl_rule *filter;
1258         struct sbuf *outputsbuf = NULL;
1259         struct proc *p;
1260         struct uidinfo *uip;
1261         struct loginclass *lc;
1262         struct prison_racct *prr;
1263
1264         error = priv_check(td, PRIV_RCTL_GET_RACCT);
1265         if (error != 0)
1266                 return (error);
1267
1268         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1269         if (error != 0)
1270                 return (error);
1271
1272         sx_slock(&allproc_lock);
1273         error = rctl_string_to_rule(inputstr, &filter);
1274         free(inputstr, M_RCTL);
1275         if (error != 0) {
1276                 sx_sunlock(&allproc_lock);
1277                 return (error);
1278         }
1279
1280         switch (filter->rr_subject_type) {
1281         case RCTL_SUBJECT_TYPE_PROCESS:
1282                 p = filter->rr_subject.rs_proc;
1283                 if (p == NULL) {
1284                         error = EINVAL;
1285                         goto out;
1286                 }
1287                 if (p->p_flag & P_SYSTEM) {
1288                         error = EINVAL;
1289                         goto out;
1290                 }
1291                 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1292                 break;
1293         case RCTL_SUBJECT_TYPE_USER:
1294                 uip = filter->rr_subject.rs_uip;
1295                 if (uip == NULL) {
1296                         error = EINVAL;
1297                         goto out;
1298                 }
1299                 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1300                 break;
1301         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1302                 lc = filter->rr_subject.rs_loginclass;
1303                 if (lc == NULL) {
1304                         error = EINVAL;
1305                         goto out;
1306                 }
1307                 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1308                 break;
1309         case RCTL_SUBJECT_TYPE_JAIL:
1310                 prr = filter->rr_subject.rs_prison_racct;
1311                 if (prr == NULL) {
1312                         error = EINVAL;
1313                         goto out;
1314                 }
1315                 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1316                 break;
1317         default:
1318                 error = EINVAL;
1319         }
1320 out:
1321         rctl_rule_release(filter);
1322         sx_sunlock(&allproc_lock);
1323         if (error != 0)
1324                 return (error);
1325
1326         error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1327
1328         return (error);
1329 }
1330
1331 static void
1332 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1333 {
1334         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1335         struct rctl_rule_link *link;
1336         struct sbuf *sb = (struct sbuf *)arg3;
1337
1338         rw_rlock(&rctl_lock);
1339         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1340                 if (!rctl_rule_matches(link->rrl_rule, filter))
1341                         continue;
1342                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1343                 sbuf_printf(sb, ",");
1344         }
1345         rw_runlock(&rctl_lock);
1346 }
1347
1348 int
1349 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1350 {
1351         int error;
1352         size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1353         char *inputstr, *buf;
1354         struct sbuf *sb;
1355         struct rctl_rule *filter;
1356         struct rctl_rule_link *link;
1357         struct proc *p;
1358
1359         error = priv_check(td, PRIV_RCTL_GET_RULES);
1360         if (error != 0)
1361                 return (error);
1362
1363         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1364         if (error != 0)
1365                 return (error);
1366
1367         sx_slock(&allproc_lock);
1368         error = rctl_string_to_rule(inputstr, &filter);
1369         free(inputstr, M_RCTL);
1370         if (error != 0) {
1371                 sx_sunlock(&allproc_lock);
1372                 return (error);
1373         }
1374
1375 again:
1376         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1377         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1378         KASSERT(sb != NULL, ("sbuf_new failed"));
1379
1380         sx_assert(&allproc_lock, SA_LOCKED);
1381         FOREACH_PROC_IN_SYSTEM(p) {
1382                 rw_rlock(&rctl_lock);
1383                 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1384                         /*
1385                          * Non-process rules will be added to the buffer later.
1386                          * Adding them here would result in duplicated output.
1387                          */
1388                         if (link->rrl_rule->rr_subject_type !=
1389                             RCTL_SUBJECT_TYPE_PROCESS)
1390                                 continue;
1391                         if (!rctl_rule_matches(link->rrl_rule, filter))
1392                                 continue;
1393                         rctl_rule_to_sbuf(sb, link->rrl_rule);
1394                         sbuf_printf(sb, ",");
1395                 }
1396                 rw_runlock(&rctl_lock);
1397         }
1398
1399         loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1400         ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1401         prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1402         if (sbuf_error(sb) == ENOMEM) {
1403                 sbuf_delete(sb);
1404                 free(buf, M_RCTL);
1405                 bufsize *= 4;
1406                 goto again;
1407         }
1408
1409         /*
1410          * Remove trailing ",".
1411          */
1412         if (sbuf_len(sb) > 0)
1413                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1414
1415         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1416
1417         rctl_rule_release(filter);
1418         sx_sunlock(&allproc_lock);
1419         free(buf, M_RCTL);
1420         return (error);
1421 }
1422
1423 int
1424 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1425 {
1426         int error;
1427         size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1428         char *inputstr, *buf;
1429         struct sbuf *sb;
1430         struct rctl_rule *filter;
1431         struct rctl_rule_link *link;
1432
1433         error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1434         if (error != 0)
1435                 return (error);
1436
1437         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1438         if (error != 0)
1439                 return (error);
1440
1441         sx_slock(&allproc_lock);
1442         error = rctl_string_to_rule(inputstr, &filter);
1443         free(inputstr, M_RCTL);
1444         if (error != 0) {
1445                 sx_sunlock(&allproc_lock);
1446                 return (error);
1447         }
1448
1449         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1450                 rctl_rule_release(filter);
1451                 sx_sunlock(&allproc_lock);
1452                 return (EINVAL);
1453         }
1454         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1455                 rctl_rule_release(filter);
1456                 sx_sunlock(&allproc_lock);
1457                 return (EOPNOTSUPP);
1458         }
1459         if (filter->rr_subject.rs_proc == NULL) {
1460                 rctl_rule_release(filter);
1461                 sx_sunlock(&allproc_lock);
1462                 return (EINVAL);
1463         }
1464
1465 again:
1466         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1467         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1468         KASSERT(sb != NULL, ("sbuf_new failed"));
1469
1470         rw_rlock(&rctl_lock);
1471         LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1472             rrl_next) {
1473                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1474                 sbuf_printf(sb, ",");
1475         }
1476         rw_runlock(&rctl_lock);
1477         if (sbuf_error(sb) == ENOMEM) {
1478                 sbuf_delete(sb);
1479                 free(buf, M_RCTL);
1480                 bufsize *= 4;
1481                 goto again;
1482         }
1483
1484         /*
1485          * Remove trailing ",".
1486          */
1487         if (sbuf_len(sb) > 0)
1488                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1489
1490         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1491         rctl_rule_release(filter);
1492         sx_sunlock(&allproc_lock);
1493         free(buf, M_RCTL);
1494         return (error);
1495 }
1496
1497 int
1498 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1499 {
1500         int error;
1501         struct rctl_rule *rule;
1502         char *inputstr;
1503
1504         error = priv_check(td, PRIV_RCTL_ADD_RULE);
1505         if (error != 0)
1506                 return (error);
1507
1508         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1509         if (error != 0)
1510                 return (error);
1511
1512         sx_slock(&allproc_lock);
1513         error = rctl_string_to_rule(inputstr, &rule);
1514         free(inputstr, M_RCTL);
1515         if (error != 0) {
1516                 sx_sunlock(&allproc_lock);
1517                 return (error);
1518         }
1519         /*
1520          * The 'per' part of a rule is optional.
1521          */
1522         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1523             rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1524                 rule->rr_per = rule->rr_subject_type;
1525
1526         if (!rctl_rule_fully_specified(rule)) {
1527                 error = EINVAL;
1528                 goto out;
1529         }
1530
1531         error = rctl_rule_add(rule);
1532
1533 out:
1534         rctl_rule_release(rule);
1535         sx_sunlock(&allproc_lock);
1536         return (error);
1537 }
1538
1539 int
1540 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1541 {
1542         int error;
1543         struct rctl_rule *filter;
1544         char *inputstr;
1545
1546         error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1547         if (error != 0)
1548                 return (error);
1549
1550         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1551         if (error != 0)
1552                 return (error);
1553
1554         sx_slock(&allproc_lock);
1555         error = rctl_string_to_rule(inputstr, &filter);
1556         free(inputstr, M_RCTL);
1557         if (error != 0) {
1558                 sx_sunlock(&allproc_lock);
1559                 return (error);
1560         }
1561
1562         error = rctl_rule_remove(filter);
1563         rctl_rule_release(filter);
1564         sx_sunlock(&allproc_lock);
1565
1566         return (error);
1567 }
1568
1569 /*
1570  * Update RCTL rule list after credential change.
1571  */
1572 void
1573 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1574 {
1575         int rulecnt, i;
1576         struct rctl_rule_link *link, *newlink;
1577         struct uidinfo *newuip;
1578         struct loginclass *newlc;
1579         struct prison_racct *newprr;
1580         LIST_HEAD(, rctl_rule_link) newrules;
1581
1582         newuip = newcred->cr_ruidinfo;
1583         newlc = newcred->cr_loginclass;
1584         newprr = newcred->cr_prison->pr_prison_racct;
1585         
1586         LIST_INIT(&newrules);
1587
1588 again:
1589         /*
1590          * First, count the rules that apply to the process with new
1591          * credentials.
1592          */
1593         rulecnt = 0;
1594         rw_rlock(&rctl_lock);
1595         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1596                 if (link->rrl_rule->rr_subject_type ==
1597                     RCTL_SUBJECT_TYPE_PROCESS)
1598                         rulecnt++;
1599         }
1600         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1601                 rulecnt++;
1602         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1603                 rulecnt++;
1604         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1605                 rulecnt++;
1606         rw_runlock(&rctl_lock);
1607
1608         /*
1609          * Create temporary list.  We've dropped the rctl_lock in order
1610          * to use M_WAITOK.
1611          */
1612         for (i = 0; i < rulecnt; i++) {
1613                 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1614                 newlink->rrl_rule = NULL;
1615                 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1616         }
1617
1618         newlink = LIST_FIRST(&newrules);
1619
1620         /*
1621          * Assign rules to the newly allocated list entries.
1622          */
1623         rw_wlock(&rctl_lock);
1624         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1625                 if (link->rrl_rule->rr_subject_type ==
1626                     RCTL_SUBJECT_TYPE_PROCESS) {
1627                         if (newlink == NULL)
1628                                 goto goaround;
1629                         rctl_rule_acquire(link->rrl_rule);
1630                         newlink->rrl_rule = link->rrl_rule;
1631                         newlink = LIST_NEXT(newlink, rrl_next);
1632                         rulecnt--;
1633                 }
1634         }
1635         
1636         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1637                 if (newlink == NULL)
1638                         goto goaround;
1639                 rctl_rule_acquire(link->rrl_rule);
1640                 newlink->rrl_rule = link->rrl_rule;
1641                 newlink = LIST_NEXT(newlink, rrl_next);
1642                 rulecnt--;
1643         }
1644
1645         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1646                 if (newlink == NULL)
1647                         goto goaround;
1648                 rctl_rule_acquire(link->rrl_rule);
1649                 newlink->rrl_rule = link->rrl_rule;
1650                 newlink = LIST_NEXT(newlink, rrl_next);
1651                 rulecnt--;
1652         }
1653
1654         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1655                 if (newlink == NULL)
1656                         goto goaround;
1657                 rctl_rule_acquire(link->rrl_rule);
1658                 newlink->rrl_rule = link->rrl_rule;
1659                 newlink = LIST_NEXT(newlink, rrl_next);
1660                 rulecnt--;
1661         }
1662
1663         if (rulecnt == 0) {
1664                 /*
1665                  * Free the old rule list.
1666                  */
1667                 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1668                         link = LIST_FIRST(&p->p_racct->r_rule_links);
1669                         LIST_REMOVE(link, rrl_next);
1670                         rctl_rule_release(link->rrl_rule);
1671                         uma_zfree(rctl_rule_link_zone, link);
1672                 }
1673
1674                 /*
1675                  * Replace lists and we're done.
1676                  *
1677                  * XXX: Is there any way to switch list heads instead
1678                  *      of iterating here?
1679                  */
1680                 while (!LIST_EMPTY(&newrules)) {
1681                         newlink = LIST_FIRST(&newrules);
1682                         LIST_REMOVE(newlink, rrl_next);
1683                         LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1684                             newlink, rrl_next);
1685                 }
1686
1687                 rw_wunlock(&rctl_lock);
1688
1689                 return;
1690         }
1691
1692 goaround:
1693         rw_wunlock(&rctl_lock);
1694
1695         /*
1696          * Rule list changed while we were not holding the rctl_lock.
1697          * Free the new list and try again.
1698          */
1699         while (!LIST_EMPTY(&newrules)) {
1700                 newlink = LIST_FIRST(&newrules);
1701                 LIST_REMOVE(newlink, rrl_next);
1702                 if (newlink->rrl_rule != NULL)
1703                         rctl_rule_release(newlink->rrl_rule);
1704                 uma_zfree(rctl_rule_link_zone, newlink);
1705         }
1706
1707         goto again;
1708 }
1709
1710 /*
1711  * Assign RCTL rules to the newly created process.
1712  */
1713 int
1714 rctl_proc_fork(struct proc *parent, struct proc *child)
1715 {
1716         int error;
1717         struct rctl_rule_link *link;
1718         struct rctl_rule *rule;
1719
1720         LIST_INIT(&child->p_racct->r_rule_links);
1721
1722         /*
1723          * No limits for kernel processes.
1724          */
1725         if (child->p_flag & P_SYSTEM)
1726                 return (0);
1727
1728         /*
1729          * Nothing to inherit from P_SYSTEM parents.
1730          */
1731         if (parent->p_racct == NULL) {
1732                 KASSERT(parent->p_flag & P_SYSTEM,
1733                     ("non-system process without racct; p = %p", parent));
1734                 return (0);
1735         }
1736
1737         rw_wlock(&rctl_lock);
1738
1739         /*
1740          * Go through limits applicable to the parent and assign them
1741          * to the child.  Rules with 'process' subject have to be duplicated
1742          * in order to make their rr_subject point to the new process.
1743          */
1744         LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1745                 if (link->rrl_rule->rr_subject_type ==
1746                     RCTL_SUBJECT_TYPE_PROCESS) {
1747                         rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1748                         if (rule == NULL)
1749                                 goto fail;
1750                         KASSERT(rule->rr_subject.rs_proc == parent,
1751                             ("rule->rr_subject.rs_proc != parent"));
1752                         rule->rr_subject.rs_proc = child;
1753                         error = rctl_racct_add_rule_locked(child->p_racct,
1754                             rule);
1755                         rctl_rule_release(rule);
1756                         if (error != 0)
1757                                 goto fail;
1758                 } else {
1759                         error = rctl_racct_add_rule_locked(child->p_racct,
1760                             link->rrl_rule);
1761                         if (error != 0)
1762                                 goto fail;
1763                 }
1764         }
1765
1766         rw_wunlock(&rctl_lock);
1767         return (0);
1768
1769 fail:
1770         while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1771                 link = LIST_FIRST(&child->p_racct->r_rule_links);
1772                 LIST_REMOVE(link, rrl_next);
1773                 rctl_rule_release(link->rrl_rule);
1774                 uma_zfree(rctl_rule_link_zone, link);
1775         }
1776         rw_wunlock(&rctl_lock);
1777         return (EAGAIN);
1778 }
1779
1780 /*
1781  * Release rules attached to the racct.
1782  */
1783 void
1784 rctl_racct_release(struct racct *racct)
1785 {
1786         struct rctl_rule_link *link;
1787
1788         rw_wlock(&rctl_lock);
1789         while (!LIST_EMPTY(&racct->r_rule_links)) {
1790                 link = LIST_FIRST(&racct->r_rule_links);
1791                 LIST_REMOVE(link, rrl_next);
1792                 rctl_rule_release(link->rrl_rule);
1793                 uma_zfree(rctl_rule_link_zone, link);
1794         }
1795         rw_wunlock(&rctl_lock);
1796 }
1797
1798 static void
1799 rctl_init(void)
1800 {
1801
1802         rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1803             sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1804             UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1805         rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1806             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1807 }
1808
1809 #else /* !RCTL */
1810
1811 int
1812 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1813 {
1814         
1815         return (ENOSYS);
1816 }
1817
1818 int
1819 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1820 {
1821         
1822         return (ENOSYS);
1823 }
1824
1825 int
1826 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1827 {
1828         
1829         return (ENOSYS);
1830 }
1831
1832 int
1833 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1834 {
1835         
1836         return (ENOSYS);
1837 }
1838
1839 int
1840 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1841 {
1842         
1843         return (ENOSYS);
1844 }
1845
1846 #endif /* !RCTL */