]> CyberLeo.Net >> Repos - FreeBSD/releng/9.1.git/blob - sys/kern/kern_rctl.c
Copy stable/9 to releng/9.1 as part of the 9.1-RELEASE release process.
[FreeBSD/releng/9.1.git] / sys / kern / kern_rctl.c
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/refcount.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/loginclass.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/racct.h>
47 #include <sys/rctl.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sx.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
58 #include <sys/sbuf.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <vm/uma.h>
62
63 #ifdef RCTL
64 #ifndef RACCT
65 #error "The RCTL option requires the RACCT option"
66 #endif
67
68 FEATURE(rctl, "Resource Limits");
69
70 #define HRF_DEFAULT             0
71 #define HRF_DONT_INHERIT        1
72 #define HRF_DONT_ACCUMULATE     2
73
74 /* Default buffer size for rctl_get_rules(2). */
75 #define RCTL_DEFAULT_BUFSIZE    4096
76 #define RCTL_MAX_INBUFLEN       4096
77 #define RCTL_LOG_BUFSIZE        128
78
79 /*
80  * 'rctl_rule_link' connects a rule with every racct it's related to.
81  * For example, rule 'user:X:openfiles:deny=N/process' is linked
82  * with uidinfo for user X, and to each process of that user.
83  */
84 struct rctl_rule_link {
85         LIST_ENTRY(rctl_rule_link)      rrl_next;
86         struct rctl_rule                *rrl_rule;
87         int                             rrl_exceeded;
88 };
89
90 struct dict {
91         const char      *d_name;
92         int             d_value;
93 };
94
95 static struct dict subjectnames[] = {
96         { "process", RCTL_SUBJECT_TYPE_PROCESS },
97         { "user", RCTL_SUBJECT_TYPE_USER },
98         { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
99         { "jail", RCTL_SUBJECT_TYPE_JAIL },
100         { NULL, -1 }};
101
102 static struct dict resourcenames[] = {
103         { "cputime", RACCT_CPU },
104         { "datasize", RACCT_DATA },
105         { "stacksize", RACCT_STACK },
106         { "coredumpsize", RACCT_CORE },
107         { "memoryuse", RACCT_RSS },
108         { "memorylocked", RACCT_MEMLOCK },
109         { "maxproc", RACCT_NPROC },
110         { "openfiles", RACCT_NOFILE },
111         { "vmemoryuse", RACCT_VMEM },
112         { "pseudoterminals", RACCT_NPTS },
113         { "swapuse", RACCT_SWAP },
114         { "nthr", RACCT_NTHR },
115         { "msgqqueued", RACCT_MSGQQUEUED },
116         { "msgqsize", RACCT_MSGQSIZE },
117         { "nmsgq", RACCT_NMSGQ },
118         { "nsem", RACCT_NSEM },
119         { "nsemop", RACCT_NSEMOP },
120         { "nshm", RACCT_NSHM },
121         { "shmsize", RACCT_SHMSIZE },
122         { "wallclock", RACCT_WALLCLOCK },
123         { NULL, -1 }};
124
125 static struct dict actionnames[] = {
126         { "sighup", RCTL_ACTION_SIGHUP },
127         { "sigint", RCTL_ACTION_SIGINT },
128         { "sigquit", RCTL_ACTION_SIGQUIT },
129         { "sigill", RCTL_ACTION_SIGILL },
130         { "sigtrap", RCTL_ACTION_SIGTRAP },
131         { "sigabrt", RCTL_ACTION_SIGABRT },
132         { "sigemt", RCTL_ACTION_SIGEMT },
133         { "sigfpe", RCTL_ACTION_SIGFPE },
134         { "sigkill", RCTL_ACTION_SIGKILL },
135         { "sigbus", RCTL_ACTION_SIGBUS },
136         { "sigsegv", RCTL_ACTION_SIGSEGV },
137         { "sigsys", RCTL_ACTION_SIGSYS },
138         { "sigpipe", RCTL_ACTION_SIGPIPE },
139         { "sigalrm", RCTL_ACTION_SIGALRM },
140         { "sigterm", RCTL_ACTION_SIGTERM },
141         { "sigurg", RCTL_ACTION_SIGURG },
142         { "sigstop", RCTL_ACTION_SIGSTOP },
143         { "sigtstp", RCTL_ACTION_SIGTSTP },
144         { "sigchld", RCTL_ACTION_SIGCHLD },
145         { "sigttin", RCTL_ACTION_SIGTTIN },
146         { "sigttou", RCTL_ACTION_SIGTTOU },
147         { "sigio", RCTL_ACTION_SIGIO },
148         { "sigxcpu", RCTL_ACTION_SIGXCPU },
149         { "sigxfsz", RCTL_ACTION_SIGXFSZ },
150         { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
151         { "sigprof", RCTL_ACTION_SIGPROF },
152         { "sigwinch", RCTL_ACTION_SIGWINCH },
153         { "siginfo", RCTL_ACTION_SIGINFO },
154         { "sigusr1", RCTL_ACTION_SIGUSR1 },
155         { "sigusr2", RCTL_ACTION_SIGUSR2 },
156         { "sigthr", RCTL_ACTION_SIGTHR },
157         { "deny", RCTL_ACTION_DENY },
158         { "log", RCTL_ACTION_LOG },
159         { "devctl", RCTL_ACTION_DEVCTL },
160         { NULL, -1 }};
161
162 static void rctl_init(void);
163 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
164
165 static uma_zone_t rctl_rule_link_zone;
166 static uma_zone_t rctl_rule_zone;
167 static struct rwlock rctl_lock;
168 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
169
170 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
171 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
172
173 MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
174
175 static const char *
176 rctl_subject_type_name(int subject)
177 {
178         int i;
179
180         for (i = 0; subjectnames[i].d_name != NULL; i++) {
181                 if (subjectnames[i].d_value == subject)
182                         return (subjectnames[i].d_name);
183         }
184
185         panic("rctl_subject_type_name: unknown subject type %d", subject);
186 }
187
188 static const char *
189 rctl_action_name(int action)
190 {
191         int i;
192
193         for (i = 0; actionnames[i].d_name != NULL; i++) {
194                 if (actionnames[i].d_value == action)
195                         return (actionnames[i].d_name);
196         }
197
198         panic("rctl_action_name: unknown action %d", action);
199 }
200
201 const char *
202 rctl_resource_name(int resource)
203 {
204         int i;
205
206         for (i = 0; resourcenames[i].d_name != NULL; i++) {
207                 if (resourcenames[i].d_value == resource)
208                         return (resourcenames[i].d_name);
209         }
210
211         panic("rctl_resource_name: unknown resource %d", resource);
212 }
213
214 /*
215  * Return the amount of resource that can be allocated by 'p' before
216  * hitting 'rule'.
217  */
218 static int64_t
219 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
220 {
221         int resource;
222         int64_t available = INT64_MAX;
223         struct ucred *cred = p->p_ucred;
224
225         rw_assert(&rctl_lock, RA_LOCKED);
226
227         resource = rule->rr_resource;
228         switch (rule->rr_per) {
229         case RCTL_SUBJECT_TYPE_PROCESS:
230                 available = rule->rr_amount -
231                     p->p_racct->r_resources[resource];
232                 break;
233         case RCTL_SUBJECT_TYPE_USER:
234                 available = rule->rr_amount -
235                     cred->cr_ruidinfo->ui_racct->r_resources[resource];
236                 break;
237         case RCTL_SUBJECT_TYPE_LOGINCLASS:
238                 available = rule->rr_amount -
239                     cred->cr_loginclass->lc_racct->r_resources[resource];
240                 break;
241         case RCTL_SUBJECT_TYPE_JAIL:
242                 available = rule->rr_amount -
243                     cred->cr_prison->pr_prison_racct->prr_racct->
244                         r_resources[resource];
245                 break;
246         default:
247                 panic("rctl_compute_available: unknown per %d",
248                     rule->rr_per);
249         }
250
251         return (available);
252 }
253
254 /*
255  * Return non-zero if allocating 'amount' by proc 'p' would exceed
256  * resource limit specified by 'rule'.
257  */
258 static int
259 rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
260     int64_t amount)
261 {
262         int64_t available;
263
264         rw_assert(&rctl_lock, RA_LOCKED);
265
266         available = rctl_available_resource(p, rule);
267         if (available >= amount)
268                 return (0);
269
270         return (1);
271 }
272
273 /*
274  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
275  * to what it keeps allocated now.  Returns non-zero if the allocation should
276  * be denied, 0 otherwise.
277  */
278 int
279 rctl_enforce(struct proc *p, int resource, uint64_t amount)
280 {
281         struct rctl_rule *rule;
282         struct rctl_rule_link *link;
283         struct sbuf sb;
284         int should_deny = 0;
285         char *buf;
286         static int curtime = 0;
287         static struct timeval lasttime;
288
289         rw_rlock(&rctl_lock);
290
291         /*
292          * There may be more than one matching rule; go through all of them.
293          * Denial should be done last, after logging and sending signals.
294          */
295         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
296                 rule = link->rrl_rule;
297                 if (rule->rr_resource != resource)
298                         continue;
299                 if (!rctl_would_exceed(p, rule, amount)) {
300                         link->rrl_exceeded = 0;
301                         continue;
302                 }
303
304                 switch (rule->rr_action) {
305                 case RCTL_ACTION_DENY:
306                         should_deny = 1;
307                         continue;
308                 case RCTL_ACTION_LOG:
309                         /*
310                          * If rrl_exceeded != 0, it means we've already
311                          * logged a warning for this process.
312                          */
313                         if (link->rrl_exceeded != 0)
314                                 continue;
315
316                         /*
317                          * If the process state is not fully initialized yet,
318                          * we can't access most of the required fields, e.g.
319                          * p->p_comm.  This happens when called from fork1().
320                          * Ignore this rule for now; it will be processed just
321                          * after fork, when called from racct_proc_fork_done().
322                          */
323                         if (p->p_state != PRS_NORMAL)
324                                 continue;
325
326                         if (!ppsratecheck(&lasttime, &curtime, 10))
327                                 continue;
328
329                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
330                         if (buf == NULL) {
331                                 printf("rctl_enforce: out of memory\n");
332                                 continue;
333                         }
334                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
335                         rctl_rule_to_sbuf(&sb, rule);
336                         sbuf_finish(&sb);
337                         printf("rctl: rule \"%s\" matched by pid %d "
338                             "(%s), uid %d, jail %s\n", sbuf_data(&sb),
339                             p->p_pid, p->p_comm, p->p_ucred->cr_uid,
340                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
341                         sbuf_delete(&sb);
342                         free(buf, M_RCTL);
343                         link->rrl_exceeded = 1;
344                         continue;
345                 case RCTL_ACTION_DEVCTL:
346                         if (link->rrl_exceeded != 0)
347                                 continue;
348
349                         if (p->p_state != PRS_NORMAL)
350                                 continue;
351         
352                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
353                         if (buf == NULL) {
354                                 printf("rctl_enforce: out of memory\n");
355                                 continue;
356                         }
357                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
358                         sbuf_printf(&sb, "rule=");
359                         rctl_rule_to_sbuf(&sb, rule);
360                         sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
361                             p->p_pid, p->p_ucred->cr_ruid,
362                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
363                         sbuf_finish(&sb);
364                         devctl_notify_f("RCTL", "rule", "matched",
365                             sbuf_data(&sb), M_NOWAIT);
366                         sbuf_delete(&sb);
367                         free(buf, M_RCTL);
368                         link->rrl_exceeded = 1;
369                         continue;
370                 default:
371                         if (link->rrl_exceeded != 0)
372                                 continue;
373
374                         if (p->p_state != PRS_NORMAL)
375                                 continue;
376
377                         KASSERT(rule->rr_action > 0 &&
378                             rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
379                             ("rctl_enforce: unknown action %d",
380                              rule->rr_action));
381
382                         /*
383                          * We're using the fact that RCTL_ACTION_SIG* values
384                          * are equal to their counterparts from sys/signal.h.
385                          */
386                         kern_psignal(p, rule->rr_action);
387                         link->rrl_exceeded = 1;
388                         continue;
389                 }
390         }
391
392         rw_runlock(&rctl_lock);
393
394         if (should_deny) {
395                 /*
396                  * Return fake error code; the caller should change it
397                  * into one proper for the situation - EFSIZ, ENOMEM etc.
398                  */
399                 return (EDOOFUS);
400         }
401
402         return (0);
403 }
404
405 uint64_t
406 rctl_get_limit(struct proc *p, int resource)
407 {
408         struct rctl_rule *rule;
409         struct rctl_rule_link *link;
410         uint64_t amount = UINT64_MAX;
411
412         rw_rlock(&rctl_lock);
413
414         /*
415          * There may be more than one matching rule; go through all of them.
416          * Denial should be done last, after logging and sending signals.
417          */
418         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
419                 rule = link->rrl_rule;
420                 if (rule->rr_resource != resource)
421                         continue;
422                 if (rule->rr_action != RCTL_ACTION_DENY)
423                         continue;
424                 if (rule->rr_amount < amount)
425                         amount = rule->rr_amount;
426         }
427
428         rw_runlock(&rctl_lock);
429
430         return (amount);
431 }
432
433 uint64_t
434 rctl_get_available(struct proc *p, int resource)
435 {
436         struct rctl_rule *rule;
437         struct rctl_rule_link *link;
438         int64_t available, minavailable, allocated;
439
440         minavailable = INT64_MAX;
441
442         rw_rlock(&rctl_lock);
443
444         /*
445          * There may be more than one matching rule; go through all of them.
446          * Denial should be done last, after logging and sending signals.
447          */
448         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
449                 rule = link->rrl_rule;
450                 if (rule->rr_resource != resource)
451                         continue;
452                 if (rule->rr_action != RCTL_ACTION_DENY)
453                         continue;
454                 available = rctl_available_resource(p, rule);
455                 if (available < minavailable)
456                         minavailable = available;
457         }
458
459         rw_runlock(&rctl_lock);
460
461         /*
462          * XXX: Think about this _hard_.
463          */
464         allocated = p->p_racct->r_resources[resource];
465         if (minavailable < INT64_MAX - allocated)
466                 minavailable += allocated;
467         if (minavailable < 0)
468                 minavailable = 0;
469         return (minavailable);
470 }
471
472 static int
473 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
474 {
475
476         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
477                 if (rule->rr_subject_type != filter->rr_subject_type)
478                         return (0);
479
480                 switch (filter->rr_subject_type) {
481                 case RCTL_SUBJECT_TYPE_PROCESS:
482                         if (filter->rr_subject.rs_proc != NULL &&
483                             rule->rr_subject.rs_proc !=
484                             filter->rr_subject.rs_proc)
485                                 return (0);
486                         break;
487                 case RCTL_SUBJECT_TYPE_USER:
488                         if (filter->rr_subject.rs_uip != NULL &&
489                             rule->rr_subject.rs_uip !=
490                             filter->rr_subject.rs_uip)
491                                 return (0);
492                         break;
493                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
494                         if (filter->rr_subject.rs_loginclass != NULL &&
495                             rule->rr_subject.rs_loginclass !=
496                             filter->rr_subject.rs_loginclass)
497                                 return (0);
498                         break;
499                 case RCTL_SUBJECT_TYPE_JAIL:
500                         if (filter->rr_subject.rs_prison_racct != NULL &&
501                             rule->rr_subject.rs_prison_racct !=
502                             filter->rr_subject.rs_prison_racct)
503                                 return (0);
504                         break;
505                 default:
506                         panic("rctl_rule_matches: unknown subject type %d",
507                             filter->rr_subject_type);
508                 }
509         }
510
511         if (filter->rr_resource != RACCT_UNDEFINED) {
512                 if (rule->rr_resource != filter->rr_resource)
513                         return (0);
514         }
515
516         if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
517                 if (rule->rr_action != filter->rr_action)
518                         return (0);
519         }
520
521         if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
522                 if (rule->rr_amount != filter->rr_amount)
523                         return (0);
524         }
525
526         if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
527                 if (rule->rr_per != filter->rr_per)
528                         return (0);
529         }
530
531         return (1);
532 }
533
534 static int
535 str2value(const char *str, int *value, struct dict *table)
536 {
537         int i;
538
539         if (value == NULL)
540                 return (EINVAL);
541
542         for (i = 0; table[i].d_name != NULL; i++) {
543                 if (strcasecmp(table[i].d_name, str) == 0) {
544                         *value =  table[i].d_value;
545                         return (0);
546                 }
547         }
548
549         return (EINVAL);
550 }
551
552 static int
553 str2id(const char *str, id_t *value)
554 {
555         char *end;
556
557         if (str == NULL)
558                 return (EINVAL);
559
560         *value = strtoul(str, &end, 10);
561         if ((size_t)(end - str) != strlen(str))
562                 return (EINVAL);
563
564         return (0);
565 }
566
567 static int
568 str2int64(const char *str, int64_t *value)
569 {
570         char *end;
571
572         if (str == NULL)
573                 return (EINVAL);
574
575         *value = strtoul(str, &end, 10);
576         if ((size_t)(end - str) != strlen(str))
577                 return (EINVAL);
578
579         return (0);
580 }
581
582 /*
583  * Connect the rule to the racct, increasing refcount for the rule.
584  */
585 static void
586 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
587 {
588         struct rctl_rule_link *link;
589
590         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
591
592         rctl_rule_acquire(rule);
593         link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
594         link->rrl_rule = rule;
595         link->rrl_exceeded = 0;
596
597         rw_wlock(&rctl_lock);
598         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
599         rw_wunlock(&rctl_lock);
600 }
601
602 static int
603 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
604 {
605         struct rctl_rule_link *link;
606
607         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
608         rw_assert(&rctl_lock, RA_WLOCKED);
609
610         link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
611         if (link == NULL)
612                 return (ENOMEM);
613         rctl_rule_acquire(rule);
614         link->rrl_rule = rule;
615         link->rrl_exceeded = 0;
616
617         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
618         return (0);
619 }
620
621 /*
622  * Remove limits for a rules matching the filter and release
623  * the refcounts for the rules, possibly freeing them.  Returns
624  * the number of limit structures removed.
625  */
626 static int
627 rctl_racct_remove_rules(struct racct *racct,
628     const struct rctl_rule *filter)
629 {
630         int removed = 0;
631         struct rctl_rule_link *link, *linktmp;
632
633         rw_assert(&rctl_lock, RA_WLOCKED);
634
635         LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
636                 if (!rctl_rule_matches(link->rrl_rule, filter))
637                         continue;
638
639                 LIST_REMOVE(link, rrl_next);
640                 rctl_rule_release(link->rrl_rule);
641                 uma_zfree(rctl_rule_link_zone, link);
642                 removed++;
643         }
644         return (removed);
645 }
646
647 static void
648 rctl_rule_acquire_subject(struct rctl_rule *rule)
649 {
650
651         switch (rule->rr_subject_type) {
652         case RCTL_SUBJECT_TYPE_UNDEFINED:
653         case RCTL_SUBJECT_TYPE_PROCESS:
654                 break;
655         case RCTL_SUBJECT_TYPE_JAIL:
656                 if (rule->rr_subject.rs_prison_racct != NULL)
657                         prison_racct_hold(rule->rr_subject.rs_prison_racct);
658                 break;
659         case RCTL_SUBJECT_TYPE_USER:
660                 if (rule->rr_subject.rs_uip != NULL)
661                         uihold(rule->rr_subject.rs_uip);
662                 break;
663         case RCTL_SUBJECT_TYPE_LOGINCLASS:
664                 if (rule->rr_subject.rs_loginclass != NULL)
665                         loginclass_hold(rule->rr_subject.rs_loginclass);
666                 break;
667         default:
668                 panic("rctl_rule_acquire_subject: unknown subject type %d",
669                     rule->rr_subject_type);
670         }
671 }
672
673 static void
674 rctl_rule_release_subject(struct rctl_rule *rule)
675 {
676
677         switch (rule->rr_subject_type) {
678         case RCTL_SUBJECT_TYPE_UNDEFINED:
679         case RCTL_SUBJECT_TYPE_PROCESS:
680                 break;
681         case RCTL_SUBJECT_TYPE_JAIL:
682                 if (rule->rr_subject.rs_prison_racct != NULL)
683                         prison_racct_free(rule->rr_subject.rs_prison_racct);
684                 break;
685         case RCTL_SUBJECT_TYPE_USER:
686                 if (rule->rr_subject.rs_uip != NULL)
687                         uifree(rule->rr_subject.rs_uip);
688                 break;
689         case RCTL_SUBJECT_TYPE_LOGINCLASS:
690                 if (rule->rr_subject.rs_loginclass != NULL)
691                         loginclass_free(rule->rr_subject.rs_loginclass);
692                 break;
693         default:
694                 panic("rctl_rule_release_subject: unknown subject type %d",
695                     rule->rr_subject_type);
696         }
697 }
698
699 struct rctl_rule *
700 rctl_rule_alloc(int flags)
701 {
702         struct rctl_rule *rule;
703
704         rule = uma_zalloc(rctl_rule_zone, flags);
705         if (rule == NULL)
706                 return (NULL);
707         rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
708         rule->rr_subject.rs_proc = NULL;
709         rule->rr_subject.rs_uip = NULL;
710         rule->rr_subject.rs_loginclass = NULL;
711         rule->rr_subject.rs_prison_racct = NULL;
712         rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
713         rule->rr_resource = RACCT_UNDEFINED;
714         rule->rr_action = RCTL_ACTION_UNDEFINED;
715         rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
716         refcount_init(&rule->rr_refcount, 1);
717
718         return (rule);
719 }
720
721 struct rctl_rule *
722 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
723 {
724         struct rctl_rule *copy;
725
726         copy = uma_zalloc(rctl_rule_zone, flags);
727         if (copy == NULL)
728                 return (NULL);
729         copy->rr_subject_type = rule->rr_subject_type;
730         copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
731         copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
732         copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
733         copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
734         copy->rr_per = rule->rr_per;
735         copy->rr_resource = rule->rr_resource;
736         copy->rr_action = rule->rr_action;
737         copy->rr_amount = rule->rr_amount;
738         refcount_init(&copy->rr_refcount, 1);
739         rctl_rule_acquire_subject(copy);
740
741         return (copy);
742 }
743
744 void
745 rctl_rule_acquire(struct rctl_rule *rule)
746 {
747
748         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
749
750         refcount_acquire(&rule->rr_refcount);
751 }
752
753 static void
754 rctl_rule_free(void *context, int pending)
755 {
756         struct rctl_rule *rule;
757         
758         rule = (struct rctl_rule *)context;
759
760         KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
761         
762         /*
763          * We don't need locking here; rule is guaranteed to be inaccessible.
764          */
765         
766         rctl_rule_release_subject(rule);
767         uma_zfree(rctl_rule_zone, rule);
768 }
769
770 void
771 rctl_rule_release(struct rctl_rule *rule)
772 {
773
774         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
775
776         if (refcount_release(&rule->rr_refcount)) {
777                 /*
778                  * rctl_rule_release() is often called when iterating
779                  * over all the uidinfo structures in the system,
780                  * holding uihashtbl_lock.  Since rctl_rule_free()
781                  * might end up calling uifree(), this would lead
782                  * to lock recursion.  Use taskqueue to avoid this.
783                  */
784                 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
785                 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
786         }
787 }
788
789 static int
790 rctl_rule_fully_specified(const struct rctl_rule *rule)
791 {
792
793         switch (rule->rr_subject_type) {
794         case RCTL_SUBJECT_TYPE_UNDEFINED:
795                 return (0);
796         case RCTL_SUBJECT_TYPE_PROCESS:
797                 if (rule->rr_subject.rs_proc == NULL)
798                         return (0);
799                 break;
800         case RCTL_SUBJECT_TYPE_USER:
801                 if (rule->rr_subject.rs_uip == NULL)
802                         return (0);
803                 break;
804         case RCTL_SUBJECT_TYPE_LOGINCLASS:
805                 if (rule->rr_subject.rs_loginclass == NULL)
806                         return (0);
807                 break;
808         case RCTL_SUBJECT_TYPE_JAIL:
809                 if (rule->rr_subject.rs_prison_racct == NULL)
810                         return (0);
811                 break;
812         default:
813                 panic("rctl_rule_fully_specified: unknown subject type %d",
814                     rule->rr_subject_type);
815         }
816         if (rule->rr_resource == RACCT_UNDEFINED)
817                 return (0);
818         if (rule->rr_action == RCTL_ACTION_UNDEFINED)
819                 return (0);
820         if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
821                 return (0);
822         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
823                 return (0);
824
825         return (1);
826 }
827
828 static int
829 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
830 {
831         int error = 0;
832         char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
833              *amountstr, *perstr;
834         struct rctl_rule *rule;
835         id_t id;
836
837         rule = rctl_rule_alloc(M_WAITOK);
838
839         subjectstr = strsep(&rulestr, ":");
840         subject_idstr = strsep(&rulestr, ":");
841         resourcestr = strsep(&rulestr, ":");
842         actionstr = strsep(&rulestr, "=/");
843         amountstr = strsep(&rulestr, "/");
844         perstr = rulestr;
845
846         if (subjectstr == NULL || subjectstr[0] == '\0')
847                 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
848         else {
849                 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
850                 if (error != 0)
851                         goto out;
852         }
853
854         if (subject_idstr == NULL || subject_idstr[0] == '\0') {
855                 rule->rr_subject.rs_proc = NULL;
856                 rule->rr_subject.rs_uip = NULL;
857                 rule->rr_subject.rs_loginclass = NULL;
858                 rule->rr_subject.rs_prison_racct = NULL;
859         } else {
860                 switch (rule->rr_subject_type) {
861                 case RCTL_SUBJECT_TYPE_UNDEFINED:
862                         error = EINVAL;
863                         goto out;
864                 case RCTL_SUBJECT_TYPE_PROCESS:
865                         error = str2id(subject_idstr, &id);
866                         if (error != 0)
867                                 goto out;
868                         sx_assert(&allproc_lock, SA_LOCKED);
869                         rule->rr_subject.rs_proc = pfind(id);
870                         if (rule->rr_subject.rs_proc == NULL) {
871                                 error = ESRCH;
872                                 goto out;
873                         }
874                         PROC_UNLOCK(rule->rr_subject.rs_proc);
875                         break;
876                 case RCTL_SUBJECT_TYPE_USER:
877                         error = str2id(subject_idstr, &id);
878                         if (error != 0)
879                                 goto out;
880                         rule->rr_subject.rs_uip = uifind(id);
881                         break;
882                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
883                         rule->rr_subject.rs_loginclass =
884                             loginclass_find(subject_idstr);
885                         if (rule->rr_subject.rs_loginclass == NULL) {
886                                 error = ENAMETOOLONG;
887                                 goto out;
888                         }
889                         break;
890                 case RCTL_SUBJECT_TYPE_JAIL:
891                         rule->rr_subject.rs_prison_racct =
892                             prison_racct_find(subject_idstr);
893                         if (rule->rr_subject.rs_prison_racct == NULL) {
894                                 error = ENAMETOOLONG;
895                                 goto out;
896                         }
897                         break;
898                default:
899                        panic("rctl_string_to_rule: unknown subject type %d",
900                            rule->rr_subject_type);
901                }
902         }
903
904         if (resourcestr == NULL || resourcestr[0] == '\0')
905                 rule->rr_resource = RACCT_UNDEFINED;
906         else {
907                 error = str2value(resourcestr, &rule->rr_resource,
908                     resourcenames);
909                 if (error != 0)
910                         goto out;
911         }
912
913         if (actionstr == NULL || actionstr[0] == '\0')
914                 rule->rr_action = RCTL_ACTION_UNDEFINED;
915         else {
916                 error = str2value(actionstr, &rule->rr_action, actionnames);
917                 if (error != 0)
918                         goto out;
919         }
920
921         if (amountstr == NULL || amountstr[0] == '\0')
922                 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
923         else {
924                 error = str2int64(amountstr, &rule->rr_amount);
925                 if (error != 0)
926                         goto out;
927                 if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
928                         rule->rr_amount *= 1000000;
929         }
930
931         if (perstr == NULL || perstr[0] == '\0')
932                 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
933         else {
934                 error = str2value(perstr, &rule->rr_per, subjectnames);
935                 if (error != 0)
936                         goto out;
937         }
938
939 out:
940         if (error == 0)
941                 *rulep = rule;
942         else
943                 rctl_rule_release(rule);
944
945         return (error);
946 }
947
948 /*
949  * Link a rule with all the subjects it applies to.
950  */
951 int
952 rctl_rule_add(struct rctl_rule *rule)
953 {
954         struct proc *p;
955         struct ucred *cred;
956         struct uidinfo *uip;
957         struct prison *pr;
958         struct prison_racct *prr;
959         struct loginclass *lc;
960         struct rctl_rule *rule2;
961         int match;
962
963         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
964
965         /*
966          * Some rules just don't make sense.  Note that the one below
967          * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
968          * for example, is not deniable in the racct sense, but the
969          * limit is enforced in a different way, so "deny" rules for %CPU
970          * do make sense.
971          */
972         if (rule->rr_action == RCTL_ACTION_DENY &&
973             (rule->rr_resource == RACCT_CPU ||
974             rule->rr_resource == RACCT_WALLCLOCK))
975                 return (EOPNOTSUPP);
976
977         if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
978             RACCT_IS_SLOPPY(rule->rr_resource))
979                 return (EOPNOTSUPP);
980
981         /*
982          * Make sure there are no duplicated rules.  Also, for the "deny"
983          * rules, remove ones differing only by "amount".
984          */
985         if (rule->rr_action == RCTL_ACTION_DENY) {
986                 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
987                 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
988                 rctl_rule_remove(rule2);
989                 rctl_rule_release(rule2);
990         } else
991                 rctl_rule_remove(rule);
992
993         switch (rule->rr_subject_type) {
994         case RCTL_SUBJECT_TYPE_PROCESS:
995                 p = rule->rr_subject.rs_proc;
996                 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
997
998                 rctl_racct_add_rule(p->p_racct, rule);
999                 /*
1000                  * In case of per-process rule, we don't have anything more
1001                  * to do.
1002                  */
1003                 return (0);
1004
1005         case RCTL_SUBJECT_TYPE_USER:
1006                 uip = rule->rr_subject.rs_uip;
1007                 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1008                 rctl_racct_add_rule(uip->ui_racct, rule);
1009                 break;
1010
1011         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1012                 lc = rule->rr_subject.rs_loginclass;
1013                 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1014                 rctl_racct_add_rule(lc->lc_racct, rule);
1015                 break;
1016
1017         case RCTL_SUBJECT_TYPE_JAIL:
1018                 prr = rule->rr_subject.rs_prison_racct;
1019                 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1020                 rctl_racct_add_rule(prr->prr_racct, rule);
1021                 break;
1022
1023         default:
1024                 panic("rctl_rule_add: unknown subject type %d",
1025                     rule->rr_subject_type);
1026         }
1027
1028         /*
1029          * Now go through all the processes and add the new rule to the ones
1030          * it applies to.
1031          */
1032         sx_assert(&allproc_lock, SA_LOCKED);
1033         FOREACH_PROC_IN_SYSTEM(p) {
1034                 cred = p->p_ucred;
1035                 switch (rule->rr_subject_type) {
1036                 case RCTL_SUBJECT_TYPE_USER:
1037                         if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1038                             cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1039                                 break;
1040                         continue;
1041                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1042                         if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1043                                 break;
1044                         continue;
1045                 case RCTL_SUBJECT_TYPE_JAIL:
1046                         match = 0;
1047                         for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1048                                 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1049                                         match = 1;
1050                                         break;
1051                                 }
1052                         }
1053                         if (match)
1054                                 break;
1055                         continue;
1056                 default:
1057                         panic("rctl_rule_add: unknown subject type %d",
1058                             rule->rr_subject_type);
1059                 }
1060
1061                 rctl_racct_add_rule(p->p_racct, rule);
1062         }
1063
1064         return (0);
1065 }
1066
1067 static void
1068 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1069 {
1070         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1071         int found = 0;
1072
1073         rw_wlock(&rctl_lock);
1074         found += rctl_racct_remove_rules(racct, filter);
1075         rw_wunlock(&rctl_lock);
1076
1077         *((int *)arg3) += found;
1078 }
1079
1080 /*
1081  * Remove all rules that match the filter.
1082  */
1083 int
1084 rctl_rule_remove(struct rctl_rule *filter)
1085 {
1086         int found = 0;
1087         struct proc *p;
1088
1089         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1090             filter->rr_subject.rs_proc != NULL) {
1091                 p = filter->rr_subject.rs_proc;
1092                 rw_wlock(&rctl_lock);
1093                 found = rctl_racct_remove_rules(p->p_racct, filter);
1094                 rw_wunlock(&rctl_lock);
1095                 if (found)
1096                         return (0);
1097                 return (ESRCH);
1098         }
1099
1100         loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1101             (void *)&found);
1102         ui_racct_foreach(rctl_rule_remove_callback, filter,
1103             (void *)&found);
1104         prison_racct_foreach(rctl_rule_remove_callback, filter,
1105             (void *)&found);
1106
1107         sx_assert(&allproc_lock, SA_LOCKED);
1108         rw_wlock(&rctl_lock);
1109         FOREACH_PROC_IN_SYSTEM(p) {
1110                 found += rctl_racct_remove_rules(p->p_racct, filter);
1111         }
1112         rw_wunlock(&rctl_lock);
1113
1114         if (found)
1115                 return (0);
1116         return (ESRCH);
1117 }
1118
1119 /*
1120  * Appends a rule to the sbuf.
1121  */
1122 static void
1123 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1124 {
1125         int64_t amount;
1126
1127         sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1128
1129         switch (rule->rr_subject_type) {
1130         case RCTL_SUBJECT_TYPE_PROCESS:
1131                 if (rule->rr_subject.rs_proc == NULL)
1132                         sbuf_printf(sb, ":");
1133                 else
1134                         sbuf_printf(sb, "%d:",
1135                             rule->rr_subject.rs_proc->p_pid);
1136                 break;
1137         case RCTL_SUBJECT_TYPE_USER:
1138                 if (rule->rr_subject.rs_uip == NULL)
1139                         sbuf_printf(sb, ":");
1140                 else
1141                         sbuf_printf(sb, "%d:",
1142                             rule->rr_subject.rs_uip->ui_uid);
1143                 break;
1144         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1145                 if (rule->rr_subject.rs_loginclass == NULL)
1146                         sbuf_printf(sb, ":");
1147                 else
1148                         sbuf_printf(sb, "%s:",
1149                             rule->rr_subject.rs_loginclass->lc_name);
1150                 break;
1151         case RCTL_SUBJECT_TYPE_JAIL:
1152                 if (rule->rr_subject.rs_prison_racct == NULL)
1153                         sbuf_printf(sb, ":");
1154                 else
1155                         sbuf_printf(sb, "%s:",
1156                             rule->rr_subject.rs_prison_racct->prr_name);
1157                 break;
1158         default:
1159                 panic("rctl_rule_to_sbuf: unknown subject type %d",
1160                     rule->rr_subject_type);
1161         }
1162
1163         amount = rule->rr_amount;
1164         if (amount != RCTL_AMOUNT_UNDEFINED &&
1165             RACCT_IS_IN_MILLIONS(rule->rr_resource))
1166                 amount /= 1000000;
1167
1168         sbuf_printf(sb, "%s:%s=%jd",
1169             rctl_resource_name(rule->rr_resource),
1170             rctl_action_name(rule->rr_action),
1171             amount);
1172
1173         if (rule->rr_per != rule->rr_subject_type)
1174                 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1175 }
1176
1177 /*
1178  * Routine used by RCTL syscalls to read in input string.
1179  */
1180 static int
1181 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1182 {
1183         int error;
1184         char *str;
1185
1186         if (inbuflen <= 0)
1187                 return (EINVAL);
1188         if (inbuflen > RCTL_MAX_INBUFLEN)
1189                 return (E2BIG);
1190
1191         str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1192         error = copyinstr(inbufp, str, inbuflen, NULL);
1193         if (error != 0) {
1194                 free(str, M_RCTL);
1195                 return (error);
1196         }
1197
1198         *inputstr = str;
1199
1200         return (0);
1201 }
1202
1203 /*
1204  * Routine used by RCTL syscalls to write out output string.
1205  */
1206 static int
1207 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1208 {
1209         int error;
1210
1211         if (outputsbuf == NULL)
1212                 return (0);
1213
1214         sbuf_finish(outputsbuf);
1215         if (outbuflen < sbuf_len(outputsbuf) + 1) {
1216                 sbuf_delete(outputsbuf);
1217                 return (ERANGE);
1218         }
1219         error = copyout(sbuf_data(outputsbuf), outbufp,
1220             sbuf_len(outputsbuf) + 1);
1221         sbuf_delete(outputsbuf);
1222         return (error);
1223 }
1224
1225 static struct sbuf *
1226 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1227 {
1228         int i;
1229         int64_t amount;
1230         struct sbuf *sb;
1231
1232         sb = sbuf_new_auto();
1233         for (i = 0; i <= RACCT_MAX; i++) {
1234                 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1235                         continue;
1236                 amount = racct->r_resources[i];
1237                 if (RACCT_IS_IN_MILLIONS(i))
1238                         amount /= 1000000;
1239                 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1240         }
1241         sbuf_setpos(sb, sbuf_len(sb) - 1);
1242         return (sb);
1243 }
1244
1245 int
1246 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1247 {
1248         int error;
1249         char *inputstr;
1250         struct rctl_rule *filter;
1251         struct sbuf *outputsbuf = NULL;
1252         struct proc *p;
1253         struct uidinfo *uip;
1254         struct loginclass *lc;
1255         struct prison_racct *prr;
1256
1257         error = priv_check(td, PRIV_RCTL_GET_RACCT);
1258         if (error != 0)
1259                 return (error);
1260
1261         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1262         if (error != 0)
1263                 return (error);
1264
1265         sx_slock(&allproc_lock);
1266         error = rctl_string_to_rule(inputstr, &filter);
1267         free(inputstr, M_RCTL);
1268         if (error != 0) {
1269                 sx_sunlock(&allproc_lock);
1270                 return (error);
1271         }
1272
1273         switch (filter->rr_subject_type) {
1274         case RCTL_SUBJECT_TYPE_PROCESS:
1275                 p = filter->rr_subject.rs_proc;
1276                 if (p == NULL) {
1277                         error = EINVAL;
1278                         goto out;
1279                 }
1280                 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1281                 break;
1282         case RCTL_SUBJECT_TYPE_USER:
1283                 uip = filter->rr_subject.rs_uip;
1284                 if (uip == NULL) {
1285                         error = EINVAL;
1286                         goto out;
1287                 }
1288                 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1289                 break;
1290         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1291                 lc = filter->rr_subject.rs_loginclass;
1292                 if (lc == NULL) {
1293                         error = EINVAL;
1294                         goto out;
1295                 }
1296                 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1297                 break;
1298         case RCTL_SUBJECT_TYPE_JAIL:
1299                 prr = filter->rr_subject.rs_prison_racct;
1300                 if (prr == NULL) {
1301                         error = EINVAL;
1302                         goto out;
1303                 }
1304                 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1305                 break;
1306         default:
1307                 error = EINVAL;
1308         }
1309 out:
1310         rctl_rule_release(filter);
1311         sx_sunlock(&allproc_lock);
1312         if (error != 0)
1313                 return (error);
1314
1315         error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1316
1317         return (error);
1318 }
1319
1320 static void
1321 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1322 {
1323         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1324         struct rctl_rule_link *link;
1325         struct sbuf *sb = (struct sbuf *)arg3;
1326
1327         rw_rlock(&rctl_lock);
1328         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1329                 if (!rctl_rule_matches(link->rrl_rule, filter))
1330                         continue;
1331                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1332                 sbuf_printf(sb, ",");
1333         }
1334         rw_runlock(&rctl_lock);
1335 }
1336
1337 int
1338 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1339 {
1340         int error;
1341         size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1342         char *inputstr, *buf;
1343         struct sbuf *sb;
1344         struct rctl_rule *filter;
1345         struct rctl_rule_link *link;
1346         struct proc *p;
1347
1348         error = priv_check(td, PRIV_RCTL_GET_RULES);
1349         if (error != 0)
1350                 return (error);
1351
1352         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1353         if (error != 0)
1354                 return (error);
1355
1356         sx_slock(&allproc_lock);
1357         error = rctl_string_to_rule(inputstr, &filter);
1358         free(inputstr, M_RCTL);
1359         if (error != 0) {
1360                 sx_sunlock(&allproc_lock);
1361                 return (error);
1362         }
1363
1364 again:
1365         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1366         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1367         KASSERT(sb != NULL, ("sbuf_new failed"));
1368
1369         sx_assert(&allproc_lock, SA_LOCKED);
1370         FOREACH_PROC_IN_SYSTEM(p) {
1371                 rw_rlock(&rctl_lock);
1372                 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1373                         /*
1374                          * Non-process rules will be added to the buffer later.
1375                          * Adding them here would result in duplicated output.
1376                          */
1377                         if (link->rrl_rule->rr_subject_type !=
1378                             RCTL_SUBJECT_TYPE_PROCESS)
1379                                 continue;
1380                         if (!rctl_rule_matches(link->rrl_rule, filter))
1381                                 continue;
1382                         rctl_rule_to_sbuf(sb, link->rrl_rule);
1383                         sbuf_printf(sb, ",");
1384                 }
1385                 rw_runlock(&rctl_lock);
1386         }
1387
1388         loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1389         ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1390         prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1391         if (sbuf_error(sb) == ENOMEM) {
1392                 sbuf_delete(sb);
1393                 free(buf, M_RCTL);
1394                 bufsize *= 4;
1395                 goto again;
1396         }
1397
1398         /*
1399          * Remove trailing ",".
1400          */
1401         if (sbuf_len(sb) > 0)
1402                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1403
1404         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1405
1406         rctl_rule_release(filter);
1407         sx_sunlock(&allproc_lock);
1408         free(buf, M_RCTL);
1409         return (error);
1410 }
1411
1412 int
1413 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1414 {
1415         int error;
1416         size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1417         char *inputstr, *buf;
1418         struct sbuf *sb;
1419         struct rctl_rule *filter;
1420         struct rctl_rule_link *link;
1421
1422         error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1423         if (error != 0)
1424                 return (error);
1425
1426         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1427         if (error != 0)
1428                 return (error);
1429
1430         sx_slock(&allproc_lock);
1431         error = rctl_string_to_rule(inputstr, &filter);
1432         free(inputstr, M_RCTL);
1433         if (error != 0) {
1434                 sx_sunlock(&allproc_lock);
1435                 return (error);
1436         }
1437
1438         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1439                 rctl_rule_release(filter);
1440                 sx_sunlock(&allproc_lock);
1441                 return (EINVAL);
1442         }
1443         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1444                 rctl_rule_release(filter);
1445                 sx_sunlock(&allproc_lock);
1446                 return (EOPNOTSUPP);
1447         }
1448         if (filter->rr_subject.rs_proc == NULL) {
1449                 rctl_rule_release(filter);
1450                 sx_sunlock(&allproc_lock);
1451                 return (EINVAL);
1452         }
1453
1454 again:
1455         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1456         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1457         KASSERT(sb != NULL, ("sbuf_new failed"));
1458
1459         rw_rlock(&rctl_lock);
1460         LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1461             rrl_next) {
1462                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1463                 sbuf_printf(sb, ",");
1464         }
1465         rw_runlock(&rctl_lock);
1466         if (sbuf_error(sb) == ENOMEM) {
1467                 sbuf_delete(sb);
1468                 free(buf, M_RCTL);
1469                 bufsize *= 4;
1470                 goto again;
1471         }
1472
1473         /*
1474          * Remove trailing ",".
1475          */
1476         if (sbuf_len(sb) > 0)
1477                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1478
1479         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1480         rctl_rule_release(filter);
1481         sx_sunlock(&allproc_lock);
1482         free(buf, M_RCTL);
1483         return (error);
1484 }
1485
1486 int
1487 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1488 {
1489         int error;
1490         struct rctl_rule *rule;
1491         char *inputstr;
1492
1493         error = priv_check(td, PRIV_RCTL_ADD_RULE);
1494         if (error != 0)
1495                 return (error);
1496
1497         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1498         if (error != 0)
1499                 return (error);
1500
1501         sx_slock(&allproc_lock);
1502         error = rctl_string_to_rule(inputstr, &rule);
1503         free(inputstr, M_RCTL);
1504         if (error != 0) {
1505                 sx_sunlock(&allproc_lock);
1506                 return (error);
1507         }
1508         /*
1509          * The 'per' part of a rule is optional.
1510          */
1511         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1512             rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1513                 rule->rr_per = rule->rr_subject_type;
1514
1515         if (!rctl_rule_fully_specified(rule)) {
1516                 error = EINVAL;
1517                 goto out;
1518         }
1519
1520         error = rctl_rule_add(rule);
1521
1522 out:
1523         rctl_rule_release(rule);
1524         sx_sunlock(&allproc_lock);
1525         return (error);
1526 }
1527
1528 int
1529 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1530 {
1531         int error;
1532         struct rctl_rule *filter;
1533         char *inputstr;
1534
1535         error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1536         if (error != 0)
1537                 return (error);
1538
1539         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1540         if (error != 0)
1541                 return (error);
1542
1543         sx_slock(&allproc_lock);
1544         error = rctl_string_to_rule(inputstr, &filter);
1545         free(inputstr, M_RCTL);
1546         if (error != 0) {
1547                 sx_sunlock(&allproc_lock);
1548                 return (error);
1549         }
1550
1551         error = rctl_rule_remove(filter);
1552         rctl_rule_release(filter);
1553         sx_sunlock(&allproc_lock);
1554
1555         return (error);
1556 }
1557
1558 /*
1559  * Update RCTL rule list after credential change.
1560  */
1561 void
1562 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1563 {
1564         int rulecnt, i;
1565         struct rctl_rule_link *link, *newlink;
1566         struct uidinfo *newuip;
1567         struct loginclass *newlc;
1568         struct prison_racct *newprr;
1569         LIST_HEAD(, rctl_rule_link) newrules;
1570
1571         newuip = newcred->cr_ruidinfo;
1572         newlc = newcred->cr_loginclass;
1573         newprr = newcred->cr_prison->pr_prison_racct;
1574         
1575         LIST_INIT(&newrules);
1576
1577 again:
1578         /*
1579          * First, count the rules that apply to the process with new
1580          * credentials.
1581          */
1582         rulecnt = 0;
1583         rw_rlock(&rctl_lock);
1584         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1585                 if (link->rrl_rule->rr_subject_type ==
1586                     RCTL_SUBJECT_TYPE_PROCESS)
1587                         rulecnt++;
1588         }
1589         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1590                 rulecnt++;
1591         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1592                 rulecnt++;
1593         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1594                 rulecnt++;
1595         rw_runlock(&rctl_lock);
1596
1597         /*
1598          * Create temporary list.  We've dropped the rctl_lock in order
1599          * to use M_WAITOK.
1600          */
1601         for (i = 0; i < rulecnt; i++) {
1602                 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1603                 newlink->rrl_rule = NULL;
1604                 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1605         }
1606
1607         newlink = LIST_FIRST(&newrules);
1608
1609         /*
1610          * Assign rules to the newly allocated list entries.
1611          */
1612         rw_wlock(&rctl_lock);
1613         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1614                 if (link->rrl_rule->rr_subject_type ==
1615                     RCTL_SUBJECT_TYPE_PROCESS) {
1616                         if (newlink == NULL)
1617                                 goto goaround;
1618                         rctl_rule_acquire(link->rrl_rule);
1619                         newlink->rrl_rule = link->rrl_rule;
1620                         newlink = LIST_NEXT(newlink, rrl_next);
1621                         rulecnt--;
1622                 }
1623         }
1624         
1625         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1626                 if (newlink == NULL)
1627                         goto goaround;
1628                 rctl_rule_acquire(link->rrl_rule);
1629                 newlink->rrl_rule = link->rrl_rule;
1630                 newlink = LIST_NEXT(newlink, rrl_next);
1631                 rulecnt--;
1632         }
1633
1634         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1635                 if (newlink == NULL)
1636                         goto goaround;
1637                 rctl_rule_acquire(link->rrl_rule);
1638                 newlink->rrl_rule = link->rrl_rule;
1639                 newlink = LIST_NEXT(newlink, rrl_next);
1640                 rulecnt--;
1641         }
1642
1643         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1644                 if (newlink == NULL)
1645                         goto goaround;
1646                 rctl_rule_acquire(link->rrl_rule);
1647                 newlink->rrl_rule = link->rrl_rule;
1648                 newlink = LIST_NEXT(newlink, rrl_next);
1649                 rulecnt--;
1650         }
1651
1652         if (rulecnt == 0) {
1653                 /*
1654                  * Free the old rule list.
1655                  */
1656                 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1657                         link = LIST_FIRST(&p->p_racct->r_rule_links);
1658                         LIST_REMOVE(link, rrl_next);
1659                         rctl_rule_release(link->rrl_rule);
1660                         uma_zfree(rctl_rule_link_zone, link);
1661                 }
1662
1663                 /*
1664                  * Replace lists and we're done.
1665                  *
1666                  * XXX: Is there any way to switch list heads instead
1667                  *      of iterating here?
1668                  */
1669                 while (!LIST_EMPTY(&newrules)) {
1670                         newlink = LIST_FIRST(&newrules);
1671                         LIST_REMOVE(newlink, rrl_next);
1672                         LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1673                             newlink, rrl_next);
1674                 }
1675
1676                 rw_wunlock(&rctl_lock);
1677
1678                 return;
1679         }
1680
1681 goaround:
1682         rw_wunlock(&rctl_lock);
1683
1684         /*
1685          * Rule list changed while we were not holding the rctl_lock.
1686          * Free the new list and try again.
1687          */
1688         while (!LIST_EMPTY(&newrules)) {
1689                 newlink = LIST_FIRST(&newrules);
1690                 LIST_REMOVE(newlink, rrl_next);
1691                 if (newlink->rrl_rule != NULL)
1692                         rctl_rule_release(newlink->rrl_rule);
1693                 uma_zfree(rctl_rule_link_zone, newlink);
1694         }
1695
1696         goto again;
1697 }
1698
1699 /*
1700  * Assign RCTL rules to the newly created process.
1701  */
1702 int
1703 rctl_proc_fork(struct proc *parent, struct proc *child)
1704 {
1705         int error;
1706         struct rctl_rule_link *link;
1707         struct rctl_rule *rule;
1708
1709         LIST_INIT(&child->p_racct->r_rule_links);
1710
1711         KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
1712
1713         rw_wlock(&rctl_lock);
1714
1715         /*
1716          * Go through limits applicable to the parent and assign them
1717          * to the child.  Rules with 'process' subject have to be duplicated
1718          * in order to make their rr_subject point to the new process.
1719          */
1720         LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1721                 if (link->rrl_rule->rr_subject_type ==
1722                     RCTL_SUBJECT_TYPE_PROCESS) {
1723                         rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1724                         if (rule == NULL)
1725                                 goto fail;
1726                         KASSERT(rule->rr_subject.rs_proc == parent,
1727                             ("rule->rr_subject.rs_proc != parent"));
1728                         rule->rr_subject.rs_proc = child;
1729                         error = rctl_racct_add_rule_locked(child->p_racct,
1730                             rule);
1731                         rctl_rule_release(rule);
1732                         if (error != 0)
1733                                 goto fail;
1734                 } else {
1735                         error = rctl_racct_add_rule_locked(child->p_racct,
1736                             link->rrl_rule);
1737                         if (error != 0)
1738                                 goto fail;
1739                 }
1740         }
1741
1742         rw_wunlock(&rctl_lock);
1743         return (0);
1744
1745 fail:
1746         while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1747                 link = LIST_FIRST(&child->p_racct->r_rule_links);
1748                 LIST_REMOVE(link, rrl_next);
1749                 rctl_rule_release(link->rrl_rule);
1750                 uma_zfree(rctl_rule_link_zone, link);
1751         }
1752         rw_wunlock(&rctl_lock);
1753         return (EAGAIN);
1754 }
1755
1756 /*
1757  * Release rules attached to the racct.
1758  */
1759 void
1760 rctl_racct_release(struct racct *racct)
1761 {
1762         struct rctl_rule_link *link;
1763
1764         rw_wlock(&rctl_lock);
1765         while (!LIST_EMPTY(&racct->r_rule_links)) {
1766                 link = LIST_FIRST(&racct->r_rule_links);
1767                 LIST_REMOVE(link, rrl_next);
1768                 rctl_rule_release(link->rrl_rule);
1769                 uma_zfree(rctl_rule_link_zone, link);
1770         }
1771         rw_wunlock(&rctl_lock);
1772 }
1773
1774 static void
1775 rctl_init(void)
1776 {
1777
1778         rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1779             sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1780             UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1781         rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1782             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1783 }
1784
1785 #else /* !RCTL */
1786
1787 int
1788 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1789 {
1790         
1791         return (ENOSYS);
1792 }
1793
1794 int
1795 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1796 {
1797         
1798         return (ENOSYS);
1799 }
1800
1801 int
1802 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1803 {
1804         
1805         return (ENOSYS);
1806 }
1807
1808 int
1809 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1810 {
1811         
1812         return (ENOSYS);
1813 }
1814
1815 int
1816 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1817 {
1818         
1819         return (ENOSYS);
1820 }
1821
1822 #endif /* !RCTL */