]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/kern/kern_rctl.c
Copy head to stable/9 as part of 9.0-RELEASE release cycle.
[FreeBSD/stable/9.git] / sys / kern / kern_rctl.c
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/refcount.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/loginclass.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/racct.h>
47 #include <sys/rctl.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sx.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
58 #include <sys/sbuf.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <vm/uma.h>
62
63 #ifdef RCTL
64 #ifndef RACCT
65 #error "The RCTL option requires the RACCT option"
66 #endif
67
68 FEATURE(rctl, "Resource Limits");
69
70 #define HRF_DEFAULT             0
71 #define HRF_DONT_INHERIT        1
72 #define HRF_DONT_ACCUMULATE     2
73
74 /* Default buffer size for rctl_get_rules(2). */
75 #define RCTL_DEFAULT_BUFSIZE    4096
76 #define RCTL_LOG_BUFSIZE        128
77
78 /*
79  * 'rctl_rule_link' connects a rule with every racct it's related to.
80  * For example, rule 'user:X:openfiles:deny=N/process' is linked
81  * with uidinfo for user X, and to each process of that user.
82  */
83 struct rctl_rule_link {
84         LIST_ENTRY(rctl_rule_link)      rrl_next;
85         struct rctl_rule                *rrl_rule;
86         int                             rrl_exceeded;
87 };
88
89 struct dict {
90         const char      *d_name;
91         int             d_value;
92 };
93
94 static struct dict subjectnames[] = {
95         { "process", RCTL_SUBJECT_TYPE_PROCESS },
96         { "user", RCTL_SUBJECT_TYPE_USER },
97         { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
98         { "jail", RCTL_SUBJECT_TYPE_JAIL },
99         { NULL, -1 }};
100
101 static struct dict resourcenames[] = {
102         { "cputime", RACCT_CPU },
103         { "datasize", RACCT_DATA },
104         { "stacksize", RACCT_STACK },
105         { "coredumpsize", RACCT_CORE },
106         { "memoryuse", RACCT_RSS },
107         { "memorylocked", RACCT_MEMLOCK },
108         { "maxproc", RACCT_NPROC },
109         { "openfiles", RACCT_NOFILE },
110         { "vmemoryuse", RACCT_VMEM },
111         { "pseudoterminals", RACCT_NPTS },
112         { "swapuse", RACCT_SWAP },
113         { "nthr", RACCT_NTHR },
114         { "msgqqueued", RACCT_MSGQQUEUED },
115         { "msgqsize", RACCT_MSGQSIZE },
116         { "nmsgq", RACCT_NMSGQ },
117         { "nsem", RACCT_NSEM },
118         { "nsemop", RACCT_NSEMOP },
119         { "nshm", RACCT_NSHM },
120         { "shmsize", RACCT_SHMSIZE },
121         { "wallclock", RACCT_WALLCLOCK },
122         { NULL, -1 }};
123
124 static struct dict actionnames[] = {
125         { "sighup", RCTL_ACTION_SIGHUP },
126         { "sigint", RCTL_ACTION_SIGINT },
127         { "sigquit", RCTL_ACTION_SIGQUIT },
128         { "sigill", RCTL_ACTION_SIGILL },
129         { "sigtrap", RCTL_ACTION_SIGTRAP },
130         { "sigabrt", RCTL_ACTION_SIGABRT },
131         { "sigemt", RCTL_ACTION_SIGEMT },
132         { "sigfpe", RCTL_ACTION_SIGFPE },
133         { "sigkill", RCTL_ACTION_SIGKILL },
134         { "sigbus", RCTL_ACTION_SIGBUS },
135         { "sigsegv", RCTL_ACTION_SIGSEGV },
136         { "sigsys", RCTL_ACTION_SIGSYS },
137         { "sigpipe", RCTL_ACTION_SIGPIPE },
138         { "sigalrm", RCTL_ACTION_SIGALRM },
139         { "sigterm", RCTL_ACTION_SIGTERM },
140         { "sigurg", RCTL_ACTION_SIGURG },
141         { "sigstop", RCTL_ACTION_SIGSTOP },
142         { "sigtstp", RCTL_ACTION_SIGTSTP },
143         { "sigchld", RCTL_ACTION_SIGCHLD },
144         { "sigttin", RCTL_ACTION_SIGTTIN },
145         { "sigttou", RCTL_ACTION_SIGTTOU },
146         { "sigio", RCTL_ACTION_SIGIO },
147         { "sigxcpu", RCTL_ACTION_SIGXCPU },
148         { "sigxfsz", RCTL_ACTION_SIGXFSZ },
149         { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
150         { "sigprof", RCTL_ACTION_SIGPROF },
151         { "sigwinch", RCTL_ACTION_SIGWINCH },
152         { "siginfo", RCTL_ACTION_SIGINFO },
153         { "sigusr1", RCTL_ACTION_SIGUSR1 },
154         { "sigusr2", RCTL_ACTION_SIGUSR2 },
155         { "sigthr", RCTL_ACTION_SIGTHR },
156         { "deny", RCTL_ACTION_DENY },
157         { "log", RCTL_ACTION_LOG },
158         { "devctl", RCTL_ACTION_DEVCTL },
159         { NULL, -1 }};
160
161 static void rctl_init(void);
162 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
163
164 static uma_zone_t rctl_rule_link_zone;
165 static uma_zone_t rctl_rule_zone;
166 static struct rwlock rctl_lock;
167 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
168
169 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
170 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
171
172 MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
173
174 static const char *
175 rctl_subject_type_name(int subject)
176 {
177         int i;
178
179         for (i = 0; subjectnames[i].d_name != NULL; i++) {
180                 if (subjectnames[i].d_value == subject)
181                         return (subjectnames[i].d_name);
182         }
183
184         panic("rctl_subject_type_name: unknown subject type %d", subject);
185 }
186
187 static const char *
188 rctl_action_name(int action)
189 {
190         int i;
191
192         for (i = 0; actionnames[i].d_name != NULL; i++) {
193                 if (actionnames[i].d_value == action)
194                         return (actionnames[i].d_name);
195         }
196
197         panic("rctl_action_name: unknown action %d", action);
198 }
199
200 const char *
201 rctl_resource_name(int resource)
202 {
203         int i;
204
205         for (i = 0; resourcenames[i].d_name != NULL; i++) {
206                 if (resourcenames[i].d_value == resource)
207                         return (resourcenames[i].d_name);
208         }
209
210         panic("rctl_resource_name: unknown resource %d", resource);
211 }
212
213 /*
214  * Return the amount of resource that can be allocated by 'p' before
215  * hitting 'rule'.
216  */
217 static int64_t
218 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
219 {
220         int resource;
221         int64_t available = INT64_MAX;
222         struct ucred *cred = p->p_ucred;
223
224         rw_assert(&rctl_lock, RA_LOCKED);
225
226         resource = rule->rr_resource;
227         switch (rule->rr_per) {
228         case RCTL_SUBJECT_TYPE_PROCESS:
229                 available = rule->rr_amount -
230                     p->p_racct->r_resources[resource];
231                 break;
232         case RCTL_SUBJECT_TYPE_USER:
233                 available = rule->rr_amount -
234                     cred->cr_ruidinfo->ui_racct->r_resources[resource];
235                 break;
236         case RCTL_SUBJECT_TYPE_LOGINCLASS:
237                 available = rule->rr_amount -
238                     cred->cr_loginclass->lc_racct->r_resources[resource];
239                 break;
240         case RCTL_SUBJECT_TYPE_JAIL:
241                 available = rule->rr_amount -
242                     cred->cr_prison->pr_prison_racct->prr_racct->
243                         r_resources[resource];
244                 break;
245         default:
246                 panic("rctl_compute_available: unknown per %d",
247                     rule->rr_per);
248         }
249
250         return (available);
251 }
252
253 /*
254  * Return non-zero if allocating 'amount' by proc 'p' would exceed
255  * resource limit specified by 'rule'.
256  */
257 static int
258 rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
259     int64_t amount)
260 {
261         int64_t available;
262
263         rw_assert(&rctl_lock, RA_LOCKED);
264
265         available = rctl_available_resource(p, rule);
266         if (available >= amount)
267                 return (0);
268
269         return (1);
270 }
271
272 /*
273  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
274  * to what it keeps allocated now.  Returns non-zero if the allocation should
275  * be denied, 0 otherwise.
276  */
277 int
278 rctl_enforce(struct proc *p, int resource, uint64_t amount)
279 {
280         struct rctl_rule *rule;
281         struct rctl_rule_link *link;
282         struct sbuf sb;
283         int should_deny = 0;
284         char *buf;
285         static int curtime = 0;
286         static struct timeval lasttime;
287
288         rw_rlock(&rctl_lock);
289
290         /*
291          * There may be more than one matching rule; go through all of them.
292          * Denial should be done last, after logging and sending signals.
293          */
294         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
295                 rule = link->rrl_rule;
296                 if (rule->rr_resource != resource)
297                         continue;
298                 if (!rctl_would_exceed(p, rule, amount)) {
299                         link->rrl_exceeded = 0;
300                         continue;
301                 }
302
303                 switch (rule->rr_action) {
304                 case RCTL_ACTION_DENY:
305                         should_deny = 1;
306                         continue;
307                 case RCTL_ACTION_LOG:
308                         /*
309                          * If rrl_exceeded != 0, it means we've already
310                          * logged a warning for this process.
311                          */
312                         if (link->rrl_exceeded != 0)
313                                 continue;
314
315                         if (!ppsratecheck(&lasttime, &curtime, 10))
316                                 continue;
317
318                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
319                         if (buf == NULL) {
320                                 printf("rctl_enforce: out of memory\n");
321                                 continue;
322                         }
323                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
324                         rctl_rule_to_sbuf(&sb, rule);
325                         sbuf_finish(&sb);
326                         printf("rctl: rule \"%s\" matched by pid %d "
327                             "(%s), uid %d, jail %s\n", sbuf_data(&sb),
328                             p->p_pid, p->p_comm, p->p_ucred->cr_uid,
329                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
330                         sbuf_delete(&sb);
331                         free(buf, M_RCTL);
332                         link->rrl_exceeded = 1;
333                         continue;
334                 case RCTL_ACTION_DEVCTL:
335                         if (link->rrl_exceeded != 0)
336                                 continue;
337
338                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
339                         if (buf == NULL) {
340                                 printf("rctl_enforce: out of memory\n");
341                                 continue;
342                         }
343                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
344                         sbuf_printf(&sb, "rule=");
345                         rctl_rule_to_sbuf(&sb, rule);
346                         sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
347                             p->p_pid, p->p_ucred->cr_ruid,
348                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
349                         sbuf_finish(&sb);
350                         devctl_notify_f("RCTL", "rule", "matched",
351                             sbuf_data(&sb), M_NOWAIT);
352                         sbuf_delete(&sb);
353                         free(buf, M_RCTL);
354                         link->rrl_exceeded = 1;
355                         continue;
356                 default:
357                         if (link->rrl_exceeded != 0)
358                                 continue;
359
360                         KASSERT(rule->rr_action > 0 &&
361                             rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
362                             ("rctl_enforce: unknown action %d",
363                              rule->rr_action));
364
365                         /*
366                          * We're supposed to send a signal, but the process
367                          * is not fully initialized yet, probably because we
368                          * got called from fork1().  For now just deny the
369                          * allocation instead.
370                          */
371                         if (p->p_state != PRS_NORMAL) {
372                                 should_deny = 1;
373                                 continue;
374                         }
375
376                         /*
377                          * We're using the fact that RCTL_ACTION_SIG* values
378                          * are equal to their counterparts from sys/signal.h.
379                          */
380                         kern_psignal(p, rule->rr_action);
381                         link->rrl_exceeded = 1;
382                         continue;
383                 }
384         }
385
386         rw_runlock(&rctl_lock);
387
388         if (should_deny) {
389                 /*
390                  * Return fake error code; the caller should change it
391                  * into one proper for the situation - EFSIZ, ENOMEM etc.
392                  */
393                 return (EDOOFUS);
394         }
395
396         return (0);
397 }
398
399 uint64_t
400 rctl_get_limit(struct proc *p, int resource)
401 {
402         struct rctl_rule *rule;
403         struct rctl_rule_link *link;
404         uint64_t amount = UINT64_MAX;
405
406         rw_rlock(&rctl_lock);
407
408         /*
409          * There may be more than one matching rule; go through all of them.
410          * Denial should be done last, after logging and sending signals.
411          */
412         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
413                 rule = link->rrl_rule;
414                 if (rule->rr_resource != resource)
415                         continue;
416                 if (rule->rr_action != RCTL_ACTION_DENY)
417                         continue;
418                 if (rule->rr_amount < amount)
419                         amount = rule->rr_amount;
420         }
421
422         rw_runlock(&rctl_lock);
423
424         return (amount);
425 }
426
427 uint64_t
428 rctl_get_available(struct proc *p, int resource)
429 {
430         struct rctl_rule *rule;
431         struct rctl_rule_link *link;
432         int64_t available, minavailable, allocated;
433
434         minavailable = INT64_MAX;
435
436         rw_rlock(&rctl_lock);
437
438         /*
439          * There may be more than one matching rule; go through all of them.
440          * Denial should be done last, after logging and sending signals.
441          */
442         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
443                 rule = link->rrl_rule;
444                 if (rule->rr_resource != resource)
445                         continue;
446                 if (rule->rr_action != RCTL_ACTION_DENY)
447                         continue;
448                 available = rctl_available_resource(p, rule);
449                 if (available < minavailable)
450                         minavailable = available;
451         }
452
453         rw_runlock(&rctl_lock);
454
455         /*
456          * XXX: Think about this _hard_.
457          */
458         allocated = p->p_racct->r_resources[resource];
459         if (minavailable < INT64_MAX - allocated)
460                 minavailable += allocated;
461         if (minavailable < 0)
462                 minavailable = 0;
463         return (minavailable);
464 }
465
466 static int
467 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
468 {
469
470         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
471                 if (rule->rr_subject_type != filter->rr_subject_type)
472                         return (0);
473
474                 switch (filter->rr_subject_type) {
475                 case RCTL_SUBJECT_TYPE_PROCESS:
476                         if (filter->rr_subject.rs_proc != NULL &&
477                             rule->rr_subject.rs_proc !=
478                             filter->rr_subject.rs_proc)
479                                 return (0);
480                         break;
481                 case RCTL_SUBJECT_TYPE_USER:
482                         if (filter->rr_subject.rs_uip != NULL &&
483                             rule->rr_subject.rs_uip !=
484                             filter->rr_subject.rs_uip)
485                                 return (0);
486                         break;
487                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
488                         if (filter->rr_subject.rs_loginclass != NULL &&
489                             rule->rr_subject.rs_loginclass !=
490                             filter->rr_subject.rs_loginclass)
491                                 return (0);
492                         break;
493                 case RCTL_SUBJECT_TYPE_JAIL:
494                         if (filter->rr_subject.rs_prison_racct != NULL &&
495                             rule->rr_subject.rs_prison_racct !=
496                             filter->rr_subject.rs_prison_racct)
497                                 return (0);
498                         break;
499                 default:
500                         panic("rctl_rule_matches: unknown subject type %d",
501                             filter->rr_subject_type);
502                 }
503         }
504
505         if (filter->rr_resource != RACCT_UNDEFINED) {
506                 if (rule->rr_resource != filter->rr_resource)
507                         return (0);
508         }
509
510         if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
511                 if (rule->rr_action != filter->rr_action)
512                         return (0);
513         }
514
515         if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
516                 if (rule->rr_amount != filter->rr_amount)
517                         return (0);
518         }
519
520         if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
521                 if (rule->rr_per != filter->rr_per)
522                         return (0);
523         }
524
525         return (1);
526 }
527
528 static int
529 str2value(const char *str, int *value, struct dict *table)
530 {
531         int i;
532
533         if (value == NULL)
534                 return (EINVAL);
535
536         for (i = 0; table[i].d_name != NULL; i++) {
537                 if (strcasecmp(table[i].d_name, str) == 0) {
538                         *value =  table[i].d_value;
539                         return (0);
540                 }
541         }
542
543         return (EINVAL);
544 }
545
546 static int
547 str2id(const char *str, id_t *value)
548 {
549         char *end;
550
551         if (str == NULL)
552                 return (EINVAL);
553
554         *value = strtoul(str, &end, 10);
555         if ((size_t)(end - str) != strlen(str))
556                 return (EINVAL);
557
558         return (0);
559 }
560
561 static int
562 str2int64(const char *str, int64_t *value)
563 {
564         char *end;
565
566         if (str == NULL)
567                 return (EINVAL);
568
569         *value = strtoul(str, &end, 10);
570         if ((size_t)(end - str) != strlen(str))
571                 return (EINVAL);
572
573         return (0);
574 }
575
576 /*
577  * Connect the rule to the racct, increasing refcount for the rule.
578  */
579 static void
580 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
581 {
582         struct rctl_rule_link *link;
583
584         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
585
586         rctl_rule_acquire(rule);
587         link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
588         link->rrl_rule = rule;
589         link->rrl_exceeded = 0;
590
591         rw_wlock(&rctl_lock);
592         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
593         rw_wunlock(&rctl_lock);
594 }
595
596 static int
597 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
598 {
599         struct rctl_rule_link *link;
600
601         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
602         rw_assert(&rctl_lock, RA_WLOCKED);
603
604         link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
605         if (link == NULL)
606                 return (ENOMEM);
607         rctl_rule_acquire(rule);
608         link->rrl_rule = rule;
609         link->rrl_exceeded = 0;
610
611         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
612         return (0);
613 }
614
615 /*
616  * Remove limits for a rules matching the filter and release
617  * the refcounts for the rules, possibly freeing them.  Returns
618  * the number of limit structures removed.
619  */
620 static int
621 rctl_racct_remove_rules(struct racct *racct,
622     const struct rctl_rule *filter)
623 {
624         int removed = 0;
625         struct rctl_rule_link *link, *linktmp;
626
627         rw_assert(&rctl_lock, RA_WLOCKED);
628
629         LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
630                 if (!rctl_rule_matches(link->rrl_rule, filter))
631                         continue;
632
633                 LIST_REMOVE(link, rrl_next);
634                 rctl_rule_release(link->rrl_rule);
635                 uma_zfree(rctl_rule_link_zone, link);
636                 removed++;
637         }
638         return (removed);
639 }
640
641 static void
642 rctl_rule_acquire_subject(struct rctl_rule *rule)
643 {
644
645         switch (rule->rr_subject_type) {
646         case RCTL_SUBJECT_TYPE_UNDEFINED:
647         case RCTL_SUBJECT_TYPE_PROCESS:
648                 break;
649         case RCTL_SUBJECT_TYPE_JAIL:
650                 if (rule->rr_subject.rs_prison_racct != NULL)
651                         prison_racct_hold(rule->rr_subject.rs_prison_racct);
652                 break;
653         case RCTL_SUBJECT_TYPE_USER:
654                 if (rule->rr_subject.rs_uip != NULL)
655                         uihold(rule->rr_subject.rs_uip);
656                 break;
657         case RCTL_SUBJECT_TYPE_LOGINCLASS:
658                 if (rule->rr_subject.rs_loginclass != NULL)
659                         loginclass_hold(rule->rr_subject.rs_loginclass);
660                 break;
661         default:
662                 panic("rctl_rule_acquire_subject: unknown subject type %d",
663                     rule->rr_subject_type);
664         }
665 }
666
667 static void
668 rctl_rule_release_subject(struct rctl_rule *rule)
669 {
670
671         switch (rule->rr_subject_type) {
672         case RCTL_SUBJECT_TYPE_UNDEFINED:
673         case RCTL_SUBJECT_TYPE_PROCESS:
674                 break;
675         case RCTL_SUBJECT_TYPE_JAIL:
676                 if (rule->rr_subject.rs_prison_racct != NULL)
677                         prison_racct_free(rule->rr_subject.rs_prison_racct);
678                 break;
679         case RCTL_SUBJECT_TYPE_USER:
680                 if (rule->rr_subject.rs_uip != NULL)
681                         uifree(rule->rr_subject.rs_uip);
682                 break;
683         case RCTL_SUBJECT_TYPE_LOGINCLASS:
684                 if (rule->rr_subject.rs_loginclass != NULL)
685                         loginclass_free(rule->rr_subject.rs_loginclass);
686                 break;
687         default:
688                 panic("rctl_rule_release_subject: unknown subject type %d",
689                     rule->rr_subject_type);
690         }
691 }
692
693 struct rctl_rule *
694 rctl_rule_alloc(int flags)
695 {
696         struct rctl_rule *rule;
697
698         rule = uma_zalloc(rctl_rule_zone, flags);
699         if (rule == NULL)
700                 return (NULL);
701         rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
702         rule->rr_subject.rs_proc = NULL;
703         rule->rr_subject.rs_uip = NULL;
704         rule->rr_subject.rs_loginclass = NULL;
705         rule->rr_subject.rs_prison_racct = NULL;
706         rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
707         rule->rr_resource = RACCT_UNDEFINED;
708         rule->rr_action = RCTL_ACTION_UNDEFINED;
709         rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
710         refcount_init(&rule->rr_refcount, 1);
711
712         return (rule);
713 }
714
715 struct rctl_rule *
716 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
717 {
718         struct rctl_rule *copy;
719
720         copy = uma_zalloc(rctl_rule_zone, flags);
721         if (copy == NULL)
722                 return (NULL);
723         copy->rr_subject_type = rule->rr_subject_type;
724         copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
725         copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
726         copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
727         copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
728         copy->rr_per = rule->rr_per;
729         copy->rr_resource = rule->rr_resource;
730         copy->rr_action = rule->rr_action;
731         copy->rr_amount = rule->rr_amount;
732         refcount_init(&copy->rr_refcount, 1);
733         rctl_rule_acquire_subject(copy);
734
735         return (copy);
736 }
737
738 void
739 rctl_rule_acquire(struct rctl_rule *rule)
740 {
741
742         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
743
744         refcount_acquire(&rule->rr_refcount);
745 }
746
747 static void
748 rctl_rule_free(void *context, int pending)
749 {
750         struct rctl_rule *rule;
751         
752         rule = (struct rctl_rule *)context;
753
754         KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
755         
756         /*
757          * We don't need locking here; rule is guaranteed to be inaccessible.
758          */
759         
760         rctl_rule_release_subject(rule);
761         uma_zfree(rctl_rule_zone, rule);
762 }
763
764 void
765 rctl_rule_release(struct rctl_rule *rule)
766 {
767
768         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
769
770         if (refcount_release(&rule->rr_refcount)) {
771                 /*
772                  * rctl_rule_release() is often called when iterating
773                  * over all the uidinfo structures in the system,
774                  * holding uihashtbl_lock.  Since rctl_rule_free()
775                  * might end up calling uifree(), this would lead
776                  * to lock recursion.  Use taskqueue to avoid this.
777                  */
778                 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
779                 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
780         }
781 }
782
783 static int
784 rctl_rule_fully_specified(const struct rctl_rule *rule)
785 {
786
787         switch (rule->rr_subject_type) {
788         case RCTL_SUBJECT_TYPE_UNDEFINED:
789                 return (0);
790         case RCTL_SUBJECT_TYPE_PROCESS:
791                 if (rule->rr_subject.rs_proc == NULL)
792                         return (0);
793                 break;
794         case RCTL_SUBJECT_TYPE_USER:
795                 if (rule->rr_subject.rs_uip == NULL)
796                         return (0);
797                 break;
798         case RCTL_SUBJECT_TYPE_LOGINCLASS:
799                 if (rule->rr_subject.rs_loginclass == NULL)
800                         return (0);
801                 break;
802         case RCTL_SUBJECT_TYPE_JAIL:
803                 if (rule->rr_subject.rs_prison_racct == NULL)
804                         return (0);
805                 break;
806         default:
807                 panic("rctl_rule_fully_specified: unknown subject type %d",
808                     rule->rr_subject_type);
809         }
810         if (rule->rr_resource == RACCT_UNDEFINED)
811                 return (0);
812         if (rule->rr_action == RCTL_ACTION_UNDEFINED)
813                 return (0);
814         if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
815                 return (0);
816         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
817                 return (0);
818
819         return (1);
820 }
821
822 static int
823 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
824 {
825         int error = 0;
826         char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
827              *amountstr, *perstr;
828         struct rctl_rule *rule;
829         id_t id;
830
831         rule = rctl_rule_alloc(M_WAITOK);
832
833         subjectstr = strsep(&rulestr, ":");
834         subject_idstr = strsep(&rulestr, ":");
835         resourcestr = strsep(&rulestr, ":");
836         actionstr = strsep(&rulestr, "=/");
837         amountstr = strsep(&rulestr, "/");
838         perstr = rulestr;
839
840         if (subjectstr == NULL || subjectstr[0] == '\0')
841                 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
842         else {
843                 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
844                 if (error != 0)
845                         goto out;
846         }
847
848         if (subject_idstr == NULL || subject_idstr[0] == '\0') {
849                 rule->rr_subject.rs_proc = NULL;
850                 rule->rr_subject.rs_uip = NULL;
851                 rule->rr_subject.rs_loginclass = NULL;
852                 rule->rr_subject.rs_prison_racct = NULL;
853         } else {
854                 switch (rule->rr_subject_type) {
855                 case RCTL_SUBJECT_TYPE_UNDEFINED:
856                         error = EINVAL;
857                         goto out;
858                 case RCTL_SUBJECT_TYPE_PROCESS:
859                         error = str2id(subject_idstr, &id);
860                         if (error != 0)
861                                 goto out;
862                         sx_assert(&allproc_lock, SA_LOCKED);
863                         rule->rr_subject.rs_proc = pfind(id);
864                         if (rule->rr_subject.rs_proc == NULL) {
865                                 error = ESRCH;
866                                 goto out;
867                         }
868                         PROC_UNLOCK(rule->rr_subject.rs_proc);
869                         break;
870                 case RCTL_SUBJECT_TYPE_USER:
871                         error = str2id(subject_idstr, &id);
872                         if (error != 0)
873                                 goto out;
874                         rule->rr_subject.rs_uip = uifind(id);
875                         break;
876                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
877                         rule->rr_subject.rs_loginclass =
878                             loginclass_find(subject_idstr);
879                         if (rule->rr_subject.rs_loginclass == NULL) {
880                                 error = ENAMETOOLONG;
881                                 goto out;
882                         }
883                         break;
884                 case RCTL_SUBJECT_TYPE_JAIL:
885                         rule->rr_subject.rs_prison_racct =
886                             prison_racct_find(subject_idstr);
887                         if (rule->rr_subject.rs_prison_racct == NULL) {
888                                 error = ENAMETOOLONG;
889                                 goto out;
890                         }
891                         break;
892                default:
893                        panic("rctl_string_to_rule: unknown subject type %d",
894                            rule->rr_subject_type);
895                }
896         }
897
898         if (resourcestr == NULL || resourcestr[0] == '\0')
899                 rule->rr_resource = RACCT_UNDEFINED;
900         else {
901                 error = str2value(resourcestr, &rule->rr_resource,
902                     resourcenames);
903                 if (error != 0)
904                         goto out;
905         }
906
907         if (actionstr == NULL || actionstr[0] == '\0')
908                 rule->rr_action = RCTL_ACTION_UNDEFINED;
909         else {
910                 error = str2value(actionstr, &rule->rr_action, actionnames);
911                 if (error != 0)
912                         goto out;
913         }
914
915         if (amountstr == NULL || amountstr[0] == '\0')
916                 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
917         else {
918                 error = str2int64(amountstr, &rule->rr_amount);
919                 if (error != 0)
920                         goto out;
921                 if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
922                         rule->rr_amount *= 1000000;
923         }
924
925         if (perstr == NULL || perstr[0] == '\0')
926                 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
927         else {
928                 error = str2value(perstr, &rule->rr_per, subjectnames);
929                 if (error != 0)
930                         goto out;
931         }
932
933 out:
934         if (error == 0)
935                 *rulep = rule;
936         else
937                 rctl_rule_release(rule);
938
939         return (error);
940 }
941
942 /*
943  * Link a rule with all the subjects it applies to.
944  */
945 int
946 rctl_rule_add(struct rctl_rule *rule)
947 {
948         struct proc *p;
949         struct ucred *cred;
950         struct uidinfo *uip;
951         struct prison *pr;
952         struct prison_racct *prr;
953         struct loginclass *lc;
954         struct rctl_rule *rule2;
955         int match;
956
957         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
958
959         /*
960          * Some rules just don't make sense.  Note that the one below
961          * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
962          * for example, is not deniable in the racct sense, but the
963          * limit is enforced in a different way, so "deny" rules for %CPU
964          * do make sense.
965          */
966         if (rule->rr_action == RCTL_ACTION_DENY &&
967             (rule->rr_resource == RACCT_CPU ||
968             rule->rr_resource == RACCT_WALLCLOCK))
969                 return (EOPNOTSUPP);
970
971         if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
972             RACCT_IS_SLOPPY(rule->rr_resource))
973                 return (EOPNOTSUPP);
974
975         /*
976          * Make sure there are no duplicated rules.  Also, for the "deny"
977          * rules, remove ones differing only by "amount".
978          */
979         if (rule->rr_action == RCTL_ACTION_DENY) {
980                 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
981                 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
982                 rctl_rule_remove(rule2);
983                 rctl_rule_release(rule2);
984         } else
985                 rctl_rule_remove(rule);
986
987         switch (rule->rr_subject_type) {
988         case RCTL_SUBJECT_TYPE_PROCESS:
989                 p = rule->rr_subject.rs_proc;
990                 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
991                 /*
992                  * No resource limits for system processes.
993                  */
994                 if (p->p_flag & P_SYSTEM)
995                         return (EPERM);
996
997                 rctl_racct_add_rule(p->p_racct, rule);
998                 /*
999                  * In case of per-process rule, we don't have anything more
1000                  * to do.
1001                  */
1002                 return (0);
1003
1004         case RCTL_SUBJECT_TYPE_USER:
1005                 uip = rule->rr_subject.rs_uip;
1006                 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1007                 rctl_racct_add_rule(uip->ui_racct, rule);
1008                 break;
1009
1010         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1011                 lc = rule->rr_subject.rs_loginclass;
1012                 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1013                 rctl_racct_add_rule(lc->lc_racct, rule);
1014                 break;
1015
1016         case RCTL_SUBJECT_TYPE_JAIL:
1017                 prr = rule->rr_subject.rs_prison_racct;
1018                 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1019                 rctl_racct_add_rule(prr->prr_racct, rule);
1020                 break;
1021
1022         default:
1023                 panic("rctl_rule_add: unknown subject type %d",
1024                     rule->rr_subject_type);
1025         }
1026
1027         /*
1028          * Now go through all the processes and add the new rule to the ones
1029          * it applies to.
1030          */
1031         sx_assert(&allproc_lock, SA_LOCKED);
1032         FOREACH_PROC_IN_SYSTEM(p) {
1033                 if (p->p_flag & P_SYSTEM)
1034                         continue;
1035                 cred = p->p_ucred;
1036                 switch (rule->rr_subject_type) {
1037                 case RCTL_SUBJECT_TYPE_USER:
1038                         if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1039                             cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1040                                 break;
1041                         continue;
1042                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1043                         if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1044                                 break;
1045                         continue;
1046                 case RCTL_SUBJECT_TYPE_JAIL:
1047                         match = 0;
1048                         for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1049                                 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1050                                         match = 1;
1051                                         break;
1052                                 }
1053                         }
1054                         if (match)
1055                                 break;
1056                         continue;
1057                 default:
1058                         panic("rctl_rule_add: unknown subject type %d",
1059                             rule->rr_subject_type);
1060                 }
1061
1062                 rctl_racct_add_rule(p->p_racct, rule);
1063         }
1064
1065         return (0);
1066 }
1067
1068 static void
1069 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1070 {
1071         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1072         int found = 0;
1073
1074         rw_wlock(&rctl_lock);
1075         found += rctl_racct_remove_rules(racct, filter);
1076         rw_wunlock(&rctl_lock);
1077
1078         *((int *)arg3) += found;
1079 }
1080
1081 /*
1082  * Remove all rules that match the filter.
1083  */
1084 int
1085 rctl_rule_remove(struct rctl_rule *filter)
1086 {
1087         int found = 0;
1088         struct proc *p;
1089
1090         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1091             filter->rr_subject.rs_proc != NULL) {
1092                 p = filter->rr_subject.rs_proc;
1093                 rw_wlock(&rctl_lock);
1094                 found = rctl_racct_remove_rules(p->p_racct, filter);
1095                 rw_wunlock(&rctl_lock);
1096                 if (found)
1097                         return (0);
1098                 return (ESRCH);
1099         }
1100
1101         loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1102             (void *)&found);
1103         ui_racct_foreach(rctl_rule_remove_callback, filter,
1104             (void *)&found);
1105         prison_racct_foreach(rctl_rule_remove_callback, filter,
1106             (void *)&found);
1107
1108         sx_assert(&allproc_lock, SA_LOCKED);
1109         rw_wlock(&rctl_lock);
1110         FOREACH_PROC_IN_SYSTEM(p) {
1111                 found += rctl_racct_remove_rules(p->p_racct, filter);
1112         }
1113         rw_wunlock(&rctl_lock);
1114
1115         if (found)
1116                 return (0);
1117         return (ESRCH);
1118 }
1119
1120 /*
1121  * Appends a rule to the sbuf.
1122  */
1123 static void
1124 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1125 {
1126         int64_t amount;
1127
1128         sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1129
1130         switch (rule->rr_subject_type) {
1131         case RCTL_SUBJECT_TYPE_PROCESS:
1132                 if (rule->rr_subject.rs_proc == NULL)
1133                         sbuf_printf(sb, ":");
1134                 else
1135                         sbuf_printf(sb, "%d:",
1136                             rule->rr_subject.rs_proc->p_pid);
1137                 break;
1138         case RCTL_SUBJECT_TYPE_USER:
1139                 if (rule->rr_subject.rs_uip == NULL)
1140                         sbuf_printf(sb, ":");
1141                 else
1142                         sbuf_printf(sb, "%d:",
1143                             rule->rr_subject.rs_uip->ui_uid);
1144                 break;
1145         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1146                 if (rule->rr_subject.rs_loginclass == NULL)
1147                         sbuf_printf(sb, ":");
1148                 else
1149                         sbuf_printf(sb, "%s:",
1150                             rule->rr_subject.rs_loginclass->lc_name);
1151                 break;
1152         case RCTL_SUBJECT_TYPE_JAIL:
1153                 if (rule->rr_subject.rs_prison_racct == NULL)
1154                         sbuf_printf(sb, ":");
1155                 else
1156                         sbuf_printf(sb, "%s:",
1157                             rule->rr_subject.rs_prison_racct->prr_name);
1158                 break;
1159         default:
1160                 panic("rctl_rule_to_sbuf: unknown subject type %d",
1161                     rule->rr_subject_type);
1162         }
1163
1164         amount = rule->rr_amount;
1165         if (amount != RCTL_AMOUNT_UNDEFINED &&
1166             RACCT_IS_IN_MILLIONS(rule->rr_resource))
1167                 amount /= 1000000;
1168
1169         sbuf_printf(sb, "%s:%s=%jd",
1170             rctl_resource_name(rule->rr_resource),
1171             rctl_action_name(rule->rr_action),
1172             amount);
1173
1174         if (rule->rr_per != rule->rr_subject_type)
1175                 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1176 }
1177
1178 /*
1179  * Routine used by RCTL syscalls to read in input string.
1180  */
1181 static int
1182 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1183 {
1184         int error;
1185         char *str;
1186
1187         if (inbuflen <= 0)
1188                 return (EINVAL);
1189
1190         str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1191         error = copyinstr(inbufp, str, inbuflen, NULL);
1192         if (error != 0) {
1193                 free(str, M_RCTL);
1194                 return (error);
1195         }
1196
1197         *inputstr = str;
1198
1199         return (0);
1200 }
1201
1202 /*
1203  * Routine used by RCTL syscalls to write out output string.
1204  */
1205 static int
1206 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1207 {
1208         int error;
1209
1210         if (outputsbuf == NULL)
1211                 return (0);
1212
1213         sbuf_finish(outputsbuf);
1214         if (outbuflen < sbuf_len(outputsbuf) + 1) {
1215                 sbuf_delete(outputsbuf);
1216                 return (ERANGE);
1217         }
1218         error = copyout(sbuf_data(outputsbuf), outbufp,
1219             sbuf_len(outputsbuf) + 1);
1220         sbuf_delete(outputsbuf);
1221         return (error);
1222 }
1223
1224 static struct sbuf *
1225 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1226 {
1227         int i;
1228         int64_t amount;
1229         struct sbuf *sb;
1230
1231         sb = sbuf_new_auto();
1232         for (i = 0; i <= RACCT_MAX; i++) {
1233                 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1234                         continue;
1235                 amount = racct->r_resources[i];
1236                 if (RACCT_IS_IN_MILLIONS(i))
1237                         amount /= 1000000;
1238                 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1239         }
1240         sbuf_setpos(sb, sbuf_len(sb) - 1);
1241         return (sb);
1242 }
1243
1244 int
1245 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1246 {
1247         int error;
1248         char *inputstr;
1249         struct rctl_rule *filter;
1250         struct sbuf *outputsbuf = NULL;
1251         struct proc *p;
1252         struct uidinfo *uip;
1253         struct loginclass *lc;
1254         struct prison_racct *prr;
1255
1256         error = priv_check(td, PRIV_RCTL_GET_RACCT);
1257         if (error != 0)
1258                 return (error);
1259
1260         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1261         if (error != 0)
1262                 return (error);
1263
1264         sx_slock(&allproc_lock);
1265         error = rctl_string_to_rule(inputstr, &filter);
1266         free(inputstr, M_RCTL);
1267         if (error != 0) {
1268                 sx_sunlock(&allproc_lock);
1269                 return (error);
1270         }
1271
1272         switch (filter->rr_subject_type) {
1273         case RCTL_SUBJECT_TYPE_PROCESS:
1274                 p = filter->rr_subject.rs_proc;
1275                 if (p == NULL) {
1276                         error = EINVAL;
1277                         goto out;
1278                 }
1279                 if (p->p_flag & P_SYSTEM) {
1280                         error = EINVAL;
1281                         goto out;
1282                 }
1283                 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1284                 break;
1285         case RCTL_SUBJECT_TYPE_USER:
1286                 uip = filter->rr_subject.rs_uip;
1287                 if (uip == NULL) {
1288                         error = EINVAL;
1289                         goto out;
1290                 }
1291                 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1292                 break;
1293         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1294                 lc = filter->rr_subject.rs_loginclass;
1295                 if (lc == NULL) {
1296                         error = EINVAL;
1297                         goto out;
1298                 }
1299                 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1300                 break;
1301         case RCTL_SUBJECT_TYPE_JAIL:
1302                 prr = filter->rr_subject.rs_prison_racct;
1303                 if (prr == NULL) {
1304                         error = EINVAL;
1305                         goto out;
1306                 }
1307                 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1308                 break;
1309         default:
1310                 error = EINVAL;
1311         }
1312 out:
1313         rctl_rule_release(filter);
1314         sx_sunlock(&allproc_lock);
1315         if (error != 0)
1316                 return (error);
1317
1318         error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1319
1320         return (error);
1321 }
1322
1323 static void
1324 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1325 {
1326         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1327         struct rctl_rule_link *link;
1328         struct sbuf *sb = (struct sbuf *)arg3;
1329
1330         rw_rlock(&rctl_lock);
1331         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1332                 if (!rctl_rule_matches(link->rrl_rule, filter))
1333                         continue;
1334                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1335                 sbuf_printf(sb, ",");
1336         }
1337         rw_runlock(&rctl_lock);
1338 }
1339
1340 int
1341 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1342 {
1343         int error;
1344         size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1345         char *inputstr, *buf;
1346         struct sbuf *sb;
1347         struct rctl_rule *filter;
1348         struct rctl_rule_link *link;
1349         struct proc *p;
1350
1351         error = priv_check(td, PRIV_RCTL_GET_RULES);
1352         if (error != 0)
1353                 return (error);
1354
1355         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1356         if (error != 0)
1357                 return (error);
1358
1359         sx_slock(&allproc_lock);
1360         error = rctl_string_to_rule(inputstr, &filter);
1361         free(inputstr, M_RCTL);
1362         if (error != 0) {
1363                 sx_sunlock(&allproc_lock);
1364                 return (error);
1365         }
1366
1367 again:
1368         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1369         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1370         KASSERT(sb != NULL, ("sbuf_new failed"));
1371
1372         sx_assert(&allproc_lock, SA_LOCKED);
1373         FOREACH_PROC_IN_SYSTEM(p) {
1374                 rw_rlock(&rctl_lock);
1375                 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1376                         /*
1377                          * Non-process rules will be added to the buffer later.
1378                          * Adding them here would result in duplicated output.
1379                          */
1380                         if (link->rrl_rule->rr_subject_type !=
1381                             RCTL_SUBJECT_TYPE_PROCESS)
1382                                 continue;
1383                         if (!rctl_rule_matches(link->rrl_rule, filter))
1384                                 continue;
1385                         rctl_rule_to_sbuf(sb, link->rrl_rule);
1386                         sbuf_printf(sb, ",");
1387                 }
1388                 rw_runlock(&rctl_lock);
1389         }
1390
1391         loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1392         ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1393         prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1394         if (sbuf_error(sb) == ENOMEM) {
1395                 sbuf_delete(sb);
1396                 free(buf, M_RCTL);
1397                 bufsize *= 4;
1398                 goto again;
1399         }
1400
1401         /*
1402          * Remove trailing ",".
1403          */
1404         if (sbuf_len(sb) > 0)
1405                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1406
1407         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1408
1409         rctl_rule_release(filter);
1410         sx_sunlock(&allproc_lock);
1411         free(buf, M_RCTL);
1412         return (error);
1413 }
1414
1415 int
1416 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1417 {
1418         int error;
1419         size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1420         char *inputstr, *buf;
1421         struct sbuf *sb;
1422         struct rctl_rule *filter;
1423         struct rctl_rule_link *link;
1424
1425         error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1426         if (error != 0)
1427                 return (error);
1428
1429         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1430         if (error != 0)
1431                 return (error);
1432
1433         sx_slock(&allproc_lock);
1434         error = rctl_string_to_rule(inputstr, &filter);
1435         free(inputstr, M_RCTL);
1436         if (error != 0) {
1437                 sx_sunlock(&allproc_lock);
1438                 return (error);
1439         }
1440
1441         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1442                 rctl_rule_release(filter);
1443                 sx_sunlock(&allproc_lock);
1444                 return (EINVAL);
1445         }
1446         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1447                 rctl_rule_release(filter);
1448                 sx_sunlock(&allproc_lock);
1449                 return (EOPNOTSUPP);
1450         }
1451         if (filter->rr_subject.rs_proc == NULL) {
1452                 rctl_rule_release(filter);
1453                 sx_sunlock(&allproc_lock);
1454                 return (EINVAL);
1455         }
1456
1457 again:
1458         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1459         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1460         KASSERT(sb != NULL, ("sbuf_new failed"));
1461
1462         rw_rlock(&rctl_lock);
1463         LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1464             rrl_next) {
1465                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1466                 sbuf_printf(sb, ",");
1467         }
1468         rw_runlock(&rctl_lock);
1469         if (sbuf_error(sb) == ENOMEM) {
1470                 sbuf_delete(sb);
1471                 free(buf, M_RCTL);
1472                 bufsize *= 4;
1473                 goto again;
1474         }
1475
1476         /*
1477          * Remove trailing ",".
1478          */
1479         if (sbuf_len(sb) > 0)
1480                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1481
1482         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1483         rctl_rule_release(filter);
1484         sx_sunlock(&allproc_lock);
1485         free(buf, M_RCTL);
1486         return (error);
1487 }
1488
1489 int
1490 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1491 {
1492         int error;
1493         struct rctl_rule *rule;
1494         char *inputstr;
1495
1496         error = priv_check(td, PRIV_RCTL_ADD_RULE);
1497         if (error != 0)
1498                 return (error);
1499
1500         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1501         if (error != 0)
1502                 return (error);
1503
1504         sx_slock(&allproc_lock);
1505         error = rctl_string_to_rule(inputstr, &rule);
1506         free(inputstr, M_RCTL);
1507         if (error != 0) {
1508                 sx_sunlock(&allproc_lock);
1509                 return (error);
1510         }
1511         /*
1512          * The 'per' part of a rule is optional.
1513          */
1514         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1515             rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1516                 rule->rr_per = rule->rr_subject_type;
1517
1518         if (!rctl_rule_fully_specified(rule)) {
1519                 error = EINVAL;
1520                 goto out;
1521         }
1522
1523         error = rctl_rule_add(rule);
1524
1525 out:
1526         rctl_rule_release(rule);
1527         sx_sunlock(&allproc_lock);
1528         return (error);
1529 }
1530
1531 int
1532 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1533 {
1534         int error;
1535         struct rctl_rule *filter;
1536         char *inputstr;
1537
1538         error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1539         if (error != 0)
1540                 return (error);
1541
1542         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1543         if (error != 0)
1544                 return (error);
1545
1546         sx_slock(&allproc_lock);
1547         error = rctl_string_to_rule(inputstr, &filter);
1548         free(inputstr, M_RCTL);
1549         if (error != 0) {
1550                 sx_sunlock(&allproc_lock);
1551                 return (error);
1552         }
1553
1554         error = rctl_rule_remove(filter);
1555         rctl_rule_release(filter);
1556         sx_sunlock(&allproc_lock);
1557
1558         return (error);
1559 }
1560
1561 /*
1562  * Update RCTL rule list after credential change.
1563  */
1564 void
1565 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1566 {
1567         int rulecnt, i;
1568         struct rctl_rule_link *link, *newlink;
1569         struct uidinfo *newuip;
1570         struct loginclass *newlc;
1571         struct prison_racct *newprr;
1572         LIST_HEAD(, rctl_rule_link) newrules;
1573
1574         newuip = newcred->cr_ruidinfo;
1575         newlc = newcred->cr_loginclass;
1576         newprr = newcred->cr_prison->pr_prison_racct;
1577         
1578         LIST_INIT(&newrules);
1579
1580 again:
1581         /*
1582          * First, count the rules that apply to the process with new
1583          * credentials.
1584          */
1585         rulecnt = 0;
1586         rw_rlock(&rctl_lock);
1587         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1588                 if (link->rrl_rule->rr_subject_type ==
1589                     RCTL_SUBJECT_TYPE_PROCESS)
1590                         rulecnt++;
1591         }
1592         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1593                 rulecnt++;
1594         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1595                 rulecnt++;
1596         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1597                 rulecnt++;
1598         rw_runlock(&rctl_lock);
1599
1600         /*
1601          * Create temporary list.  We've dropped the rctl_lock in order
1602          * to use M_WAITOK.
1603          */
1604         for (i = 0; i < rulecnt; i++) {
1605                 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1606                 newlink->rrl_rule = NULL;
1607                 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1608         }
1609
1610         newlink = LIST_FIRST(&newrules);
1611
1612         /*
1613          * Assign rules to the newly allocated list entries.
1614          */
1615         rw_wlock(&rctl_lock);
1616         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1617                 if (link->rrl_rule->rr_subject_type ==
1618                     RCTL_SUBJECT_TYPE_PROCESS) {
1619                         if (newlink == NULL)
1620                                 goto goaround;
1621                         rctl_rule_acquire(link->rrl_rule);
1622                         newlink->rrl_rule = link->rrl_rule;
1623                         newlink = LIST_NEXT(newlink, rrl_next);
1624                         rulecnt--;
1625                 }
1626         }
1627         
1628         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1629                 if (newlink == NULL)
1630                         goto goaround;
1631                 rctl_rule_acquire(link->rrl_rule);
1632                 newlink->rrl_rule = link->rrl_rule;
1633                 newlink = LIST_NEXT(newlink, rrl_next);
1634                 rulecnt--;
1635         }
1636
1637         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1638                 if (newlink == NULL)
1639                         goto goaround;
1640                 rctl_rule_acquire(link->rrl_rule);
1641                 newlink->rrl_rule = link->rrl_rule;
1642                 newlink = LIST_NEXT(newlink, rrl_next);
1643                 rulecnt--;
1644         }
1645
1646         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1647                 if (newlink == NULL)
1648                         goto goaround;
1649                 rctl_rule_acquire(link->rrl_rule);
1650                 newlink->rrl_rule = link->rrl_rule;
1651                 newlink = LIST_NEXT(newlink, rrl_next);
1652                 rulecnt--;
1653         }
1654
1655         if (rulecnt == 0) {
1656                 /*
1657                  * Free the old rule list.
1658                  */
1659                 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1660                         link = LIST_FIRST(&p->p_racct->r_rule_links);
1661                         LIST_REMOVE(link, rrl_next);
1662                         rctl_rule_release(link->rrl_rule);
1663                         uma_zfree(rctl_rule_link_zone, link);
1664                 }
1665
1666                 /*
1667                  * Replace lists and we're done.
1668                  *
1669                  * XXX: Is there any way to switch list heads instead
1670                  *      of iterating here?
1671                  */
1672                 while (!LIST_EMPTY(&newrules)) {
1673                         newlink = LIST_FIRST(&newrules);
1674                         LIST_REMOVE(newlink, rrl_next);
1675                         LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1676                             newlink, rrl_next);
1677                 }
1678
1679                 rw_wunlock(&rctl_lock);
1680
1681                 return;
1682         }
1683
1684 goaround:
1685         rw_wunlock(&rctl_lock);
1686
1687         /*
1688          * Rule list changed while we were not holding the rctl_lock.
1689          * Free the new list and try again.
1690          */
1691         while (!LIST_EMPTY(&newrules)) {
1692                 newlink = LIST_FIRST(&newrules);
1693                 LIST_REMOVE(newlink, rrl_next);
1694                 if (newlink->rrl_rule != NULL)
1695                         rctl_rule_release(newlink->rrl_rule);
1696                 uma_zfree(rctl_rule_link_zone, newlink);
1697         }
1698
1699         goto again;
1700 }
1701
1702 /*
1703  * Assign RCTL rules to the newly created process.
1704  */
1705 int
1706 rctl_proc_fork(struct proc *parent, struct proc *child)
1707 {
1708         int error;
1709         struct rctl_rule_link *link;
1710         struct rctl_rule *rule;
1711
1712         LIST_INIT(&child->p_racct->r_rule_links);
1713
1714         /*
1715          * No limits for kernel processes.
1716          */
1717         if (child->p_flag & P_SYSTEM)
1718                 return (0);
1719
1720         /*
1721          * Nothing to inherit from P_SYSTEM parents.
1722          */
1723         if (parent->p_racct == NULL) {
1724                 KASSERT(parent->p_flag & P_SYSTEM,
1725                     ("non-system process without racct; p = %p", parent));
1726                 return (0);
1727         }
1728
1729         rw_wlock(&rctl_lock);
1730
1731         /*
1732          * Go through limits applicable to the parent and assign them
1733          * to the child.  Rules with 'process' subject have to be duplicated
1734          * in order to make their rr_subject point to the new process.
1735          */
1736         LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1737                 if (link->rrl_rule->rr_subject_type ==
1738                     RCTL_SUBJECT_TYPE_PROCESS) {
1739                         rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1740                         if (rule == NULL)
1741                                 goto fail;
1742                         KASSERT(rule->rr_subject.rs_proc == parent,
1743                             ("rule->rr_subject.rs_proc != parent"));
1744                         rule->rr_subject.rs_proc = child;
1745                         error = rctl_racct_add_rule_locked(child->p_racct,
1746                             rule);
1747                         rctl_rule_release(rule);
1748                         if (error != 0)
1749                                 goto fail;
1750                 } else {
1751                         error = rctl_racct_add_rule_locked(child->p_racct,
1752                             link->rrl_rule);
1753                         if (error != 0)
1754                                 goto fail;
1755                 }
1756         }
1757
1758         rw_wunlock(&rctl_lock);
1759         return (0);
1760
1761 fail:
1762         while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1763                 link = LIST_FIRST(&child->p_racct->r_rule_links);
1764                 LIST_REMOVE(link, rrl_next);
1765                 rctl_rule_release(link->rrl_rule);
1766                 uma_zfree(rctl_rule_link_zone, link);
1767         }
1768         rw_wunlock(&rctl_lock);
1769         return (EAGAIN);
1770 }
1771
1772 /*
1773  * Release rules attached to the racct.
1774  */
1775 void
1776 rctl_racct_release(struct racct *racct)
1777 {
1778         struct rctl_rule_link *link;
1779
1780         rw_wlock(&rctl_lock);
1781         while (!LIST_EMPTY(&racct->r_rule_links)) {
1782                 link = LIST_FIRST(&racct->r_rule_links);
1783                 LIST_REMOVE(link, rrl_next);
1784                 rctl_rule_release(link->rrl_rule);
1785                 uma_zfree(rctl_rule_link_zone, link);
1786         }
1787         rw_wunlock(&rctl_lock);
1788 }
1789
1790 static void
1791 rctl_init(void)
1792 {
1793
1794         rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1795             sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1796             UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1797         rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1798             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1799 }
1800
1801 #else /* !RCTL */
1802
1803 int
1804 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1805 {
1806         
1807         return (ENOSYS);
1808 }
1809
1810 int
1811 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1812 {
1813         
1814         return (ENOSYS);
1815 }
1816
1817 int
1818 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1819 {
1820         
1821         return (ENOSYS);
1822 }
1823
1824 int
1825 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1826 {
1827         
1828         return (ENOSYS);
1829 }
1830
1831 int
1832 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1833 {
1834         
1835         return (ENOSYS);
1836 }
1837
1838 #endif /* !RCTL */