]> CyberLeo.Net >> Repos - FreeBSD/releng/9.0.git/blob - sys/kern/kern_rctl.c
Copy stable/9 to releng/9.0 as part of the FreeBSD 9.0-RELEASE release
[FreeBSD/releng/9.0.git] / sys / kern / kern_rctl.c
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/refcount.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/loginclass.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/racct.h>
47 #include <sys/rctl.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sx.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
58 #include <sys/sbuf.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <vm/uma.h>
62
63 #ifdef RCTL
64 #ifndef RACCT
65 #error "The RCTL option requires the RACCT option"
66 #endif
67
68 FEATURE(rctl, "Resource Limits");
69
70 #define HRF_DEFAULT             0
71 #define HRF_DONT_INHERIT        1
72 #define HRF_DONT_ACCUMULATE     2
73
74 /* Default buffer size for rctl_get_rules(2). */
75 #define RCTL_DEFAULT_BUFSIZE    4096
76 #define RCTL_LOG_BUFSIZE        128
77
78 /*
79  * 'rctl_rule_link' connects a rule with every racct it's related to.
80  * For example, rule 'user:X:openfiles:deny=N/process' is linked
81  * with uidinfo for user X, and to each process of that user.
82  */
83 struct rctl_rule_link {
84         LIST_ENTRY(rctl_rule_link)      rrl_next;
85         struct rctl_rule                *rrl_rule;
86         int                             rrl_exceeded;
87 };
88
89 struct dict {
90         const char      *d_name;
91         int             d_value;
92 };
93
94 static struct dict subjectnames[] = {
95         { "process", RCTL_SUBJECT_TYPE_PROCESS },
96         { "user", RCTL_SUBJECT_TYPE_USER },
97         { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
98         { "jail", RCTL_SUBJECT_TYPE_JAIL },
99         { NULL, -1 }};
100
101 static struct dict resourcenames[] = {
102         { "cputime", RACCT_CPU },
103         { "datasize", RACCT_DATA },
104         { "stacksize", RACCT_STACK },
105         { "coredumpsize", RACCT_CORE },
106         { "memoryuse", RACCT_RSS },
107         { "memorylocked", RACCT_MEMLOCK },
108         { "maxproc", RACCT_NPROC },
109         { "openfiles", RACCT_NOFILE },
110         { "vmemoryuse", RACCT_VMEM },
111         { "pseudoterminals", RACCT_NPTS },
112         { "swapuse", RACCT_SWAP },
113         { "nthr", RACCT_NTHR },
114         { "msgqqueued", RACCT_MSGQQUEUED },
115         { "msgqsize", RACCT_MSGQSIZE },
116         { "nmsgq", RACCT_NMSGQ },
117         { "nsem", RACCT_NSEM },
118         { "nsemop", RACCT_NSEMOP },
119         { "nshm", RACCT_NSHM },
120         { "shmsize", RACCT_SHMSIZE },
121         { "wallclock", RACCT_WALLCLOCK },
122         { NULL, -1 }};
123
124 static struct dict actionnames[] = {
125         { "sighup", RCTL_ACTION_SIGHUP },
126         { "sigint", RCTL_ACTION_SIGINT },
127         { "sigquit", RCTL_ACTION_SIGQUIT },
128         { "sigill", RCTL_ACTION_SIGILL },
129         { "sigtrap", RCTL_ACTION_SIGTRAP },
130         { "sigabrt", RCTL_ACTION_SIGABRT },
131         { "sigemt", RCTL_ACTION_SIGEMT },
132         { "sigfpe", RCTL_ACTION_SIGFPE },
133         { "sigkill", RCTL_ACTION_SIGKILL },
134         { "sigbus", RCTL_ACTION_SIGBUS },
135         { "sigsegv", RCTL_ACTION_SIGSEGV },
136         { "sigsys", RCTL_ACTION_SIGSYS },
137         { "sigpipe", RCTL_ACTION_SIGPIPE },
138         { "sigalrm", RCTL_ACTION_SIGALRM },
139         { "sigterm", RCTL_ACTION_SIGTERM },
140         { "sigurg", RCTL_ACTION_SIGURG },
141         { "sigstop", RCTL_ACTION_SIGSTOP },
142         { "sigtstp", RCTL_ACTION_SIGTSTP },
143         { "sigchld", RCTL_ACTION_SIGCHLD },
144         { "sigttin", RCTL_ACTION_SIGTTIN },
145         { "sigttou", RCTL_ACTION_SIGTTOU },
146         { "sigio", RCTL_ACTION_SIGIO },
147         { "sigxcpu", RCTL_ACTION_SIGXCPU },
148         { "sigxfsz", RCTL_ACTION_SIGXFSZ },
149         { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
150         { "sigprof", RCTL_ACTION_SIGPROF },
151         { "sigwinch", RCTL_ACTION_SIGWINCH },
152         { "siginfo", RCTL_ACTION_SIGINFO },
153         { "sigusr1", RCTL_ACTION_SIGUSR1 },
154         { "sigusr2", RCTL_ACTION_SIGUSR2 },
155         { "sigthr", RCTL_ACTION_SIGTHR },
156         { "deny", RCTL_ACTION_DENY },
157         { "log", RCTL_ACTION_LOG },
158         { "devctl", RCTL_ACTION_DEVCTL },
159         { NULL, -1 }};
160
161 static void rctl_init(void);
162 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
163
164 static uma_zone_t rctl_rule_link_zone;
165 static uma_zone_t rctl_rule_zone;
166 static struct rwlock rctl_lock;
167 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
168
169 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
170 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
171
172 MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
173
174 static const char *
175 rctl_subject_type_name(int subject)
176 {
177         int i;
178
179         for (i = 0; subjectnames[i].d_name != NULL; i++) {
180                 if (subjectnames[i].d_value == subject)
181                         return (subjectnames[i].d_name);
182         }
183
184         panic("rctl_subject_type_name: unknown subject type %d", subject);
185 }
186
187 static const char *
188 rctl_action_name(int action)
189 {
190         int i;
191
192         for (i = 0; actionnames[i].d_name != NULL; i++) {
193                 if (actionnames[i].d_value == action)
194                         return (actionnames[i].d_name);
195         }
196
197         panic("rctl_action_name: unknown action %d", action);
198 }
199
200 const char *
201 rctl_resource_name(int resource)
202 {
203         int i;
204
205         for (i = 0; resourcenames[i].d_name != NULL; i++) {
206                 if (resourcenames[i].d_value == resource)
207                         return (resourcenames[i].d_name);
208         }
209
210         panic("rctl_resource_name: unknown resource %d", resource);
211 }
212
213 /*
214  * Return the amount of resource that can be allocated by 'p' before
215  * hitting 'rule'.
216  */
217 static int64_t
218 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
219 {
220         int resource;
221         int64_t available = INT64_MAX;
222         struct ucred *cred = p->p_ucred;
223
224         rw_assert(&rctl_lock, RA_LOCKED);
225
226         resource = rule->rr_resource;
227         switch (rule->rr_per) {
228         case RCTL_SUBJECT_TYPE_PROCESS:
229                 available = rule->rr_amount -
230                     p->p_racct->r_resources[resource];
231                 break;
232         case RCTL_SUBJECT_TYPE_USER:
233                 available = rule->rr_amount -
234                     cred->cr_ruidinfo->ui_racct->r_resources[resource];
235                 break;
236         case RCTL_SUBJECT_TYPE_LOGINCLASS:
237                 available = rule->rr_amount -
238                     cred->cr_loginclass->lc_racct->r_resources[resource];
239                 break;
240         case RCTL_SUBJECT_TYPE_JAIL:
241                 available = rule->rr_amount -
242                     cred->cr_prison->pr_prison_racct->prr_racct->
243                         r_resources[resource];
244                 break;
245         default:
246                 panic("rctl_compute_available: unknown per %d",
247                     rule->rr_per);
248         }
249
250         return (available);
251 }
252
253 /*
254  * Return non-zero if allocating 'amount' by proc 'p' would exceed
255  * resource limit specified by 'rule'.
256  */
257 static int
258 rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
259     int64_t amount)
260 {
261         int64_t available;
262
263         rw_assert(&rctl_lock, RA_LOCKED);
264
265         available = rctl_available_resource(p, rule);
266         if (available >= amount)
267                 return (0);
268
269         return (1);
270 }
271
272 /*
273  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
274  * to what it keeps allocated now.  Returns non-zero if the allocation should
275  * be denied, 0 otherwise.
276  */
277 int
278 rctl_enforce(struct proc *p, int resource, uint64_t amount)
279 {
280         struct rctl_rule *rule;
281         struct rctl_rule_link *link;
282         struct sbuf sb;
283         int should_deny = 0;
284         char *buf;
285         static int curtime = 0;
286         static struct timeval lasttime;
287
288         rw_rlock(&rctl_lock);
289
290         /*
291          * There may be more than one matching rule; go through all of them.
292          * Denial should be done last, after logging and sending signals.
293          */
294         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
295                 rule = link->rrl_rule;
296                 if (rule->rr_resource != resource)
297                         continue;
298                 if (!rctl_would_exceed(p, rule, amount)) {
299                         link->rrl_exceeded = 0;
300                         continue;
301                 }
302
303                 switch (rule->rr_action) {
304                 case RCTL_ACTION_DENY:
305                         should_deny = 1;
306                         continue;
307                 case RCTL_ACTION_LOG:
308                         /*
309                          * If rrl_exceeded != 0, it means we've already
310                          * logged a warning for this process.
311                          */
312                         if (link->rrl_exceeded != 0)
313                                 continue;
314
315                         /*
316                          * If the process state is not fully initialized yet,
317                          * we can't access most of the required fields, e.g.
318                          * p->p_comm.  This happens when called from fork1().
319                          * Ignore this rule for now; it will be processed just
320                          * after fork, when called from racct_proc_fork_done().
321                          */
322                         if (p->p_state != PRS_NORMAL)
323                                 continue;
324
325                         if (!ppsratecheck(&lasttime, &curtime, 10))
326                                 continue;
327
328                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
329                         if (buf == NULL) {
330                                 printf("rctl_enforce: out of memory\n");
331                                 continue;
332                         }
333                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
334                         rctl_rule_to_sbuf(&sb, rule);
335                         sbuf_finish(&sb);
336                         printf("rctl: rule \"%s\" matched by pid %d "
337                             "(%s), uid %d, jail %s\n", sbuf_data(&sb),
338                             p->p_pid, p->p_comm, p->p_ucred->cr_uid,
339                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
340                         sbuf_delete(&sb);
341                         free(buf, M_RCTL);
342                         link->rrl_exceeded = 1;
343                         continue;
344                 case RCTL_ACTION_DEVCTL:
345                         if (link->rrl_exceeded != 0)
346                                 continue;
347
348                         if (p->p_state != PRS_NORMAL)
349                                 continue;
350         
351                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
352                         if (buf == NULL) {
353                                 printf("rctl_enforce: out of memory\n");
354                                 continue;
355                         }
356                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
357                         sbuf_printf(&sb, "rule=");
358                         rctl_rule_to_sbuf(&sb, rule);
359                         sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
360                             p->p_pid, p->p_ucred->cr_ruid,
361                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
362                         sbuf_finish(&sb);
363                         devctl_notify_f("RCTL", "rule", "matched",
364                             sbuf_data(&sb), M_NOWAIT);
365                         sbuf_delete(&sb);
366                         free(buf, M_RCTL);
367                         link->rrl_exceeded = 1;
368                         continue;
369                 default:
370                         if (link->rrl_exceeded != 0)
371                                 continue;
372
373                         if (p->p_state != PRS_NORMAL)
374                                 continue;
375
376                         KASSERT(rule->rr_action > 0 &&
377                             rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
378                             ("rctl_enforce: unknown action %d",
379                              rule->rr_action));
380
381                         /*
382                          * We're using the fact that RCTL_ACTION_SIG* values
383                          * are equal to their counterparts from sys/signal.h.
384                          */
385                         kern_psignal(p, rule->rr_action);
386                         link->rrl_exceeded = 1;
387                         continue;
388                 }
389         }
390
391         rw_runlock(&rctl_lock);
392
393         if (should_deny) {
394                 /*
395                  * Return fake error code; the caller should change it
396                  * into one proper for the situation - EFSIZ, ENOMEM etc.
397                  */
398                 return (EDOOFUS);
399         }
400
401         return (0);
402 }
403
404 uint64_t
405 rctl_get_limit(struct proc *p, int resource)
406 {
407         struct rctl_rule *rule;
408         struct rctl_rule_link *link;
409         uint64_t amount = UINT64_MAX;
410
411         rw_rlock(&rctl_lock);
412
413         /*
414          * There may be more than one matching rule; go through all of them.
415          * Denial should be done last, after logging and sending signals.
416          */
417         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
418                 rule = link->rrl_rule;
419                 if (rule->rr_resource != resource)
420                         continue;
421                 if (rule->rr_action != RCTL_ACTION_DENY)
422                         continue;
423                 if (rule->rr_amount < amount)
424                         amount = rule->rr_amount;
425         }
426
427         rw_runlock(&rctl_lock);
428
429         return (amount);
430 }
431
432 uint64_t
433 rctl_get_available(struct proc *p, int resource)
434 {
435         struct rctl_rule *rule;
436         struct rctl_rule_link *link;
437         int64_t available, minavailable, allocated;
438
439         minavailable = INT64_MAX;
440
441         rw_rlock(&rctl_lock);
442
443         /*
444          * There may be more than one matching rule; go through all of them.
445          * Denial should be done last, after logging and sending signals.
446          */
447         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
448                 rule = link->rrl_rule;
449                 if (rule->rr_resource != resource)
450                         continue;
451                 if (rule->rr_action != RCTL_ACTION_DENY)
452                         continue;
453                 available = rctl_available_resource(p, rule);
454                 if (available < minavailable)
455                         minavailable = available;
456         }
457
458         rw_runlock(&rctl_lock);
459
460         /*
461          * XXX: Think about this _hard_.
462          */
463         allocated = p->p_racct->r_resources[resource];
464         if (minavailable < INT64_MAX - allocated)
465                 minavailable += allocated;
466         if (minavailable < 0)
467                 minavailable = 0;
468         return (minavailable);
469 }
470
471 static int
472 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
473 {
474
475         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
476                 if (rule->rr_subject_type != filter->rr_subject_type)
477                         return (0);
478
479                 switch (filter->rr_subject_type) {
480                 case RCTL_SUBJECT_TYPE_PROCESS:
481                         if (filter->rr_subject.rs_proc != NULL &&
482                             rule->rr_subject.rs_proc !=
483                             filter->rr_subject.rs_proc)
484                                 return (0);
485                         break;
486                 case RCTL_SUBJECT_TYPE_USER:
487                         if (filter->rr_subject.rs_uip != NULL &&
488                             rule->rr_subject.rs_uip !=
489                             filter->rr_subject.rs_uip)
490                                 return (0);
491                         break;
492                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
493                         if (filter->rr_subject.rs_loginclass != NULL &&
494                             rule->rr_subject.rs_loginclass !=
495                             filter->rr_subject.rs_loginclass)
496                                 return (0);
497                         break;
498                 case RCTL_SUBJECT_TYPE_JAIL:
499                         if (filter->rr_subject.rs_prison_racct != NULL &&
500                             rule->rr_subject.rs_prison_racct !=
501                             filter->rr_subject.rs_prison_racct)
502                                 return (0);
503                         break;
504                 default:
505                         panic("rctl_rule_matches: unknown subject type %d",
506                             filter->rr_subject_type);
507                 }
508         }
509
510         if (filter->rr_resource != RACCT_UNDEFINED) {
511                 if (rule->rr_resource != filter->rr_resource)
512                         return (0);
513         }
514
515         if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
516                 if (rule->rr_action != filter->rr_action)
517                         return (0);
518         }
519
520         if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
521                 if (rule->rr_amount != filter->rr_amount)
522                         return (0);
523         }
524
525         if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
526                 if (rule->rr_per != filter->rr_per)
527                         return (0);
528         }
529
530         return (1);
531 }
532
533 static int
534 str2value(const char *str, int *value, struct dict *table)
535 {
536         int i;
537
538         if (value == NULL)
539                 return (EINVAL);
540
541         for (i = 0; table[i].d_name != NULL; i++) {
542                 if (strcasecmp(table[i].d_name, str) == 0) {
543                         *value =  table[i].d_value;
544                         return (0);
545                 }
546         }
547
548         return (EINVAL);
549 }
550
551 static int
552 str2id(const char *str, id_t *value)
553 {
554         char *end;
555
556         if (str == NULL)
557                 return (EINVAL);
558
559         *value = strtoul(str, &end, 10);
560         if ((size_t)(end - str) != strlen(str))
561                 return (EINVAL);
562
563         return (0);
564 }
565
566 static int
567 str2int64(const char *str, int64_t *value)
568 {
569         char *end;
570
571         if (str == NULL)
572                 return (EINVAL);
573
574         *value = strtoul(str, &end, 10);
575         if ((size_t)(end - str) != strlen(str))
576                 return (EINVAL);
577
578         return (0);
579 }
580
581 /*
582  * Connect the rule to the racct, increasing refcount for the rule.
583  */
584 static void
585 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
586 {
587         struct rctl_rule_link *link;
588
589         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
590
591         rctl_rule_acquire(rule);
592         link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
593         link->rrl_rule = rule;
594         link->rrl_exceeded = 0;
595
596         rw_wlock(&rctl_lock);
597         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
598         rw_wunlock(&rctl_lock);
599 }
600
601 static int
602 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
603 {
604         struct rctl_rule_link *link;
605
606         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
607         rw_assert(&rctl_lock, RA_WLOCKED);
608
609         link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
610         if (link == NULL)
611                 return (ENOMEM);
612         rctl_rule_acquire(rule);
613         link->rrl_rule = rule;
614         link->rrl_exceeded = 0;
615
616         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
617         return (0);
618 }
619
620 /*
621  * Remove limits for a rules matching the filter and release
622  * the refcounts for the rules, possibly freeing them.  Returns
623  * the number of limit structures removed.
624  */
625 static int
626 rctl_racct_remove_rules(struct racct *racct,
627     const struct rctl_rule *filter)
628 {
629         int removed = 0;
630         struct rctl_rule_link *link, *linktmp;
631
632         rw_assert(&rctl_lock, RA_WLOCKED);
633
634         LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
635                 if (!rctl_rule_matches(link->rrl_rule, filter))
636                         continue;
637
638                 LIST_REMOVE(link, rrl_next);
639                 rctl_rule_release(link->rrl_rule);
640                 uma_zfree(rctl_rule_link_zone, link);
641                 removed++;
642         }
643         return (removed);
644 }
645
646 static void
647 rctl_rule_acquire_subject(struct rctl_rule *rule)
648 {
649
650         switch (rule->rr_subject_type) {
651         case RCTL_SUBJECT_TYPE_UNDEFINED:
652         case RCTL_SUBJECT_TYPE_PROCESS:
653                 break;
654         case RCTL_SUBJECT_TYPE_JAIL:
655                 if (rule->rr_subject.rs_prison_racct != NULL)
656                         prison_racct_hold(rule->rr_subject.rs_prison_racct);
657                 break;
658         case RCTL_SUBJECT_TYPE_USER:
659                 if (rule->rr_subject.rs_uip != NULL)
660                         uihold(rule->rr_subject.rs_uip);
661                 break;
662         case RCTL_SUBJECT_TYPE_LOGINCLASS:
663                 if (rule->rr_subject.rs_loginclass != NULL)
664                         loginclass_hold(rule->rr_subject.rs_loginclass);
665                 break;
666         default:
667                 panic("rctl_rule_acquire_subject: unknown subject type %d",
668                     rule->rr_subject_type);
669         }
670 }
671
672 static void
673 rctl_rule_release_subject(struct rctl_rule *rule)
674 {
675
676         switch (rule->rr_subject_type) {
677         case RCTL_SUBJECT_TYPE_UNDEFINED:
678         case RCTL_SUBJECT_TYPE_PROCESS:
679                 break;
680         case RCTL_SUBJECT_TYPE_JAIL:
681                 if (rule->rr_subject.rs_prison_racct != NULL)
682                         prison_racct_free(rule->rr_subject.rs_prison_racct);
683                 break;
684         case RCTL_SUBJECT_TYPE_USER:
685                 if (rule->rr_subject.rs_uip != NULL)
686                         uifree(rule->rr_subject.rs_uip);
687                 break;
688         case RCTL_SUBJECT_TYPE_LOGINCLASS:
689                 if (rule->rr_subject.rs_loginclass != NULL)
690                         loginclass_free(rule->rr_subject.rs_loginclass);
691                 break;
692         default:
693                 panic("rctl_rule_release_subject: unknown subject type %d",
694                     rule->rr_subject_type);
695         }
696 }
697
698 struct rctl_rule *
699 rctl_rule_alloc(int flags)
700 {
701         struct rctl_rule *rule;
702
703         rule = uma_zalloc(rctl_rule_zone, flags);
704         if (rule == NULL)
705                 return (NULL);
706         rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
707         rule->rr_subject.rs_proc = NULL;
708         rule->rr_subject.rs_uip = NULL;
709         rule->rr_subject.rs_loginclass = NULL;
710         rule->rr_subject.rs_prison_racct = NULL;
711         rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
712         rule->rr_resource = RACCT_UNDEFINED;
713         rule->rr_action = RCTL_ACTION_UNDEFINED;
714         rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
715         refcount_init(&rule->rr_refcount, 1);
716
717         return (rule);
718 }
719
720 struct rctl_rule *
721 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
722 {
723         struct rctl_rule *copy;
724
725         copy = uma_zalloc(rctl_rule_zone, flags);
726         if (copy == NULL)
727                 return (NULL);
728         copy->rr_subject_type = rule->rr_subject_type;
729         copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
730         copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
731         copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
732         copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
733         copy->rr_per = rule->rr_per;
734         copy->rr_resource = rule->rr_resource;
735         copy->rr_action = rule->rr_action;
736         copy->rr_amount = rule->rr_amount;
737         refcount_init(&copy->rr_refcount, 1);
738         rctl_rule_acquire_subject(copy);
739
740         return (copy);
741 }
742
743 void
744 rctl_rule_acquire(struct rctl_rule *rule)
745 {
746
747         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
748
749         refcount_acquire(&rule->rr_refcount);
750 }
751
752 static void
753 rctl_rule_free(void *context, int pending)
754 {
755         struct rctl_rule *rule;
756         
757         rule = (struct rctl_rule *)context;
758
759         KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
760         
761         /*
762          * We don't need locking here; rule is guaranteed to be inaccessible.
763          */
764         
765         rctl_rule_release_subject(rule);
766         uma_zfree(rctl_rule_zone, rule);
767 }
768
769 void
770 rctl_rule_release(struct rctl_rule *rule)
771 {
772
773         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
774
775         if (refcount_release(&rule->rr_refcount)) {
776                 /*
777                  * rctl_rule_release() is often called when iterating
778                  * over all the uidinfo structures in the system,
779                  * holding uihashtbl_lock.  Since rctl_rule_free()
780                  * might end up calling uifree(), this would lead
781                  * to lock recursion.  Use taskqueue to avoid this.
782                  */
783                 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
784                 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
785         }
786 }
787
788 static int
789 rctl_rule_fully_specified(const struct rctl_rule *rule)
790 {
791
792         switch (rule->rr_subject_type) {
793         case RCTL_SUBJECT_TYPE_UNDEFINED:
794                 return (0);
795         case RCTL_SUBJECT_TYPE_PROCESS:
796                 if (rule->rr_subject.rs_proc == NULL)
797                         return (0);
798                 break;
799         case RCTL_SUBJECT_TYPE_USER:
800                 if (rule->rr_subject.rs_uip == NULL)
801                         return (0);
802                 break;
803         case RCTL_SUBJECT_TYPE_LOGINCLASS:
804                 if (rule->rr_subject.rs_loginclass == NULL)
805                         return (0);
806                 break;
807         case RCTL_SUBJECT_TYPE_JAIL:
808                 if (rule->rr_subject.rs_prison_racct == NULL)
809                         return (0);
810                 break;
811         default:
812                 panic("rctl_rule_fully_specified: unknown subject type %d",
813                     rule->rr_subject_type);
814         }
815         if (rule->rr_resource == RACCT_UNDEFINED)
816                 return (0);
817         if (rule->rr_action == RCTL_ACTION_UNDEFINED)
818                 return (0);
819         if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
820                 return (0);
821         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
822                 return (0);
823
824         return (1);
825 }
826
827 static int
828 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
829 {
830         int error = 0;
831         char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
832              *amountstr, *perstr;
833         struct rctl_rule *rule;
834         id_t id;
835
836         rule = rctl_rule_alloc(M_WAITOK);
837
838         subjectstr = strsep(&rulestr, ":");
839         subject_idstr = strsep(&rulestr, ":");
840         resourcestr = strsep(&rulestr, ":");
841         actionstr = strsep(&rulestr, "=/");
842         amountstr = strsep(&rulestr, "/");
843         perstr = rulestr;
844
845         if (subjectstr == NULL || subjectstr[0] == '\0')
846                 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
847         else {
848                 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
849                 if (error != 0)
850                         goto out;
851         }
852
853         if (subject_idstr == NULL || subject_idstr[0] == '\0') {
854                 rule->rr_subject.rs_proc = NULL;
855                 rule->rr_subject.rs_uip = NULL;
856                 rule->rr_subject.rs_loginclass = NULL;
857                 rule->rr_subject.rs_prison_racct = NULL;
858         } else {
859                 switch (rule->rr_subject_type) {
860                 case RCTL_SUBJECT_TYPE_UNDEFINED:
861                         error = EINVAL;
862                         goto out;
863                 case RCTL_SUBJECT_TYPE_PROCESS:
864                         error = str2id(subject_idstr, &id);
865                         if (error != 0)
866                                 goto out;
867                         sx_assert(&allproc_lock, SA_LOCKED);
868                         rule->rr_subject.rs_proc = pfind(id);
869                         if (rule->rr_subject.rs_proc == NULL) {
870                                 error = ESRCH;
871                                 goto out;
872                         }
873                         PROC_UNLOCK(rule->rr_subject.rs_proc);
874                         break;
875                 case RCTL_SUBJECT_TYPE_USER:
876                         error = str2id(subject_idstr, &id);
877                         if (error != 0)
878                                 goto out;
879                         rule->rr_subject.rs_uip = uifind(id);
880                         break;
881                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
882                         rule->rr_subject.rs_loginclass =
883                             loginclass_find(subject_idstr);
884                         if (rule->rr_subject.rs_loginclass == NULL) {
885                                 error = ENAMETOOLONG;
886                                 goto out;
887                         }
888                         break;
889                 case RCTL_SUBJECT_TYPE_JAIL:
890                         rule->rr_subject.rs_prison_racct =
891                             prison_racct_find(subject_idstr);
892                         if (rule->rr_subject.rs_prison_racct == NULL) {
893                                 error = ENAMETOOLONG;
894                                 goto out;
895                         }
896                         break;
897                default:
898                        panic("rctl_string_to_rule: unknown subject type %d",
899                            rule->rr_subject_type);
900                }
901         }
902
903         if (resourcestr == NULL || resourcestr[0] == '\0')
904                 rule->rr_resource = RACCT_UNDEFINED;
905         else {
906                 error = str2value(resourcestr, &rule->rr_resource,
907                     resourcenames);
908                 if (error != 0)
909                         goto out;
910         }
911
912         if (actionstr == NULL || actionstr[0] == '\0')
913                 rule->rr_action = RCTL_ACTION_UNDEFINED;
914         else {
915                 error = str2value(actionstr, &rule->rr_action, actionnames);
916                 if (error != 0)
917                         goto out;
918         }
919
920         if (amountstr == NULL || amountstr[0] == '\0')
921                 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
922         else {
923                 error = str2int64(amountstr, &rule->rr_amount);
924                 if (error != 0)
925                         goto out;
926                 if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
927                         rule->rr_amount *= 1000000;
928         }
929
930         if (perstr == NULL || perstr[0] == '\0')
931                 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
932         else {
933                 error = str2value(perstr, &rule->rr_per, subjectnames);
934                 if (error != 0)
935                         goto out;
936         }
937
938 out:
939         if (error == 0)
940                 *rulep = rule;
941         else
942                 rctl_rule_release(rule);
943
944         return (error);
945 }
946
947 /*
948  * Link a rule with all the subjects it applies to.
949  */
950 int
951 rctl_rule_add(struct rctl_rule *rule)
952 {
953         struct proc *p;
954         struct ucred *cred;
955         struct uidinfo *uip;
956         struct prison *pr;
957         struct prison_racct *prr;
958         struct loginclass *lc;
959         struct rctl_rule *rule2;
960         int match;
961
962         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
963
964         /*
965          * Some rules just don't make sense.  Note that the one below
966          * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
967          * for example, is not deniable in the racct sense, but the
968          * limit is enforced in a different way, so "deny" rules for %CPU
969          * do make sense.
970          */
971         if (rule->rr_action == RCTL_ACTION_DENY &&
972             (rule->rr_resource == RACCT_CPU ||
973             rule->rr_resource == RACCT_WALLCLOCK))
974                 return (EOPNOTSUPP);
975
976         if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
977             RACCT_IS_SLOPPY(rule->rr_resource))
978                 return (EOPNOTSUPP);
979
980         /*
981          * Make sure there are no duplicated rules.  Also, for the "deny"
982          * rules, remove ones differing only by "amount".
983          */
984         if (rule->rr_action == RCTL_ACTION_DENY) {
985                 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
986                 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
987                 rctl_rule_remove(rule2);
988                 rctl_rule_release(rule2);
989         } else
990                 rctl_rule_remove(rule);
991
992         switch (rule->rr_subject_type) {
993         case RCTL_SUBJECT_TYPE_PROCESS:
994                 p = rule->rr_subject.rs_proc;
995                 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
996                 /*
997                  * No resource limits for system processes.
998                  */
999                 if (p->p_flag & P_SYSTEM)
1000                         return (EPERM);
1001
1002                 rctl_racct_add_rule(p->p_racct, rule);
1003                 /*
1004                  * In case of per-process rule, we don't have anything more
1005                  * to do.
1006                  */
1007                 return (0);
1008
1009         case RCTL_SUBJECT_TYPE_USER:
1010                 uip = rule->rr_subject.rs_uip;
1011                 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1012                 rctl_racct_add_rule(uip->ui_racct, rule);
1013                 break;
1014
1015         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1016                 lc = rule->rr_subject.rs_loginclass;
1017                 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1018                 rctl_racct_add_rule(lc->lc_racct, rule);
1019                 break;
1020
1021         case RCTL_SUBJECT_TYPE_JAIL:
1022                 prr = rule->rr_subject.rs_prison_racct;
1023                 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1024                 rctl_racct_add_rule(prr->prr_racct, rule);
1025                 break;
1026
1027         default:
1028                 panic("rctl_rule_add: unknown subject type %d",
1029                     rule->rr_subject_type);
1030         }
1031
1032         /*
1033          * Now go through all the processes and add the new rule to the ones
1034          * it applies to.
1035          */
1036         sx_assert(&allproc_lock, SA_LOCKED);
1037         FOREACH_PROC_IN_SYSTEM(p) {
1038                 if (p->p_flag & P_SYSTEM)
1039                         continue;
1040                 cred = p->p_ucred;
1041                 switch (rule->rr_subject_type) {
1042                 case RCTL_SUBJECT_TYPE_USER:
1043                         if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1044                             cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1045                                 break;
1046                         continue;
1047                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1048                         if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1049                                 break;
1050                         continue;
1051                 case RCTL_SUBJECT_TYPE_JAIL:
1052                         match = 0;
1053                         for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1054                                 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1055                                         match = 1;
1056                                         break;
1057                                 }
1058                         }
1059                         if (match)
1060                                 break;
1061                         continue;
1062                 default:
1063                         panic("rctl_rule_add: unknown subject type %d",
1064                             rule->rr_subject_type);
1065                 }
1066
1067                 rctl_racct_add_rule(p->p_racct, rule);
1068         }
1069
1070         return (0);
1071 }
1072
1073 static void
1074 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1075 {
1076         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1077         int found = 0;
1078
1079         rw_wlock(&rctl_lock);
1080         found += rctl_racct_remove_rules(racct, filter);
1081         rw_wunlock(&rctl_lock);
1082
1083         *((int *)arg3) += found;
1084 }
1085
1086 /*
1087  * Remove all rules that match the filter.
1088  */
1089 int
1090 rctl_rule_remove(struct rctl_rule *filter)
1091 {
1092         int found = 0;
1093         struct proc *p;
1094
1095         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1096             filter->rr_subject.rs_proc != NULL) {
1097                 p = filter->rr_subject.rs_proc;
1098                 rw_wlock(&rctl_lock);
1099                 found = rctl_racct_remove_rules(p->p_racct, filter);
1100                 rw_wunlock(&rctl_lock);
1101                 if (found)
1102                         return (0);
1103                 return (ESRCH);
1104         }
1105
1106         loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1107             (void *)&found);
1108         ui_racct_foreach(rctl_rule_remove_callback, filter,
1109             (void *)&found);
1110         prison_racct_foreach(rctl_rule_remove_callback, filter,
1111             (void *)&found);
1112
1113         sx_assert(&allproc_lock, SA_LOCKED);
1114         rw_wlock(&rctl_lock);
1115         FOREACH_PROC_IN_SYSTEM(p) {
1116                 found += rctl_racct_remove_rules(p->p_racct, filter);
1117         }
1118         rw_wunlock(&rctl_lock);
1119
1120         if (found)
1121                 return (0);
1122         return (ESRCH);
1123 }
1124
1125 /*
1126  * Appends a rule to the sbuf.
1127  */
1128 static void
1129 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1130 {
1131         int64_t amount;
1132
1133         sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1134
1135         switch (rule->rr_subject_type) {
1136         case RCTL_SUBJECT_TYPE_PROCESS:
1137                 if (rule->rr_subject.rs_proc == NULL)
1138                         sbuf_printf(sb, ":");
1139                 else
1140                         sbuf_printf(sb, "%d:",
1141                             rule->rr_subject.rs_proc->p_pid);
1142                 break;
1143         case RCTL_SUBJECT_TYPE_USER:
1144                 if (rule->rr_subject.rs_uip == NULL)
1145                         sbuf_printf(sb, ":");
1146                 else
1147                         sbuf_printf(sb, "%d:",
1148                             rule->rr_subject.rs_uip->ui_uid);
1149                 break;
1150         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1151                 if (rule->rr_subject.rs_loginclass == NULL)
1152                         sbuf_printf(sb, ":");
1153                 else
1154                         sbuf_printf(sb, "%s:",
1155                             rule->rr_subject.rs_loginclass->lc_name);
1156                 break;
1157         case RCTL_SUBJECT_TYPE_JAIL:
1158                 if (rule->rr_subject.rs_prison_racct == NULL)
1159                         sbuf_printf(sb, ":");
1160                 else
1161                         sbuf_printf(sb, "%s:",
1162                             rule->rr_subject.rs_prison_racct->prr_name);
1163                 break;
1164         default:
1165                 panic("rctl_rule_to_sbuf: unknown subject type %d",
1166                     rule->rr_subject_type);
1167         }
1168
1169         amount = rule->rr_amount;
1170         if (amount != RCTL_AMOUNT_UNDEFINED &&
1171             RACCT_IS_IN_MILLIONS(rule->rr_resource))
1172                 amount /= 1000000;
1173
1174         sbuf_printf(sb, "%s:%s=%jd",
1175             rctl_resource_name(rule->rr_resource),
1176             rctl_action_name(rule->rr_action),
1177             amount);
1178
1179         if (rule->rr_per != rule->rr_subject_type)
1180                 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1181 }
1182
1183 /*
1184  * Routine used by RCTL syscalls to read in input string.
1185  */
1186 static int
1187 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1188 {
1189         int error;
1190         char *str;
1191
1192         if (inbuflen <= 0)
1193                 return (EINVAL);
1194
1195         str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1196         error = copyinstr(inbufp, str, inbuflen, NULL);
1197         if (error != 0) {
1198                 free(str, M_RCTL);
1199                 return (error);
1200         }
1201
1202         *inputstr = str;
1203
1204         return (0);
1205 }
1206
1207 /*
1208  * Routine used by RCTL syscalls to write out output string.
1209  */
1210 static int
1211 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1212 {
1213         int error;
1214
1215         if (outputsbuf == NULL)
1216                 return (0);
1217
1218         sbuf_finish(outputsbuf);
1219         if (outbuflen < sbuf_len(outputsbuf) + 1) {
1220                 sbuf_delete(outputsbuf);
1221                 return (ERANGE);
1222         }
1223         error = copyout(sbuf_data(outputsbuf), outbufp,
1224             sbuf_len(outputsbuf) + 1);
1225         sbuf_delete(outputsbuf);
1226         return (error);
1227 }
1228
1229 static struct sbuf *
1230 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1231 {
1232         int i;
1233         int64_t amount;
1234         struct sbuf *sb;
1235
1236         sb = sbuf_new_auto();
1237         for (i = 0; i <= RACCT_MAX; i++) {
1238                 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1239                         continue;
1240                 amount = racct->r_resources[i];
1241                 if (RACCT_IS_IN_MILLIONS(i))
1242                         amount /= 1000000;
1243                 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1244         }
1245         sbuf_setpos(sb, sbuf_len(sb) - 1);
1246         return (sb);
1247 }
1248
1249 int
1250 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1251 {
1252         int error;
1253         char *inputstr;
1254         struct rctl_rule *filter;
1255         struct sbuf *outputsbuf = NULL;
1256         struct proc *p;
1257         struct uidinfo *uip;
1258         struct loginclass *lc;
1259         struct prison_racct *prr;
1260
1261         error = priv_check(td, PRIV_RCTL_GET_RACCT);
1262         if (error != 0)
1263                 return (error);
1264
1265         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1266         if (error != 0)
1267                 return (error);
1268
1269         sx_slock(&allproc_lock);
1270         error = rctl_string_to_rule(inputstr, &filter);
1271         free(inputstr, M_RCTL);
1272         if (error != 0) {
1273                 sx_sunlock(&allproc_lock);
1274                 return (error);
1275         }
1276
1277         switch (filter->rr_subject_type) {
1278         case RCTL_SUBJECT_TYPE_PROCESS:
1279                 p = filter->rr_subject.rs_proc;
1280                 if (p == NULL) {
1281                         error = EINVAL;
1282                         goto out;
1283                 }
1284                 if (p->p_flag & P_SYSTEM) {
1285                         error = EINVAL;
1286                         goto out;
1287                 }
1288                 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1289                 break;
1290         case RCTL_SUBJECT_TYPE_USER:
1291                 uip = filter->rr_subject.rs_uip;
1292                 if (uip == NULL) {
1293                         error = EINVAL;
1294                         goto out;
1295                 }
1296                 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1297                 break;
1298         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1299                 lc = filter->rr_subject.rs_loginclass;
1300                 if (lc == NULL) {
1301                         error = EINVAL;
1302                         goto out;
1303                 }
1304                 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1305                 break;
1306         case RCTL_SUBJECT_TYPE_JAIL:
1307                 prr = filter->rr_subject.rs_prison_racct;
1308                 if (prr == NULL) {
1309                         error = EINVAL;
1310                         goto out;
1311                 }
1312                 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1313                 break;
1314         default:
1315                 error = EINVAL;
1316         }
1317 out:
1318         rctl_rule_release(filter);
1319         sx_sunlock(&allproc_lock);
1320         if (error != 0)
1321                 return (error);
1322
1323         error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1324
1325         return (error);
1326 }
1327
1328 static void
1329 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1330 {
1331         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1332         struct rctl_rule_link *link;
1333         struct sbuf *sb = (struct sbuf *)arg3;
1334
1335         rw_rlock(&rctl_lock);
1336         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1337                 if (!rctl_rule_matches(link->rrl_rule, filter))
1338                         continue;
1339                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1340                 sbuf_printf(sb, ",");
1341         }
1342         rw_runlock(&rctl_lock);
1343 }
1344
1345 int
1346 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1347 {
1348         int error;
1349         size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1350         char *inputstr, *buf;
1351         struct sbuf *sb;
1352         struct rctl_rule *filter;
1353         struct rctl_rule_link *link;
1354         struct proc *p;
1355
1356         error = priv_check(td, PRIV_RCTL_GET_RULES);
1357         if (error != 0)
1358                 return (error);
1359
1360         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1361         if (error != 0)
1362                 return (error);
1363
1364         sx_slock(&allproc_lock);
1365         error = rctl_string_to_rule(inputstr, &filter);
1366         free(inputstr, M_RCTL);
1367         if (error != 0) {
1368                 sx_sunlock(&allproc_lock);
1369                 return (error);
1370         }
1371
1372 again:
1373         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1374         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1375         KASSERT(sb != NULL, ("sbuf_new failed"));
1376
1377         sx_assert(&allproc_lock, SA_LOCKED);
1378         FOREACH_PROC_IN_SYSTEM(p) {
1379                 rw_rlock(&rctl_lock);
1380                 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1381                         /*
1382                          * Non-process rules will be added to the buffer later.
1383                          * Adding them here would result in duplicated output.
1384                          */
1385                         if (link->rrl_rule->rr_subject_type !=
1386                             RCTL_SUBJECT_TYPE_PROCESS)
1387                                 continue;
1388                         if (!rctl_rule_matches(link->rrl_rule, filter))
1389                                 continue;
1390                         rctl_rule_to_sbuf(sb, link->rrl_rule);
1391                         sbuf_printf(sb, ",");
1392                 }
1393                 rw_runlock(&rctl_lock);
1394         }
1395
1396         loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1397         ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1398         prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1399         if (sbuf_error(sb) == ENOMEM) {
1400                 sbuf_delete(sb);
1401                 free(buf, M_RCTL);
1402                 bufsize *= 4;
1403                 goto again;
1404         }
1405
1406         /*
1407          * Remove trailing ",".
1408          */
1409         if (sbuf_len(sb) > 0)
1410                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1411
1412         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1413
1414         rctl_rule_release(filter);
1415         sx_sunlock(&allproc_lock);
1416         free(buf, M_RCTL);
1417         return (error);
1418 }
1419
1420 int
1421 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1422 {
1423         int error;
1424         size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1425         char *inputstr, *buf;
1426         struct sbuf *sb;
1427         struct rctl_rule *filter;
1428         struct rctl_rule_link *link;
1429
1430         error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1431         if (error != 0)
1432                 return (error);
1433
1434         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1435         if (error != 0)
1436                 return (error);
1437
1438         sx_slock(&allproc_lock);
1439         error = rctl_string_to_rule(inputstr, &filter);
1440         free(inputstr, M_RCTL);
1441         if (error != 0) {
1442                 sx_sunlock(&allproc_lock);
1443                 return (error);
1444         }
1445
1446         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1447                 rctl_rule_release(filter);
1448                 sx_sunlock(&allproc_lock);
1449                 return (EINVAL);
1450         }
1451         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1452                 rctl_rule_release(filter);
1453                 sx_sunlock(&allproc_lock);
1454                 return (EOPNOTSUPP);
1455         }
1456         if (filter->rr_subject.rs_proc == NULL) {
1457                 rctl_rule_release(filter);
1458                 sx_sunlock(&allproc_lock);
1459                 return (EINVAL);
1460         }
1461
1462 again:
1463         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1464         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1465         KASSERT(sb != NULL, ("sbuf_new failed"));
1466
1467         rw_rlock(&rctl_lock);
1468         LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1469             rrl_next) {
1470                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1471                 sbuf_printf(sb, ",");
1472         }
1473         rw_runlock(&rctl_lock);
1474         if (sbuf_error(sb) == ENOMEM) {
1475                 sbuf_delete(sb);
1476                 free(buf, M_RCTL);
1477                 bufsize *= 4;
1478                 goto again;
1479         }
1480
1481         /*
1482          * Remove trailing ",".
1483          */
1484         if (sbuf_len(sb) > 0)
1485                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1486
1487         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1488         rctl_rule_release(filter);
1489         sx_sunlock(&allproc_lock);
1490         free(buf, M_RCTL);
1491         return (error);
1492 }
1493
1494 int
1495 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1496 {
1497         int error;
1498         struct rctl_rule *rule;
1499         char *inputstr;
1500
1501         error = priv_check(td, PRIV_RCTL_ADD_RULE);
1502         if (error != 0)
1503                 return (error);
1504
1505         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1506         if (error != 0)
1507                 return (error);
1508
1509         sx_slock(&allproc_lock);
1510         error = rctl_string_to_rule(inputstr, &rule);
1511         free(inputstr, M_RCTL);
1512         if (error != 0) {
1513                 sx_sunlock(&allproc_lock);
1514                 return (error);
1515         }
1516         /*
1517          * The 'per' part of a rule is optional.
1518          */
1519         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1520             rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1521                 rule->rr_per = rule->rr_subject_type;
1522
1523         if (!rctl_rule_fully_specified(rule)) {
1524                 error = EINVAL;
1525                 goto out;
1526         }
1527
1528         error = rctl_rule_add(rule);
1529
1530 out:
1531         rctl_rule_release(rule);
1532         sx_sunlock(&allproc_lock);
1533         return (error);
1534 }
1535
1536 int
1537 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1538 {
1539         int error;
1540         struct rctl_rule *filter;
1541         char *inputstr;
1542
1543         error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1544         if (error != 0)
1545                 return (error);
1546
1547         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1548         if (error != 0)
1549                 return (error);
1550
1551         sx_slock(&allproc_lock);
1552         error = rctl_string_to_rule(inputstr, &filter);
1553         free(inputstr, M_RCTL);
1554         if (error != 0) {
1555                 sx_sunlock(&allproc_lock);
1556                 return (error);
1557         }
1558
1559         error = rctl_rule_remove(filter);
1560         rctl_rule_release(filter);
1561         sx_sunlock(&allproc_lock);
1562
1563         return (error);
1564 }
1565
1566 /*
1567  * Update RCTL rule list after credential change.
1568  */
1569 void
1570 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1571 {
1572         int rulecnt, i;
1573         struct rctl_rule_link *link, *newlink;
1574         struct uidinfo *newuip;
1575         struct loginclass *newlc;
1576         struct prison_racct *newprr;
1577         LIST_HEAD(, rctl_rule_link) newrules;
1578
1579         newuip = newcred->cr_ruidinfo;
1580         newlc = newcred->cr_loginclass;
1581         newprr = newcred->cr_prison->pr_prison_racct;
1582         
1583         LIST_INIT(&newrules);
1584
1585 again:
1586         /*
1587          * First, count the rules that apply to the process with new
1588          * credentials.
1589          */
1590         rulecnt = 0;
1591         rw_rlock(&rctl_lock);
1592         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1593                 if (link->rrl_rule->rr_subject_type ==
1594                     RCTL_SUBJECT_TYPE_PROCESS)
1595                         rulecnt++;
1596         }
1597         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1598                 rulecnt++;
1599         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1600                 rulecnt++;
1601         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1602                 rulecnt++;
1603         rw_runlock(&rctl_lock);
1604
1605         /*
1606          * Create temporary list.  We've dropped the rctl_lock in order
1607          * to use M_WAITOK.
1608          */
1609         for (i = 0; i < rulecnt; i++) {
1610                 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1611                 newlink->rrl_rule = NULL;
1612                 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1613         }
1614
1615         newlink = LIST_FIRST(&newrules);
1616
1617         /*
1618          * Assign rules to the newly allocated list entries.
1619          */
1620         rw_wlock(&rctl_lock);
1621         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1622                 if (link->rrl_rule->rr_subject_type ==
1623                     RCTL_SUBJECT_TYPE_PROCESS) {
1624                         if (newlink == NULL)
1625                                 goto goaround;
1626                         rctl_rule_acquire(link->rrl_rule);
1627                         newlink->rrl_rule = link->rrl_rule;
1628                         newlink = LIST_NEXT(newlink, rrl_next);
1629                         rulecnt--;
1630                 }
1631         }
1632         
1633         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1634                 if (newlink == NULL)
1635                         goto goaround;
1636                 rctl_rule_acquire(link->rrl_rule);
1637                 newlink->rrl_rule = link->rrl_rule;
1638                 newlink = LIST_NEXT(newlink, rrl_next);
1639                 rulecnt--;
1640         }
1641
1642         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1643                 if (newlink == NULL)
1644                         goto goaround;
1645                 rctl_rule_acquire(link->rrl_rule);
1646                 newlink->rrl_rule = link->rrl_rule;
1647                 newlink = LIST_NEXT(newlink, rrl_next);
1648                 rulecnt--;
1649         }
1650
1651         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1652                 if (newlink == NULL)
1653                         goto goaround;
1654                 rctl_rule_acquire(link->rrl_rule);
1655                 newlink->rrl_rule = link->rrl_rule;
1656                 newlink = LIST_NEXT(newlink, rrl_next);
1657                 rulecnt--;
1658         }
1659
1660         if (rulecnt == 0) {
1661                 /*
1662                  * Free the old rule list.
1663                  */
1664                 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1665                         link = LIST_FIRST(&p->p_racct->r_rule_links);
1666                         LIST_REMOVE(link, rrl_next);
1667                         rctl_rule_release(link->rrl_rule);
1668                         uma_zfree(rctl_rule_link_zone, link);
1669                 }
1670
1671                 /*
1672                  * Replace lists and we're done.
1673                  *
1674                  * XXX: Is there any way to switch list heads instead
1675                  *      of iterating here?
1676                  */
1677                 while (!LIST_EMPTY(&newrules)) {
1678                         newlink = LIST_FIRST(&newrules);
1679                         LIST_REMOVE(newlink, rrl_next);
1680                         LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1681                             newlink, rrl_next);
1682                 }
1683
1684                 rw_wunlock(&rctl_lock);
1685
1686                 return;
1687         }
1688
1689 goaround:
1690         rw_wunlock(&rctl_lock);
1691
1692         /*
1693          * Rule list changed while we were not holding the rctl_lock.
1694          * Free the new list and try again.
1695          */
1696         while (!LIST_EMPTY(&newrules)) {
1697                 newlink = LIST_FIRST(&newrules);
1698                 LIST_REMOVE(newlink, rrl_next);
1699                 if (newlink->rrl_rule != NULL)
1700                         rctl_rule_release(newlink->rrl_rule);
1701                 uma_zfree(rctl_rule_link_zone, newlink);
1702         }
1703
1704         goto again;
1705 }
1706
1707 /*
1708  * Assign RCTL rules to the newly created process.
1709  */
1710 int
1711 rctl_proc_fork(struct proc *parent, struct proc *child)
1712 {
1713         int error;
1714         struct rctl_rule_link *link;
1715         struct rctl_rule *rule;
1716
1717         LIST_INIT(&child->p_racct->r_rule_links);
1718
1719         /*
1720          * No limits for kernel processes.
1721          */
1722         if (child->p_flag & P_SYSTEM)
1723                 return (0);
1724
1725         /*
1726          * Nothing to inherit from P_SYSTEM parents.
1727          */
1728         if (parent->p_racct == NULL) {
1729                 KASSERT(parent->p_flag & P_SYSTEM,
1730                     ("non-system process without racct; p = %p", parent));
1731                 return (0);
1732         }
1733
1734         rw_wlock(&rctl_lock);
1735
1736         /*
1737          * Go through limits applicable to the parent and assign them
1738          * to the child.  Rules with 'process' subject have to be duplicated
1739          * in order to make their rr_subject point to the new process.
1740          */
1741         LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1742                 if (link->rrl_rule->rr_subject_type ==
1743                     RCTL_SUBJECT_TYPE_PROCESS) {
1744                         rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1745                         if (rule == NULL)
1746                                 goto fail;
1747                         KASSERT(rule->rr_subject.rs_proc == parent,
1748                             ("rule->rr_subject.rs_proc != parent"));
1749                         rule->rr_subject.rs_proc = child;
1750                         error = rctl_racct_add_rule_locked(child->p_racct,
1751                             rule);
1752                         rctl_rule_release(rule);
1753                         if (error != 0)
1754                                 goto fail;
1755                 } else {
1756                         error = rctl_racct_add_rule_locked(child->p_racct,
1757                             link->rrl_rule);
1758                         if (error != 0)
1759                                 goto fail;
1760                 }
1761         }
1762
1763         rw_wunlock(&rctl_lock);
1764         return (0);
1765
1766 fail:
1767         while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1768                 link = LIST_FIRST(&child->p_racct->r_rule_links);
1769                 LIST_REMOVE(link, rrl_next);
1770                 rctl_rule_release(link->rrl_rule);
1771                 uma_zfree(rctl_rule_link_zone, link);
1772         }
1773         rw_wunlock(&rctl_lock);
1774         return (EAGAIN);
1775 }
1776
1777 /*
1778  * Release rules attached to the racct.
1779  */
1780 void
1781 rctl_racct_release(struct racct *racct)
1782 {
1783         struct rctl_rule_link *link;
1784
1785         rw_wlock(&rctl_lock);
1786         while (!LIST_EMPTY(&racct->r_rule_links)) {
1787                 link = LIST_FIRST(&racct->r_rule_links);
1788                 LIST_REMOVE(link, rrl_next);
1789                 rctl_rule_release(link->rrl_rule);
1790                 uma_zfree(rctl_rule_link_zone, link);
1791         }
1792         rw_wunlock(&rctl_lock);
1793 }
1794
1795 static void
1796 rctl_init(void)
1797 {
1798
1799         rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1800             sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1801             UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1802         rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1803             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1804 }
1805
1806 #else /* !RCTL */
1807
1808 int
1809 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1810 {
1811         
1812         return (ENOSYS);
1813 }
1814
1815 int
1816 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1817 {
1818         
1819         return (ENOSYS);
1820 }
1821
1822 int
1823 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1824 {
1825         
1826         return (ENOSYS);
1827 }
1828
1829 int
1830 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1831 {
1832         
1833         return (ENOSYS);
1834 }
1835
1836 int
1837 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1838 {
1839         
1840         return (ENOSYS);
1841 }
1842
1843 #endif /* !RCTL */