]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_rctl.c
MFhead @ r292177
[FreeBSD/FreeBSD.git] / sys / kern / kern_rctl.c
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/refcount.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/loginclass.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/racct.h>
47 #include <sys/rctl.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sx.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
58 #include <sys/sbuf.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <vm/uma.h>
62
63 #ifdef RCTL
64 #ifndef RACCT
65 #error "The RCTL option requires the RACCT option"
66 #endif
67
68 FEATURE(rctl, "Resource Limits");
69
70 #define HRF_DEFAULT             0
71 #define HRF_DONT_INHERIT        1
72 #define HRF_DONT_ACCUMULATE     2
73
74 #define RCTL_MAX_INBUFSIZE      4 * 1024
75 #define RCTL_MAX_OUTBUFSIZE     16 * 1024 * 1024
76 #define RCTL_LOG_BUFSIZE        128
77
78 #define RCTL_PCPU_SHIFT         (10 * 1000000)
79
80 unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
81
82 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW, 0, "Resource Limits");
83 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
84     &rctl_maxbufsize, 0, "Maximum output buffer size");
85
86 /*
87  * 'rctl_rule_link' connects a rule with every racct it's related to.
88  * For example, rule 'user:X:openfiles:deny=N/process' is linked
89  * with uidinfo for user X, and to each process of that user.
90  */
91 struct rctl_rule_link {
92         LIST_ENTRY(rctl_rule_link)      rrl_next;
93         struct rctl_rule                *rrl_rule;
94         int                             rrl_exceeded;
95 };
96
97 struct dict {
98         const char      *d_name;
99         int             d_value;
100 };
101
102 static struct dict subjectnames[] = {
103         { "process", RCTL_SUBJECT_TYPE_PROCESS },
104         { "user", RCTL_SUBJECT_TYPE_USER },
105         { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
106         { "jail", RCTL_SUBJECT_TYPE_JAIL },
107         { NULL, -1 }};
108
109 static struct dict resourcenames[] = {
110         { "cputime", RACCT_CPU },
111         { "datasize", RACCT_DATA },
112         { "stacksize", RACCT_STACK },
113         { "coredumpsize", RACCT_CORE },
114         { "memoryuse", RACCT_RSS },
115         { "memorylocked", RACCT_MEMLOCK },
116         { "maxproc", RACCT_NPROC },
117         { "openfiles", RACCT_NOFILE },
118         { "vmemoryuse", RACCT_VMEM },
119         { "pseudoterminals", RACCT_NPTS },
120         { "swapuse", RACCT_SWAP },
121         { "nthr", RACCT_NTHR },
122         { "msgqqueued", RACCT_MSGQQUEUED },
123         { "msgqsize", RACCT_MSGQSIZE },
124         { "nmsgq", RACCT_NMSGQ },
125         { "nsem", RACCT_NSEM },
126         { "nsemop", RACCT_NSEMOP },
127         { "nshm", RACCT_NSHM },
128         { "shmsize", RACCT_SHMSIZE },
129         { "wallclock", RACCT_WALLCLOCK },
130         { "pcpu", RACCT_PCTCPU },
131         { NULL, -1 }};
132
133 static struct dict actionnames[] = {
134         { "sighup", RCTL_ACTION_SIGHUP },
135         { "sigint", RCTL_ACTION_SIGINT },
136         { "sigquit", RCTL_ACTION_SIGQUIT },
137         { "sigill", RCTL_ACTION_SIGILL },
138         { "sigtrap", RCTL_ACTION_SIGTRAP },
139         { "sigabrt", RCTL_ACTION_SIGABRT },
140         { "sigemt", RCTL_ACTION_SIGEMT },
141         { "sigfpe", RCTL_ACTION_SIGFPE },
142         { "sigkill", RCTL_ACTION_SIGKILL },
143         { "sigbus", RCTL_ACTION_SIGBUS },
144         { "sigsegv", RCTL_ACTION_SIGSEGV },
145         { "sigsys", RCTL_ACTION_SIGSYS },
146         { "sigpipe", RCTL_ACTION_SIGPIPE },
147         { "sigalrm", RCTL_ACTION_SIGALRM },
148         { "sigterm", RCTL_ACTION_SIGTERM },
149         { "sigurg", RCTL_ACTION_SIGURG },
150         { "sigstop", RCTL_ACTION_SIGSTOP },
151         { "sigtstp", RCTL_ACTION_SIGTSTP },
152         { "sigchld", RCTL_ACTION_SIGCHLD },
153         { "sigttin", RCTL_ACTION_SIGTTIN },
154         { "sigttou", RCTL_ACTION_SIGTTOU },
155         { "sigio", RCTL_ACTION_SIGIO },
156         { "sigxcpu", RCTL_ACTION_SIGXCPU },
157         { "sigxfsz", RCTL_ACTION_SIGXFSZ },
158         { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
159         { "sigprof", RCTL_ACTION_SIGPROF },
160         { "sigwinch", RCTL_ACTION_SIGWINCH },
161         { "siginfo", RCTL_ACTION_SIGINFO },
162         { "sigusr1", RCTL_ACTION_SIGUSR1 },
163         { "sigusr2", RCTL_ACTION_SIGUSR2 },
164         { "sigthr", RCTL_ACTION_SIGTHR },
165         { "deny", RCTL_ACTION_DENY },
166         { "log", RCTL_ACTION_LOG },
167         { "devctl", RCTL_ACTION_DEVCTL },
168         { NULL, -1 }};
169
170 static void rctl_init(void);
171 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
172
173 static uma_zone_t rctl_rule_link_zone;
174 static uma_zone_t rctl_rule_zone;
175 static struct rwlock rctl_lock;
176 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
177
178 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
179 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
180
181 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
182
183 static const char *
184 rctl_subject_type_name(int subject)
185 {
186         int i;
187
188         for (i = 0; subjectnames[i].d_name != NULL; i++) {
189                 if (subjectnames[i].d_value == subject)
190                         return (subjectnames[i].d_name);
191         }
192
193         panic("rctl_subject_type_name: unknown subject type %d", subject);
194 }
195
196 static const char *
197 rctl_action_name(int action)
198 {
199         int i;
200
201         for (i = 0; actionnames[i].d_name != NULL; i++) {
202                 if (actionnames[i].d_value == action)
203                         return (actionnames[i].d_name);
204         }
205
206         panic("rctl_action_name: unknown action %d", action);
207 }
208
209 const char *
210 rctl_resource_name(int resource)
211 {
212         int i;
213
214         for (i = 0; resourcenames[i].d_name != NULL; i++) {
215                 if (resourcenames[i].d_value == resource)
216                         return (resourcenames[i].d_name);
217         }
218
219         panic("rctl_resource_name: unknown resource %d", resource);
220 }
221
222 /*
223  * Return the amount of resource that can be allocated by 'p' before
224  * hitting 'rule'.
225  */
226 static int64_t
227 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
228 {
229         int resource;
230         int64_t available = INT64_MAX;
231         struct ucred *cred = p->p_ucred;
232
233         ASSERT_RACCT_ENABLED();
234         rw_assert(&rctl_lock, RA_LOCKED);
235
236         resource = rule->rr_resource;
237         switch (rule->rr_per) {
238         case RCTL_SUBJECT_TYPE_PROCESS:
239                 available = rule->rr_amount -
240                     p->p_racct->r_resources[resource];
241                 break;
242         case RCTL_SUBJECT_TYPE_USER:
243                 available = rule->rr_amount -
244                     cred->cr_ruidinfo->ui_racct->r_resources[resource];
245                 break;
246         case RCTL_SUBJECT_TYPE_LOGINCLASS:
247                 available = rule->rr_amount -
248                     cred->cr_loginclass->lc_racct->r_resources[resource];
249                 break;
250         case RCTL_SUBJECT_TYPE_JAIL:
251                 available = rule->rr_amount -
252                     cred->cr_prison->pr_prison_racct->prr_racct->
253                         r_resources[resource];
254                 break;
255         default:
256                 panic("rctl_compute_available: unknown per %d",
257                     rule->rr_per);
258         }
259
260         return (available);
261 }
262
263 /*
264  * Return non-zero if allocating 'amount' by proc 'p' would exceed
265  * resource limit specified by 'rule'.
266  */
267 static int
268 rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
269     int64_t amount)
270 {
271         int64_t available;
272
273         ASSERT_RACCT_ENABLED();
274
275         rw_assert(&rctl_lock, RA_LOCKED);
276
277         available = rctl_available_resource(p, rule);
278         if (available >= amount)
279                 return (0);
280
281         return (1);
282 }
283
284 /*
285  * Special version of rctl_get_available() for the %CPU resource.
286  * We slightly cheat here and return less than we normally would.
287  */
288 int64_t
289 rctl_pcpu_available(const struct proc *p) {
290         struct rctl_rule *rule;
291         struct rctl_rule_link *link;
292         int64_t available, minavailable, limit;
293
294         ASSERT_RACCT_ENABLED();
295
296         minavailable = INT64_MAX;
297         limit = 0;
298
299         rw_rlock(&rctl_lock);
300
301         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
302                 rule = link->rrl_rule;
303                 if (rule->rr_resource != RACCT_PCTCPU)
304                         continue;
305                 if (rule->rr_action != RCTL_ACTION_DENY)
306                         continue;
307                 available = rctl_available_resource(p, rule);
308                 if (available < minavailable) {
309                         minavailable = available;
310                         limit = rule->rr_amount;
311                 }
312         }
313
314         rw_runlock(&rctl_lock);
315
316         /*
317          * Return slightly less than actual value of the available
318          * %cpu resource.  This makes %cpu throttling more agressive
319          * and lets us act sooner than the limits are already exceeded.
320          */
321         if (limit != 0) {
322                 if (limit > 2 * RCTL_PCPU_SHIFT)
323                         minavailable -= RCTL_PCPU_SHIFT;
324                 else
325                         minavailable -= (limit / 2);
326         }
327
328         return (minavailable);
329 }
330
331 /*
332  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
333  * to what it keeps allocated now.  Returns non-zero if the allocation should
334  * be denied, 0 otherwise.
335  */
336 int
337 rctl_enforce(struct proc *p, int resource, uint64_t amount)
338 {
339         struct rctl_rule *rule;
340         struct rctl_rule_link *link;
341         struct sbuf sb;
342         int should_deny = 0;
343         char *buf;
344         static int curtime = 0;
345         static struct timeval lasttime;
346
347         ASSERT_RACCT_ENABLED();
348
349         rw_rlock(&rctl_lock);
350
351         /*
352          * There may be more than one matching rule; go through all of them.
353          * Denial should be done last, after logging and sending signals.
354          */
355         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
356                 rule = link->rrl_rule;
357                 if (rule->rr_resource != resource)
358                         continue;
359                 if (!rctl_would_exceed(p, rule, amount)) {
360                         link->rrl_exceeded = 0;
361                         continue;
362                 }
363
364                 switch (rule->rr_action) {
365                 case RCTL_ACTION_DENY:
366                         should_deny = 1;
367                         continue;
368                 case RCTL_ACTION_LOG:
369                         /*
370                          * If rrl_exceeded != 0, it means we've already
371                          * logged a warning for this process.
372                          */
373                         if (link->rrl_exceeded != 0)
374                                 continue;
375
376                         /*
377                          * If the process state is not fully initialized yet,
378                          * we can't access most of the required fields, e.g.
379                          * p->p_comm.  This happens when called from fork1().
380                          * Ignore this rule for now; it will be processed just
381                          * after fork, when called from racct_proc_fork_done().
382                          */
383                         if (p->p_state != PRS_NORMAL)
384                                 continue;
385
386                         if (!ppsratecheck(&lasttime, &curtime, 10))
387                                 continue;
388
389                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
390                         if (buf == NULL) {
391                                 printf("rctl_enforce: out of memory\n");
392                                 continue;
393                         }
394                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
395                         rctl_rule_to_sbuf(&sb, rule);
396                         sbuf_finish(&sb);
397                         printf("rctl: rule \"%s\" matched by pid %d "
398                             "(%s), uid %d, jail %s\n", sbuf_data(&sb),
399                             p->p_pid, p->p_comm, p->p_ucred->cr_uid,
400                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
401                         sbuf_delete(&sb);
402                         free(buf, M_RCTL);
403                         link->rrl_exceeded = 1;
404                         continue;
405                 case RCTL_ACTION_DEVCTL:
406                         if (link->rrl_exceeded != 0)
407                                 continue;
408
409                         if (p->p_state != PRS_NORMAL)
410                                 continue;
411         
412                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
413                         if (buf == NULL) {
414                                 printf("rctl_enforce: out of memory\n");
415                                 continue;
416                         }
417                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
418                         sbuf_printf(&sb, "rule=");
419                         rctl_rule_to_sbuf(&sb, rule);
420                         sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
421                             p->p_pid, p->p_ucred->cr_ruid,
422                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
423                         sbuf_finish(&sb);
424                         devctl_notify_f("RCTL", "rule", "matched",
425                             sbuf_data(&sb), M_NOWAIT);
426                         sbuf_delete(&sb);
427                         free(buf, M_RCTL);
428                         link->rrl_exceeded = 1;
429                         continue;
430                 default:
431                         if (link->rrl_exceeded != 0)
432                                 continue;
433
434                         if (p->p_state != PRS_NORMAL)
435                                 continue;
436
437                         KASSERT(rule->rr_action > 0 &&
438                             rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
439                             ("rctl_enforce: unknown action %d",
440                              rule->rr_action));
441
442                         /*
443                          * We're using the fact that RCTL_ACTION_SIG* values
444                          * are equal to their counterparts from sys/signal.h.
445                          */
446                         kern_psignal(p, rule->rr_action);
447                         link->rrl_exceeded = 1;
448                         continue;
449                 }
450         }
451
452         rw_runlock(&rctl_lock);
453
454         if (should_deny) {
455                 /*
456                  * Return fake error code; the caller should change it
457                  * into one proper for the situation - EFSIZ, ENOMEM etc.
458                  */
459                 return (EDOOFUS);
460         }
461
462         return (0);
463 }
464
465 uint64_t
466 rctl_get_limit(struct proc *p, int resource)
467 {
468         struct rctl_rule *rule;
469         struct rctl_rule_link *link;
470         uint64_t amount = UINT64_MAX;
471
472         ASSERT_RACCT_ENABLED();
473
474         rw_rlock(&rctl_lock);
475
476         /*
477          * There may be more than one matching rule; go through all of them.
478          * Denial should be done last, after logging and sending signals.
479          */
480         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
481                 rule = link->rrl_rule;
482                 if (rule->rr_resource != resource)
483                         continue;
484                 if (rule->rr_action != RCTL_ACTION_DENY)
485                         continue;
486                 if (rule->rr_amount < amount)
487                         amount = rule->rr_amount;
488         }
489
490         rw_runlock(&rctl_lock);
491
492         return (amount);
493 }
494
495 uint64_t
496 rctl_get_available(struct proc *p, int resource)
497 {
498         struct rctl_rule *rule;
499         struct rctl_rule_link *link;
500         int64_t available, minavailable, allocated;
501
502         minavailable = INT64_MAX;
503
504         ASSERT_RACCT_ENABLED();
505
506         rw_rlock(&rctl_lock);
507
508         /*
509          * There may be more than one matching rule; go through all of them.
510          * Denial should be done last, after logging and sending signals.
511          */
512         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
513                 rule = link->rrl_rule;
514                 if (rule->rr_resource != resource)
515                         continue;
516                 if (rule->rr_action != RCTL_ACTION_DENY)
517                         continue;
518                 available = rctl_available_resource(p, rule);
519                 if (available < minavailable)
520                         minavailable = available;
521         }
522
523         rw_runlock(&rctl_lock);
524
525         /*
526          * XXX: Think about this _hard_.
527          */
528         allocated = p->p_racct->r_resources[resource];
529         if (minavailable < INT64_MAX - allocated)
530                 minavailable += allocated;
531         if (minavailable < 0)
532                 minavailable = 0;
533         return (minavailable);
534 }
535
536 static int
537 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
538 {
539
540         ASSERT_RACCT_ENABLED();
541
542         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
543                 if (rule->rr_subject_type != filter->rr_subject_type)
544                         return (0);
545
546                 switch (filter->rr_subject_type) {
547                 case RCTL_SUBJECT_TYPE_PROCESS:
548                         if (filter->rr_subject.rs_proc != NULL &&
549                             rule->rr_subject.rs_proc !=
550                             filter->rr_subject.rs_proc)
551                                 return (0);
552                         break;
553                 case RCTL_SUBJECT_TYPE_USER:
554                         if (filter->rr_subject.rs_uip != NULL &&
555                             rule->rr_subject.rs_uip !=
556                             filter->rr_subject.rs_uip)
557                                 return (0);
558                         break;
559                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
560                         if (filter->rr_subject.rs_loginclass != NULL &&
561                             rule->rr_subject.rs_loginclass !=
562                             filter->rr_subject.rs_loginclass)
563                                 return (0);
564                         break;
565                 case RCTL_SUBJECT_TYPE_JAIL:
566                         if (filter->rr_subject.rs_prison_racct != NULL &&
567                             rule->rr_subject.rs_prison_racct !=
568                             filter->rr_subject.rs_prison_racct)
569                                 return (0);
570                         break;
571                 default:
572                         panic("rctl_rule_matches: unknown subject type %d",
573                             filter->rr_subject_type);
574                 }
575         }
576
577         if (filter->rr_resource != RACCT_UNDEFINED) {
578                 if (rule->rr_resource != filter->rr_resource)
579                         return (0);
580         }
581
582         if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
583                 if (rule->rr_action != filter->rr_action)
584                         return (0);
585         }
586
587         if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
588                 if (rule->rr_amount != filter->rr_amount)
589                         return (0);
590         }
591
592         if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
593                 if (rule->rr_per != filter->rr_per)
594                         return (0);
595         }
596
597         return (1);
598 }
599
600 static int
601 str2value(const char *str, int *value, struct dict *table)
602 {
603         int i;
604
605         if (value == NULL)
606                 return (EINVAL);
607
608         for (i = 0; table[i].d_name != NULL; i++) {
609                 if (strcasecmp(table[i].d_name, str) == 0) {
610                         *value =  table[i].d_value;
611                         return (0);
612                 }
613         }
614
615         return (EINVAL);
616 }
617
618 static int
619 str2id(const char *str, id_t *value)
620 {
621         char *end;
622
623         if (str == NULL)
624                 return (EINVAL);
625
626         *value = strtoul(str, &end, 10);
627         if ((size_t)(end - str) != strlen(str))
628                 return (EINVAL);
629
630         return (0);
631 }
632
633 static int
634 str2int64(const char *str, int64_t *value)
635 {
636         char *end;
637
638         if (str == NULL)
639                 return (EINVAL);
640
641         *value = strtoul(str, &end, 10);
642         if ((size_t)(end - str) != strlen(str))
643                 return (EINVAL);
644
645         return (0);
646 }
647
648 /*
649  * Connect the rule to the racct, increasing refcount for the rule.
650  */
651 static void
652 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
653 {
654         struct rctl_rule_link *link;
655
656         ASSERT_RACCT_ENABLED();
657         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
658
659         rctl_rule_acquire(rule);
660         link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
661         link->rrl_rule = rule;
662         link->rrl_exceeded = 0;
663
664         rw_wlock(&rctl_lock);
665         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
666         rw_wunlock(&rctl_lock);
667 }
668
669 static int
670 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
671 {
672         struct rctl_rule_link *link;
673
674         ASSERT_RACCT_ENABLED();
675         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
676         rw_assert(&rctl_lock, RA_WLOCKED);
677
678         link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
679         if (link == NULL)
680                 return (ENOMEM);
681         rctl_rule_acquire(rule);
682         link->rrl_rule = rule;
683         link->rrl_exceeded = 0;
684
685         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
686         return (0);
687 }
688
689 /*
690  * Remove limits for a rules matching the filter and release
691  * the refcounts for the rules, possibly freeing them.  Returns
692  * the number of limit structures removed.
693  */
694 static int
695 rctl_racct_remove_rules(struct racct *racct,
696     const struct rctl_rule *filter)
697 {
698         int removed = 0;
699         struct rctl_rule_link *link, *linktmp;
700
701         ASSERT_RACCT_ENABLED();
702         rw_assert(&rctl_lock, RA_WLOCKED);
703
704         LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
705                 if (!rctl_rule_matches(link->rrl_rule, filter))
706                         continue;
707
708                 LIST_REMOVE(link, rrl_next);
709                 rctl_rule_release(link->rrl_rule);
710                 uma_zfree(rctl_rule_link_zone, link);
711                 removed++;
712         }
713         return (removed);
714 }
715
716 static void
717 rctl_rule_acquire_subject(struct rctl_rule *rule)
718 {
719
720         ASSERT_RACCT_ENABLED();
721
722         switch (rule->rr_subject_type) {
723         case RCTL_SUBJECT_TYPE_UNDEFINED:
724         case RCTL_SUBJECT_TYPE_PROCESS:
725                 break;
726         case RCTL_SUBJECT_TYPE_JAIL:
727                 if (rule->rr_subject.rs_prison_racct != NULL)
728                         prison_racct_hold(rule->rr_subject.rs_prison_racct);
729                 break;
730         case RCTL_SUBJECT_TYPE_USER:
731                 if (rule->rr_subject.rs_uip != NULL)
732                         uihold(rule->rr_subject.rs_uip);
733                 break;
734         case RCTL_SUBJECT_TYPE_LOGINCLASS:
735                 if (rule->rr_subject.rs_loginclass != NULL)
736                         loginclass_hold(rule->rr_subject.rs_loginclass);
737                 break;
738         default:
739                 panic("rctl_rule_acquire_subject: unknown subject type %d",
740                     rule->rr_subject_type);
741         }
742 }
743
744 static void
745 rctl_rule_release_subject(struct rctl_rule *rule)
746 {
747
748         ASSERT_RACCT_ENABLED();
749
750         switch (rule->rr_subject_type) {
751         case RCTL_SUBJECT_TYPE_UNDEFINED:
752         case RCTL_SUBJECT_TYPE_PROCESS:
753                 break;
754         case RCTL_SUBJECT_TYPE_JAIL:
755                 if (rule->rr_subject.rs_prison_racct != NULL)
756                         prison_racct_free(rule->rr_subject.rs_prison_racct);
757                 break;
758         case RCTL_SUBJECT_TYPE_USER:
759                 if (rule->rr_subject.rs_uip != NULL)
760                         uifree(rule->rr_subject.rs_uip);
761                 break;
762         case RCTL_SUBJECT_TYPE_LOGINCLASS:
763                 if (rule->rr_subject.rs_loginclass != NULL)
764                         loginclass_free(rule->rr_subject.rs_loginclass);
765                 break;
766         default:
767                 panic("rctl_rule_release_subject: unknown subject type %d",
768                     rule->rr_subject_type);
769         }
770 }
771
772 struct rctl_rule *
773 rctl_rule_alloc(int flags)
774 {
775         struct rctl_rule *rule;
776
777         ASSERT_RACCT_ENABLED();
778
779         rule = uma_zalloc(rctl_rule_zone, flags);
780         if (rule == NULL)
781                 return (NULL);
782         rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
783         rule->rr_subject.rs_proc = NULL;
784         rule->rr_subject.rs_uip = NULL;
785         rule->rr_subject.rs_loginclass = NULL;
786         rule->rr_subject.rs_prison_racct = NULL;
787         rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
788         rule->rr_resource = RACCT_UNDEFINED;
789         rule->rr_action = RCTL_ACTION_UNDEFINED;
790         rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
791         refcount_init(&rule->rr_refcount, 1);
792
793         return (rule);
794 }
795
796 struct rctl_rule *
797 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
798 {
799         struct rctl_rule *copy;
800
801         ASSERT_RACCT_ENABLED();
802
803         copy = uma_zalloc(rctl_rule_zone, flags);
804         if (copy == NULL)
805                 return (NULL);
806         copy->rr_subject_type = rule->rr_subject_type;
807         copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
808         copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
809         copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
810         copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
811         copy->rr_per = rule->rr_per;
812         copy->rr_resource = rule->rr_resource;
813         copy->rr_action = rule->rr_action;
814         copy->rr_amount = rule->rr_amount;
815         refcount_init(&copy->rr_refcount, 1);
816         rctl_rule_acquire_subject(copy);
817
818         return (copy);
819 }
820
821 void
822 rctl_rule_acquire(struct rctl_rule *rule)
823 {
824
825         ASSERT_RACCT_ENABLED();
826         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
827
828         refcount_acquire(&rule->rr_refcount);
829 }
830
831 static void
832 rctl_rule_free(void *context, int pending)
833 {
834         struct rctl_rule *rule;
835         
836         rule = (struct rctl_rule *)context;
837
838         ASSERT_RACCT_ENABLED();
839         KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
840         
841         /*
842          * We don't need locking here; rule is guaranteed to be inaccessible.
843          */
844         
845         rctl_rule_release_subject(rule);
846         uma_zfree(rctl_rule_zone, rule);
847 }
848
849 void
850 rctl_rule_release(struct rctl_rule *rule)
851 {
852
853         ASSERT_RACCT_ENABLED();
854         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
855
856         if (refcount_release(&rule->rr_refcount)) {
857                 /*
858                  * rctl_rule_release() is often called when iterating
859                  * over all the uidinfo structures in the system,
860                  * holding uihashtbl_lock.  Since rctl_rule_free()
861                  * might end up calling uifree(), this would lead
862                  * to lock recursion.  Use taskqueue to avoid this.
863                  */
864                 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
865                 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
866         }
867 }
868
869 static int
870 rctl_rule_fully_specified(const struct rctl_rule *rule)
871 {
872
873         ASSERT_RACCT_ENABLED();
874
875         switch (rule->rr_subject_type) {
876         case RCTL_SUBJECT_TYPE_UNDEFINED:
877                 return (0);
878         case RCTL_SUBJECT_TYPE_PROCESS:
879                 if (rule->rr_subject.rs_proc == NULL)
880                         return (0);
881                 break;
882         case RCTL_SUBJECT_TYPE_USER:
883                 if (rule->rr_subject.rs_uip == NULL)
884                         return (0);
885                 break;
886         case RCTL_SUBJECT_TYPE_LOGINCLASS:
887                 if (rule->rr_subject.rs_loginclass == NULL)
888                         return (0);
889                 break;
890         case RCTL_SUBJECT_TYPE_JAIL:
891                 if (rule->rr_subject.rs_prison_racct == NULL)
892                         return (0);
893                 break;
894         default:
895                 panic("rctl_rule_fully_specified: unknown subject type %d",
896                     rule->rr_subject_type);
897         }
898         if (rule->rr_resource == RACCT_UNDEFINED)
899                 return (0);
900         if (rule->rr_action == RCTL_ACTION_UNDEFINED)
901                 return (0);
902         if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
903                 return (0);
904         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
905                 return (0);
906
907         return (1);
908 }
909
910 static int
911 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
912 {
913         int error = 0;
914         char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
915              *amountstr, *perstr;
916         struct rctl_rule *rule;
917         id_t id;
918
919         ASSERT_RACCT_ENABLED();
920
921         rule = rctl_rule_alloc(M_WAITOK);
922
923         subjectstr = strsep(&rulestr, ":");
924         subject_idstr = strsep(&rulestr, ":");
925         resourcestr = strsep(&rulestr, ":");
926         actionstr = strsep(&rulestr, "=/");
927         amountstr = strsep(&rulestr, "/");
928         perstr = rulestr;
929
930         if (subjectstr == NULL || subjectstr[0] == '\0')
931                 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
932         else {
933                 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
934                 if (error != 0)
935                         goto out;
936         }
937
938         if (subject_idstr == NULL || subject_idstr[0] == '\0') {
939                 rule->rr_subject.rs_proc = NULL;
940                 rule->rr_subject.rs_uip = NULL;
941                 rule->rr_subject.rs_loginclass = NULL;
942                 rule->rr_subject.rs_prison_racct = NULL;
943         } else {
944                 switch (rule->rr_subject_type) {
945                 case RCTL_SUBJECT_TYPE_UNDEFINED:
946                         error = EINVAL;
947                         goto out;
948                 case RCTL_SUBJECT_TYPE_PROCESS:
949                         error = str2id(subject_idstr, &id);
950                         if (error != 0)
951                                 goto out;
952                         sx_assert(&allproc_lock, SA_LOCKED);
953                         rule->rr_subject.rs_proc = pfind(id);
954                         if (rule->rr_subject.rs_proc == NULL) {
955                                 error = ESRCH;
956                                 goto out;
957                         }
958                         PROC_UNLOCK(rule->rr_subject.rs_proc);
959                         break;
960                 case RCTL_SUBJECT_TYPE_USER:
961                         error = str2id(subject_idstr, &id);
962                         if (error != 0)
963                                 goto out;
964                         rule->rr_subject.rs_uip = uifind(id);
965                         break;
966                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
967                         rule->rr_subject.rs_loginclass =
968                             loginclass_find(subject_idstr);
969                         if (rule->rr_subject.rs_loginclass == NULL) {
970                                 error = ENAMETOOLONG;
971                                 goto out;
972                         }
973                         break;
974                 case RCTL_SUBJECT_TYPE_JAIL:
975                         rule->rr_subject.rs_prison_racct =
976                             prison_racct_find(subject_idstr);
977                         if (rule->rr_subject.rs_prison_racct == NULL) {
978                                 error = ENAMETOOLONG;
979                                 goto out;
980                         }
981                         break;
982                default:
983                        panic("rctl_string_to_rule: unknown subject type %d",
984                            rule->rr_subject_type);
985                }
986         }
987
988         if (resourcestr == NULL || resourcestr[0] == '\0')
989                 rule->rr_resource = RACCT_UNDEFINED;
990         else {
991                 error = str2value(resourcestr, &rule->rr_resource,
992                     resourcenames);
993                 if (error != 0)
994                         goto out;
995         }
996
997         if (actionstr == NULL || actionstr[0] == '\0')
998                 rule->rr_action = RCTL_ACTION_UNDEFINED;
999         else {
1000                 error = str2value(actionstr, &rule->rr_action, actionnames);
1001                 if (error != 0)
1002                         goto out;
1003         }
1004
1005         if (amountstr == NULL || amountstr[0] == '\0')
1006                 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1007         else {
1008                 error = str2int64(amountstr, &rule->rr_amount);
1009                 if (error != 0)
1010                         goto out;
1011                 if (RACCT_IS_IN_MILLIONS(rule->rr_resource))
1012                         rule->rr_amount *= 1000000;
1013         }
1014
1015         if (perstr == NULL || perstr[0] == '\0')
1016                 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1017         else {
1018                 error = str2value(perstr, &rule->rr_per, subjectnames);
1019                 if (error != 0)
1020                         goto out;
1021         }
1022
1023 out:
1024         if (error == 0)
1025                 *rulep = rule;
1026         else
1027                 rctl_rule_release(rule);
1028
1029         return (error);
1030 }
1031
1032 /*
1033  * Link a rule with all the subjects it applies to.
1034  */
1035 int
1036 rctl_rule_add(struct rctl_rule *rule)
1037 {
1038         struct proc *p;
1039         struct ucred *cred;
1040         struct uidinfo *uip;
1041         struct prison *pr;
1042         struct prison_racct *prr;
1043         struct loginclass *lc;
1044         struct rctl_rule *rule2;
1045         int match;
1046
1047         ASSERT_RACCT_ENABLED();
1048         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1049
1050         /*
1051          * Some rules just don't make sense.  Note that the one below
1052          * cannot be rewritten using RACCT_IS_DENIABLE(); the RACCT_PCTCPU,
1053          * for example, is not deniable in the racct sense, but the
1054          * limit is enforced in a different way, so "deny" rules for %CPU
1055          * do make sense.
1056          */
1057         if (rule->rr_action == RCTL_ACTION_DENY &&
1058             (rule->rr_resource == RACCT_CPU ||
1059             rule->rr_resource == RACCT_WALLCLOCK))
1060                 return (EOPNOTSUPP);
1061
1062         if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1063             RACCT_IS_SLOPPY(rule->rr_resource))
1064                 return (EOPNOTSUPP);
1065
1066         /*
1067          * Make sure there are no duplicated rules.  Also, for the "deny"
1068          * rules, remove ones differing only by "amount".
1069          */
1070         if (rule->rr_action == RCTL_ACTION_DENY) {
1071                 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1072                 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1073                 rctl_rule_remove(rule2);
1074                 rctl_rule_release(rule2);
1075         } else
1076                 rctl_rule_remove(rule);
1077
1078         switch (rule->rr_subject_type) {
1079         case RCTL_SUBJECT_TYPE_PROCESS:
1080                 p = rule->rr_subject.rs_proc;
1081                 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1082
1083                 rctl_racct_add_rule(p->p_racct, rule);
1084                 /*
1085                  * In case of per-process rule, we don't have anything more
1086                  * to do.
1087                  */
1088                 return (0);
1089
1090         case RCTL_SUBJECT_TYPE_USER:
1091                 uip = rule->rr_subject.rs_uip;
1092                 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1093                 rctl_racct_add_rule(uip->ui_racct, rule);
1094                 break;
1095
1096         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1097                 lc = rule->rr_subject.rs_loginclass;
1098                 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1099                 rctl_racct_add_rule(lc->lc_racct, rule);
1100                 break;
1101
1102         case RCTL_SUBJECT_TYPE_JAIL:
1103                 prr = rule->rr_subject.rs_prison_racct;
1104                 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1105                 rctl_racct_add_rule(prr->prr_racct, rule);
1106                 break;
1107
1108         default:
1109                 panic("rctl_rule_add: unknown subject type %d",
1110                     rule->rr_subject_type);
1111         }
1112
1113         /*
1114          * Now go through all the processes and add the new rule to the ones
1115          * it applies to.
1116          */
1117         sx_assert(&allproc_lock, SA_LOCKED);
1118         FOREACH_PROC_IN_SYSTEM(p) {
1119                 cred = p->p_ucred;
1120                 switch (rule->rr_subject_type) {
1121                 case RCTL_SUBJECT_TYPE_USER:
1122                         if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1123                             cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1124                                 break;
1125                         continue;
1126                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1127                         if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1128                                 break;
1129                         continue;
1130                 case RCTL_SUBJECT_TYPE_JAIL:
1131                         match = 0;
1132                         for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1133                                 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1134                                         match = 1;
1135                                         break;
1136                                 }
1137                         }
1138                         if (match)
1139                                 break;
1140                         continue;
1141                 default:
1142                         panic("rctl_rule_add: unknown subject type %d",
1143                             rule->rr_subject_type);
1144                 }
1145
1146                 rctl_racct_add_rule(p->p_racct, rule);
1147         }
1148
1149         return (0);
1150 }
1151
1152 static void
1153 rctl_rule_pre_callback(void)
1154 {
1155
1156         rw_wlock(&rctl_lock);
1157 }
1158
1159 static void
1160 rctl_rule_post_callback(void)
1161 {
1162
1163         rw_wunlock(&rctl_lock);
1164 }
1165
1166 static void
1167 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1168 {
1169         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1170         int found = 0;
1171
1172         ASSERT_RACCT_ENABLED();
1173         rw_assert(&rctl_lock, RA_WLOCKED);
1174
1175         found += rctl_racct_remove_rules(racct, filter);
1176
1177         *((int *)arg3) += found;
1178 }
1179
1180 /*
1181  * Remove all rules that match the filter.
1182  */
1183 int
1184 rctl_rule_remove(struct rctl_rule *filter)
1185 {
1186         int found = 0;
1187         struct proc *p;
1188
1189         ASSERT_RACCT_ENABLED();
1190
1191         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1192             filter->rr_subject.rs_proc != NULL) {
1193                 p = filter->rr_subject.rs_proc;
1194                 rw_wlock(&rctl_lock);
1195                 found = rctl_racct_remove_rules(p->p_racct, filter);
1196                 rw_wunlock(&rctl_lock);
1197                 if (found)
1198                         return (0);
1199                 return (ESRCH);
1200         }
1201
1202         loginclass_racct_foreach(rctl_rule_remove_callback,
1203             rctl_rule_pre_callback, rctl_rule_post_callback,
1204             filter, (void *)&found);
1205         ui_racct_foreach(rctl_rule_remove_callback,
1206             rctl_rule_pre_callback, rctl_rule_post_callback,
1207             filter, (void *)&found);
1208         prison_racct_foreach(rctl_rule_remove_callback,
1209             rctl_rule_pre_callback, rctl_rule_post_callback,
1210             filter, (void *)&found);
1211
1212         sx_assert(&allproc_lock, SA_LOCKED);
1213         rw_wlock(&rctl_lock);
1214         FOREACH_PROC_IN_SYSTEM(p) {
1215                 found += rctl_racct_remove_rules(p->p_racct, filter);
1216         }
1217         rw_wunlock(&rctl_lock);
1218
1219         if (found)
1220                 return (0);
1221         return (ESRCH);
1222 }
1223
1224 /*
1225  * Appends a rule to the sbuf.
1226  */
1227 static void
1228 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1229 {
1230         int64_t amount;
1231
1232         ASSERT_RACCT_ENABLED();
1233
1234         sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1235
1236         switch (rule->rr_subject_type) {
1237         case RCTL_SUBJECT_TYPE_PROCESS:
1238                 if (rule->rr_subject.rs_proc == NULL)
1239                         sbuf_printf(sb, ":");
1240                 else
1241                         sbuf_printf(sb, "%d:",
1242                             rule->rr_subject.rs_proc->p_pid);
1243                 break;
1244         case RCTL_SUBJECT_TYPE_USER:
1245                 if (rule->rr_subject.rs_uip == NULL)
1246                         sbuf_printf(sb, ":");
1247                 else
1248                         sbuf_printf(sb, "%d:",
1249                             rule->rr_subject.rs_uip->ui_uid);
1250                 break;
1251         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1252                 if (rule->rr_subject.rs_loginclass == NULL)
1253                         sbuf_printf(sb, ":");
1254                 else
1255                         sbuf_printf(sb, "%s:",
1256                             rule->rr_subject.rs_loginclass->lc_name);
1257                 break;
1258         case RCTL_SUBJECT_TYPE_JAIL:
1259                 if (rule->rr_subject.rs_prison_racct == NULL)
1260                         sbuf_printf(sb, ":");
1261                 else
1262                         sbuf_printf(sb, "%s:",
1263                             rule->rr_subject.rs_prison_racct->prr_name);
1264                 break;
1265         default:
1266                 panic("rctl_rule_to_sbuf: unknown subject type %d",
1267                     rule->rr_subject_type);
1268         }
1269
1270         amount = rule->rr_amount;
1271         if (amount != RCTL_AMOUNT_UNDEFINED &&
1272             RACCT_IS_IN_MILLIONS(rule->rr_resource))
1273                 amount /= 1000000;
1274
1275         sbuf_printf(sb, "%s:%s=%jd",
1276             rctl_resource_name(rule->rr_resource),
1277             rctl_action_name(rule->rr_action),
1278             amount);
1279
1280         if (rule->rr_per != rule->rr_subject_type)
1281                 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1282 }
1283
1284 /*
1285  * Routine used by RCTL syscalls to read in input string.
1286  */
1287 static int
1288 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1289 {
1290         int error;
1291         char *str;
1292
1293         ASSERT_RACCT_ENABLED();
1294
1295         if (inbuflen <= 0)
1296                 return (EINVAL);
1297         if (inbuflen > RCTL_MAX_INBUFSIZE)
1298                 return (E2BIG);
1299
1300         str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1301         error = copyinstr(inbufp, str, inbuflen, NULL);
1302         if (error != 0) {
1303                 free(str, M_RCTL);
1304                 return (error);
1305         }
1306
1307         *inputstr = str;
1308
1309         return (0);
1310 }
1311
1312 /*
1313  * Routine used by RCTL syscalls to write out output string.
1314  */
1315 static int
1316 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1317 {
1318         int error;
1319
1320         ASSERT_RACCT_ENABLED();
1321
1322         if (outputsbuf == NULL)
1323                 return (0);
1324
1325         sbuf_finish(outputsbuf);
1326         if (outbuflen < sbuf_len(outputsbuf) + 1) {
1327                 sbuf_delete(outputsbuf);
1328                 return (ERANGE);
1329         }
1330         error = copyout(sbuf_data(outputsbuf), outbufp,
1331             sbuf_len(outputsbuf) + 1);
1332         sbuf_delete(outputsbuf);
1333         return (error);
1334 }
1335
1336 static struct sbuf *
1337 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1338 {
1339         int i;
1340         int64_t amount;
1341         struct sbuf *sb;
1342
1343         ASSERT_RACCT_ENABLED();
1344
1345         sb = sbuf_new_auto();
1346         for (i = 0; i <= RACCT_MAX; i++) {
1347                 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1348                         continue;
1349                 amount = racct->r_resources[i];
1350                 if (RACCT_IS_IN_MILLIONS(i))
1351                         amount /= 1000000;
1352                 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1353         }
1354         sbuf_setpos(sb, sbuf_len(sb) - 1);
1355         return (sb);
1356 }
1357
1358 int
1359 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1360 {
1361         int error;
1362         char *inputstr;
1363         struct rctl_rule *filter;
1364         struct sbuf *outputsbuf = NULL;
1365         struct proc *p;
1366         struct uidinfo *uip;
1367         struct loginclass *lc;
1368         struct prison_racct *prr;
1369
1370         if (!racct_enable)
1371                 return (ENOSYS);
1372
1373         error = priv_check(td, PRIV_RCTL_GET_RACCT);
1374         if (error != 0)
1375                 return (error);
1376
1377         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1378         if (error != 0)
1379                 return (error);
1380
1381         sx_slock(&allproc_lock);
1382         error = rctl_string_to_rule(inputstr, &filter);
1383         free(inputstr, M_RCTL);
1384         if (error != 0) {
1385                 sx_sunlock(&allproc_lock);
1386                 return (error);
1387         }
1388
1389         switch (filter->rr_subject_type) {
1390         case RCTL_SUBJECT_TYPE_PROCESS:
1391                 p = filter->rr_subject.rs_proc;
1392                 if (p == NULL) {
1393                         error = EINVAL;
1394                         goto out;
1395                 }
1396                 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1397                 break;
1398         case RCTL_SUBJECT_TYPE_USER:
1399                 uip = filter->rr_subject.rs_uip;
1400                 if (uip == NULL) {
1401                         error = EINVAL;
1402                         goto out;
1403                 }
1404                 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1405                 break;
1406         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1407                 lc = filter->rr_subject.rs_loginclass;
1408                 if (lc == NULL) {
1409                         error = EINVAL;
1410                         goto out;
1411                 }
1412                 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1413                 break;
1414         case RCTL_SUBJECT_TYPE_JAIL:
1415                 prr = filter->rr_subject.rs_prison_racct;
1416                 if (prr == NULL) {
1417                         error = EINVAL;
1418                         goto out;
1419                 }
1420                 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1421                 break;
1422         default:
1423                 error = EINVAL;
1424         }
1425 out:
1426         rctl_rule_release(filter);
1427         sx_sunlock(&allproc_lock);
1428         if (error != 0)
1429                 return (error);
1430
1431         error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1432
1433         return (error);
1434 }
1435
1436 static void
1437 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1438 {
1439         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1440         struct rctl_rule_link *link;
1441         struct sbuf *sb = (struct sbuf *)arg3;
1442
1443         ASSERT_RACCT_ENABLED();
1444         rw_assert(&rctl_lock, RA_LOCKED);
1445
1446         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1447                 if (!rctl_rule_matches(link->rrl_rule, filter))
1448                         continue;
1449                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1450                 sbuf_printf(sb, ",");
1451         }
1452 }
1453
1454 int
1455 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1456 {
1457         int error;
1458         size_t bufsize;
1459         char *inputstr, *buf;
1460         struct sbuf *sb;
1461         struct rctl_rule *filter;
1462         struct rctl_rule_link *link;
1463         struct proc *p;
1464
1465         if (!racct_enable)
1466                 return (ENOSYS);
1467
1468         error = priv_check(td, PRIV_RCTL_GET_RULES);
1469         if (error != 0)
1470                 return (error);
1471
1472         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1473         if (error != 0)
1474                 return (error);
1475
1476         sx_slock(&allproc_lock);
1477         error = rctl_string_to_rule(inputstr, &filter);
1478         free(inputstr, M_RCTL);
1479         if (error != 0) {
1480                 sx_sunlock(&allproc_lock);
1481                 return (error);
1482         }
1483
1484         bufsize = uap->outbuflen;
1485         if (bufsize > rctl_maxbufsize) {
1486                 sx_sunlock(&allproc_lock);
1487                 return (E2BIG);
1488         }
1489
1490         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1491         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1492         KASSERT(sb != NULL, ("sbuf_new failed"));
1493
1494         FOREACH_PROC_IN_SYSTEM(p) {
1495                 rw_rlock(&rctl_lock);
1496                 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1497                         /*
1498                          * Non-process rules will be added to the buffer later.
1499                          * Adding them here would result in duplicated output.
1500                          */
1501                         if (link->rrl_rule->rr_subject_type !=
1502                             RCTL_SUBJECT_TYPE_PROCESS)
1503                                 continue;
1504                         if (!rctl_rule_matches(link->rrl_rule, filter))
1505                                 continue;
1506                         rctl_rule_to_sbuf(sb, link->rrl_rule);
1507                         sbuf_printf(sb, ",");
1508                 }
1509                 rw_runlock(&rctl_lock);
1510         }
1511
1512         loginclass_racct_foreach(rctl_get_rules_callback,
1513             rctl_rule_pre_callback, rctl_rule_post_callback,
1514             filter, sb);
1515         ui_racct_foreach(rctl_get_rules_callback,
1516             rctl_rule_pre_callback, rctl_rule_post_callback,
1517             filter, sb);
1518         prison_racct_foreach(rctl_get_rules_callback,
1519             rctl_rule_pre_callback, rctl_rule_post_callback,
1520             filter, sb);
1521         if (sbuf_error(sb) == ENOMEM) {
1522                 error = ERANGE;
1523                 goto out;
1524         }
1525
1526         /*
1527          * Remove trailing ",".
1528          */
1529         if (sbuf_len(sb) > 0)
1530                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1531
1532         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1533 out:
1534         rctl_rule_release(filter);
1535         sx_sunlock(&allproc_lock);
1536         free(buf, M_RCTL);
1537         return (error);
1538 }
1539
1540 int
1541 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1542 {
1543         int error;
1544         size_t bufsize;
1545         char *inputstr, *buf;
1546         struct sbuf *sb;
1547         struct rctl_rule *filter;
1548         struct rctl_rule_link *link;
1549
1550         if (!racct_enable)
1551                 return (ENOSYS);
1552
1553         error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1554         if (error != 0)
1555                 return (error);
1556
1557         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1558         if (error != 0)
1559                 return (error);
1560
1561         sx_slock(&allproc_lock);
1562         error = rctl_string_to_rule(inputstr, &filter);
1563         free(inputstr, M_RCTL);
1564         if (error != 0) {
1565                 sx_sunlock(&allproc_lock);
1566                 return (error);
1567         }
1568
1569         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1570                 rctl_rule_release(filter);
1571                 sx_sunlock(&allproc_lock);
1572                 return (EINVAL);
1573         }
1574         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1575                 rctl_rule_release(filter);
1576                 sx_sunlock(&allproc_lock);
1577                 return (EOPNOTSUPP);
1578         }
1579         if (filter->rr_subject.rs_proc == NULL) {
1580                 rctl_rule_release(filter);
1581                 sx_sunlock(&allproc_lock);
1582                 return (EINVAL);
1583         }
1584
1585         bufsize = uap->outbuflen;
1586         if (bufsize > rctl_maxbufsize) {
1587                 rctl_rule_release(filter);
1588                 sx_sunlock(&allproc_lock);
1589                 return (E2BIG);
1590         }
1591
1592         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1593         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1594         KASSERT(sb != NULL, ("sbuf_new failed"));
1595
1596         rw_rlock(&rctl_lock);
1597         LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1598             rrl_next) {
1599                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1600                 sbuf_printf(sb, ",");
1601         }
1602         rw_runlock(&rctl_lock);
1603         if (sbuf_error(sb) == ENOMEM) {
1604                 error = ERANGE;
1605                 goto out;
1606         }
1607
1608         /*
1609          * Remove trailing ",".
1610          */
1611         if (sbuf_len(sb) > 0)
1612                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1613
1614         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1615 out:
1616         rctl_rule_release(filter);
1617         sx_sunlock(&allproc_lock);
1618         free(buf, M_RCTL);
1619         return (error);
1620 }
1621
1622 int
1623 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1624 {
1625         int error;
1626         struct rctl_rule *rule;
1627         char *inputstr;
1628
1629         if (!racct_enable)
1630                 return (ENOSYS);
1631
1632         error = priv_check(td, PRIV_RCTL_ADD_RULE);
1633         if (error != 0)
1634                 return (error);
1635
1636         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1637         if (error != 0)
1638                 return (error);
1639
1640         sx_slock(&allproc_lock);
1641         error = rctl_string_to_rule(inputstr, &rule);
1642         free(inputstr, M_RCTL);
1643         if (error != 0) {
1644                 sx_sunlock(&allproc_lock);
1645                 return (error);
1646         }
1647         /*
1648          * The 'per' part of a rule is optional.
1649          */
1650         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1651             rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1652                 rule->rr_per = rule->rr_subject_type;
1653
1654         if (!rctl_rule_fully_specified(rule)) {
1655                 error = EINVAL;
1656                 goto out;
1657         }
1658
1659         error = rctl_rule_add(rule);
1660
1661 out:
1662         rctl_rule_release(rule);
1663         sx_sunlock(&allproc_lock);
1664         return (error);
1665 }
1666
1667 int
1668 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1669 {
1670         int error;
1671         struct rctl_rule *filter;
1672         char *inputstr;
1673
1674         if (!racct_enable)
1675                 return (ENOSYS);
1676
1677         error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1678         if (error != 0)
1679                 return (error);
1680
1681         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1682         if (error != 0)
1683                 return (error);
1684
1685         sx_slock(&allproc_lock);
1686         error = rctl_string_to_rule(inputstr, &filter);
1687         free(inputstr, M_RCTL);
1688         if (error != 0) {
1689                 sx_sunlock(&allproc_lock);
1690                 return (error);
1691         }
1692
1693         error = rctl_rule_remove(filter);
1694         rctl_rule_release(filter);
1695         sx_sunlock(&allproc_lock);
1696
1697         return (error);
1698 }
1699
1700 /*
1701  * Update RCTL rule list after credential change.
1702  */
1703 void
1704 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1705 {
1706         int rulecnt, i;
1707         struct rctl_rule_link *link, *newlink;
1708         struct uidinfo *newuip;
1709         struct loginclass *newlc;
1710         struct prison_racct *newprr;
1711         LIST_HEAD(, rctl_rule_link) newrules;
1712
1713         ASSERT_RACCT_ENABLED();
1714
1715         newuip = newcred->cr_ruidinfo;
1716         newlc = newcred->cr_loginclass;
1717         newprr = newcred->cr_prison->pr_prison_racct;
1718         
1719         LIST_INIT(&newrules);
1720
1721 again:
1722         /*
1723          * First, count the rules that apply to the process with new
1724          * credentials.
1725          */
1726         rulecnt = 0;
1727         rw_rlock(&rctl_lock);
1728         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1729                 if (link->rrl_rule->rr_subject_type ==
1730                     RCTL_SUBJECT_TYPE_PROCESS)
1731                         rulecnt++;
1732         }
1733         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1734                 rulecnt++;
1735         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1736                 rulecnt++;
1737         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1738                 rulecnt++;
1739         rw_runlock(&rctl_lock);
1740
1741         /*
1742          * Create temporary list.  We've dropped the rctl_lock in order
1743          * to use M_WAITOK.
1744          */
1745         for (i = 0; i < rulecnt; i++) {
1746                 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1747                 newlink->rrl_rule = NULL;
1748                 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1749         }
1750
1751         newlink = LIST_FIRST(&newrules);
1752
1753         /*
1754          * Assign rules to the newly allocated list entries.
1755          */
1756         rw_wlock(&rctl_lock);
1757         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1758                 if (link->rrl_rule->rr_subject_type ==
1759                     RCTL_SUBJECT_TYPE_PROCESS) {
1760                         if (newlink == NULL)
1761                                 goto goaround;
1762                         rctl_rule_acquire(link->rrl_rule);
1763                         newlink->rrl_rule = link->rrl_rule;
1764                         newlink = LIST_NEXT(newlink, rrl_next);
1765                         rulecnt--;
1766                 }
1767         }
1768         
1769         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1770                 if (newlink == NULL)
1771                         goto goaround;
1772                 rctl_rule_acquire(link->rrl_rule);
1773                 newlink->rrl_rule = link->rrl_rule;
1774                 newlink = LIST_NEXT(newlink, rrl_next);
1775                 rulecnt--;
1776         }
1777
1778         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1779                 if (newlink == NULL)
1780                         goto goaround;
1781                 rctl_rule_acquire(link->rrl_rule);
1782                 newlink->rrl_rule = link->rrl_rule;
1783                 newlink = LIST_NEXT(newlink, rrl_next);
1784                 rulecnt--;
1785         }
1786
1787         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1788                 if (newlink == NULL)
1789                         goto goaround;
1790                 rctl_rule_acquire(link->rrl_rule);
1791                 newlink->rrl_rule = link->rrl_rule;
1792                 newlink = LIST_NEXT(newlink, rrl_next);
1793                 rulecnt--;
1794         }
1795
1796         if (rulecnt == 0) {
1797                 /*
1798                  * Free the old rule list.
1799                  */
1800                 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1801                         link = LIST_FIRST(&p->p_racct->r_rule_links);
1802                         LIST_REMOVE(link, rrl_next);
1803                         rctl_rule_release(link->rrl_rule);
1804                         uma_zfree(rctl_rule_link_zone, link);
1805                 }
1806
1807                 /*
1808                  * Replace lists and we're done.
1809                  *
1810                  * XXX: Is there any way to switch list heads instead
1811                  *      of iterating here?
1812                  */
1813                 while (!LIST_EMPTY(&newrules)) {
1814                         newlink = LIST_FIRST(&newrules);
1815                         LIST_REMOVE(newlink, rrl_next);
1816                         LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1817                             newlink, rrl_next);
1818                 }
1819
1820                 rw_wunlock(&rctl_lock);
1821
1822                 return;
1823         }
1824
1825 goaround:
1826         rw_wunlock(&rctl_lock);
1827
1828         /*
1829          * Rule list changed while we were not holding the rctl_lock.
1830          * Free the new list and try again.
1831          */
1832         while (!LIST_EMPTY(&newrules)) {
1833                 newlink = LIST_FIRST(&newrules);
1834                 LIST_REMOVE(newlink, rrl_next);
1835                 if (newlink->rrl_rule != NULL)
1836                         rctl_rule_release(newlink->rrl_rule);
1837                 uma_zfree(rctl_rule_link_zone, newlink);
1838         }
1839
1840         goto again;
1841 }
1842
1843 /*
1844  * Assign RCTL rules to the newly created process.
1845  */
1846 int
1847 rctl_proc_fork(struct proc *parent, struct proc *child)
1848 {
1849         int error;
1850         struct rctl_rule_link *link;
1851         struct rctl_rule *rule;
1852
1853         LIST_INIT(&child->p_racct->r_rule_links);
1854
1855         ASSERT_RACCT_ENABLED();
1856         KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
1857
1858         rw_wlock(&rctl_lock);
1859
1860         /*
1861          * Go through limits applicable to the parent and assign them
1862          * to the child.  Rules with 'process' subject have to be duplicated
1863          * in order to make their rr_subject point to the new process.
1864          */
1865         LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1866                 if (link->rrl_rule->rr_subject_type ==
1867                     RCTL_SUBJECT_TYPE_PROCESS) {
1868                         rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1869                         if (rule == NULL)
1870                                 goto fail;
1871                         KASSERT(rule->rr_subject.rs_proc == parent,
1872                             ("rule->rr_subject.rs_proc != parent"));
1873                         rule->rr_subject.rs_proc = child;
1874                         error = rctl_racct_add_rule_locked(child->p_racct,
1875                             rule);
1876                         rctl_rule_release(rule);
1877                         if (error != 0)
1878                                 goto fail;
1879                 } else {
1880                         error = rctl_racct_add_rule_locked(child->p_racct,
1881                             link->rrl_rule);
1882                         if (error != 0)
1883                                 goto fail;
1884                 }
1885         }
1886
1887         rw_wunlock(&rctl_lock);
1888         return (0);
1889
1890 fail:
1891         while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1892                 link = LIST_FIRST(&child->p_racct->r_rule_links);
1893                 LIST_REMOVE(link, rrl_next);
1894                 rctl_rule_release(link->rrl_rule);
1895                 uma_zfree(rctl_rule_link_zone, link);
1896         }
1897         rw_wunlock(&rctl_lock);
1898         return (EAGAIN);
1899 }
1900
1901 /*
1902  * Release rules attached to the racct.
1903  */
1904 void
1905 rctl_racct_release(struct racct *racct)
1906 {
1907         struct rctl_rule_link *link;
1908
1909         ASSERT_RACCT_ENABLED();
1910
1911         rw_wlock(&rctl_lock);
1912         while (!LIST_EMPTY(&racct->r_rule_links)) {
1913                 link = LIST_FIRST(&racct->r_rule_links);
1914                 LIST_REMOVE(link, rrl_next);
1915                 rctl_rule_release(link->rrl_rule);
1916                 uma_zfree(rctl_rule_link_zone, link);
1917         }
1918         rw_wunlock(&rctl_lock);
1919 }
1920
1921 static void
1922 rctl_init(void)
1923 {
1924
1925         if (!racct_enable)
1926                 return;
1927
1928         rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1929             sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1930             UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1931         rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1932             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1933 }
1934
1935 #else /* !RCTL */
1936
1937 int
1938 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1939 {
1940         
1941         return (ENOSYS);
1942 }
1943
1944 int
1945 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1946 {
1947         
1948         return (ENOSYS);
1949 }
1950
1951 int
1952 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1953 {
1954         
1955         return (ENOSYS);
1956 }
1957
1958 int
1959 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1960 {
1961         
1962         return (ENOSYS);
1963 }
1964
1965 int
1966 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1967 {
1968         
1969         return (ENOSYS);
1970 }
1971
1972 #endif /* !RCTL */