]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_rctl.c
Merge tnftp-20100108 from the vendor branch into head.
[FreeBSD/FreeBSD.git] / sys / kern / kern_rctl.c
1 /*-
2  * Copyright (c) 2010 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Edward Tomasz Napierala under sponsorship
6  * from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/refcount.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/loginclass.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/racct.h>
47 #include <sys/rctl.h>
48 #include <sys/resourcevar.h>
49 #include <sys/sx.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
55 #include <sys/lock.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
58 #include <sys/sbuf.h>
59 #include <sys/taskqueue.h>
60 #include <sys/tree.h>
61 #include <vm/uma.h>
62
63 #ifdef RCTL
64 #ifndef RACCT
65 #error "The RCTL option requires the RACCT option"
66 #endif
67
68 FEATURE(rctl, "Resource Limits");
69
70 #define HRF_DEFAULT             0
71 #define HRF_DONT_INHERIT        1
72 #define HRF_DONT_ACCUMULATE     2
73
74 /* Default buffer size for rctl_get_rules(2). */
75 #define RCTL_DEFAULT_BUFSIZE    4096
76 #define RCTL_LOG_BUFSIZE        128
77
78 /*
79  * 'rctl_rule_link' connects a rule with every racct it's related to.
80  * For example, rule 'user:X:openfiles:deny=N/process' is linked
81  * with uidinfo for user X, and to each process of that user.
82  */
83 struct rctl_rule_link {
84         LIST_ENTRY(rctl_rule_link)      rrl_next;
85         struct rctl_rule                *rrl_rule;
86         int                             rrl_exceeded;
87 };
88
89 struct dict {
90         const char      *d_name;
91         int             d_value;
92 };
93
94 static struct dict subjectnames[] = {
95         { "process", RCTL_SUBJECT_TYPE_PROCESS },
96         { "user", RCTL_SUBJECT_TYPE_USER },
97         { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
98         { "jail", RCTL_SUBJECT_TYPE_JAIL },
99         { NULL, -1 }};
100
101 static struct dict resourcenames[] = {
102         { "cpu", RACCT_CPU },
103         { "data", RACCT_DATA },
104         { "stack", RACCT_STACK },
105         { "core", RACCT_CORE },
106         { "rss", RACCT_RSS },
107         { "memlock", RACCT_MEMLOCK },
108         { "nproc", RACCT_NPROC },
109         { "nofile", RACCT_NOFILE },
110         { "vmem", RACCT_VMEM },
111         { "npts", RACCT_NPTS },
112         { "swap", RACCT_SWAP },
113         { "nthr", RACCT_NTHR },
114         { "msgqqueued", RACCT_MSGQQUEUED },
115         { "msgqsize", RACCT_MSGQSIZE },
116         { "nmsgq", RACCT_NMSGQ },
117         { "nsem", RACCT_NSEM },
118         { "nsemop", RACCT_NSEMOP },
119         { "nshm", RACCT_NSHM },
120         { "shmsize", RACCT_SHMSIZE },
121         { "wallclock", RACCT_WALLCLOCK },
122         { NULL, -1 }};
123
124 static struct dict actionnames[] = {
125         { "sighup", RCTL_ACTION_SIGHUP },
126         { "sigint", RCTL_ACTION_SIGINT },
127         { "sigquit", RCTL_ACTION_SIGQUIT },
128         { "sigill", RCTL_ACTION_SIGILL },
129         { "sigtrap", RCTL_ACTION_SIGTRAP },
130         { "sigabrt", RCTL_ACTION_SIGABRT },
131         { "sigemt", RCTL_ACTION_SIGEMT },
132         { "sigfpe", RCTL_ACTION_SIGFPE },
133         { "sigkill", RCTL_ACTION_SIGKILL },
134         { "sigbus", RCTL_ACTION_SIGBUS },
135         { "sigsegv", RCTL_ACTION_SIGSEGV },
136         { "sigsys", RCTL_ACTION_SIGSYS },
137         { "sigpipe", RCTL_ACTION_SIGPIPE },
138         { "sigalrm", RCTL_ACTION_SIGALRM },
139         { "sigterm", RCTL_ACTION_SIGTERM },
140         { "sigurg", RCTL_ACTION_SIGURG },
141         { "sigstop", RCTL_ACTION_SIGSTOP },
142         { "sigtstp", RCTL_ACTION_SIGTSTP },
143         { "sigchld", RCTL_ACTION_SIGCHLD },
144         { "sigttin", RCTL_ACTION_SIGTTIN },
145         { "sigttou", RCTL_ACTION_SIGTTOU },
146         { "sigio", RCTL_ACTION_SIGIO },
147         { "sigxcpu", RCTL_ACTION_SIGXCPU },
148         { "sigxfsz", RCTL_ACTION_SIGXFSZ },
149         { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
150         { "sigprof", RCTL_ACTION_SIGPROF },
151         { "sigwinch", RCTL_ACTION_SIGWINCH },
152         { "siginfo", RCTL_ACTION_SIGINFO },
153         { "sigusr1", RCTL_ACTION_SIGUSR1 },
154         { "sigusr2", RCTL_ACTION_SIGUSR2 },
155         { "sigthr", RCTL_ACTION_SIGTHR },
156         { "deny", RCTL_ACTION_DENY },
157         { "log", RCTL_ACTION_LOG },
158         { "devctl", RCTL_ACTION_DEVCTL },
159         { NULL, -1 }};
160
161 static void rctl_init(void);
162 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
163
164 static uma_zone_t rctl_rule_link_zone;
165 static uma_zone_t rctl_rule_zone;
166 static struct rwlock rctl_lock;
167 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
168
169 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
170 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
171
172 MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
173
174 static const char *
175 rctl_subject_type_name(int subject)
176 {
177         int i;
178
179         for (i = 0; subjectnames[i].d_name != NULL; i++) {
180                 if (subjectnames[i].d_value == subject)
181                         return (subjectnames[i].d_name);
182         }
183
184         panic("rctl_subject_type_name: unknown subject type %d", subject);
185 }
186
187 static const char *
188 rctl_action_name(int action)
189 {
190         int i;
191
192         for (i = 0; actionnames[i].d_name != NULL; i++) {
193                 if (actionnames[i].d_value == action)
194                         return (actionnames[i].d_name);
195         }
196
197         panic("rctl_action_name: unknown action %d", action);
198 }
199
200 const char *
201 rctl_resource_name(int resource)
202 {
203         int i;
204
205         for (i = 0; resourcenames[i].d_name != NULL; i++) {
206                 if (resourcenames[i].d_value == resource)
207                         return (resourcenames[i].d_name);
208         }
209
210         panic("rctl_resource_name: unknown resource %d", resource);
211 }
212
213 /*
214  * Return the amount of resource that can be allocated by 'p' before
215  * hitting 'rule'.
216  */
217 static int64_t
218 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
219 {
220         int resource;
221         int64_t available = INT64_MAX;
222         struct ucred *cred = p->p_ucred;
223
224         rw_assert(&rctl_lock, RA_LOCKED);
225
226         resource = rule->rr_resource;
227         switch (rule->rr_per) {
228         case RCTL_SUBJECT_TYPE_PROCESS:
229                 available = rule->rr_amount -
230                     p->p_racct->r_resources[resource];
231                 break;
232         case RCTL_SUBJECT_TYPE_USER:
233                 available = rule->rr_amount -
234                     cred->cr_ruidinfo->ui_racct->r_resources[resource];
235                 break;
236         case RCTL_SUBJECT_TYPE_LOGINCLASS:
237                 available = rule->rr_amount -
238                     cred->cr_loginclass->lc_racct->r_resources[resource];
239                 break;
240         case RCTL_SUBJECT_TYPE_JAIL:
241                 available = rule->rr_amount -
242                     cred->cr_prison->pr_prison_racct->prr_racct->
243                         r_resources[resource];
244                 break;
245         default:
246                 panic("rctl_compute_available: unknown per %d",
247                     rule->rr_per);
248         }
249
250         return (available);
251 }
252
253 /*
254  * Return non-zero if allocating 'amount' by proc 'p' would exceed
255  * resource limit specified by 'rule'.
256  */
257 static int
258 rctl_would_exceed(const struct proc *p, const struct rctl_rule *rule,
259     int64_t amount)
260 {
261         int64_t available;
262
263         rw_assert(&rctl_lock, RA_LOCKED);
264
265         available = rctl_available_resource(p, rule);
266         if (available >= amount)
267                 return (0);
268
269         return (1);
270 }
271
272 /*
273  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
274  * to what it keeps allocated now.  Returns non-zero if the allocation should
275  * be denied, 0 otherwise.
276  */
277 int
278 rctl_enforce(struct proc *p, int resource, uint64_t amount)
279 {
280         struct rctl_rule *rule;
281         struct rctl_rule_link *link;
282         struct sbuf sb;
283         int should_deny = 0;
284         char *buf;
285         static int curtime = 0;
286         static struct timeval lasttime;
287
288         rw_rlock(&rctl_lock);
289
290         /*
291          * There may be more than one matching rule; go through all of them.
292          * Denial should be done last, after logging and sending signals.
293          */
294         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
295                 rule = link->rrl_rule;
296                 if (rule->rr_resource != resource)
297                         continue;
298                 if (!rctl_would_exceed(p, rule, amount)) {
299                         link->rrl_exceeded = 0;
300                         continue;
301                 }
302
303                 switch (rule->rr_action) {
304                 case RCTL_ACTION_DENY:
305                         should_deny = 1;
306                         continue;
307                 case RCTL_ACTION_LOG:
308                         /*
309                          * If rrl_exceeded != 0, it means we've already
310                          * logged a warning for this process.
311                          */
312                         if (link->rrl_exceeded != 0)
313                                 continue;
314
315                         if (!ppsratecheck(&lasttime, &curtime, 10))
316                                 continue;
317
318                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
319                         if (buf == NULL) {
320                                 printf("rctl_enforce: out of memory\n");
321                                 continue;
322                         }
323                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
324                         rctl_rule_to_sbuf(&sb, rule);
325                         sbuf_finish(&sb);
326                         printf("rctl: rule \"%s\" matched by pid %d "
327                             "(%s), uid %d, jail %s\n", sbuf_data(&sb),
328                             p->p_pid, p->p_comm, p->p_ucred->cr_uid,
329                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
330                         sbuf_delete(&sb);
331                         free(buf, M_RCTL);
332                         link->rrl_exceeded = 1;
333                         continue;
334                 case RCTL_ACTION_DEVCTL:
335                         if (link->rrl_exceeded != 0)
336                                 continue;
337
338                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
339                         if (buf == NULL) {
340                                 printf("rctl_enforce: out of memory\n");
341                                 continue;
342                         }
343                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
344                         sbuf_printf(&sb, "rule=");
345                         rctl_rule_to_sbuf(&sb, rule);
346                         sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
347                             p->p_pid, p->p_ucred->cr_ruid,
348                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
349                         sbuf_finish(&sb);
350                         devctl_notify_f("RCTL", "rule", "matched",
351                             sbuf_data(&sb), M_NOWAIT);
352                         sbuf_delete(&sb);
353                         free(buf, M_RCTL);
354                         link->rrl_exceeded = 1;
355                         continue;
356                 default:
357                         if (link->rrl_exceeded != 0)
358                                 continue;
359
360                         KASSERT(rule->rr_action > 0 &&
361                             rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
362                             ("rctl_enforce: unknown action %d",
363                              rule->rr_action));
364
365                         /*
366                          * We're using the fact that RCTL_ACTION_SIG* values
367                          * are equal to their counterparts from sys/signal.h.
368                          */
369                         psignal(p, rule->rr_action);
370                         link->rrl_exceeded = 1;
371                         continue;
372                 }
373         }
374
375         rw_runlock(&rctl_lock);
376
377         if (should_deny) {
378                 /*
379                  * Return fake error code; the caller should change it
380                  * into one proper for the situation - EFSIZ, ENOMEM etc.
381                  */
382                 return (EDOOFUS);
383         }
384
385         return (0);
386 }
387
388 uint64_t
389 rctl_get_limit(struct proc *p, int resource)
390 {
391         struct rctl_rule *rule;
392         struct rctl_rule_link *link;
393         uint64_t amount = UINT64_MAX;
394
395         rw_rlock(&rctl_lock);
396
397         /*
398          * There may be more than one matching rule; go through all of them.
399          * Denial should be done last, after logging and sending signals.
400          */
401         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
402                 rule = link->rrl_rule;
403                 if (rule->rr_resource != resource)
404                         continue;
405                 if (rule->rr_action != RCTL_ACTION_DENY)
406                         continue;
407                 if (rule->rr_amount < amount)
408                         amount = rule->rr_amount;
409         }
410
411         rw_runlock(&rctl_lock);
412
413         return (amount);
414 }
415
416 uint64_t
417 rctl_get_available(struct proc *p, int resource)
418 {
419         struct rctl_rule *rule;
420         struct rctl_rule_link *link;
421         int64_t available, minavailable, allocated;
422
423         minavailable = INT64_MAX;
424
425         rw_rlock(&rctl_lock);
426
427         /*
428          * There may be more than one matching rule; go through all of them.
429          * Denial should be done last, after logging and sending signals.
430          */
431         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
432                 rule = link->rrl_rule;
433                 if (rule->rr_resource != resource)
434                         continue;
435                 if (rule->rr_action != RCTL_ACTION_DENY)
436                         continue;
437                 available = rctl_available_resource(p, rule);
438                 if (available < minavailable)
439                         minavailable = available;
440         }
441
442         rw_runlock(&rctl_lock);
443
444         /*
445          * XXX: Think about this _hard_.
446          */
447         allocated = p->p_racct->r_resources[resource];
448         if (minavailable < INT64_MAX - allocated)
449                 minavailable += allocated;
450         if (minavailable < 0)
451                 minavailable = 0;
452         return (minavailable);
453 }
454
455 static int
456 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
457 {
458
459         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
460                 if (rule->rr_subject_type != filter->rr_subject_type)
461                         return (0);
462
463                 switch (filter->rr_subject_type) {
464                 case RCTL_SUBJECT_TYPE_PROCESS:
465                         if (filter->rr_subject.rs_proc != NULL &&
466                             rule->rr_subject.rs_proc !=
467                             filter->rr_subject.rs_proc)
468                                 return (0);
469                         break;
470                 case RCTL_SUBJECT_TYPE_USER:
471                         if (filter->rr_subject.rs_uip != NULL &&
472                             rule->rr_subject.rs_uip !=
473                             filter->rr_subject.rs_uip)
474                                 return (0);
475                         break;
476                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
477                         if (filter->rr_subject.rs_loginclass != NULL &&
478                             rule->rr_subject.rs_loginclass !=
479                             filter->rr_subject.rs_loginclass)
480                                 return (0);
481                         break;
482                 case RCTL_SUBJECT_TYPE_JAIL:
483                         if (filter->rr_subject.rs_prison_racct != NULL &&
484                             rule->rr_subject.rs_prison_racct !=
485                             filter->rr_subject.rs_prison_racct)
486                                 return (0);
487                         break;
488                 default:
489                         panic("rctl_rule_matches: unknown subject type %d",
490                             filter->rr_subject_type);
491                 }
492         }
493
494         if (filter->rr_resource != RACCT_UNDEFINED) {
495                 if (rule->rr_resource != filter->rr_resource)
496                         return (0);
497         }
498
499         if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
500                 if (rule->rr_action != filter->rr_action)
501                         return (0);
502         }
503
504         if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
505                 if (rule->rr_amount != filter->rr_amount)
506                         return (0);
507         }
508
509         if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
510                 if (rule->rr_per != filter->rr_per)
511                         return (0);
512         }
513
514         return (1);
515 }
516
517 static int
518 str2value(const char *str, int *value, struct dict *table)
519 {
520         int i;
521
522         if (value == NULL)
523                 return (EINVAL);
524
525         for (i = 0; table[i].d_name != NULL; i++) {
526                 if (strcasecmp(table[i].d_name, str) == 0) {
527                         *value =  table[i].d_value;
528                         return (0);
529                 }
530         }
531
532         return (EINVAL);
533 }
534
535 static int
536 str2id(const char *str, id_t *value)
537 {
538         char *end;
539
540         if (str == NULL)
541                 return (EINVAL);
542
543         *value = strtoul(str, &end, 10);
544         if ((size_t)(end - str) != strlen(str))
545                 return (EINVAL);
546
547         return (0);
548 }
549
550 static int
551 str2int64(const char *str, int64_t *value)
552 {
553         char *end;
554
555         if (str == NULL)
556                 return (EINVAL);
557
558         *value = strtoul(str, &end, 10);
559         if ((size_t)(end - str) != strlen(str))
560                 return (EINVAL);
561
562         return (0);
563 }
564
565 /*
566  * Connect the rule to the racct, increasing refcount for the rule.
567  */
568 static void
569 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
570 {
571         struct rctl_rule_link *link;
572
573         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
574
575         rctl_rule_acquire(rule);
576         link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
577         link->rrl_rule = rule;
578         link->rrl_exceeded = 0;
579
580         rw_wlock(&rctl_lock);
581         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
582         rw_wunlock(&rctl_lock);
583 }
584
585 static int
586 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
587 {
588         struct rctl_rule_link *link;
589
590         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
591         rw_assert(&rctl_lock, RA_WLOCKED);
592
593         link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
594         if (link == NULL)
595                 return (ENOMEM);
596         rctl_rule_acquire(rule);
597         link->rrl_rule = rule;
598         link->rrl_exceeded = 0;
599
600         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
601         return (0);
602 }
603
604 /*
605  * Remove limits for a rules matching the filter and release
606  * the refcounts for the rules, possibly freeing them.  Returns
607  * the number of limit structures removed.
608  */
609 static int
610 rctl_racct_remove_rules(struct racct *racct,
611     const struct rctl_rule *filter)
612 {
613         int removed = 0;
614         struct rctl_rule_link *link, *linktmp;
615
616         rw_assert(&rctl_lock, RA_WLOCKED);
617
618         LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
619                 if (!rctl_rule_matches(link->rrl_rule, filter))
620                         continue;
621
622                 LIST_REMOVE(link, rrl_next);
623                 rctl_rule_release(link->rrl_rule);
624                 uma_zfree(rctl_rule_link_zone, link);
625                 removed++;
626         }
627         return (removed);
628 }
629
630 static void
631 rctl_rule_acquire_subject(struct rctl_rule *rule)
632 {
633
634         switch (rule->rr_subject_type) {
635         case RCTL_SUBJECT_TYPE_UNDEFINED:
636         case RCTL_SUBJECT_TYPE_PROCESS:
637                 break;
638         case RCTL_SUBJECT_TYPE_JAIL:
639                 if (rule->rr_subject.rs_prison_racct != NULL)
640                         prison_racct_hold(rule->rr_subject.rs_prison_racct);
641                 break;
642         case RCTL_SUBJECT_TYPE_USER:
643                 if (rule->rr_subject.rs_uip != NULL)
644                         uihold(rule->rr_subject.rs_uip);
645                 break;
646         case RCTL_SUBJECT_TYPE_LOGINCLASS:
647                 if (rule->rr_subject.rs_loginclass != NULL)
648                         loginclass_hold(rule->rr_subject.rs_loginclass);
649                 break;
650         default:
651                 panic("rctl_rule_acquire_subject: unknown subject type %d",
652                     rule->rr_subject_type);
653         }
654 }
655
656 static void
657 rctl_rule_release_subject(struct rctl_rule *rule)
658 {
659
660         switch (rule->rr_subject_type) {
661         case RCTL_SUBJECT_TYPE_UNDEFINED:
662         case RCTL_SUBJECT_TYPE_PROCESS:
663                 break;
664         case RCTL_SUBJECT_TYPE_JAIL:
665                 if (rule->rr_subject.rs_prison_racct != NULL)
666                         prison_racct_free(rule->rr_subject.rs_prison_racct);
667                 break;
668         case RCTL_SUBJECT_TYPE_USER:
669                 if (rule->rr_subject.rs_uip != NULL)
670                         uifree(rule->rr_subject.rs_uip);
671                 break;
672         case RCTL_SUBJECT_TYPE_LOGINCLASS:
673                 if (rule->rr_subject.rs_loginclass != NULL)
674                         loginclass_free(rule->rr_subject.rs_loginclass);
675                 break;
676         default:
677                 panic("rctl_rule_release_subject: unknown subject type %d",
678                     rule->rr_subject_type);
679         }
680 }
681
682 struct rctl_rule *
683 rctl_rule_alloc(int flags)
684 {
685         struct rctl_rule *rule;
686
687         rule = uma_zalloc(rctl_rule_zone, flags);
688         if (rule == NULL)
689                 return (NULL);
690         rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
691         rule->rr_subject.rs_proc = NULL;
692         rule->rr_subject.rs_uip = NULL;
693         rule->rr_subject.rs_loginclass = NULL;
694         rule->rr_subject.rs_prison_racct = NULL;
695         rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
696         rule->rr_resource = RACCT_UNDEFINED;
697         rule->rr_action = RCTL_ACTION_UNDEFINED;
698         rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
699         refcount_init(&rule->rr_refcount, 1);
700
701         return (rule);
702 }
703
704 struct rctl_rule *
705 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
706 {
707         struct rctl_rule *copy;
708
709         copy = uma_zalloc(rctl_rule_zone, flags);
710         if (copy == NULL)
711                 return (NULL);
712         copy->rr_subject_type = rule->rr_subject_type;
713         copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
714         copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
715         copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
716         copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
717         copy->rr_per = rule->rr_per;
718         copy->rr_resource = rule->rr_resource;
719         copy->rr_action = rule->rr_action;
720         copy->rr_amount = rule->rr_amount;
721         refcount_init(&copy->rr_refcount, 1);
722         rctl_rule_acquire_subject(copy);
723
724         return (copy);
725 }
726
727 void
728 rctl_rule_acquire(struct rctl_rule *rule)
729 {
730
731         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
732
733         refcount_acquire(&rule->rr_refcount);
734 }
735
736 static void
737 rctl_rule_free(void *context, int pending)
738 {
739         struct rctl_rule *rule;
740         
741         rule = (struct rctl_rule *)context;
742
743         KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
744         
745         /*
746          * We don't need locking here; rule is guaranteed to be inaccessible.
747          */
748         
749         rctl_rule_release_subject(rule);
750         uma_zfree(rctl_rule_zone, rule);
751 }
752
753 void
754 rctl_rule_release(struct rctl_rule *rule)
755 {
756
757         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
758
759         if (refcount_release(&rule->rr_refcount)) {
760                 /*
761                  * rctl_rule_release() is often called when iterating
762                  * over all the uidinfo structures in the system,
763                  * holding uihashtbl_lock.  Since rctl_rule_free()
764                  * might end up calling uifree(), this would lead
765                  * to lock recursion.  Use taskqueue to avoid this.
766                  */
767                 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
768                 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
769         }
770 }
771
772 static int
773 rctl_rule_fully_specified(const struct rctl_rule *rule)
774 {
775
776         switch (rule->rr_subject_type) {
777         case RCTL_SUBJECT_TYPE_UNDEFINED:
778                 return (0);
779         case RCTL_SUBJECT_TYPE_PROCESS:
780                 if (rule->rr_subject.rs_proc == NULL)
781                         return (0);
782                 break;
783         case RCTL_SUBJECT_TYPE_USER:
784                 if (rule->rr_subject.rs_uip == NULL)
785                         return (0);
786                 break;
787         case RCTL_SUBJECT_TYPE_LOGINCLASS:
788                 if (rule->rr_subject.rs_loginclass == NULL)
789                         return (0);
790                 break;
791         case RCTL_SUBJECT_TYPE_JAIL:
792                 if (rule->rr_subject.rs_prison_racct == NULL)
793                         return (0);
794                 break;
795         default:
796                 panic("rctl_rule_fully_specified: unknown subject type %d",
797                     rule->rr_subject_type);
798         }
799         if (rule->rr_resource == RACCT_UNDEFINED)
800                 return (0);
801         if (rule->rr_action == RCTL_ACTION_UNDEFINED)
802                 return (0);
803         if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
804                 return (0);
805         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
806                 return (0);
807
808         return (1);
809 }
810
811 static int
812 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
813 {
814         int error = 0;
815         char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
816              *amountstr, *perstr;
817         struct rctl_rule *rule;
818         id_t id;
819
820         rule = rctl_rule_alloc(M_WAITOK);
821
822         subjectstr = strsep(&rulestr, ":");
823         subject_idstr = strsep(&rulestr, ":");
824         resourcestr = strsep(&rulestr, ":");
825         actionstr = strsep(&rulestr, "=/");
826         amountstr = strsep(&rulestr, "/");
827         perstr = rulestr;
828
829         if (subjectstr == NULL || subjectstr[0] == '\0')
830                 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
831         else {
832                 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
833                 if (error != 0)
834                         goto out;
835         }
836
837         if (subject_idstr == NULL || subject_idstr[0] == '\0') {
838                 rule->rr_subject.rs_proc = NULL;
839                 rule->rr_subject.rs_uip = NULL;
840                 rule->rr_subject.rs_loginclass = NULL;
841                 rule->rr_subject.rs_prison_racct = NULL;
842         } else {
843                 switch (rule->rr_subject_type) {
844                 case RCTL_SUBJECT_TYPE_UNDEFINED:
845                         error = EINVAL;
846                         goto out;
847                 case RCTL_SUBJECT_TYPE_PROCESS:
848                         error = str2id(subject_idstr, &id);
849                         if (error != 0)
850                                 goto out;
851                         sx_assert(&allproc_lock, SA_LOCKED);
852                         rule->rr_subject.rs_proc = pfind(id);
853                         if (rule->rr_subject.rs_proc == NULL) {
854                                 error = ESRCH;
855                                 goto out;
856                         }
857                         PROC_UNLOCK(rule->rr_subject.rs_proc);
858                         break;
859                 case RCTL_SUBJECT_TYPE_USER:
860                         error = str2id(subject_idstr, &id);
861                         if (error != 0)
862                                 goto out;
863                         rule->rr_subject.rs_uip = uifind(id);
864                         break;
865                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
866                         rule->rr_subject.rs_loginclass =
867                             loginclass_find(subject_idstr);
868                         if (rule->rr_subject.rs_loginclass == NULL) {
869                                 error = ENAMETOOLONG;
870                                 goto out;
871                         }
872                         break;
873                 case RCTL_SUBJECT_TYPE_JAIL:
874                         rule->rr_subject.rs_prison_racct =
875                             prison_racct_find(subject_idstr);
876                         if (rule->rr_subject.rs_prison_racct == NULL) {
877                                 error = ENAMETOOLONG;
878                                 goto out;
879                         }
880                         break;
881                default:
882                        panic("rctl_string_to_rule: unknown subject type %d",
883                            rule->rr_subject_type);
884                }
885         }
886
887         if (resourcestr == NULL || resourcestr[0] == '\0')
888                 rule->rr_resource = RACCT_UNDEFINED;
889         else {
890                 error = str2value(resourcestr, &rule->rr_resource,
891                     resourcenames);
892                 if (error != 0)
893                         goto out;
894         }
895
896         if (actionstr == NULL || actionstr[0] == '\0')
897                 rule->rr_action = RCTL_ACTION_UNDEFINED;
898         else {
899                 error = str2value(actionstr, &rule->rr_action, actionnames);
900                 if (error != 0)
901                         goto out;
902         }
903
904         if (amountstr == NULL || amountstr[0] == '\0')
905                 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
906         else {
907                 error = str2int64(amountstr, &rule->rr_amount);
908                 if (error != 0)
909                         goto out;
910                 if (racct_is_in_thousands(rule->rr_resource))
911                         rule->rr_amount *= 1000;
912         }
913
914         if (perstr == NULL || perstr[0] == '\0')
915                 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
916         else {
917                 error = str2value(perstr, &rule->rr_per, subjectnames);
918                 if (error != 0)
919                         goto out;
920         }
921
922 out:
923         if (error == 0)
924                 *rulep = rule;
925         else
926                 rctl_rule_release(rule);
927
928         return (error);
929 }
930
931 /*
932  * Link a rule with all the subjects it applies to.
933  */
934 int
935 rctl_rule_add(struct rctl_rule *rule)
936 {
937         struct proc *p;
938         struct ucred *cred;
939         struct uidinfo *uip;
940         struct prison *pr;
941         struct prison_racct *prr;
942         struct loginclass *lc;
943         struct rctl_rule *rule2;
944         int match;
945
946         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
947
948         /*
949          * Some rules just don't make sense.  Note that the one below
950          * cannot be rewritten using racct_is_deniable(); the RACCT_PCTCPU,
951          * for example, is not deniable in the racct sense, but the
952          * limit is enforced in a different way, so "deny" rules for %CPU
953          * do make sense.
954          */
955         if (rule->rr_action == RCTL_ACTION_DENY &&
956             (rule->rr_resource == RACCT_CPU ||
957             rule->rr_resource == RACCT_WALLCLOCK))
958                 return (EOPNOTSUPP);
959
960         if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
961             racct_is_sloppy(rule->rr_resource))
962                 return (EOPNOTSUPP);
963
964         /*
965          * Make sure there are no duplicated rules.  Also, for the "deny"
966          * rules, remove ones differing only by "amount".
967          */
968         if (rule->rr_action == RCTL_ACTION_DENY) {
969                 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
970                 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
971                 rctl_rule_remove(rule2);
972                 rctl_rule_release(rule2);
973         } else
974                 rctl_rule_remove(rule);
975
976         switch (rule->rr_subject_type) {
977         case RCTL_SUBJECT_TYPE_PROCESS:
978                 p = rule->rr_subject.rs_proc;
979                 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
980                 /*
981                  * No resource limits for system processes.
982                  */
983                 if (p->p_flag & P_SYSTEM)
984                         return (EPERM);
985
986                 rctl_racct_add_rule(p->p_racct, rule);
987                 /*
988                  * In case of per-process rule, we don't have anything more
989                  * to do.
990                  */
991                 return (0);
992
993         case RCTL_SUBJECT_TYPE_USER:
994                 uip = rule->rr_subject.rs_uip;
995                 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
996                 rctl_racct_add_rule(uip->ui_racct, rule);
997                 break;
998
999         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1000                 lc = rule->rr_subject.rs_loginclass;
1001                 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1002                 rctl_racct_add_rule(lc->lc_racct, rule);
1003                 break;
1004
1005         case RCTL_SUBJECT_TYPE_JAIL:
1006                 prr = rule->rr_subject.rs_prison_racct;
1007                 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1008                 rctl_racct_add_rule(prr->prr_racct, rule);
1009                 break;
1010
1011         default:
1012                 panic("rctl_rule_add: unknown subject type %d",
1013                     rule->rr_subject_type);
1014         }
1015
1016         /*
1017          * Now go through all the processes and add the new rule to the ones
1018          * it applies to.
1019          */
1020         sx_assert(&allproc_lock, SA_LOCKED);
1021         FOREACH_PROC_IN_SYSTEM(p) {
1022                 if (p->p_flag & P_SYSTEM)
1023                         continue;
1024                 cred = p->p_ucred;
1025                 switch (rule->rr_subject_type) {
1026                 case RCTL_SUBJECT_TYPE_USER:
1027                         if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1028                             cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1029                                 break;
1030                         continue;
1031                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1032                         if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1033                                 break;
1034                         continue;
1035                 case RCTL_SUBJECT_TYPE_JAIL:
1036                         match = 0;
1037                         for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1038                                 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1039                                         match = 1;
1040                                         break;
1041                                 }
1042                         }
1043                         if (match)
1044                                 break;
1045                         continue;
1046                 default:
1047                         panic("rctl_rule_add: unknown subject type %d",
1048                             rule->rr_subject_type);
1049                 }
1050
1051                 rctl_racct_add_rule(p->p_racct, rule);
1052         }
1053
1054         return (0);
1055 }
1056
1057 static void
1058 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1059 {
1060         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1061         int found = 0;
1062
1063         rw_wlock(&rctl_lock);
1064         found += rctl_racct_remove_rules(racct, filter);
1065         rw_wunlock(&rctl_lock);
1066
1067         *((int *)arg3) += found;
1068 }
1069
1070 /*
1071  * Remove all rules that match the filter.
1072  */
1073 int
1074 rctl_rule_remove(struct rctl_rule *filter)
1075 {
1076         int found = 0;
1077         struct proc *p;
1078
1079         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1080             filter->rr_subject.rs_proc != NULL) {
1081                 p = filter->rr_subject.rs_proc;
1082                 rw_wlock(&rctl_lock);
1083                 found = rctl_racct_remove_rules(p->p_racct, filter);
1084                 rw_wunlock(&rctl_lock);
1085                 if (found)
1086                         return (0);
1087                 return (ESRCH);
1088         }
1089
1090         loginclass_racct_foreach(rctl_rule_remove_callback, filter,
1091             (void *)&found);
1092         ui_racct_foreach(rctl_rule_remove_callback, filter,
1093             (void *)&found);
1094         prison_racct_foreach(rctl_rule_remove_callback, filter,
1095             (void *)&found);
1096
1097         sx_assert(&allproc_lock, SA_LOCKED);
1098         rw_wlock(&rctl_lock);
1099         FOREACH_PROC_IN_SYSTEM(p) {
1100                 found += rctl_racct_remove_rules(p->p_racct, filter);
1101         }
1102         rw_wunlock(&rctl_lock);
1103
1104         if (found)
1105                 return (0);
1106         return (ESRCH);
1107 }
1108
1109 /*
1110  * Appends a rule to the sbuf.
1111  */
1112 static void
1113 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1114 {
1115         int64_t amount;
1116
1117         sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1118
1119         switch (rule->rr_subject_type) {
1120         case RCTL_SUBJECT_TYPE_PROCESS:
1121                 if (rule->rr_subject.rs_proc == NULL)
1122                         sbuf_printf(sb, ":");
1123                 else
1124                         sbuf_printf(sb, "%d:",
1125                             rule->rr_subject.rs_proc->p_pid);
1126                 break;
1127         case RCTL_SUBJECT_TYPE_USER:
1128                 if (rule->rr_subject.rs_uip == NULL)
1129                         sbuf_printf(sb, ":");
1130                 else
1131                         sbuf_printf(sb, "%d:",
1132                             rule->rr_subject.rs_uip->ui_uid);
1133                 break;
1134         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1135                 if (rule->rr_subject.rs_loginclass == NULL)
1136                         sbuf_printf(sb, ":");
1137                 else
1138                         sbuf_printf(sb, "%s:",
1139                             rule->rr_subject.rs_loginclass->lc_name);
1140                 break;
1141         case RCTL_SUBJECT_TYPE_JAIL:
1142                 if (rule->rr_subject.rs_prison_racct == NULL)
1143                         sbuf_printf(sb, ":");
1144                 else
1145                         sbuf_printf(sb, "%s:",
1146                             rule->rr_subject.rs_prison_racct->prr_name);
1147                 break;
1148         default:
1149                 panic("rctl_rule_to_sbuf: unknown subject type %d",
1150                     rule->rr_subject_type);
1151         }
1152
1153         amount = rule->rr_amount;
1154         if (amount != RCTL_AMOUNT_UNDEFINED &&
1155             racct_is_in_thousands(rule->rr_resource))
1156                 amount /= 1000;
1157
1158         sbuf_printf(sb, "%s:%s=%jd",
1159             rctl_resource_name(rule->rr_resource),
1160             rctl_action_name(rule->rr_action),
1161             amount);
1162
1163         if (rule->rr_per != rule->rr_subject_type)
1164                 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1165 }
1166
1167 /*
1168  * Routine used by RCTL syscalls to read in input string.
1169  */
1170 static int
1171 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1172 {
1173         int error;
1174         char *str;
1175
1176         if (inbuflen <= 0)
1177                 return (EINVAL);
1178
1179         str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1180         error = copyinstr(inbufp, str, inbuflen, NULL);
1181         if (error != 0) {
1182                 free(str, M_RCTL);
1183                 return (error);
1184         }
1185
1186         *inputstr = str;
1187
1188         return (0);
1189 }
1190
1191 /*
1192  * Routine used by RCTL syscalls to write out output string.
1193  */
1194 static int
1195 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1196 {
1197         int error;
1198
1199         if (outputsbuf == NULL)
1200                 return (0);
1201
1202         sbuf_finish(outputsbuf);
1203         if (outbuflen < sbuf_len(outputsbuf) + 1) {
1204                 sbuf_delete(outputsbuf);
1205                 return (ERANGE);
1206         }
1207         error = copyout(sbuf_data(outputsbuf), outbufp,
1208             sbuf_len(outputsbuf) + 1);
1209         sbuf_delete(outputsbuf);
1210         return (error);
1211 }
1212
1213 static struct sbuf *
1214 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1215 {
1216         int i;
1217         int64_t amount;
1218         struct sbuf *sb;
1219
1220         sb = sbuf_new_auto();
1221         for (i = 0; i <= RACCT_MAX; i++) {
1222                 if (sloppy == 0 && racct_is_sloppy(i))
1223                         continue;
1224                 amount = racct->r_resources[i];
1225                 if (racct_is_in_thousands(i))
1226                         amount /= 1000;
1227                 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1228         }
1229         sbuf_setpos(sb, sbuf_len(sb) - 1);
1230         return (sb);
1231 }
1232
1233 int
1234 rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1235 {
1236         int error;
1237         char *inputstr;
1238         struct rctl_rule *filter;
1239         struct sbuf *outputsbuf = NULL;
1240         struct proc *p;
1241         struct uidinfo *uip;
1242         struct loginclass *lc;
1243         struct prison_racct *prr;
1244
1245         error = priv_check(td, PRIV_RCTL_GET_RACCT);
1246         if (error != 0)
1247                 return (error);
1248
1249         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1250         if (error != 0)
1251                 return (error);
1252
1253         sx_slock(&allproc_lock);
1254         error = rctl_string_to_rule(inputstr, &filter);
1255         free(inputstr, M_RCTL);
1256         if (error != 0) {
1257                 sx_sunlock(&allproc_lock);
1258                 return (error);
1259         }
1260
1261         switch (filter->rr_subject_type) {
1262         case RCTL_SUBJECT_TYPE_PROCESS:
1263                 p = filter->rr_subject.rs_proc;
1264                 if (p == NULL) {
1265                         error = EINVAL;
1266                         goto out;
1267                 }
1268                 if (p->p_flag & P_SYSTEM) {
1269                         error = EINVAL;
1270                         goto out;
1271                 }
1272                 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1273                 break;
1274         case RCTL_SUBJECT_TYPE_USER:
1275                 uip = filter->rr_subject.rs_uip;
1276                 if (uip == NULL) {
1277                         error = EINVAL;
1278                         goto out;
1279                 }
1280                 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1281                 break;
1282         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1283                 lc = filter->rr_subject.rs_loginclass;
1284                 if (lc == NULL) {
1285                         error = EINVAL;
1286                         goto out;
1287                 }
1288                 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1289                 break;
1290         case RCTL_SUBJECT_TYPE_JAIL:
1291                 prr = filter->rr_subject.rs_prison_racct;
1292                 if (prr == NULL) {
1293                         error = EINVAL;
1294                         goto out;
1295                 }
1296                 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1297                 break;
1298         default:
1299                 error = EINVAL;
1300         }
1301 out:
1302         rctl_rule_release(filter);
1303         sx_sunlock(&allproc_lock);
1304         if (error != 0)
1305                 return (error);
1306
1307         error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1308
1309         return (error);
1310 }
1311
1312 static void
1313 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1314 {
1315         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1316         struct rctl_rule_link *link;
1317         struct sbuf *sb = (struct sbuf *)arg3;
1318
1319         rw_rlock(&rctl_lock);
1320         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1321                 if (!rctl_rule_matches(link->rrl_rule, filter))
1322                         continue;
1323                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1324                 sbuf_printf(sb, ",");
1325         }
1326         rw_runlock(&rctl_lock);
1327 }
1328
1329 int
1330 rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1331 {
1332         int error;
1333         size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1334         char *inputstr, *buf;
1335         struct sbuf *sb;
1336         struct rctl_rule *filter;
1337         struct rctl_rule_link *link;
1338         struct proc *p;
1339
1340         error = priv_check(td, PRIV_RCTL_GET_RULES);
1341         if (error != 0)
1342                 return (error);
1343
1344         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1345         if (error != 0)
1346                 return (error);
1347
1348         sx_slock(&allproc_lock);
1349         error = rctl_string_to_rule(inputstr, &filter);
1350         free(inputstr, M_RCTL);
1351         if (error != 0) {
1352                 sx_sunlock(&allproc_lock);
1353                 return (error);
1354         }
1355
1356 again:
1357         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1358         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1359         KASSERT(sb != NULL, ("sbuf_new failed"));
1360
1361         sx_assert(&allproc_lock, SA_LOCKED);
1362         FOREACH_PROC_IN_SYSTEM(p) {
1363                 rw_rlock(&rctl_lock);
1364                 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1365                         /*
1366                          * Non-process rules will be added to the buffer later.
1367                          * Adding them here would result in duplicated output.
1368                          */
1369                         if (link->rrl_rule->rr_subject_type !=
1370                             RCTL_SUBJECT_TYPE_PROCESS)
1371                                 continue;
1372                         if (!rctl_rule_matches(link->rrl_rule, filter))
1373                                 continue;
1374                         rctl_rule_to_sbuf(sb, link->rrl_rule);
1375                         sbuf_printf(sb, ",");
1376                 }
1377                 rw_runlock(&rctl_lock);
1378         }
1379
1380         loginclass_racct_foreach(rctl_get_rules_callback, filter, sb);
1381         ui_racct_foreach(rctl_get_rules_callback, filter, sb);
1382         prison_racct_foreach(rctl_get_rules_callback, filter, sb);
1383         if (sbuf_error(sb) == ENOMEM) {
1384                 sbuf_delete(sb);
1385                 free(buf, M_RCTL);
1386                 bufsize *= 4;
1387                 goto again;
1388         }
1389
1390         /*
1391          * Remove trailing ",".
1392          */
1393         if (sbuf_len(sb) > 0)
1394                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1395
1396         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1397
1398         rctl_rule_release(filter);
1399         sx_sunlock(&allproc_lock);
1400         free(buf, M_RCTL);
1401         return (error);
1402 }
1403
1404 int
1405 rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1406 {
1407         int error;
1408         size_t bufsize = RCTL_DEFAULT_BUFSIZE;
1409         char *inputstr, *buf;
1410         struct sbuf *sb;
1411         struct rctl_rule *filter;
1412         struct rctl_rule_link *link;
1413
1414         error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1415         if (error != 0)
1416                 return (error);
1417
1418         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1419         if (error != 0)
1420                 return (error);
1421
1422         sx_slock(&allproc_lock);
1423         error = rctl_string_to_rule(inputstr, &filter);
1424         free(inputstr, M_RCTL);
1425         if (error != 0) {
1426                 sx_sunlock(&allproc_lock);
1427                 return (error);
1428         }
1429
1430         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1431                 rctl_rule_release(filter);
1432                 sx_sunlock(&allproc_lock);
1433                 return (EINVAL);
1434         }
1435         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1436                 rctl_rule_release(filter);
1437                 sx_sunlock(&allproc_lock);
1438                 return (EOPNOTSUPP);
1439         }
1440         if (filter->rr_subject.rs_proc == NULL) {
1441                 rctl_rule_release(filter);
1442                 sx_sunlock(&allproc_lock);
1443                 return (EINVAL);
1444         }
1445
1446 again:
1447         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1448         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1449         KASSERT(sb != NULL, ("sbuf_new failed"));
1450
1451         rw_rlock(&rctl_lock);
1452         LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1453             rrl_next) {
1454                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1455                 sbuf_printf(sb, ",");
1456         }
1457         rw_runlock(&rctl_lock);
1458         if (sbuf_error(sb) == ENOMEM) {
1459                 sbuf_delete(sb);
1460                 free(buf, M_RCTL);
1461                 bufsize *= 4;
1462                 goto again;
1463         }
1464
1465         /*
1466          * Remove trailing ",".
1467          */
1468         if (sbuf_len(sb) > 0)
1469                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1470
1471         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1472         rctl_rule_release(filter);
1473         sx_sunlock(&allproc_lock);
1474         free(buf, M_RCTL);
1475         return (error);
1476 }
1477
1478 int
1479 rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1480 {
1481         int error;
1482         struct rctl_rule *rule;
1483         char *inputstr;
1484
1485         error = priv_check(td, PRIV_RCTL_ADD_RULE);
1486         if (error != 0)
1487                 return (error);
1488
1489         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1490         if (error != 0)
1491                 return (error);
1492
1493         sx_slock(&allproc_lock);
1494         error = rctl_string_to_rule(inputstr, &rule);
1495         free(inputstr, M_RCTL);
1496         if (error != 0) {
1497                 sx_sunlock(&allproc_lock);
1498                 return (error);
1499         }
1500         /*
1501          * The 'per' part of a rule is optional.
1502          */
1503         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1504             rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1505                 rule->rr_per = rule->rr_subject_type;
1506
1507         if (!rctl_rule_fully_specified(rule)) {
1508                 error = EINVAL;
1509                 goto out;
1510         }
1511
1512         error = rctl_rule_add(rule);
1513
1514 out:
1515         rctl_rule_release(rule);
1516         sx_sunlock(&allproc_lock);
1517         return (error);
1518 }
1519
1520 int
1521 rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1522 {
1523         int error;
1524         struct rctl_rule *filter;
1525         char *inputstr;
1526
1527         error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1528         if (error != 0)
1529                 return (error);
1530
1531         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1532         if (error != 0)
1533                 return (error);
1534
1535         sx_slock(&allproc_lock);
1536         error = rctl_string_to_rule(inputstr, &filter);
1537         free(inputstr, M_RCTL);
1538         if (error != 0) {
1539                 sx_sunlock(&allproc_lock);
1540                 return (error);
1541         }
1542
1543         error = rctl_rule_remove(filter);
1544         rctl_rule_release(filter);
1545         sx_sunlock(&allproc_lock);
1546
1547         return (error);
1548 }
1549
1550 /*
1551  * Update RCTL rule list after credential change.
1552  */
1553 void
1554 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1555 {
1556         int rulecnt, i;
1557         struct rctl_rule_link *link, *newlink;
1558         struct uidinfo *newuip;
1559         struct loginclass *newlc;
1560         struct prison_racct *newprr;
1561         LIST_HEAD(, rctl_rule_link) newrules;
1562
1563         newuip = newcred->cr_ruidinfo;
1564         newlc = newcred->cr_loginclass;
1565         newprr = newcred->cr_prison->pr_prison_racct;
1566         
1567         LIST_INIT(&newrules);
1568
1569 again:
1570         /*
1571          * First, count the rules that apply to the process with new
1572          * credentials.
1573          */
1574         rulecnt = 0;
1575         rw_rlock(&rctl_lock);
1576         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1577                 if (link->rrl_rule->rr_subject_type ==
1578                     RCTL_SUBJECT_TYPE_PROCESS)
1579                         rulecnt++;
1580         }
1581         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1582                 rulecnt++;
1583         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1584                 rulecnt++;
1585         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1586                 rulecnt++;
1587         rw_runlock(&rctl_lock);
1588
1589         /*
1590          * Create temporary list.  We've dropped the rctl_lock in order
1591          * to use M_WAITOK.
1592          */
1593         for (i = 0; i < rulecnt; i++) {
1594                 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1595                 newlink->rrl_rule = NULL;
1596                 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
1597         }
1598
1599         newlink = LIST_FIRST(&newrules);
1600
1601         /*
1602          * Assign rules to the newly allocated list entries.
1603          */
1604         rw_wlock(&rctl_lock);
1605         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1606                 if (link->rrl_rule->rr_subject_type ==
1607                     RCTL_SUBJECT_TYPE_PROCESS) {
1608                         if (newlink == NULL)
1609                                 goto goaround;
1610                         rctl_rule_acquire(link->rrl_rule);
1611                         newlink->rrl_rule = link->rrl_rule;
1612                         newlink = LIST_NEXT(newlink, rrl_next);
1613                         rulecnt--;
1614                 }
1615         }
1616         
1617         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
1618                 if (newlink == NULL)
1619                         goto goaround;
1620                 rctl_rule_acquire(link->rrl_rule);
1621                 newlink->rrl_rule = link->rrl_rule;
1622                 newlink = LIST_NEXT(newlink, rrl_next);
1623                 rulecnt--;
1624         }
1625
1626         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
1627                 if (newlink == NULL)
1628                         goto goaround;
1629                 rctl_rule_acquire(link->rrl_rule);
1630                 newlink->rrl_rule = link->rrl_rule;
1631                 newlink = LIST_NEXT(newlink, rrl_next);
1632                 rulecnt--;
1633         }
1634
1635         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
1636                 if (newlink == NULL)
1637                         goto goaround;
1638                 rctl_rule_acquire(link->rrl_rule);
1639                 newlink->rrl_rule = link->rrl_rule;
1640                 newlink = LIST_NEXT(newlink, rrl_next);
1641                 rulecnt--;
1642         }
1643
1644         if (rulecnt == 0) {
1645                 /*
1646                  * Free the old rule list.
1647                  */
1648                 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
1649                         link = LIST_FIRST(&p->p_racct->r_rule_links);
1650                         LIST_REMOVE(link, rrl_next);
1651                         rctl_rule_release(link->rrl_rule);
1652                         uma_zfree(rctl_rule_link_zone, link);
1653                 }
1654
1655                 /*
1656                  * Replace lists and we're done.
1657                  *
1658                  * XXX: Is there any way to switch list heads instead
1659                  *      of iterating here?
1660                  */
1661                 while (!LIST_EMPTY(&newrules)) {
1662                         newlink = LIST_FIRST(&newrules);
1663                         LIST_REMOVE(newlink, rrl_next);
1664                         LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
1665                             newlink, rrl_next);
1666                 }
1667
1668                 rw_wunlock(&rctl_lock);
1669
1670                 return;
1671         }
1672
1673 goaround:
1674         rw_wunlock(&rctl_lock);
1675
1676         /*
1677          * Rule list changed while we were not holding the rctl_lock.
1678          * Free the new list and try again.
1679          */
1680         while (!LIST_EMPTY(&newrules)) {
1681                 newlink = LIST_FIRST(&newrules);
1682                 LIST_REMOVE(newlink, rrl_next);
1683                 if (newlink->rrl_rule != NULL)
1684                         rctl_rule_release(newlink->rrl_rule);
1685                 uma_zfree(rctl_rule_link_zone, newlink);
1686         }
1687
1688         goto again;
1689 }
1690
1691 /*
1692  * Assign RCTL rules to the newly created process.
1693  */
1694 int
1695 rctl_proc_fork(struct proc *parent, struct proc *child)
1696 {
1697         int error;
1698         struct rctl_rule_link *link;
1699         struct rctl_rule *rule;
1700
1701         LIST_INIT(&child->p_racct->r_rule_links);
1702
1703         /*
1704          * No limits for kernel processes.
1705          */
1706         if (child->p_flag & P_SYSTEM)
1707                 return (0);
1708
1709         /*
1710          * Nothing to inherit from P_SYSTEM parents.
1711          */
1712         if (parent->p_racct == NULL) {
1713                 KASSERT(parent->p_flag & P_SYSTEM,
1714                     ("non-system process without racct; p = %p", parent));
1715                 return (0);
1716         }
1717
1718         rw_wlock(&rctl_lock);
1719
1720         /*
1721          * Go through limits applicable to the parent and assign them
1722          * to the child.  Rules with 'process' subject have to be duplicated
1723          * in order to make their rr_subject point to the new process.
1724          */
1725         LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
1726                 if (link->rrl_rule->rr_subject_type ==
1727                     RCTL_SUBJECT_TYPE_PROCESS) {
1728                         rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
1729                         if (rule == NULL)
1730                                 goto fail;
1731                         KASSERT(rule->rr_subject.rs_proc == parent,
1732                             ("rule->rr_subject.rs_proc != parent"));
1733                         rule->rr_subject.rs_proc = child;
1734                         error = rctl_racct_add_rule_locked(child->p_racct,
1735                             rule);
1736                         rctl_rule_release(rule);
1737                         if (error != 0)
1738                                 goto fail;
1739                 } else {
1740                         error = rctl_racct_add_rule_locked(child->p_racct,
1741                             link->rrl_rule);
1742                         if (error != 0)
1743                                 goto fail;
1744                 }
1745         }
1746
1747         rw_wunlock(&rctl_lock);
1748         return (0);
1749
1750 fail:
1751         while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
1752                 link = LIST_FIRST(&child->p_racct->r_rule_links);
1753                 LIST_REMOVE(link, rrl_next);
1754                 rctl_rule_release(link->rrl_rule);
1755                 uma_zfree(rctl_rule_link_zone, link);
1756         }
1757         rw_wunlock(&rctl_lock);
1758         return (EAGAIN);
1759 }
1760
1761 /*
1762  * Release rules attached to the racct.
1763  */
1764 void
1765 rctl_racct_release(struct racct *racct)
1766 {
1767         struct rctl_rule_link *link;
1768
1769         rw_wlock(&rctl_lock);
1770         while (!LIST_EMPTY(&racct->r_rule_links)) {
1771                 link = LIST_FIRST(&racct->r_rule_links);
1772                 LIST_REMOVE(link, rrl_next);
1773                 rctl_rule_release(link->rrl_rule);
1774                 uma_zfree(rctl_rule_link_zone, link);
1775         }
1776         rw_wunlock(&rctl_lock);
1777 }
1778
1779 static void
1780 rctl_init(void)
1781 {
1782
1783         rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
1784             sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
1785             UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1786         rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
1787             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1788 }
1789
1790 #else /* !RCTL */
1791
1792 int
1793 rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1794 {
1795         
1796         return (ENOSYS);
1797 }
1798
1799 int
1800 rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1801 {
1802         
1803         return (ENOSYS);
1804 }
1805
1806 int
1807 rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1808 {
1809         
1810         return (ENOSYS);
1811 }
1812
1813 int
1814 rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1815 {
1816         
1817         return (ENOSYS);
1818 }
1819
1820 int
1821 rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1822 {
1823         
1824         return (ENOSYS);
1825 }
1826
1827 #endif /* !RCTL */