2 * Copyright (c) 2010 The FreeBSD Foundation
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/refcount.h>
41 #include <sys/kernel.h>
42 #include <sys/limits.h>
43 #include <sys/loginclass.h>
46 #include <sys/racct.h>
48 #include <sys/resourcevar.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/systm.h>
53 #include <sys/types.h>
54 #include <sys/eventhandler.h>
56 #include <sys/mutex.h>
57 #include <sys/rwlock.h>
59 #include <sys/taskqueue.h>
65 #error "The RCTL option requires the RACCT option"
68 FEATURE(rctl, "Resource Limits");
71 #define HRF_DONT_INHERIT 1
72 #define HRF_DONT_ACCUMULATE 2
74 #define RCTL_MAX_INBUFSIZE 4 * 1024
75 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024
76 #define RCTL_LOG_BUFSIZE 128
78 #define RCTL_PCPU_SHIFT (10 * 1000000)
80 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
81 static int rctl_log_rate_limit = 10;
82 static int rctl_devctl_rate_limit = 10;
85 * Values below are initialized in rctl_init().
87 static int rctl_throttle_min = -1;
88 static int rctl_throttle_max = -1;
89 static int rctl_throttle_pct = -1;
90 static int rctl_throttle_pct2 = -1;
92 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
93 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
94 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
95 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
97 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW, 0, "Resource Limits");
98 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
99 &rctl_maxbufsize, 0, "Maximum output buffer size");
100 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
101 &rctl_log_rate_limit, 0, "Maximum number of log messages per second");
102 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
103 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
104 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
105 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_min_sysctl, "IU",
106 "Shortest throttling duration, in hz");
107 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
108 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
109 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_max_sysctl, "IU",
110 "Longest throttling duration, in hz");
111 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
112 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
113 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct_sysctl, "IU",
114 "Throttling penalty for process consumption, in percent");
115 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
116 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
117 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct2_sysctl, "IU",
118 "Throttling penalty for container consumption, in percent");
119 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
122 * 'rctl_rule_link' connects a rule with every racct it's related to.
123 * For example, rule 'user:X:openfiles:deny=N/process' is linked
124 * with uidinfo for user X, and to each process of that user.
126 struct rctl_rule_link {
127 LIST_ENTRY(rctl_rule_link) rrl_next;
128 struct rctl_rule *rrl_rule;
137 static struct dict subjectnames[] = {
138 { "process", RCTL_SUBJECT_TYPE_PROCESS },
139 { "user", RCTL_SUBJECT_TYPE_USER },
140 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
141 { "jail", RCTL_SUBJECT_TYPE_JAIL },
144 static struct dict resourcenames[] = {
145 { "cputime", RACCT_CPU },
146 { "datasize", RACCT_DATA },
147 { "stacksize", RACCT_STACK },
148 { "coredumpsize", RACCT_CORE },
149 { "memoryuse", RACCT_RSS },
150 { "memorylocked", RACCT_MEMLOCK },
151 { "maxproc", RACCT_NPROC },
152 { "openfiles", RACCT_NOFILE },
153 { "vmemoryuse", RACCT_VMEM },
154 { "pseudoterminals", RACCT_NPTS },
155 { "swapuse", RACCT_SWAP },
156 { "nthr", RACCT_NTHR },
157 { "msgqqueued", RACCT_MSGQQUEUED },
158 { "msgqsize", RACCT_MSGQSIZE },
159 { "nmsgq", RACCT_NMSGQ },
160 { "nsem", RACCT_NSEM },
161 { "nsemop", RACCT_NSEMOP },
162 { "nshm", RACCT_NSHM },
163 { "shmsize", RACCT_SHMSIZE },
164 { "wallclock", RACCT_WALLCLOCK },
165 { "pcpu", RACCT_PCTCPU },
166 { "readbps", RACCT_READBPS },
167 { "writebps", RACCT_WRITEBPS },
168 { "readiops", RACCT_READIOPS },
169 { "writeiops", RACCT_WRITEIOPS },
172 static struct dict actionnames[] = {
173 { "sighup", RCTL_ACTION_SIGHUP },
174 { "sigint", RCTL_ACTION_SIGINT },
175 { "sigquit", RCTL_ACTION_SIGQUIT },
176 { "sigill", RCTL_ACTION_SIGILL },
177 { "sigtrap", RCTL_ACTION_SIGTRAP },
178 { "sigabrt", RCTL_ACTION_SIGABRT },
179 { "sigemt", RCTL_ACTION_SIGEMT },
180 { "sigfpe", RCTL_ACTION_SIGFPE },
181 { "sigkill", RCTL_ACTION_SIGKILL },
182 { "sigbus", RCTL_ACTION_SIGBUS },
183 { "sigsegv", RCTL_ACTION_SIGSEGV },
184 { "sigsys", RCTL_ACTION_SIGSYS },
185 { "sigpipe", RCTL_ACTION_SIGPIPE },
186 { "sigalrm", RCTL_ACTION_SIGALRM },
187 { "sigterm", RCTL_ACTION_SIGTERM },
188 { "sigurg", RCTL_ACTION_SIGURG },
189 { "sigstop", RCTL_ACTION_SIGSTOP },
190 { "sigtstp", RCTL_ACTION_SIGTSTP },
191 { "sigchld", RCTL_ACTION_SIGCHLD },
192 { "sigttin", RCTL_ACTION_SIGTTIN },
193 { "sigttou", RCTL_ACTION_SIGTTOU },
194 { "sigio", RCTL_ACTION_SIGIO },
195 { "sigxcpu", RCTL_ACTION_SIGXCPU },
196 { "sigxfsz", RCTL_ACTION_SIGXFSZ },
197 { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
198 { "sigprof", RCTL_ACTION_SIGPROF },
199 { "sigwinch", RCTL_ACTION_SIGWINCH },
200 { "siginfo", RCTL_ACTION_SIGINFO },
201 { "sigusr1", RCTL_ACTION_SIGUSR1 },
202 { "sigusr2", RCTL_ACTION_SIGUSR2 },
203 { "sigthr", RCTL_ACTION_SIGTHR },
204 { "deny", RCTL_ACTION_DENY },
205 { "log", RCTL_ACTION_LOG },
206 { "devctl", RCTL_ACTION_DEVCTL },
207 { "throttle", RCTL_ACTION_THROTTLE },
210 static void rctl_init(void);
211 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
213 static uma_zone_t rctl_rule_zone;
214 static uma_zone_t rctl_rule_link_zone;
215 static struct rwlock rctl_lock;
216 RW_SYSINIT(rctl_lock, &rctl_lock, "RCTL lock");
218 #define RCTL_RLOCK() rw_rlock(&rctl_lock)
219 #define RCTL_RUNLOCK() rw_runlock(&rctl_lock)
220 #define RCTL_WLOCK() rw_wlock(&rctl_lock)
221 #define RCTL_WUNLOCK() rw_wunlock(&rctl_lock)
222 #define RCTL_LOCK_ASSERT() rw_assert(&rctl_lock, RA_LOCKED)
223 #define RCTL_WLOCK_ASSERT() rw_assert(&rctl_lock, RA_WLOCKED)
225 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
226 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
228 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
230 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
232 int error, val = rctl_throttle_min;
234 error = sysctl_handle_int(oidp, &val, 0, req);
235 if (error || !req->newptr)
237 if (val < 1 || val > rctl_throttle_max)
241 rctl_throttle_min = val;
247 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
249 int error, val = rctl_throttle_max;
251 error = sysctl_handle_int(oidp, &val, 0, req);
252 if (error || !req->newptr)
254 if (val < rctl_throttle_min)
258 rctl_throttle_max = val;
264 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
266 int error, val = rctl_throttle_pct;
268 error = sysctl_handle_int(oidp, &val, 0, req);
269 if (error || !req->newptr)
275 rctl_throttle_pct = val;
281 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
283 int error, val = rctl_throttle_pct2;
285 error = sysctl_handle_int(oidp, &val, 0, req);
286 if (error || !req->newptr)
292 rctl_throttle_pct2 = val;
299 rctl_subject_type_name(int subject)
303 for (i = 0; subjectnames[i].d_name != NULL; i++) {
304 if (subjectnames[i].d_value == subject)
305 return (subjectnames[i].d_name);
308 panic("rctl_subject_type_name: unknown subject type %d", subject);
312 rctl_action_name(int action)
316 for (i = 0; actionnames[i].d_name != NULL; i++) {
317 if (actionnames[i].d_value == action)
318 return (actionnames[i].d_name);
321 panic("rctl_action_name: unknown action %d", action);
325 rctl_resource_name(int resource)
329 for (i = 0; resourcenames[i].d_name != NULL; i++) {
330 if (resourcenames[i].d_value == resource)
331 return (resourcenames[i].d_name);
334 panic("rctl_resource_name: unknown resource %d", resource);
337 static struct racct *
338 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
340 struct ucred *cred = p->p_ucred;
342 ASSERT_RACCT_ENABLED();
345 switch (rule->rr_per) {
346 case RCTL_SUBJECT_TYPE_PROCESS:
348 case RCTL_SUBJECT_TYPE_USER:
349 return (cred->cr_ruidinfo->ui_racct);
350 case RCTL_SUBJECT_TYPE_LOGINCLASS:
351 return (cred->cr_loginclass->lc_racct);
352 case RCTL_SUBJECT_TYPE_JAIL:
353 return (cred->cr_prison->pr_prison_racct->prr_racct);
355 panic("%s: unknown per %d", __func__, rule->rr_per);
360 * Return the amount of resource that can be allocated by 'p' before
364 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
366 const struct racct *racct;
369 ASSERT_RACCT_ENABLED();
372 racct = rctl_proc_rule_to_racct(p, rule);
373 available = rule->rr_amount - racct->r_resources[rule->rr_resource];
379 * Called every second for proc, uidinfo, loginclass, and jail containers.
380 * If the limit isn't exceeded, it decreases the usage amount to zero.
381 * Otherwise, it decreases it by the value of the limit. This way
382 * resource consumption exceeding the limit "carries over" to the next
386 rctl_throttle_decay(struct racct *racct, int resource)
388 struct rctl_rule *rule;
389 struct rctl_rule_link *link;
390 int64_t minavailable;
392 ASSERT_RACCT_ENABLED();
394 minavailable = INT64_MAX;
398 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
399 rule = link->rrl_rule;
401 if (rule->rr_resource != resource)
403 if (rule->rr_action != RCTL_ACTION_THROTTLE)
406 if (rule->rr_amount < minavailable)
407 minavailable = rule->rr_amount;
412 if (racct->r_resources[resource] < minavailable) {
413 racct->r_resources[resource] = 0;
416 * Cap utilization counter at ten times the limit. Otherwise,
417 * if we changed the rule lowering the allowed amount, it could
418 * take unreasonably long time for the accumulated resource
421 if (racct->r_resources[resource] > minavailable * 10)
422 racct->r_resources[resource] = minavailable * 10;
424 racct->r_resources[resource] -= minavailable;
429 * Special version of rctl_get_available() for the %CPU resource.
430 * We slightly cheat here and return less than we normally would.
433 rctl_pcpu_available(const struct proc *p) {
434 struct rctl_rule *rule;
435 struct rctl_rule_link *link;
436 int64_t available, minavailable, limit;
438 ASSERT_RACCT_ENABLED();
440 minavailable = INT64_MAX;
445 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
446 rule = link->rrl_rule;
447 if (rule->rr_resource != RACCT_PCTCPU)
449 if (rule->rr_action != RCTL_ACTION_DENY)
451 available = rctl_available_resource(p, rule);
452 if (available < minavailable) {
453 minavailable = available;
454 limit = rule->rr_amount;
461 * Return slightly less than actual value of the available
462 * %cpu resource. This makes %cpu throttling more agressive
463 * and lets us act sooner than the limits are already exceeded.
466 if (limit > 2 * RCTL_PCPU_SHIFT)
467 minavailable -= RCTL_PCPU_SHIFT;
469 minavailable -= (limit / 2);
472 return (minavailable);
476 xadd(uint64_t a, uint64_t b)
492 xmul(uint64_t a, uint64_t b)
495 if (b != 0 && a > UINT64_MAX / b)
502 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
503 * to what it keeps allocated now. Returns non-zero if the allocation should
504 * be denied, 0 otherwise.
507 rctl_enforce(struct proc *p, int resource, uint64_t amount)
509 static struct timeval log_lasttime, devctl_lasttime;
510 static int log_curtime = 0, devctl_curtime = 0;
511 struct rctl_rule *rule;
512 struct rctl_rule_link *link;
516 uint64_t sleep_ms, sleep_ratio;
520 ASSERT_RACCT_ENABLED();
525 * There may be more than one matching rule; go through all of them.
526 * Denial should be done last, after logging and sending signals.
528 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
529 rule = link->rrl_rule;
530 if (rule->rr_resource != resource)
533 available = rctl_available_resource(p, rule);
534 if (available >= (int64_t)amount) {
535 link->rrl_exceeded = 0;
539 switch (rule->rr_action) {
540 case RCTL_ACTION_DENY:
543 case RCTL_ACTION_LOG:
545 * If rrl_exceeded != 0, it means we've already
546 * logged a warning for this process.
548 if (link->rrl_exceeded != 0)
552 * If the process state is not fully initialized yet,
553 * we can't access most of the required fields, e.g.
554 * p->p_comm. This happens when called from fork1().
555 * Ignore this rule for now; it will be processed just
556 * after fork, when called from racct_proc_fork_done().
558 if (p->p_state != PRS_NORMAL)
561 if (!ppsratecheck(&log_lasttime, &log_curtime,
562 rctl_log_rate_limit))
565 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
567 printf("rctl_enforce: out of memory\n");
570 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
571 rctl_rule_to_sbuf(&sb, rule);
573 printf("rctl: rule \"%s\" matched by pid %d "
574 "(%s), uid %d, jail %s\n", sbuf_data(&sb),
575 p->p_pid, p->p_comm, p->p_ucred->cr_uid,
576 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
579 link->rrl_exceeded = 1;
581 case RCTL_ACTION_DEVCTL:
582 if (link->rrl_exceeded != 0)
585 if (p->p_state != PRS_NORMAL)
588 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
589 rctl_devctl_rate_limit))
592 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
594 printf("rctl_enforce: out of memory\n");
597 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
598 sbuf_printf(&sb, "rule=");
599 rctl_rule_to_sbuf(&sb, rule);
600 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
601 p->p_pid, p->p_ucred->cr_ruid,
602 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
604 devctl_notify_f("RCTL", "rule", "matched",
605 sbuf_data(&sb), M_NOWAIT);
608 link->rrl_exceeded = 1;
610 case RCTL_ACTION_THROTTLE:
611 if (p->p_state != PRS_NORMAL)
615 * Make the process sleep for a fraction of second
616 * proportional to the ratio of process' resource
617 * utilization compared to the limit. The point is
618 * to penalize resource hogs: processes that consume
619 * more of the available resources sleep for longer.
621 * We're trying to defer division until the very end,
622 * to minimize the rounding effects. The following
623 * calculation could have been written in a clearer
626 * sleep_ms = hz * p->p_racct->r_resources[resource] /
628 * sleep_ms *= rctl_throttle_pct / 100;
629 * if (sleep_ms < rctl_throttle_min)
630 * sleep_ms = rctl_throttle_min;
633 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
634 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100;
635 if (sleep_ms < rctl_throttle_min * rule->rr_amount)
636 sleep_ms = rctl_throttle_min * rule->rr_amount;
639 * Multiply that by the ratio of the resource
640 * consumption for the container compared to the limit,
641 * squared. In other words, a process in a container
642 * that is two times over the limit will be throttled
643 * four times as much for hitting the same rule. The
644 * point is to penalize processes more if the container
645 * itself (eg certain UID or jail) is above the limit.
648 sleep_ratio = -available / rule->rr_amount;
651 sleep_ratio = xmul(sleep_ratio, sleep_ratio);
652 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
653 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
656 * Finally the division.
658 sleep_ms /= rule->rr_amount;
660 if (sleep_ms > rctl_throttle_max)
661 sleep_ms = rctl_throttle_max;
663 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ld ms (ratio %ld, available %ld)\n",
664 __func__, p->p_pid, p->p_comm,
665 p->p_racct->r_resources[resource],
666 rule->rr_amount, sleep_ms, sleep_ratio, available);
669 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
670 __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
671 racct_proc_throttle(p, sleep_ms);
674 if (link->rrl_exceeded != 0)
677 if (p->p_state != PRS_NORMAL)
680 KASSERT(rule->rr_action > 0 &&
681 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
682 ("rctl_enforce: unknown action %d",
686 * We're using the fact that RCTL_ACTION_SIG* values
687 * are equal to their counterparts from sys/signal.h.
689 kern_psignal(p, rule->rr_action);
690 link->rrl_exceeded = 1;
699 * Return fake error code; the caller should change it
700 * into one proper for the situation - EFSIZ, ENOMEM etc.
709 rctl_get_limit(struct proc *p, int resource)
711 struct rctl_rule *rule;
712 struct rctl_rule_link *link;
713 uint64_t amount = UINT64_MAX;
715 ASSERT_RACCT_ENABLED();
720 * There may be more than one matching rule; go through all of them.
721 * Denial should be done last, after logging and sending signals.
723 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
724 rule = link->rrl_rule;
725 if (rule->rr_resource != resource)
727 if (rule->rr_action != RCTL_ACTION_DENY)
729 if (rule->rr_amount < amount)
730 amount = rule->rr_amount;
739 rctl_get_available(struct proc *p, int resource)
741 struct rctl_rule *rule;
742 struct rctl_rule_link *link;
743 int64_t available, minavailable, allocated;
745 minavailable = INT64_MAX;
747 ASSERT_RACCT_ENABLED();
752 * There may be more than one matching rule; go through all of them.
753 * Denial should be done last, after logging and sending signals.
755 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
756 rule = link->rrl_rule;
757 if (rule->rr_resource != resource)
759 if (rule->rr_action != RCTL_ACTION_DENY)
761 available = rctl_available_resource(p, rule);
762 if (available < minavailable)
763 minavailable = available;
769 * XXX: Think about this _hard_.
771 allocated = p->p_racct->r_resources[resource];
772 if (minavailable < INT64_MAX - allocated)
773 minavailable += allocated;
774 if (minavailable < 0)
776 return (minavailable);
780 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
783 ASSERT_RACCT_ENABLED();
785 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
786 if (rule->rr_subject_type != filter->rr_subject_type)
789 switch (filter->rr_subject_type) {
790 case RCTL_SUBJECT_TYPE_PROCESS:
791 if (filter->rr_subject.rs_proc != NULL &&
792 rule->rr_subject.rs_proc !=
793 filter->rr_subject.rs_proc)
796 case RCTL_SUBJECT_TYPE_USER:
797 if (filter->rr_subject.rs_uip != NULL &&
798 rule->rr_subject.rs_uip !=
799 filter->rr_subject.rs_uip)
802 case RCTL_SUBJECT_TYPE_LOGINCLASS:
803 if (filter->rr_subject.rs_loginclass != NULL &&
804 rule->rr_subject.rs_loginclass !=
805 filter->rr_subject.rs_loginclass)
808 case RCTL_SUBJECT_TYPE_JAIL:
809 if (filter->rr_subject.rs_prison_racct != NULL &&
810 rule->rr_subject.rs_prison_racct !=
811 filter->rr_subject.rs_prison_racct)
815 panic("rctl_rule_matches: unknown subject type %d",
816 filter->rr_subject_type);
820 if (filter->rr_resource != RACCT_UNDEFINED) {
821 if (rule->rr_resource != filter->rr_resource)
825 if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
826 if (rule->rr_action != filter->rr_action)
830 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
831 if (rule->rr_amount != filter->rr_amount)
835 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
836 if (rule->rr_per != filter->rr_per)
844 str2value(const char *str, int *value, struct dict *table)
851 for (i = 0; table[i].d_name != NULL; i++) {
852 if (strcasecmp(table[i].d_name, str) == 0) {
853 *value = table[i].d_value;
862 str2id(const char *str, id_t *value)
869 *value = strtoul(str, &end, 10);
870 if ((size_t)(end - str) != strlen(str))
877 str2int64(const char *str, int64_t *value)
884 *value = strtoul(str, &end, 10);
885 if ((size_t)(end - str) != strlen(str))
895 * Connect the rule to the racct, increasing refcount for the rule.
898 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
900 struct rctl_rule_link *link;
902 ASSERT_RACCT_ENABLED();
903 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
905 rctl_rule_acquire(rule);
906 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
907 link->rrl_rule = rule;
908 link->rrl_exceeded = 0;
911 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
916 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
918 struct rctl_rule_link *link;
920 ASSERT_RACCT_ENABLED();
921 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
924 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
927 rctl_rule_acquire(rule);
928 link->rrl_rule = rule;
929 link->rrl_exceeded = 0;
931 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
936 * Remove limits for a rules matching the filter and release
937 * the refcounts for the rules, possibly freeing them. Returns
938 * the number of limit structures removed.
941 rctl_racct_remove_rules(struct racct *racct,
942 const struct rctl_rule *filter)
944 struct rctl_rule_link *link, *linktmp;
947 ASSERT_RACCT_ENABLED();
950 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
951 if (!rctl_rule_matches(link->rrl_rule, filter))
954 LIST_REMOVE(link, rrl_next);
955 rctl_rule_release(link->rrl_rule);
956 uma_zfree(rctl_rule_link_zone, link);
963 rctl_rule_acquire_subject(struct rctl_rule *rule)
966 ASSERT_RACCT_ENABLED();
968 switch (rule->rr_subject_type) {
969 case RCTL_SUBJECT_TYPE_UNDEFINED:
970 case RCTL_SUBJECT_TYPE_PROCESS:
972 case RCTL_SUBJECT_TYPE_JAIL:
973 if (rule->rr_subject.rs_prison_racct != NULL)
974 prison_racct_hold(rule->rr_subject.rs_prison_racct);
976 case RCTL_SUBJECT_TYPE_USER:
977 if (rule->rr_subject.rs_uip != NULL)
978 uihold(rule->rr_subject.rs_uip);
980 case RCTL_SUBJECT_TYPE_LOGINCLASS:
981 if (rule->rr_subject.rs_loginclass != NULL)
982 loginclass_hold(rule->rr_subject.rs_loginclass);
985 panic("rctl_rule_acquire_subject: unknown subject type %d",
986 rule->rr_subject_type);
991 rctl_rule_release_subject(struct rctl_rule *rule)
994 ASSERT_RACCT_ENABLED();
996 switch (rule->rr_subject_type) {
997 case RCTL_SUBJECT_TYPE_UNDEFINED:
998 case RCTL_SUBJECT_TYPE_PROCESS:
1000 case RCTL_SUBJECT_TYPE_JAIL:
1001 if (rule->rr_subject.rs_prison_racct != NULL)
1002 prison_racct_free(rule->rr_subject.rs_prison_racct);
1004 case RCTL_SUBJECT_TYPE_USER:
1005 if (rule->rr_subject.rs_uip != NULL)
1006 uifree(rule->rr_subject.rs_uip);
1008 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1009 if (rule->rr_subject.rs_loginclass != NULL)
1010 loginclass_free(rule->rr_subject.rs_loginclass);
1013 panic("rctl_rule_release_subject: unknown subject type %d",
1014 rule->rr_subject_type);
1019 rctl_rule_alloc(int flags)
1021 struct rctl_rule *rule;
1023 ASSERT_RACCT_ENABLED();
1025 rule = uma_zalloc(rctl_rule_zone, flags);
1028 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1029 rule->rr_subject.rs_proc = NULL;
1030 rule->rr_subject.rs_uip = NULL;
1031 rule->rr_subject.rs_loginclass = NULL;
1032 rule->rr_subject.rs_prison_racct = NULL;
1033 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1034 rule->rr_resource = RACCT_UNDEFINED;
1035 rule->rr_action = RCTL_ACTION_UNDEFINED;
1036 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1037 refcount_init(&rule->rr_refcount, 1);
1043 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
1045 struct rctl_rule *copy;
1047 ASSERT_RACCT_ENABLED();
1049 copy = uma_zalloc(rctl_rule_zone, flags);
1052 copy->rr_subject_type = rule->rr_subject_type;
1053 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
1054 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
1055 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
1056 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
1057 copy->rr_per = rule->rr_per;
1058 copy->rr_resource = rule->rr_resource;
1059 copy->rr_action = rule->rr_action;
1060 copy->rr_amount = rule->rr_amount;
1061 refcount_init(©->rr_refcount, 1);
1062 rctl_rule_acquire_subject(copy);
1068 rctl_rule_acquire(struct rctl_rule *rule)
1071 ASSERT_RACCT_ENABLED();
1072 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1074 refcount_acquire(&rule->rr_refcount);
1078 rctl_rule_free(void *context, int pending)
1080 struct rctl_rule *rule;
1082 rule = (struct rctl_rule *)context;
1084 ASSERT_RACCT_ENABLED();
1085 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
1088 * We don't need locking here; rule is guaranteed to be inaccessible.
1091 rctl_rule_release_subject(rule);
1092 uma_zfree(rctl_rule_zone, rule);
1096 rctl_rule_release(struct rctl_rule *rule)
1099 ASSERT_RACCT_ENABLED();
1100 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1102 if (refcount_release(&rule->rr_refcount)) {
1104 * rctl_rule_release() is often called when iterating
1105 * over all the uidinfo structures in the system,
1106 * holding uihashtbl_lock. Since rctl_rule_free()
1107 * might end up calling uifree(), this would lead
1108 * to lock recursion. Use taskqueue to avoid this.
1110 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
1111 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
1116 rctl_rule_fully_specified(const struct rctl_rule *rule)
1119 ASSERT_RACCT_ENABLED();
1121 switch (rule->rr_subject_type) {
1122 case RCTL_SUBJECT_TYPE_UNDEFINED:
1124 case RCTL_SUBJECT_TYPE_PROCESS:
1125 if (rule->rr_subject.rs_proc == NULL)
1128 case RCTL_SUBJECT_TYPE_USER:
1129 if (rule->rr_subject.rs_uip == NULL)
1132 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1133 if (rule->rr_subject.rs_loginclass == NULL)
1136 case RCTL_SUBJECT_TYPE_JAIL:
1137 if (rule->rr_subject.rs_prison_racct == NULL)
1141 panic("rctl_rule_fully_specified: unknown subject type %d",
1142 rule->rr_subject_type);
1144 if (rule->rr_resource == RACCT_UNDEFINED)
1146 if (rule->rr_action == RCTL_ACTION_UNDEFINED)
1148 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
1150 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
1157 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
1159 struct rctl_rule *rule;
1160 char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
1161 *amountstr, *perstr;
1165 ASSERT_RACCT_ENABLED();
1167 rule = rctl_rule_alloc(M_WAITOK);
1169 subjectstr = strsep(&rulestr, ":");
1170 subject_idstr = strsep(&rulestr, ":");
1171 resourcestr = strsep(&rulestr, ":");
1172 actionstr = strsep(&rulestr, "=/");
1173 amountstr = strsep(&rulestr, "/");
1176 if (subjectstr == NULL || subjectstr[0] == '\0')
1177 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1179 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
1184 if (subject_idstr == NULL || subject_idstr[0] == '\0') {
1185 rule->rr_subject.rs_proc = NULL;
1186 rule->rr_subject.rs_uip = NULL;
1187 rule->rr_subject.rs_loginclass = NULL;
1188 rule->rr_subject.rs_prison_racct = NULL;
1190 switch (rule->rr_subject_type) {
1191 case RCTL_SUBJECT_TYPE_UNDEFINED:
1194 case RCTL_SUBJECT_TYPE_PROCESS:
1195 error = str2id(subject_idstr, &id);
1198 sx_assert(&allproc_lock, SA_LOCKED);
1199 rule->rr_subject.rs_proc = pfind(id);
1200 if (rule->rr_subject.rs_proc == NULL) {
1204 PROC_UNLOCK(rule->rr_subject.rs_proc);
1206 case RCTL_SUBJECT_TYPE_USER:
1207 error = str2id(subject_idstr, &id);
1210 rule->rr_subject.rs_uip = uifind(id);
1212 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1213 rule->rr_subject.rs_loginclass =
1214 loginclass_find(subject_idstr);
1215 if (rule->rr_subject.rs_loginclass == NULL) {
1216 error = ENAMETOOLONG;
1220 case RCTL_SUBJECT_TYPE_JAIL:
1221 rule->rr_subject.rs_prison_racct =
1222 prison_racct_find(subject_idstr);
1223 if (rule->rr_subject.rs_prison_racct == NULL) {
1224 error = ENAMETOOLONG;
1229 panic("rctl_string_to_rule: unknown subject type %d",
1230 rule->rr_subject_type);
1234 if (resourcestr == NULL || resourcestr[0] == '\0')
1235 rule->rr_resource = RACCT_UNDEFINED;
1237 error = str2value(resourcestr, &rule->rr_resource,
1243 if (actionstr == NULL || actionstr[0] == '\0')
1244 rule->rr_action = RCTL_ACTION_UNDEFINED;
1246 error = str2value(actionstr, &rule->rr_action, actionnames);
1251 if (amountstr == NULL || amountstr[0] == '\0')
1252 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1254 error = str2int64(amountstr, &rule->rr_amount);
1257 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
1258 if (rule->rr_amount > INT64_MAX / 1000000) {
1262 rule->rr_amount *= 1000000;
1266 if (perstr == NULL || perstr[0] == '\0')
1267 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1269 error = str2value(perstr, &rule->rr_per, subjectnames);
1278 rctl_rule_release(rule);
1284 * Link a rule with all the subjects it applies to.
1287 rctl_rule_add(struct rctl_rule *rule)
1291 struct uidinfo *uip;
1293 struct prison_racct *prr;
1294 struct loginclass *lc;
1295 struct rctl_rule *rule2;
1298 ASSERT_RACCT_ENABLED();
1299 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1302 * Some rules just don't make sense, like "deny" rule for an undeniable
1303 * resource. The exception are the RSS and %CPU resources - they are
1304 * not deniable in the racct sense, but the limit is enforced in
1307 if (rule->rr_action == RCTL_ACTION_DENY &&
1308 !RACCT_IS_DENIABLE(rule->rr_resource) &&
1309 rule->rr_resource != RACCT_RSS &&
1310 rule->rr_resource != RACCT_PCTCPU) {
1311 return (EOPNOTSUPP);
1314 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1315 !RACCT_IS_DECAYING(rule->rr_resource)) {
1316 return (EOPNOTSUPP);
1319 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1320 rule->rr_resource == RACCT_PCTCPU) {
1321 return (EOPNOTSUPP);
1324 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1325 RACCT_IS_SLOPPY(rule->rr_resource)) {
1326 return (EOPNOTSUPP);
1330 * Make sure there are no duplicated rules. Also, for the "deny"
1331 * rules, remove ones differing only by "amount".
1333 if (rule->rr_action == RCTL_ACTION_DENY) {
1334 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1335 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1336 rctl_rule_remove(rule2);
1337 rctl_rule_release(rule2);
1339 rctl_rule_remove(rule);
1341 switch (rule->rr_subject_type) {
1342 case RCTL_SUBJECT_TYPE_PROCESS:
1343 p = rule->rr_subject.rs_proc;
1344 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1346 rctl_racct_add_rule(p->p_racct, rule);
1348 * In case of per-process rule, we don't have anything more
1353 case RCTL_SUBJECT_TYPE_USER:
1354 uip = rule->rr_subject.rs_uip;
1355 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1356 rctl_racct_add_rule(uip->ui_racct, rule);
1359 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1360 lc = rule->rr_subject.rs_loginclass;
1361 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1362 rctl_racct_add_rule(lc->lc_racct, rule);
1365 case RCTL_SUBJECT_TYPE_JAIL:
1366 prr = rule->rr_subject.rs_prison_racct;
1367 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1368 rctl_racct_add_rule(prr->prr_racct, rule);
1372 panic("rctl_rule_add: unknown subject type %d",
1373 rule->rr_subject_type);
1377 * Now go through all the processes and add the new rule to the ones
1380 sx_assert(&allproc_lock, SA_LOCKED);
1381 FOREACH_PROC_IN_SYSTEM(p) {
1383 switch (rule->rr_subject_type) {
1384 case RCTL_SUBJECT_TYPE_USER:
1385 if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1386 cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1389 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1390 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1393 case RCTL_SUBJECT_TYPE_JAIL:
1395 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1396 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1405 panic("rctl_rule_add: unknown subject type %d",
1406 rule->rr_subject_type);
1409 rctl_racct_add_rule(p->p_racct, rule);
1416 rctl_rule_pre_callback(void)
1423 rctl_rule_post_callback(void)
1430 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1432 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1435 ASSERT_RACCT_ENABLED();
1436 RCTL_WLOCK_ASSERT();
1438 found += rctl_racct_remove_rules(racct, filter);
1440 *((int *)arg3) += found;
1444 * Remove all rules that match the filter.
1447 rctl_rule_remove(struct rctl_rule *filter)
1452 ASSERT_RACCT_ENABLED();
1454 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1455 filter->rr_subject.rs_proc != NULL) {
1456 p = filter->rr_subject.rs_proc;
1458 found = rctl_racct_remove_rules(p->p_racct, filter);
1465 loginclass_racct_foreach(rctl_rule_remove_callback,
1466 rctl_rule_pre_callback, rctl_rule_post_callback,
1467 filter, (void *)&found);
1468 ui_racct_foreach(rctl_rule_remove_callback,
1469 rctl_rule_pre_callback, rctl_rule_post_callback,
1470 filter, (void *)&found);
1471 prison_racct_foreach(rctl_rule_remove_callback,
1472 rctl_rule_pre_callback, rctl_rule_post_callback,
1473 filter, (void *)&found);
1475 sx_assert(&allproc_lock, SA_LOCKED);
1477 FOREACH_PROC_IN_SYSTEM(p) {
1478 found += rctl_racct_remove_rules(p->p_racct, filter);
1488 * Appends a rule to the sbuf.
1491 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1495 ASSERT_RACCT_ENABLED();
1497 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1499 switch (rule->rr_subject_type) {
1500 case RCTL_SUBJECT_TYPE_PROCESS:
1501 if (rule->rr_subject.rs_proc == NULL)
1502 sbuf_printf(sb, ":");
1504 sbuf_printf(sb, "%d:",
1505 rule->rr_subject.rs_proc->p_pid);
1507 case RCTL_SUBJECT_TYPE_USER:
1508 if (rule->rr_subject.rs_uip == NULL)
1509 sbuf_printf(sb, ":");
1511 sbuf_printf(sb, "%d:",
1512 rule->rr_subject.rs_uip->ui_uid);
1514 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1515 if (rule->rr_subject.rs_loginclass == NULL)
1516 sbuf_printf(sb, ":");
1518 sbuf_printf(sb, "%s:",
1519 rule->rr_subject.rs_loginclass->lc_name);
1521 case RCTL_SUBJECT_TYPE_JAIL:
1522 if (rule->rr_subject.rs_prison_racct == NULL)
1523 sbuf_printf(sb, ":");
1525 sbuf_printf(sb, "%s:",
1526 rule->rr_subject.rs_prison_racct->prr_name);
1529 panic("rctl_rule_to_sbuf: unknown subject type %d",
1530 rule->rr_subject_type);
1533 amount = rule->rr_amount;
1534 if (amount != RCTL_AMOUNT_UNDEFINED &&
1535 RACCT_IS_IN_MILLIONS(rule->rr_resource))
1538 sbuf_printf(sb, "%s:%s=%jd",
1539 rctl_resource_name(rule->rr_resource),
1540 rctl_action_name(rule->rr_action),
1543 if (rule->rr_per != rule->rr_subject_type)
1544 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1548 * Routine used by RCTL syscalls to read in input string.
1551 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1556 ASSERT_RACCT_ENABLED();
1560 if (inbuflen > RCTL_MAX_INBUFSIZE)
1563 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1564 error = copyinstr(inbufp, str, inbuflen, NULL);
1576 * Routine used by RCTL syscalls to write out output string.
1579 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1583 ASSERT_RACCT_ENABLED();
1585 if (outputsbuf == NULL)
1588 sbuf_finish(outputsbuf);
1589 if (outbuflen < sbuf_len(outputsbuf) + 1) {
1590 sbuf_delete(outputsbuf);
1593 error = copyout(sbuf_data(outputsbuf), outbufp,
1594 sbuf_len(outputsbuf) + 1);
1595 sbuf_delete(outputsbuf);
1599 static struct sbuf *
1600 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1606 ASSERT_RACCT_ENABLED();
1608 sb = sbuf_new_auto();
1609 for (i = 0; i <= RACCT_MAX; i++) {
1610 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1612 amount = racct->r_resources[i];
1613 if (RACCT_IS_IN_MILLIONS(i))
1615 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1617 sbuf_setpos(sb, sbuf_len(sb) - 1);
1622 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1624 struct rctl_rule *filter;
1625 struct sbuf *outputsbuf = NULL;
1627 struct uidinfo *uip;
1628 struct loginclass *lc;
1629 struct prison_racct *prr;
1636 error = priv_check(td, PRIV_RCTL_GET_RACCT);
1640 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1644 sx_slock(&allproc_lock);
1645 error = rctl_string_to_rule(inputstr, &filter);
1646 free(inputstr, M_RCTL);
1648 sx_sunlock(&allproc_lock);
1652 switch (filter->rr_subject_type) {
1653 case RCTL_SUBJECT_TYPE_PROCESS:
1654 p = filter->rr_subject.rs_proc;
1659 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1661 case RCTL_SUBJECT_TYPE_USER:
1662 uip = filter->rr_subject.rs_uip;
1667 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1669 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1670 lc = filter->rr_subject.rs_loginclass;
1675 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1677 case RCTL_SUBJECT_TYPE_JAIL:
1678 prr = filter->rr_subject.rs_prison_racct;
1683 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1689 rctl_rule_release(filter);
1690 sx_sunlock(&allproc_lock);
1694 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1700 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1702 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1703 struct rctl_rule_link *link;
1704 struct sbuf *sb = (struct sbuf *)arg3;
1706 ASSERT_RACCT_ENABLED();
1709 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1710 if (!rctl_rule_matches(link->rrl_rule, filter))
1712 rctl_rule_to_sbuf(sb, link->rrl_rule);
1713 sbuf_printf(sb, ",");
1718 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1721 struct rctl_rule *filter;
1722 struct rctl_rule_link *link;
1724 char *inputstr, *buf;
1731 error = priv_check(td, PRIV_RCTL_GET_RULES);
1735 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1739 sx_slock(&allproc_lock);
1740 error = rctl_string_to_rule(inputstr, &filter);
1741 free(inputstr, M_RCTL);
1743 sx_sunlock(&allproc_lock);
1747 bufsize = uap->outbuflen;
1748 if (bufsize > rctl_maxbufsize) {
1749 sx_sunlock(&allproc_lock);
1753 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1754 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1755 KASSERT(sb != NULL, ("sbuf_new failed"));
1757 FOREACH_PROC_IN_SYSTEM(p) {
1759 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1761 * Non-process rules will be added to the buffer later.
1762 * Adding them here would result in duplicated output.
1764 if (link->rrl_rule->rr_subject_type !=
1765 RCTL_SUBJECT_TYPE_PROCESS)
1767 if (!rctl_rule_matches(link->rrl_rule, filter))
1769 rctl_rule_to_sbuf(sb, link->rrl_rule);
1770 sbuf_printf(sb, ",");
1775 loginclass_racct_foreach(rctl_get_rules_callback,
1776 rctl_rule_pre_callback, rctl_rule_post_callback,
1778 ui_racct_foreach(rctl_get_rules_callback,
1779 rctl_rule_pre_callback, rctl_rule_post_callback,
1781 prison_racct_foreach(rctl_get_rules_callback,
1782 rctl_rule_pre_callback, rctl_rule_post_callback,
1784 if (sbuf_error(sb) == ENOMEM) {
1790 * Remove trailing ",".
1792 if (sbuf_len(sb) > 0)
1793 sbuf_setpos(sb, sbuf_len(sb) - 1);
1795 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1797 rctl_rule_release(filter);
1798 sx_sunlock(&allproc_lock);
1804 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1807 struct rctl_rule *filter;
1808 struct rctl_rule_link *link;
1809 char *inputstr, *buf;
1816 error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1820 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1824 sx_slock(&allproc_lock);
1825 error = rctl_string_to_rule(inputstr, &filter);
1826 free(inputstr, M_RCTL);
1828 sx_sunlock(&allproc_lock);
1832 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1833 rctl_rule_release(filter);
1834 sx_sunlock(&allproc_lock);
1837 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1838 rctl_rule_release(filter);
1839 sx_sunlock(&allproc_lock);
1840 return (EOPNOTSUPP);
1842 if (filter->rr_subject.rs_proc == NULL) {
1843 rctl_rule_release(filter);
1844 sx_sunlock(&allproc_lock);
1848 bufsize = uap->outbuflen;
1849 if (bufsize > rctl_maxbufsize) {
1850 rctl_rule_release(filter);
1851 sx_sunlock(&allproc_lock);
1855 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1856 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1857 KASSERT(sb != NULL, ("sbuf_new failed"));
1860 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1862 rctl_rule_to_sbuf(sb, link->rrl_rule);
1863 sbuf_printf(sb, ",");
1866 if (sbuf_error(sb) == ENOMEM) {
1872 * Remove trailing ",".
1874 if (sbuf_len(sb) > 0)
1875 sbuf_setpos(sb, sbuf_len(sb) - 1);
1877 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1879 rctl_rule_release(filter);
1880 sx_sunlock(&allproc_lock);
1886 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1888 struct rctl_rule *rule;
1895 error = priv_check(td, PRIV_RCTL_ADD_RULE);
1899 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1903 sx_slock(&allproc_lock);
1904 error = rctl_string_to_rule(inputstr, &rule);
1905 free(inputstr, M_RCTL);
1907 sx_sunlock(&allproc_lock);
1911 * The 'per' part of a rule is optional.
1913 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1914 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1915 rule->rr_per = rule->rr_subject_type;
1917 if (!rctl_rule_fully_specified(rule)) {
1922 error = rctl_rule_add(rule);
1925 rctl_rule_release(rule);
1926 sx_sunlock(&allproc_lock);
1931 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1933 struct rctl_rule *filter;
1940 error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1944 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1948 sx_slock(&allproc_lock);
1949 error = rctl_string_to_rule(inputstr, &filter);
1950 free(inputstr, M_RCTL);
1952 sx_sunlock(&allproc_lock);
1956 error = rctl_rule_remove(filter);
1957 rctl_rule_release(filter);
1958 sx_sunlock(&allproc_lock);
1964 * Update RCTL rule list after credential change.
1967 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1969 LIST_HEAD(, rctl_rule_link) newrules;
1970 struct rctl_rule_link *link, *newlink;
1971 struct uidinfo *newuip;
1972 struct loginclass *newlc;
1973 struct prison_racct *newprr;
1976 ASSERT_RACCT_ENABLED();
1978 newuip = newcred->cr_ruidinfo;
1979 newlc = newcred->cr_loginclass;
1980 newprr = newcred->cr_prison->pr_prison_racct;
1982 LIST_INIT(&newrules);
1986 * First, count the rules that apply to the process with new
1991 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1992 if (link->rrl_rule->rr_subject_type ==
1993 RCTL_SUBJECT_TYPE_PROCESS)
1996 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1998 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
2000 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
2005 * Create temporary list. We've dropped the rctl_lock in order
2008 for (i = 0; i < rulecnt; i++) {
2009 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
2010 newlink->rrl_rule = NULL;
2011 newlink->rrl_exceeded = 0;
2012 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
2015 newlink = LIST_FIRST(&newrules);
2018 * Assign rules to the newly allocated list entries.
2021 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
2022 if (link->rrl_rule->rr_subject_type ==
2023 RCTL_SUBJECT_TYPE_PROCESS) {
2024 if (newlink == NULL)
2026 rctl_rule_acquire(link->rrl_rule);
2027 newlink->rrl_rule = link->rrl_rule;
2028 newlink->rrl_exceeded = link->rrl_exceeded;
2029 newlink = LIST_NEXT(newlink, rrl_next);
2034 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
2035 if (newlink == NULL)
2037 rctl_rule_acquire(link->rrl_rule);
2038 newlink->rrl_rule = link->rrl_rule;
2039 newlink->rrl_exceeded = link->rrl_exceeded;
2040 newlink = LIST_NEXT(newlink, rrl_next);
2044 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
2045 if (newlink == NULL)
2047 rctl_rule_acquire(link->rrl_rule);
2048 newlink->rrl_rule = link->rrl_rule;
2049 newlink->rrl_exceeded = link->rrl_exceeded;
2050 newlink = LIST_NEXT(newlink, rrl_next);
2054 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
2055 if (newlink == NULL)
2057 rctl_rule_acquire(link->rrl_rule);
2058 newlink->rrl_rule = link->rrl_rule;
2059 newlink->rrl_exceeded = link->rrl_exceeded;
2060 newlink = LIST_NEXT(newlink, rrl_next);
2066 * Free the old rule list.
2068 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
2069 link = LIST_FIRST(&p->p_racct->r_rule_links);
2070 LIST_REMOVE(link, rrl_next);
2071 rctl_rule_release(link->rrl_rule);
2072 uma_zfree(rctl_rule_link_zone, link);
2076 * Replace lists and we're done.
2078 * XXX: Is there any way to switch list heads instead
2079 * of iterating here?
2081 while (!LIST_EMPTY(&newrules)) {
2082 newlink = LIST_FIRST(&newrules);
2083 LIST_REMOVE(newlink, rrl_next);
2084 LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
2097 * Rule list changed while we were not holding the rctl_lock.
2098 * Free the new list and try again.
2100 while (!LIST_EMPTY(&newrules)) {
2101 newlink = LIST_FIRST(&newrules);
2102 LIST_REMOVE(newlink, rrl_next);
2103 if (newlink->rrl_rule != NULL)
2104 rctl_rule_release(newlink->rrl_rule);
2105 uma_zfree(rctl_rule_link_zone, newlink);
2112 * Assign RCTL rules to the newly created process.
2115 rctl_proc_fork(struct proc *parent, struct proc *child)
2117 struct rctl_rule *rule;
2118 struct rctl_rule_link *link;
2121 LIST_INIT(&child->p_racct->r_rule_links);
2123 ASSERT_RACCT_ENABLED();
2124 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
2129 * Go through limits applicable to the parent and assign them
2130 * to the child. Rules with 'process' subject have to be duplicated
2131 * in order to make their rr_subject point to the new process.
2133 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
2134 if (link->rrl_rule->rr_subject_type ==
2135 RCTL_SUBJECT_TYPE_PROCESS) {
2136 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
2139 KASSERT(rule->rr_subject.rs_proc == parent,
2140 ("rule->rr_subject.rs_proc != parent"));
2141 rule->rr_subject.rs_proc = child;
2142 error = rctl_racct_add_rule_locked(child->p_racct,
2144 rctl_rule_release(rule);
2148 error = rctl_racct_add_rule_locked(child->p_racct,
2159 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
2160 link = LIST_FIRST(&child->p_racct->r_rule_links);
2161 LIST_REMOVE(link, rrl_next);
2162 rctl_rule_release(link->rrl_rule);
2163 uma_zfree(rctl_rule_link_zone, link);
2170 * Release rules attached to the racct.
2173 rctl_racct_release(struct racct *racct)
2175 struct rctl_rule_link *link;
2177 ASSERT_RACCT_ENABLED();
2180 while (!LIST_EMPTY(&racct->r_rule_links)) {
2181 link = LIST_FIRST(&racct->r_rule_links);
2182 LIST_REMOVE(link, rrl_next);
2183 rctl_rule_release(link->rrl_rule);
2184 uma_zfree(rctl_rule_link_zone, link);
2196 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
2197 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2198 rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
2199 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
2203 * Set default values, making sure not to overwrite the ones
2204 * fetched from tunables. Most of those could be set at the
2205 * declaration, except for the rctl_throttle_max - we cannot
2206 * set it there due to hz not being compile time constant.
2208 if (rctl_throttle_min < 1)
2209 rctl_throttle_min = 1;
2210 if (rctl_throttle_max < rctl_throttle_min)
2211 rctl_throttle_max = 2 * hz;
2212 if (rctl_throttle_pct < 0)
2213 rctl_throttle_pct = 100;
2214 if (rctl_throttle_pct2 < 0)
2215 rctl_throttle_pct2 = 100;
2221 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
2228 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
2235 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
2242 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
2249 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)