2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2010 The FreeBSD Foundation
7 * This software was developed by Edward Tomasz Napierala under sponsorship
8 * from the FreeBSD Foundation.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
37 #include <sys/param.h>
38 #include <sys/devctl.h>
39 #include <sys/malloc.h>
40 #include <sys/queue.h>
41 #include <sys/refcount.h>
43 #include <sys/kernel.h>
44 #include <sys/limits.h>
45 #include <sys/loginclass.h>
48 #include <sys/racct.h>
50 #include <sys/resourcevar.h>
52 #include <sys/sysent.h>
53 #include <sys/sysproto.h>
54 #include <sys/systm.h>
55 #include <sys/types.h>
56 #include <sys/eventhandler.h>
58 #include <sys/mutex.h>
59 #include <sys/rwlock.h>
61 #include <sys/taskqueue.h>
67 #error "The RCTL option requires the RACCT option"
70 FEATURE(rctl, "Resource Limits");
73 #define HRF_DONT_INHERIT 1
74 #define HRF_DONT_ACCUMULATE 2
76 #define RCTL_MAX_INBUFSIZE 4 * 1024
77 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024
78 #define RCTL_LOG_BUFSIZE 128
80 #define RCTL_PCPU_SHIFT (10 * 1000000)
82 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
83 static int rctl_log_rate_limit = 10;
84 static int rctl_devctl_rate_limit = 10;
87 * Values below are initialized in rctl_init().
89 static int rctl_throttle_min = -1;
90 static int rctl_throttle_max = -1;
91 static int rctl_throttle_pct = -1;
92 static int rctl_throttle_pct2 = -1;
94 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
95 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
96 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
97 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
99 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
101 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
102 &rctl_maxbufsize, 0, "Maximum output buffer size");
103 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
104 &rctl_log_rate_limit, 0, "Maximum number of log messages per second");
105 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
106 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
107 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
108 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
109 &rctl_throttle_min_sysctl, "IU",
110 "Shortest throttling duration, in hz");
111 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
112 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
113 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
114 &rctl_throttle_max_sysctl, "IU",
115 "Longest throttling duration, in hz");
116 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
117 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
118 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
119 &rctl_throttle_pct_sysctl, "IU",
120 "Throttling penalty for process consumption, in percent");
121 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
122 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
123 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
124 &rctl_throttle_pct2_sysctl, "IU",
125 "Throttling penalty for container consumption, in percent");
126 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
129 * 'rctl_rule_link' connects a rule with every racct it's related to.
130 * For example, rule 'user:X:openfiles:deny=N/process' is linked
131 * with uidinfo for user X, and to each process of that user.
133 struct rctl_rule_link {
134 LIST_ENTRY(rctl_rule_link) rrl_next;
135 struct rctl_rule *rrl_rule;
144 static struct dict subjectnames[] = {
145 { "process", RCTL_SUBJECT_TYPE_PROCESS },
146 { "user", RCTL_SUBJECT_TYPE_USER },
147 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
148 { "jail", RCTL_SUBJECT_TYPE_JAIL },
151 static struct dict resourcenames[] = {
152 { "cputime", RACCT_CPU },
153 { "datasize", RACCT_DATA },
154 { "stacksize", RACCT_STACK },
155 { "coredumpsize", RACCT_CORE },
156 { "memoryuse", RACCT_RSS },
157 { "memorylocked", RACCT_MEMLOCK },
158 { "maxproc", RACCT_NPROC },
159 { "openfiles", RACCT_NOFILE },
160 { "vmemoryuse", RACCT_VMEM },
161 { "pseudoterminals", RACCT_NPTS },
162 { "swapuse", RACCT_SWAP },
163 { "nthr", RACCT_NTHR },
164 { "msgqqueued", RACCT_MSGQQUEUED },
165 { "msgqsize", RACCT_MSGQSIZE },
166 { "nmsgq", RACCT_NMSGQ },
167 { "nsem", RACCT_NSEM },
168 { "nsemop", RACCT_NSEMOP },
169 { "nshm", RACCT_NSHM },
170 { "shmsize", RACCT_SHMSIZE },
171 { "wallclock", RACCT_WALLCLOCK },
172 { "pcpu", RACCT_PCTCPU },
173 { "readbps", RACCT_READBPS },
174 { "writebps", RACCT_WRITEBPS },
175 { "readiops", RACCT_READIOPS },
176 { "writeiops", RACCT_WRITEIOPS },
179 static struct dict actionnames[] = {
180 { "sighup", RCTL_ACTION_SIGHUP },
181 { "sigint", RCTL_ACTION_SIGINT },
182 { "sigquit", RCTL_ACTION_SIGQUIT },
183 { "sigill", RCTL_ACTION_SIGILL },
184 { "sigtrap", RCTL_ACTION_SIGTRAP },
185 { "sigabrt", RCTL_ACTION_SIGABRT },
186 { "sigemt", RCTL_ACTION_SIGEMT },
187 { "sigfpe", RCTL_ACTION_SIGFPE },
188 { "sigkill", RCTL_ACTION_SIGKILL },
189 { "sigbus", RCTL_ACTION_SIGBUS },
190 { "sigsegv", RCTL_ACTION_SIGSEGV },
191 { "sigsys", RCTL_ACTION_SIGSYS },
192 { "sigpipe", RCTL_ACTION_SIGPIPE },
193 { "sigalrm", RCTL_ACTION_SIGALRM },
194 { "sigterm", RCTL_ACTION_SIGTERM },
195 { "sigurg", RCTL_ACTION_SIGURG },
196 { "sigstop", RCTL_ACTION_SIGSTOP },
197 { "sigtstp", RCTL_ACTION_SIGTSTP },
198 { "sigchld", RCTL_ACTION_SIGCHLD },
199 { "sigttin", RCTL_ACTION_SIGTTIN },
200 { "sigttou", RCTL_ACTION_SIGTTOU },
201 { "sigio", RCTL_ACTION_SIGIO },
202 { "sigxcpu", RCTL_ACTION_SIGXCPU },
203 { "sigxfsz", RCTL_ACTION_SIGXFSZ },
204 { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
205 { "sigprof", RCTL_ACTION_SIGPROF },
206 { "sigwinch", RCTL_ACTION_SIGWINCH },
207 { "siginfo", RCTL_ACTION_SIGINFO },
208 { "sigusr1", RCTL_ACTION_SIGUSR1 },
209 { "sigusr2", RCTL_ACTION_SIGUSR2 },
210 { "sigthr", RCTL_ACTION_SIGTHR },
211 { "deny", RCTL_ACTION_DENY },
212 { "log", RCTL_ACTION_LOG },
213 { "devctl", RCTL_ACTION_DEVCTL },
214 { "throttle", RCTL_ACTION_THROTTLE },
217 static void rctl_init(void);
218 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
220 static uma_zone_t rctl_rule_zone;
221 static uma_zone_t rctl_rule_link_zone;
223 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
224 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
226 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
228 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
230 int error, val = rctl_throttle_min;
232 error = sysctl_handle_int(oidp, &val, 0, req);
233 if (error || !req->newptr)
235 if (val < 1 || val > rctl_throttle_max)
239 rctl_throttle_min = val;
245 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
247 int error, val = rctl_throttle_max;
249 error = sysctl_handle_int(oidp, &val, 0, req);
250 if (error || !req->newptr)
252 if (val < rctl_throttle_min)
256 rctl_throttle_max = val;
262 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
264 int error, val = rctl_throttle_pct;
266 error = sysctl_handle_int(oidp, &val, 0, req);
267 if (error || !req->newptr)
273 rctl_throttle_pct = val;
279 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
281 int error, val = rctl_throttle_pct2;
283 error = sysctl_handle_int(oidp, &val, 0, req);
284 if (error || !req->newptr)
290 rctl_throttle_pct2 = val;
297 rctl_subject_type_name(int subject)
301 for (i = 0; subjectnames[i].d_name != NULL; i++) {
302 if (subjectnames[i].d_value == subject)
303 return (subjectnames[i].d_name);
306 panic("rctl_subject_type_name: unknown subject type %d", subject);
310 rctl_action_name(int action)
314 for (i = 0; actionnames[i].d_name != NULL; i++) {
315 if (actionnames[i].d_value == action)
316 return (actionnames[i].d_name);
319 panic("rctl_action_name: unknown action %d", action);
323 rctl_resource_name(int resource)
327 for (i = 0; resourcenames[i].d_name != NULL; i++) {
328 if (resourcenames[i].d_value == resource)
329 return (resourcenames[i].d_name);
332 panic("rctl_resource_name: unknown resource %d", resource);
335 static struct racct *
336 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
338 struct ucred *cred = p->p_ucred;
340 ASSERT_RACCT_ENABLED();
343 switch (rule->rr_per) {
344 case RCTL_SUBJECT_TYPE_PROCESS:
346 case RCTL_SUBJECT_TYPE_USER:
347 return (cred->cr_ruidinfo->ui_racct);
348 case RCTL_SUBJECT_TYPE_LOGINCLASS:
349 return (cred->cr_loginclass->lc_racct);
350 case RCTL_SUBJECT_TYPE_JAIL:
351 return (cred->cr_prison->pr_prison_racct->prr_racct);
353 panic("%s: unknown per %d", __func__, rule->rr_per);
358 * Return the amount of resource that can be allocated by 'p' before
362 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
364 const struct racct *racct;
367 ASSERT_RACCT_ENABLED();
370 racct = rctl_proc_rule_to_racct(p, rule);
371 available = rule->rr_amount - racct->r_resources[rule->rr_resource];
377 * Called every second for proc, uidinfo, loginclass, and jail containers.
378 * If the limit isn't exceeded, it decreases the usage amount to zero.
379 * Otherwise, it decreases it by the value of the limit. This way
380 * resource consumption exceeding the limit "carries over" to the next
384 rctl_throttle_decay(struct racct *racct, int resource)
386 struct rctl_rule *rule;
387 struct rctl_rule_link *link;
388 int64_t minavailable;
390 ASSERT_RACCT_ENABLED();
393 minavailable = INT64_MAX;
395 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
396 rule = link->rrl_rule;
398 if (rule->rr_resource != resource)
400 if (rule->rr_action != RCTL_ACTION_THROTTLE)
403 if (rule->rr_amount < minavailable)
404 minavailable = rule->rr_amount;
407 if (racct->r_resources[resource] < minavailable) {
408 racct->r_resources[resource] = 0;
411 * Cap utilization counter at ten times the limit. Otherwise,
412 * if we changed the rule lowering the allowed amount, it could
413 * take unreasonably long time for the accumulated resource
416 if (racct->r_resources[resource] > minavailable * 10)
417 racct->r_resources[resource] = minavailable * 10;
419 racct->r_resources[resource] -= minavailable;
424 * Special version of rctl_get_available() for the %CPU resource.
425 * We slightly cheat here and return less than we normally would.
428 rctl_pcpu_available(const struct proc *p) {
429 struct rctl_rule *rule;
430 struct rctl_rule_link *link;
431 int64_t available, minavailable, limit;
433 ASSERT_RACCT_ENABLED();
436 minavailable = INT64_MAX;
439 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
440 rule = link->rrl_rule;
441 if (rule->rr_resource != RACCT_PCTCPU)
443 if (rule->rr_action != RCTL_ACTION_DENY)
445 available = rctl_available_resource(p, rule);
446 if (available < minavailable) {
447 minavailable = available;
448 limit = rule->rr_amount;
453 * Return slightly less than actual value of the available
454 * %cpu resource. This makes %cpu throttling more aggressive
455 * and lets us act sooner than the limits are already exceeded.
458 if (limit > 2 * RCTL_PCPU_SHIFT)
459 minavailable -= RCTL_PCPU_SHIFT;
461 minavailable -= (limit / 2);
464 return (minavailable);
468 xadd(uint64_t a, uint64_t b)
484 xmul(uint64_t a, uint64_t b)
487 if (b != 0 && a > UINT64_MAX / b)
494 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
495 * to what it keeps allocated now. Returns non-zero if the allocation should
496 * be denied, 0 otherwise.
499 rctl_enforce(struct proc *p, int resource, uint64_t amount)
501 static struct timeval log_lasttime, devctl_lasttime;
502 static int log_curtime = 0, devctl_curtime = 0;
503 struct rctl_rule *rule;
504 struct rctl_rule_link *link;
508 uint64_t sleep_ms, sleep_ratio;
511 ASSERT_RACCT_ENABLED();
515 * There may be more than one matching rule; go through all of them.
516 * Denial should be done last, after logging and sending signals.
518 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
519 rule = link->rrl_rule;
520 if (rule->rr_resource != resource)
523 available = rctl_available_resource(p, rule);
524 if (available >= (int64_t)amount) {
525 link->rrl_exceeded = 0;
529 switch (rule->rr_action) {
530 case RCTL_ACTION_DENY:
533 case RCTL_ACTION_LOG:
535 * If rrl_exceeded != 0, it means we've already
536 * logged a warning for this process.
538 if (link->rrl_exceeded != 0)
542 * If the process state is not fully initialized yet,
543 * we can't access most of the required fields, e.g.
544 * p->p_comm. This happens when called from fork1().
545 * Ignore this rule for now; it will be processed just
546 * after fork, when called from racct_proc_fork_done().
548 if (p->p_state != PRS_NORMAL)
551 if (!ppsratecheck(&log_lasttime, &log_curtime,
552 rctl_log_rate_limit))
555 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
557 printf("rctl_enforce: out of memory\n");
560 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
561 rctl_rule_to_sbuf(&sb, rule);
563 printf("rctl: rule \"%s\" matched by pid %d "
564 "(%s), uid %d, jail %s\n", sbuf_data(&sb),
565 p->p_pid, p->p_comm, p->p_ucred->cr_uid,
566 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
569 link->rrl_exceeded = 1;
571 case RCTL_ACTION_DEVCTL:
572 if (link->rrl_exceeded != 0)
575 if (p->p_state != PRS_NORMAL)
578 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
579 rctl_devctl_rate_limit))
582 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
584 printf("rctl_enforce: out of memory\n");
587 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
588 sbuf_printf(&sb, "rule=");
589 rctl_rule_to_sbuf(&sb, rule);
590 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
591 p->p_pid, p->p_ucred->cr_ruid,
592 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
594 devctl_notify("RCTL", "rule", "matched",
598 link->rrl_exceeded = 1;
600 case RCTL_ACTION_THROTTLE:
601 if (p->p_state != PRS_NORMAL)
605 * Make the process sleep for a fraction of second
606 * proportional to the ratio of process' resource
607 * utilization compared to the limit. The point is
608 * to penalize resource hogs: processes that consume
609 * more of the available resources sleep for longer.
611 * We're trying to defer division until the very end,
612 * to minimize the rounding effects. The following
613 * calculation could have been written in a clearer
616 * sleep_ms = hz * p->p_racct->r_resources[resource] /
618 * sleep_ms *= rctl_throttle_pct / 100;
619 * if (sleep_ms < rctl_throttle_min)
620 * sleep_ms = rctl_throttle_min;
623 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
624 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100;
625 if (sleep_ms < rctl_throttle_min * rule->rr_amount)
626 sleep_ms = rctl_throttle_min * rule->rr_amount;
629 * Multiply that by the ratio of the resource
630 * consumption for the container compared to the limit,
631 * squared. In other words, a process in a container
632 * that is two times over the limit will be throttled
633 * four times as much for hitting the same rule. The
634 * point is to penalize processes more if the container
635 * itself (eg certain UID or jail) is above the limit.
638 sleep_ratio = -available / rule->rr_amount;
641 sleep_ratio = xmul(sleep_ratio, sleep_ratio);
642 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
643 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
646 * Finally the division.
648 sleep_ms /= rule->rr_amount;
650 if (sleep_ms > rctl_throttle_max)
651 sleep_ms = rctl_throttle_max;
653 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
654 __func__, p->p_pid, p->p_comm,
655 p->p_racct->r_resources[resource],
656 rule->rr_amount, (uintmax_t)sleep_ms,
657 (uintmax_t)sleep_ratio, (intmax_t)available);
660 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
661 __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
662 racct_proc_throttle(p, sleep_ms);
665 if (link->rrl_exceeded != 0)
668 if (p->p_state != PRS_NORMAL)
671 KASSERT(rule->rr_action > 0 &&
672 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
673 ("rctl_enforce: unknown action %d",
677 * We're using the fact that RCTL_ACTION_SIG* values
678 * are equal to their counterparts from sys/signal.h.
680 kern_psignal(p, rule->rr_action);
681 link->rrl_exceeded = 1;
688 * Return fake error code; the caller should change it
689 * into one proper for the situation - EFSIZ, ENOMEM etc.
698 rctl_get_limit(struct proc *p, int resource)
700 struct rctl_rule *rule;
701 struct rctl_rule_link *link;
702 uint64_t amount = UINT64_MAX;
704 ASSERT_RACCT_ENABLED();
708 * There may be more than one matching rule; go through all of them.
709 * Denial should be done last, after logging and sending signals.
711 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
712 rule = link->rrl_rule;
713 if (rule->rr_resource != resource)
715 if (rule->rr_action != RCTL_ACTION_DENY)
717 if (rule->rr_amount < amount)
718 amount = rule->rr_amount;
725 rctl_get_available(struct proc *p, int resource)
727 struct rctl_rule *rule;
728 struct rctl_rule_link *link;
729 int64_t available, minavailable, allocated;
731 minavailable = INT64_MAX;
733 ASSERT_RACCT_ENABLED();
737 * There may be more than one matching rule; go through all of them.
738 * Denial should be done last, after logging and sending signals.
740 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
741 rule = link->rrl_rule;
742 if (rule->rr_resource != resource)
744 if (rule->rr_action != RCTL_ACTION_DENY)
746 available = rctl_available_resource(p, rule);
747 if (available < minavailable)
748 minavailable = available;
752 * XXX: Think about this _hard_.
754 allocated = p->p_racct->r_resources[resource];
755 if (minavailable < INT64_MAX - allocated)
756 minavailable += allocated;
757 if (minavailable < 0)
760 return (minavailable);
764 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
767 ASSERT_RACCT_ENABLED();
769 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
770 if (rule->rr_subject_type != filter->rr_subject_type)
773 switch (filter->rr_subject_type) {
774 case RCTL_SUBJECT_TYPE_PROCESS:
775 if (filter->rr_subject.rs_proc != NULL &&
776 rule->rr_subject.rs_proc !=
777 filter->rr_subject.rs_proc)
780 case RCTL_SUBJECT_TYPE_USER:
781 if (filter->rr_subject.rs_uip != NULL &&
782 rule->rr_subject.rs_uip !=
783 filter->rr_subject.rs_uip)
786 case RCTL_SUBJECT_TYPE_LOGINCLASS:
787 if (filter->rr_subject.rs_loginclass != NULL &&
788 rule->rr_subject.rs_loginclass !=
789 filter->rr_subject.rs_loginclass)
792 case RCTL_SUBJECT_TYPE_JAIL:
793 if (filter->rr_subject.rs_prison_racct != NULL &&
794 rule->rr_subject.rs_prison_racct !=
795 filter->rr_subject.rs_prison_racct)
799 panic("rctl_rule_matches: unknown subject type %d",
800 filter->rr_subject_type);
804 if (filter->rr_resource != RACCT_UNDEFINED) {
805 if (rule->rr_resource != filter->rr_resource)
809 if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
810 if (rule->rr_action != filter->rr_action)
814 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
815 if (rule->rr_amount != filter->rr_amount)
819 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
820 if (rule->rr_per != filter->rr_per)
828 str2value(const char *str, int *value, struct dict *table)
835 for (i = 0; table[i].d_name != NULL; i++) {
836 if (strcasecmp(table[i].d_name, str) == 0) {
837 *value = table[i].d_value;
846 str2id(const char *str, id_t *value)
853 *value = strtoul(str, &end, 10);
854 if ((size_t)(end - str) != strlen(str))
861 str2int64(const char *str, int64_t *value)
868 *value = strtoul(str, &end, 10);
869 if ((size_t)(end - str) != strlen(str))
879 * Connect the rule to the racct, increasing refcount for the rule.
882 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
884 struct rctl_rule_link *link;
886 ASSERT_RACCT_ENABLED();
887 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
889 rctl_rule_acquire(rule);
890 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
891 link->rrl_rule = rule;
892 link->rrl_exceeded = 0;
895 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
900 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
902 struct rctl_rule_link *link;
904 ASSERT_RACCT_ENABLED();
905 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
908 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
911 rctl_rule_acquire(rule);
912 link->rrl_rule = rule;
913 link->rrl_exceeded = 0;
915 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
921 * Remove limits for a rules matching the filter and release
922 * the refcounts for the rules, possibly freeing them. Returns
923 * the number of limit structures removed.
926 rctl_racct_remove_rules(struct racct *racct,
927 const struct rctl_rule *filter)
929 struct rctl_rule_link *link, *linktmp;
932 ASSERT_RACCT_ENABLED();
935 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
936 if (!rctl_rule_matches(link->rrl_rule, filter))
939 LIST_REMOVE(link, rrl_next);
940 rctl_rule_release(link->rrl_rule);
941 uma_zfree(rctl_rule_link_zone, link);
948 rctl_rule_acquire_subject(struct rctl_rule *rule)
951 ASSERT_RACCT_ENABLED();
953 switch (rule->rr_subject_type) {
954 case RCTL_SUBJECT_TYPE_UNDEFINED:
955 case RCTL_SUBJECT_TYPE_PROCESS:
957 case RCTL_SUBJECT_TYPE_JAIL:
958 if (rule->rr_subject.rs_prison_racct != NULL)
959 prison_racct_hold(rule->rr_subject.rs_prison_racct);
961 case RCTL_SUBJECT_TYPE_USER:
962 if (rule->rr_subject.rs_uip != NULL)
963 uihold(rule->rr_subject.rs_uip);
965 case RCTL_SUBJECT_TYPE_LOGINCLASS:
966 if (rule->rr_subject.rs_loginclass != NULL)
967 loginclass_hold(rule->rr_subject.rs_loginclass);
970 panic("rctl_rule_acquire_subject: unknown subject type %d",
971 rule->rr_subject_type);
976 rctl_rule_release_subject(struct rctl_rule *rule)
979 ASSERT_RACCT_ENABLED();
981 switch (rule->rr_subject_type) {
982 case RCTL_SUBJECT_TYPE_UNDEFINED:
983 case RCTL_SUBJECT_TYPE_PROCESS:
985 case RCTL_SUBJECT_TYPE_JAIL:
986 if (rule->rr_subject.rs_prison_racct != NULL)
987 prison_racct_free(rule->rr_subject.rs_prison_racct);
989 case RCTL_SUBJECT_TYPE_USER:
990 if (rule->rr_subject.rs_uip != NULL)
991 uifree(rule->rr_subject.rs_uip);
993 case RCTL_SUBJECT_TYPE_LOGINCLASS:
994 if (rule->rr_subject.rs_loginclass != NULL)
995 loginclass_free(rule->rr_subject.rs_loginclass);
998 panic("rctl_rule_release_subject: unknown subject type %d",
999 rule->rr_subject_type);
1004 rctl_rule_alloc(int flags)
1006 struct rctl_rule *rule;
1008 ASSERT_RACCT_ENABLED();
1010 rule = uma_zalloc(rctl_rule_zone, flags);
1013 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1014 rule->rr_subject.rs_proc = NULL;
1015 rule->rr_subject.rs_uip = NULL;
1016 rule->rr_subject.rs_loginclass = NULL;
1017 rule->rr_subject.rs_prison_racct = NULL;
1018 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1019 rule->rr_resource = RACCT_UNDEFINED;
1020 rule->rr_action = RCTL_ACTION_UNDEFINED;
1021 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1022 refcount_init(&rule->rr_refcount, 1);
1028 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
1030 struct rctl_rule *copy;
1032 ASSERT_RACCT_ENABLED();
1034 copy = uma_zalloc(rctl_rule_zone, flags);
1037 copy->rr_subject_type = rule->rr_subject_type;
1038 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
1039 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
1040 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
1041 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
1042 copy->rr_per = rule->rr_per;
1043 copy->rr_resource = rule->rr_resource;
1044 copy->rr_action = rule->rr_action;
1045 copy->rr_amount = rule->rr_amount;
1046 refcount_init(©->rr_refcount, 1);
1047 rctl_rule_acquire_subject(copy);
1053 rctl_rule_acquire(struct rctl_rule *rule)
1056 ASSERT_RACCT_ENABLED();
1057 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1059 refcount_acquire(&rule->rr_refcount);
1063 rctl_rule_free(void *context, int pending)
1065 struct rctl_rule *rule;
1067 rule = (struct rctl_rule *)context;
1069 ASSERT_RACCT_ENABLED();
1070 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
1073 * We don't need locking here; rule is guaranteed to be inaccessible.
1076 rctl_rule_release_subject(rule);
1077 uma_zfree(rctl_rule_zone, rule);
1081 rctl_rule_release(struct rctl_rule *rule)
1084 ASSERT_RACCT_ENABLED();
1085 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1087 if (refcount_release(&rule->rr_refcount)) {
1089 * rctl_rule_release() is often called when iterating
1090 * over all the uidinfo structures in the system,
1091 * holding uihashtbl_lock. Since rctl_rule_free()
1092 * might end up calling uifree(), this would lead
1093 * to lock recursion. Use taskqueue to avoid this.
1095 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
1096 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
1101 rctl_rule_fully_specified(const struct rctl_rule *rule)
1104 ASSERT_RACCT_ENABLED();
1106 switch (rule->rr_subject_type) {
1107 case RCTL_SUBJECT_TYPE_UNDEFINED:
1109 case RCTL_SUBJECT_TYPE_PROCESS:
1110 if (rule->rr_subject.rs_proc == NULL)
1113 case RCTL_SUBJECT_TYPE_USER:
1114 if (rule->rr_subject.rs_uip == NULL)
1117 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1118 if (rule->rr_subject.rs_loginclass == NULL)
1121 case RCTL_SUBJECT_TYPE_JAIL:
1122 if (rule->rr_subject.rs_prison_racct == NULL)
1126 panic("rctl_rule_fully_specified: unknown subject type %d",
1127 rule->rr_subject_type);
1129 if (rule->rr_resource == RACCT_UNDEFINED)
1131 if (rule->rr_action == RCTL_ACTION_UNDEFINED)
1133 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
1135 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
1142 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
1144 struct rctl_rule *rule;
1145 char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
1146 *amountstr, *perstr;
1150 ASSERT_RACCT_ENABLED();
1152 rule = rctl_rule_alloc(M_WAITOK);
1154 subjectstr = strsep(&rulestr, ":");
1155 subject_idstr = strsep(&rulestr, ":");
1156 resourcestr = strsep(&rulestr, ":");
1157 actionstr = strsep(&rulestr, "=/");
1158 amountstr = strsep(&rulestr, "/");
1161 if (subjectstr == NULL || subjectstr[0] == '\0')
1162 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1164 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
1169 if (subject_idstr == NULL || subject_idstr[0] == '\0') {
1170 rule->rr_subject.rs_proc = NULL;
1171 rule->rr_subject.rs_uip = NULL;
1172 rule->rr_subject.rs_loginclass = NULL;
1173 rule->rr_subject.rs_prison_racct = NULL;
1175 switch (rule->rr_subject_type) {
1176 case RCTL_SUBJECT_TYPE_UNDEFINED:
1179 case RCTL_SUBJECT_TYPE_PROCESS:
1180 error = str2id(subject_idstr, &id);
1183 sx_assert(&allproc_lock, SA_LOCKED);
1184 rule->rr_subject.rs_proc = pfind(id);
1185 if (rule->rr_subject.rs_proc == NULL) {
1189 PROC_UNLOCK(rule->rr_subject.rs_proc);
1191 case RCTL_SUBJECT_TYPE_USER:
1192 error = str2id(subject_idstr, &id);
1195 rule->rr_subject.rs_uip = uifind(id);
1197 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1198 rule->rr_subject.rs_loginclass =
1199 loginclass_find(subject_idstr);
1200 if (rule->rr_subject.rs_loginclass == NULL) {
1201 error = ENAMETOOLONG;
1205 case RCTL_SUBJECT_TYPE_JAIL:
1206 rule->rr_subject.rs_prison_racct =
1207 prison_racct_find(subject_idstr);
1208 if (rule->rr_subject.rs_prison_racct == NULL) {
1209 error = ENAMETOOLONG;
1214 panic("rctl_string_to_rule: unknown subject type %d",
1215 rule->rr_subject_type);
1219 if (resourcestr == NULL || resourcestr[0] == '\0')
1220 rule->rr_resource = RACCT_UNDEFINED;
1222 error = str2value(resourcestr, &rule->rr_resource,
1228 if (actionstr == NULL || actionstr[0] == '\0')
1229 rule->rr_action = RCTL_ACTION_UNDEFINED;
1231 error = str2value(actionstr, &rule->rr_action, actionnames);
1236 if (amountstr == NULL || amountstr[0] == '\0')
1237 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1239 error = str2int64(amountstr, &rule->rr_amount);
1242 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
1243 if (rule->rr_amount > INT64_MAX / 1000000) {
1247 rule->rr_amount *= 1000000;
1251 if (perstr == NULL || perstr[0] == '\0')
1252 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1254 error = str2value(perstr, &rule->rr_per, subjectnames);
1263 rctl_rule_release(rule);
1269 * Link a rule with all the subjects it applies to.
1272 rctl_rule_add(struct rctl_rule *rule)
1276 struct uidinfo *uip;
1278 struct prison_racct *prr;
1279 struct loginclass *lc;
1280 struct rctl_rule *rule2;
1283 ASSERT_RACCT_ENABLED();
1284 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1287 * Some rules just don't make sense, like "deny" rule for an undeniable
1288 * resource. The exception are the RSS and %CPU resources - they are
1289 * not deniable in the racct sense, but the limit is enforced in
1292 if (rule->rr_action == RCTL_ACTION_DENY &&
1293 !RACCT_IS_DENIABLE(rule->rr_resource) &&
1294 rule->rr_resource != RACCT_RSS &&
1295 rule->rr_resource != RACCT_PCTCPU) {
1296 return (EOPNOTSUPP);
1299 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1300 !RACCT_IS_DECAYING(rule->rr_resource)) {
1301 return (EOPNOTSUPP);
1304 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1305 rule->rr_resource == RACCT_PCTCPU) {
1306 return (EOPNOTSUPP);
1309 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1310 RACCT_IS_SLOPPY(rule->rr_resource)) {
1311 return (EOPNOTSUPP);
1315 * Make sure there are no duplicated rules. Also, for the "deny"
1316 * rules, remove ones differing only by "amount".
1318 if (rule->rr_action == RCTL_ACTION_DENY) {
1319 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1320 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1321 rctl_rule_remove(rule2);
1322 rctl_rule_release(rule2);
1324 rctl_rule_remove(rule);
1326 switch (rule->rr_subject_type) {
1327 case RCTL_SUBJECT_TYPE_PROCESS:
1328 p = rule->rr_subject.rs_proc;
1329 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1331 rctl_racct_add_rule(p->p_racct, rule);
1333 * In case of per-process rule, we don't have anything more
1338 case RCTL_SUBJECT_TYPE_USER:
1339 uip = rule->rr_subject.rs_uip;
1340 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1341 rctl_racct_add_rule(uip->ui_racct, rule);
1344 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1345 lc = rule->rr_subject.rs_loginclass;
1346 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1347 rctl_racct_add_rule(lc->lc_racct, rule);
1350 case RCTL_SUBJECT_TYPE_JAIL:
1351 prr = rule->rr_subject.rs_prison_racct;
1352 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1353 rctl_racct_add_rule(prr->prr_racct, rule);
1357 panic("rctl_rule_add: unknown subject type %d",
1358 rule->rr_subject_type);
1362 * Now go through all the processes and add the new rule to the ones
1365 sx_assert(&allproc_lock, SA_LOCKED);
1366 FOREACH_PROC_IN_SYSTEM(p) {
1368 switch (rule->rr_subject_type) {
1369 case RCTL_SUBJECT_TYPE_USER:
1370 if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1371 cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1374 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1375 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1378 case RCTL_SUBJECT_TYPE_JAIL:
1380 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1381 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1390 panic("rctl_rule_add: unknown subject type %d",
1391 rule->rr_subject_type);
1394 rctl_racct_add_rule(p->p_racct, rule);
1401 rctl_rule_pre_callback(void)
1408 rctl_rule_post_callback(void)
1415 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1417 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1420 ASSERT_RACCT_ENABLED();
1421 RACCT_LOCK_ASSERT();
1423 found += rctl_racct_remove_rules(racct, filter);
1425 *((int *)arg3) += found;
1429 * Remove all rules that match the filter.
1432 rctl_rule_remove(struct rctl_rule *filter)
1437 ASSERT_RACCT_ENABLED();
1439 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1440 filter->rr_subject.rs_proc != NULL) {
1441 p = filter->rr_subject.rs_proc;
1443 found = rctl_racct_remove_rules(p->p_racct, filter);
1450 loginclass_racct_foreach(rctl_rule_remove_callback,
1451 rctl_rule_pre_callback, rctl_rule_post_callback,
1452 filter, (void *)&found);
1453 ui_racct_foreach(rctl_rule_remove_callback,
1454 rctl_rule_pre_callback, rctl_rule_post_callback,
1455 filter, (void *)&found);
1456 prison_racct_foreach(rctl_rule_remove_callback,
1457 rctl_rule_pre_callback, rctl_rule_post_callback,
1458 filter, (void *)&found);
1460 sx_assert(&allproc_lock, SA_LOCKED);
1462 FOREACH_PROC_IN_SYSTEM(p) {
1463 found += rctl_racct_remove_rules(p->p_racct, filter);
1473 * Appends a rule to the sbuf.
1476 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1480 ASSERT_RACCT_ENABLED();
1482 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1484 switch (rule->rr_subject_type) {
1485 case RCTL_SUBJECT_TYPE_PROCESS:
1486 if (rule->rr_subject.rs_proc == NULL)
1487 sbuf_printf(sb, ":");
1489 sbuf_printf(sb, "%d:",
1490 rule->rr_subject.rs_proc->p_pid);
1492 case RCTL_SUBJECT_TYPE_USER:
1493 if (rule->rr_subject.rs_uip == NULL)
1494 sbuf_printf(sb, ":");
1496 sbuf_printf(sb, "%d:",
1497 rule->rr_subject.rs_uip->ui_uid);
1499 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1500 if (rule->rr_subject.rs_loginclass == NULL)
1501 sbuf_printf(sb, ":");
1503 sbuf_printf(sb, "%s:",
1504 rule->rr_subject.rs_loginclass->lc_name);
1506 case RCTL_SUBJECT_TYPE_JAIL:
1507 if (rule->rr_subject.rs_prison_racct == NULL)
1508 sbuf_printf(sb, ":");
1510 sbuf_printf(sb, "%s:",
1511 rule->rr_subject.rs_prison_racct->prr_name);
1514 panic("rctl_rule_to_sbuf: unknown subject type %d",
1515 rule->rr_subject_type);
1518 amount = rule->rr_amount;
1519 if (amount != RCTL_AMOUNT_UNDEFINED &&
1520 RACCT_IS_IN_MILLIONS(rule->rr_resource))
1523 sbuf_printf(sb, "%s:%s=%jd",
1524 rctl_resource_name(rule->rr_resource),
1525 rctl_action_name(rule->rr_action),
1528 if (rule->rr_per != rule->rr_subject_type)
1529 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1533 * Routine used by RCTL syscalls to read in input string.
1536 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1541 ASSERT_RACCT_ENABLED();
1545 if (inbuflen > RCTL_MAX_INBUFSIZE)
1548 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1549 error = copyinstr(inbufp, str, inbuflen, NULL);
1561 * Routine used by RCTL syscalls to write out output string.
1564 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1568 ASSERT_RACCT_ENABLED();
1570 if (outputsbuf == NULL)
1573 sbuf_finish(outputsbuf);
1574 if (outbuflen < sbuf_len(outputsbuf) + 1) {
1575 sbuf_delete(outputsbuf);
1578 error = copyout(sbuf_data(outputsbuf), outbufp,
1579 sbuf_len(outputsbuf) + 1);
1580 sbuf_delete(outputsbuf);
1584 static struct sbuf *
1585 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1591 ASSERT_RACCT_ENABLED();
1593 sb = sbuf_new_auto();
1594 for (i = 0; i <= RACCT_MAX; i++) {
1595 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1598 amount = racct->r_resources[i];
1600 if (RACCT_IS_IN_MILLIONS(i))
1602 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1604 sbuf_setpos(sb, sbuf_len(sb) - 1);
1609 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1611 struct rctl_rule *filter;
1612 struct sbuf *outputsbuf = NULL;
1614 struct uidinfo *uip;
1615 struct loginclass *lc;
1616 struct prison_racct *prr;
1623 error = priv_check(td, PRIV_RCTL_GET_RACCT);
1627 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1631 sx_slock(&allproc_lock);
1632 error = rctl_string_to_rule(inputstr, &filter);
1633 free(inputstr, M_RCTL);
1635 sx_sunlock(&allproc_lock);
1639 switch (filter->rr_subject_type) {
1640 case RCTL_SUBJECT_TYPE_PROCESS:
1641 p = filter->rr_subject.rs_proc;
1646 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1648 case RCTL_SUBJECT_TYPE_USER:
1649 uip = filter->rr_subject.rs_uip;
1654 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1656 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1657 lc = filter->rr_subject.rs_loginclass;
1662 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1664 case RCTL_SUBJECT_TYPE_JAIL:
1665 prr = filter->rr_subject.rs_prison_racct;
1670 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1676 rctl_rule_release(filter);
1677 sx_sunlock(&allproc_lock);
1681 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1687 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1689 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1690 struct rctl_rule_link *link;
1691 struct sbuf *sb = (struct sbuf *)arg3;
1693 ASSERT_RACCT_ENABLED();
1694 RACCT_LOCK_ASSERT();
1696 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1697 if (!rctl_rule_matches(link->rrl_rule, filter))
1699 rctl_rule_to_sbuf(sb, link->rrl_rule);
1700 sbuf_printf(sb, ",");
1705 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1708 struct rctl_rule *filter;
1709 struct rctl_rule_link *link;
1711 char *inputstr, *buf;
1718 error = priv_check(td, PRIV_RCTL_GET_RULES);
1722 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1726 sx_slock(&allproc_lock);
1727 error = rctl_string_to_rule(inputstr, &filter);
1728 free(inputstr, M_RCTL);
1730 sx_sunlock(&allproc_lock);
1734 bufsize = uap->outbuflen;
1735 if (bufsize > rctl_maxbufsize) {
1736 sx_sunlock(&allproc_lock);
1740 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1741 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1742 KASSERT(sb != NULL, ("sbuf_new failed"));
1744 FOREACH_PROC_IN_SYSTEM(p) {
1746 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1748 * Non-process rules will be added to the buffer later.
1749 * Adding them here would result in duplicated output.
1751 if (link->rrl_rule->rr_subject_type !=
1752 RCTL_SUBJECT_TYPE_PROCESS)
1754 if (!rctl_rule_matches(link->rrl_rule, filter))
1756 rctl_rule_to_sbuf(sb, link->rrl_rule);
1757 sbuf_printf(sb, ",");
1762 loginclass_racct_foreach(rctl_get_rules_callback,
1763 rctl_rule_pre_callback, rctl_rule_post_callback,
1765 ui_racct_foreach(rctl_get_rules_callback,
1766 rctl_rule_pre_callback, rctl_rule_post_callback,
1768 prison_racct_foreach(rctl_get_rules_callback,
1769 rctl_rule_pre_callback, rctl_rule_post_callback,
1771 if (sbuf_error(sb) == ENOMEM) {
1777 * Remove trailing ",".
1779 if (sbuf_len(sb) > 0)
1780 sbuf_setpos(sb, sbuf_len(sb) - 1);
1782 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1784 rctl_rule_release(filter);
1785 sx_sunlock(&allproc_lock);
1791 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1794 struct rctl_rule *filter;
1795 struct rctl_rule_link *link;
1796 char *inputstr, *buf;
1803 error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1807 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1811 sx_slock(&allproc_lock);
1812 error = rctl_string_to_rule(inputstr, &filter);
1813 free(inputstr, M_RCTL);
1815 sx_sunlock(&allproc_lock);
1819 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1820 rctl_rule_release(filter);
1821 sx_sunlock(&allproc_lock);
1824 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1825 rctl_rule_release(filter);
1826 sx_sunlock(&allproc_lock);
1827 return (EOPNOTSUPP);
1829 if (filter->rr_subject.rs_proc == NULL) {
1830 rctl_rule_release(filter);
1831 sx_sunlock(&allproc_lock);
1835 bufsize = uap->outbuflen;
1836 if (bufsize > rctl_maxbufsize) {
1837 rctl_rule_release(filter);
1838 sx_sunlock(&allproc_lock);
1842 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1843 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1844 KASSERT(sb != NULL, ("sbuf_new failed"));
1847 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1849 rctl_rule_to_sbuf(sb, link->rrl_rule);
1850 sbuf_printf(sb, ",");
1853 if (sbuf_error(sb) == ENOMEM) {
1860 * Remove trailing ",".
1862 if (sbuf_len(sb) > 0)
1863 sbuf_setpos(sb, sbuf_len(sb) - 1);
1865 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1867 rctl_rule_release(filter);
1868 sx_sunlock(&allproc_lock);
1874 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1876 struct rctl_rule *rule;
1883 error = priv_check(td, PRIV_RCTL_ADD_RULE);
1887 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1891 sx_slock(&allproc_lock);
1892 error = rctl_string_to_rule(inputstr, &rule);
1893 free(inputstr, M_RCTL);
1895 sx_sunlock(&allproc_lock);
1899 * The 'per' part of a rule is optional.
1901 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1902 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1903 rule->rr_per = rule->rr_subject_type;
1905 if (!rctl_rule_fully_specified(rule)) {
1910 error = rctl_rule_add(rule);
1913 rctl_rule_release(rule);
1914 sx_sunlock(&allproc_lock);
1919 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1921 struct rctl_rule *filter;
1928 error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1932 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1936 sx_slock(&allproc_lock);
1937 error = rctl_string_to_rule(inputstr, &filter);
1938 free(inputstr, M_RCTL);
1940 sx_sunlock(&allproc_lock);
1944 error = rctl_rule_remove(filter);
1945 rctl_rule_release(filter);
1946 sx_sunlock(&allproc_lock);
1952 * Update RCTL rule list after credential change.
1955 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1957 LIST_HEAD(, rctl_rule_link) newrules;
1958 struct rctl_rule_link *link, *newlink;
1959 struct uidinfo *newuip;
1960 struct loginclass *newlc;
1961 struct prison_racct *newprr;
1967 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
1969 newuip = newcred->cr_ruidinfo;
1970 newlc = newcred->cr_loginclass;
1971 newprr = newcred->cr_prison->pr_prison_racct;
1973 LIST_INIT(&newrules);
1977 * First, count the rules that apply to the process with new
1982 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1983 if (link->rrl_rule->rr_subject_type ==
1984 RCTL_SUBJECT_TYPE_PROCESS)
1987 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1989 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1991 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1996 * Create temporary list. We've dropped the rctl_lock in order
1999 for (i = 0; i < rulecnt; i++) {
2000 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
2001 newlink->rrl_rule = NULL;
2002 newlink->rrl_exceeded = 0;
2003 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
2006 newlink = LIST_FIRST(&newrules);
2009 * Assign rules to the newly allocated list entries.
2012 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
2013 if (link->rrl_rule->rr_subject_type ==
2014 RCTL_SUBJECT_TYPE_PROCESS) {
2015 if (newlink == NULL)
2017 rctl_rule_acquire(link->rrl_rule);
2018 newlink->rrl_rule = link->rrl_rule;
2019 newlink->rrl_exceeded = link->rrl_exceeded;
2020 newlink = LIST_NEXT(newlink, rrl_next);
2025 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
2026 if (newlink == NULL)
2028 rctl_rule_acquire(link->rrl_rule);
2029 newlink->rrl_rule = link->rrl_rule;
2030 newlink->rrl_exceeded = link->rrl_exceeded;
2031 newlink = LIST_NEXT(newlink, rrl_next);
2035 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
2036 if (newlink == NULL)
2038 rctl_rule_acquire(link->rrl_rule);
2039 newlink->rrl_rule = link->rrl_rule;
2040 newlink->rrl_exceeded = link->rrl_exceeded;
2041 newlink = LIST_NEXT(newlink, rrl_next);
2045 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
2046 if (newlink == NULL)
2048 rctl_rule_acquire(link->rrl_rule);
2049 newlink->rrl_rule = link->rrl_rule;
2050 newlink->rrl_exceeded = link->rrl_exceeded;
2051 newlink = LIST_NEXT(newlink, rrl_next);
2057 * Free the old rule list.
2059 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
2060 link = LIST_FIRST(&p->p_racct->r_rule_links);
2061 LIST_REMOVE(link, rrl_next);
2062 rctl_rule_release(link->rrl_rule);
2063 uma_zfree(rctl_rule_link_zone, link);
2067 * Replace lists and we're done.
2069 * XXX: Is there any way to switch list heads instead
2070 * of iterating here?
2072 while (!LIST_EMPTY(&newrules)) {
2073 newlink = LIST_FIRST(&newrules);
2074 LIST_REMOVE(newlink, rrl_next);
2075 LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
2088 * Rule list changed while we were not holding the rctl_lock.
2089 * Free the new list and try again.
2091 while (!LIST_EMPTY(&newrules)) {
2092 newlink = LIST_FIRST(&newrules);
2093 LIST_REMOVE(newlink, rrl_next);
2094 if (newlink->rrl_rule != NULL)
2095 rctl_rule_release(newlink->rrl_rule);
2096 uma_zfree(rctl_rule_link_zone, newlink);
2103 * Assign RCTL rules to the newly created process.
2106 rctl_proc_fork(struct proc *parent, struct proc *child)
2108 struct rctl_rule *rule;
2109 struct rctl_rule_link *link;
2112 ASSERT_RACCT_ENABLED();
2113 RACCT_LOCK_ASSERT();
2114 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
2116 LIST_INIT(&child->p_racct->r_rule_links);
2119 * Go through limits applicable to the parent and assign them
2120 * to the child. Rules with 'process' subject have to be duplicated
2121 * in order to make their rr_subject point to the new process.
2123 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
2124 if (link->rrl_rule->rr_subject_type ==
2125 RCTL_SUBJECT_TYPE_PROCESS) {
2126 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
2129 KASSERT(rule->rr_subject.rs_proc == parent,
2130 ("rule->rr_subject.rs_proc != parent"));
2131 rule->rr_subject.rs_proc = child;
2132 error = rctl_racct_add_rule_locked(child->p_racct,
2134 rctl_rule_release(rule);
2138 error = rctl_racct_add_rule_locked(child->p_racct,
2148 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
2149 link = LIST_FIRST(&child->p_racct->r_rule_links);
2150 LIST_REMOVE(link, rrl_next);
2151 rctl_rule_release(link->rrl_rule);
2152 uma_zfree(rctl_rule_link_zone, link);
2159 * Release rules attached to the racct.
2162 rctl_racct_release(struct racct *racct)
2164 struct rctl_rule_link *link;
2166 ASSERT_RACCT_ENABLED();
2167 RACCT_LOCK_ASSERT();
2169 while (!LIST_EMPTY(&racct->r_rule_links)) {
2170 link = LIST_FIRST(&racct->r_rule_links);
2171 LIST_REMOVE(link, rrl_next);
2172 rctl_rule_release(link->rrl_rule);
2173 uma_zfree(rctl_rule_link_zone, link);
2184 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
2185 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2186 rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
2187 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
2191 * Set default values, making sure not to overwrite the ones
2192 * fetched from tunables. Most of those could be set at the
2193 * declaration, except for the rctl_throttle_max - we cannot
2194 * set it there due to hz not being compile time constant.
2196 if (rctl_throttle_min < 1)
2197 rctl_throttle_min = 1;
2198 if (rctl_throttle_max < rctl_throttle_min)
2199 rctl_throttle_max = 2 * hz;
2200 if (rctl_throttle_pct < 0)
2201 rctl_throttle_pct = 100;
2202 if (rctl_throttle_pct2 < 0)
2203 rctl_throttle_pct2 = 100;
2209 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
2216 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
2223 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
2230 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
2237 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)