2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2010 The FreeBSD Foundation
6 * This software was developed by Edward Tomasz Napierala under sponsorship
7 * from the FreeBSD Foundation.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/devctl.h>
34 #include <sys/malloc.h>
35 #include <sys/queue.h>
36 #include <sys/refcount.h>
38 #include <sys/kernel.h>
39 #include <sys/limits.h>
40 #include <sys/loginclass.h>
43 #include <sys/racct.h>
45 #include <sys/resourcevar.h>
47 #include <sys/sysproto.h>
48 #include <sys/systm.h>
49 #include <sys/types.h>
50 #include <sys/eventhandler.h>
52 #include <sys/mutex.h>
53 #include <sys/rwlock.h>
55 #include <sys/taskqueue.h>
61 #error "The RCTL option requires the RACCT option"
64 FEATURE(rctl, "Resource Limits");
67 #define HRF_DONT_INHERIT 1
68 #define HRF_DONT_ACCUMULATE 2
70 #define RCTL_MAX_INBUFSIZE 4 * 1024
71 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024
72 #define RCTL_LOG_BUFSIZE 128
74 #define RCTL_PCPU_SHIFT (10 * 1000000)
76 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
77 static int rctl_log_rate_limit = 10;
78 static int rctl_devctl_rate_limit = 10;
81 * Values below are initialized in rctl_init().
83 static int rctl_throttle_min = -1;
84 static int rctl_throttle_max = -1;
85 static int rctl_throttle_pct = -1;
86 static int rctl_throttle_pct2 = -1;
88 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
89 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
90 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
91 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
93 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
95 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
96 &rctl_maxbufsize, 0, "Maximum output buffer size");
97 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
98 &rctl_log_rate_limit, 0, "Maximum number of log messages per second");
99 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
100 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
101 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
102 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
103 &rctl_throttle_min_sysctl, "IU",
104 "Shortest throttling duration, in hz");
105 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
106 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
107 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
108 &rctl_throttle_max_sysctl, "IU",
109 "Longest throttling duration, in hz");
110 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
111 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
112 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
113 &rctl_throttle_pct_sysctl, "IU",
114 "Throttling penalty for process consumption, in percent");
115 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
116 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
117 CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
118 &rctl_throttle_pct2_sysctl, "IU",
119 "Throttling penalty for container consumption, in percent");
120 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
123 * 'rctl_rule_link' connects a rule with every racct it's related to.
124 * For example, rule 'user:X:openfiles:deny=N/process' is linked
125 * with uidinfo for user X, and to each process of that user.
127 struct rctl_rule_link {
128 LIST_ENTRY(rctl_rule_link) rrl_next;
129 struct rctl_rule *rrl_rule;
138 static struct dict subjectnames[] = {
139 { "process", RCTL_SUBJECT_TYPE_PROCESS },
140 { "user", RCTL_SUBJECT_TYPE_USER },
141 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
142 { "jail", RCTL_SUBJECT_TYPE_JAIL },
145 static struct dict resourcenames[] = {
146 { "cputime", RACCT_CPU },
147 { "datasize", RACCT_DATA },
148 { "stacksize", RACCT_STACK },
149 { "coredumpsize", RACCT_CORE },
150 { "memoryuse", RACCT_RSS },
151 { "memorylocked", RACCT_MEMLOCK },
152 { "maxproc", RACCT_NPROC },
153 { "openfiles", RACCT_NOFILE },
154 { "vmemoryuse", RACCT_VMEM },
155 { "pseudoterminals", RACCT_NPTS },
156 { "swapuse", RACCT_SWAP },
157 { "nthr", RACCT_NTHR },
158 { "msgqqueued", RACCT_MSGQQUEUED },
159 { "msgqsize", RACCT_MSGQSIZE },
160 { "nmsgq", RACCT_NMSGQ },
161 { "nsem", RACCT_NSEM },
162 { "nsemop", RACCT_NSEMOP },
163 { "nshm", RACCT_NSHM },
164 { "shmsize", RACCT_SHMSIZE },
165 { "wallclock", RACCT_WALLCLOCK },
166 { "pcpu", RACCT_PCTCPU },
167 { "readbps", RACCT_READBPS },
168 { "writebps", RACCT_WRITEBPS },
169 { "readiops", RACCT_READIOPS },
170 { "writeiops", RACCT_WRITEIOPS },
173 static struct dict actionnames[] = {
174 { "sighup", RCTL_ACTION_SIGHUP },
175 { "sigint", RCTL_ACTION_SIGINT },
176 { "sigquit", RCTL_ACTION_SIGQUIT },
177 { "sigill", RCTL_ACTION_SIGILL },
178 { "sigtrap", RCTL_ACTION_SIGTRAP },
179 { "sigabrt", RCTL_ACTION_SIGABRT },
180 { "sigemt", RCTL_ACTION_SIGEMT },
181 { "sigfpe", RCTL_ACTION_SIGFPE },
182 { "sigkill", RCTL_ACTION_SIGKILL },
183 { "sigbus", RCTL_ACTION_SIGBUS },
184 { "sigsegv", RCTL_ACTION_SIGSEGV },
185 { "sigsys", RCTL_ACTION_SIGSYS },
186 { "sigpipe", RCTL_ACTION_SIGPIPE },
187 { "sigalrm", RCTL_ACTION_SIGALRM },
188 { "sigterm", RCTL_ACTION_SIGTERM },
189 { "sigurg", RCTL_ACTION_SIGURG },
190 { "sigstop", RCTL_ACTION_SIGSTOP },
191 { "sigtstp", RCTL_ACTION_SIGTSTP },
192 { "sigchld", RCTL_ACTION_SIGCHLD },
193 { "sigttin", RCTL_ACTION_SIGTTIN },
194 { "sigttou", RCTL_ACTION_SIGTTOU },
195 { "sigio", RCTL_ACTION_SIGIO },
196 { "sigxcpu", RCTL_ACTION_SIGXCPU },
197 { "sigxfsz", RCTL_ACTION_SIGXFSZ },
198 { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
199 { "sigprof", RCTL_ACTION_SIGPROF },
200 { "sigwinch", RCTL_ACTION_SIGWINCH },
201 { "siginfo", RCTL_ACTION_SIGINFO },
202 { "sigusr1", RCTL_ACTION_SIGUSR1 },
203 { "sigusr2", RCTL_ACTION_SIGUSR2 },
204 { "sigthr", RCTL_ACTION_SIGTHR },
205 { "deny", RCTL_ACTION_DENY },
206 { "log", RCTL_ACTION_LOG },
207 { "devctl", RCTL_ACTION_DEVCTL },
208 { "throttle", RCTL_ACTION_THROTTLE },
211 static void rctl_init(void);
212 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
214 static uma_zone_t rctl_rule_zone;
215 static uma_zone_t rctl_rule_link_zone;
217 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
218 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
220 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
222 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
224 int error, val = rctl_throttle_min;
226 error = sysctl_handle_int(oidp, &val, 0, req);
227 if (error || !req->newptr)
229 if (val < 1 || val > rctl_throttle_max)
233 rctl_throttle_min = val;
239 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
241 int error, val = rctl_throttle_max;
243 error = sysctl_handle_int(oidp, &val, 0, req);
244 if (error || !req->newptr)
246 if (val < rctl_throttle_min)
250 rctl_throttle_max = val;
256 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
258 int error, val = rctl_throttle_pct;
260 error = sysctl_handle_int(oidp, &val, 0, req);
261 if (error || !req->newptr)
267 rctl_throttle_pct = val;
273 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
275 int error, val = rctl_throttle_pct2;
277 error = sysctl_handle_int(oidp, &val, 0, req);
278 if (error || !req->newptr)
284 rctl_throttle_pct2 = val;
291 rctl_subject_type_name(int subject)
295 for (i = 0; subjectnames[i].d_name != NULL; i++) {
296 if (subjectnames[i].d_value == subject)
297 return (subjectnames[i].d_name);
300 panic("rctl_subject_type_name: unknown subject type %d", subject);
304 rctl_action_name(int action)
308 for (i = 0; actionnames[i].d_name != NULL; i++) {
309 if (actionnames[i].d_value == action)
310 return (actionnames[i].d_name);
313 panic("rctl_action_name: unknown action %d", action);
317 rctl_resource_name(int resource)
321 for (i = 0; resourcenames[i].d_name != NULL; i++) {
322 if (resourcenames[i].d_value == resource)
323 return (resourcenames[i].d_name);
326 panic("rctl_resource_name: unknown resource %d", resource);
329 static struct racct *
330 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
332 struct ucred *cred = p->p_ucred;
334 ASSERT_RACCT_ENABLED();
337 switch (rule->rr_per) {
338 case RCTL_SUBJECT_TYPE_PROCESS:
340 case RCTL_SUBJECT_TYPE_USER:
341 return (cred->cr_ruidinfo->ui_racct);
342 case RCTL_SUBJECT_TYPE_LOGINCLASS:
343 return (cred->cr_loginclass->lc_racct);
344 case RCTL_SUBJECT_TYPE_JAIL:
345 return (cred->cr_prison->pr_prison_racct->prr_racct);
347 panic("%s: unknown per %d", __func__, rule->rr_per);
352 * Return the amount of resource that can be allocated by 'p' before
356 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
358 const struct racct *racct;
361 ASSERT_RACCT_ENABLED();
364 racct = rctl_proc_rule_to_racct(p, rule);
365 available = rule->rr_amount - racct->r_resources[rule->rr_resource];
371 * Called every second for proc, uidinfo, loginclass, and jail containers.
372 * If the limit isn't exceeded, it decreases the usage amount to zero.
373 * Otherwise, it decreases it by the value of the limit. This way
374 * resource consumption exceeding the limit "carries over" to the next
378 rctl_throttle_decay(struct racct *racct, int resource)
380 struct rctl_rule *rule;
381 struct rctl_rule_link *link;
382 int64_t minavailable;
384 ASSERT_RACCT_ENABLED();
387 minavailable = INT64_MAX;
389 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
390 rule = link->rrl_rule;
392 if (rule->rr_resource != resource)
394 if (rule->rr_action != RCTL_ACTION_THROTTLE)
397 if (rule->rr_amount < minavailable)
398 minavailable = rule->rr_amount;
401 if (racct->r_resources[resource] < minavailable) {
402 racct->r_resources[resource] = 0;
405 * Cap utilization counter at ten times the limit. Otherwise,
406 * if we changed the rule lowering the allowed amount, it could
407 * take unreasonably long time for the accumulated resource
410 if (racct->r_resources[resource] > minavailable * 10)
411 racct->r_resources[resource] = minavailable * 10;
413 racct->r_resources[resource] -= minavailable;
418 * Special version of rctl_get_available() for the %CPU resource.
419 * We slightly cheat here and return less than we normally would.
422 rctl_pcpu_available(const struct proc *p) {
423 struct rctl_rule *rule;
424 struct rctl_rule_link *link;
425 int64_t available, minavailable, limit;
427 ASSERT_RACCT_ENABLED();
430 minavailable = INT64_MAX;
433 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
434 rule = link->rrl_rule;
435 if (rule->rr_resource != RACCT_PCTCPU)
437 if (rule->rr_action != RCTL_ACTION_DENY)
439 available = rctl_available_resource(p, rule);
440 if (available < minavailable) {
441 minavailable = available;
442 limit = rule->rr_amount;
447 * Return slightly less than actual value of the available
448 * %cpu resource. This makes %cpu throttling more aggressive
449 * and lets us act sooner than the limits are already exceeded.
452 if (limit > 2 * RCTL_PCPU_SHIFT)
453 minavailable -= RCTL_PCPU_SHIFT;
455 minavailable -= (limit / 2);
458 return (minavailable);
462 xadd(uint64_t a, uint64_t b)
478 xmul(uint64_t a, uint64_t b)
481 if (b != 0 && a > UINT64_MAX / b)
488 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
489 * to what it keeps allocated now. Returns non-zero if the allocation should
490 * be denied, 0 otherwise.
493 rctl_enforce(struct proc *p, int resource, uint64_t amount)
495 static struct timeval log_lasttime, devctl_lasttime;
496 static int log_curtime = 0, devctl_curtime = 0;
497 struct rctl_rule *rule;
498 struct rctl_rule_link *link;
502 uint64_t sleep_ms, sleep_ratio;
505 ASSERT_RACCT_ENABLED();
509 * There may be more than one matching rule; go through all of them.
510 * Denial should be done last, after logging and sending signals.
512 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
513 rule = link->rrl_rule;
514 if (rule->rr_resource != resource)
517 available = rctl_available_resource(p, rule);
518 if (available >= (int64_t)amount) {
519 link->rrl_exceeded = 0;
523 switch (rule->rr_action) {
524 case RCTL_ACTION_DENY:
527 case RCTL_ACTION_LOG:
529 * If rrl_exceeded != 0, it means we've already
530 * logged a warning for this process.
532 if (link->rrl_exceeded != 0)
536 * If the process state is not fully initialized yet,
537 * we can't access most of the required fields, e.g.
538 * p->p_comm. This happens when called from fork1().
539 * Ignore this rule for now; it will be processed just
540 * after fork, when called from racct_proc_fork_done().
542 if (p->p_state != PRS_NORMAL)
545 if (!ppsratecheck(&log_lasttime, &log_curtime,
546 rctl_log_rate_limit))
549 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
551 printf("rctl_enforce: out of memory\n");
554 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
555 rctl_rule_to_sbuf(&sb, rule);
557 printf("rctl: rule \"%s\" matched by pid %d "
558 "(%s), uid %d, jail %s\n", sbuf_data(&sb),
559 p->p_pid, p->p_comm, p->p_ucred->cr_uid,
560 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
563 link->rrl_exceeded = 1;
565 case RCTL_ACTION_DEVCTL:
566 if (link->rrl_exceeded != 0)
569 if (p->p_state != PRS_NORMAL)
572 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
573 rctl_devctl_rate_limit))
576 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
578 printf("rctl_enforce: out of memory\n");
581 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
582 sbuf_printf(&sb, "rule=");
583 rctl_rule_to_sbuf(&sb, rule);
584 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
585 p->p_pid, p->p_ucred->cr_ruid,
586 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
588 devctl_notify("RCTL", "rule", "matched",
592 link->rrl_exceeded = 1;
594 case RCTL_ACTION_THROTTLE:
595 if (p->p_state != PRS_NORMAL)
598 if (rule->rr_amount == 0) {
599 racct_proc_throttle(p, rctl_throttle_max);
604 * Make the process sleep for a fraction of second
605 * proportional to the ratio of process' resource
606 * utilization compared to the limit. The point is
607 * to penalize resource hogs: processes that consume
608 * more of the available resources sleep for longer.
610 * We're trying to defer division until the very end,
611 * to minimize the rounding effects. The following
612 * calculation could have been written in a clearer
615 * sleep_ms = hz * p->p_racct->r_resources[resource] /
617 * sleep_ms *= rctl_throttle_pct / 100;
618 * if (sleep_ms < rctl_throttle_min)
619 * sleep_ms = rctl_throttle_min;
622 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
623 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100;
624 if (sleep_ms < rctl_throttle_min * rule->rr_amount)
625 sleep_ms = rctl_throttle_min * rule->rr_amount;
628 * Multiply that by the ratio of the resource
629 * consumption for the container compared to the limit,
630 * squared. In other words, a process in a container
631 * that is two times over the limit will be throttled
632 * four times as much for hitting the same rule. The
633 * point is to penalize processes more if the container
634 * itself (eg certain UID or jail) is above the limit.
637 sleep_ratio = -available / rule->rr_amount;
640 sleep_ratio = xmul(sleep_ratio, sleep_ratio);
641 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
642 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
645 * Finally the division.
647 sleep_ms /= rule->rr_amount;
649 if (sleep_ms > rctl_throttle_max)
650 sleep_ms = rctl_throttle_max;
652 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
653 __func__, p->p_pid, p->p_comm,
654 p->p_racct->r_resources[resource],
655 rule->rr_amount, (uintmax_t)sleep_ms,
656 (uintmax_t)sleep_ratio, (intmax_t)available);
659 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
660 __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
661 racct_proc_throttle(p, sleep_ms);
664 if (link->rrl_exceeded != 0)
667 if (p->p_state != PRS_NORMAL)
670 KASSERT(rule->rr_action > 0 &&
671 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
672 ("rctl_enforce: unknown action %d",
676 * We're using the fact that RCTL_ACTION_SIG* values
677 * are equal to their counterparts from sys/signal.h.
679 kern_psignal(p, rule->rr_action);
680 link->rrl_exceeded = 1;
687 * Return fake error code; the caller should change it
688 * into one proper for the situation - EFSIZ, ENOMEM etc.
697 rctl_get_limit(struct proc *p, int resource)
699 struct rctl_rule *rule;
700 struct rctl_rule_link *link;
701 uint64_t amount = UINT64_MAX;
703 ASSERT_RACCT_ENABLED();
707 * There may be more than one matching rule; go through all of them.
708 * Denial should be done last, after logging and sending signals.
710 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
711 rule = link->rrl_rule;
712 if (rule->rr_resource != resource)
714 if (rule->rr_action != RCTL_ACTION_DENY)
716 if (rule->rr_amount < amount)
717 amount = rule->rr_amount;
724 rctl_get_available(struct proc *p, int resource)
726 struct rctl_rule *rule;
727 struct rctl_rule_link *link;
728 int64_t available, minavailable, allocated;
730 minavailable = INT64_MAX;
732 ASSERT_RACCT_ENABLED();
736 * There may be more than one matching rule; go through all of them.
737 * Denial should be done last, after logging and sending signals.
739 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
740 rule = link->rrl_rule;
741 if (rule->rr_resource != resource)
743 if (rule->rr_action != RCTL_ACTION_DENY)
745 available = rctl_available_resource(p, rule);
746 if (available < minavailable)
747 minavailable = available;
751 * XXX: Think about this _hard_.
753 allocated = p->p_racct->r_resources[resource];
754 if (minavailable < INT64_MAX - allocated)
755 minavailable += allocated;
756 if (minavailable < 0)
759 return (minavailable);
763 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
766 ASSERT_RACCT_ENABLED();
768 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
769 if (rule->rr_subject_type != filter->rr_subject_type)
772 switch (filter->rr_subject_type) {
773 case RCTL_SUBJECT_TYPE_PROCESS:
774 if (filter->rr_subject.rs_proc != NULL &&
775 rule->rr_subject.rs_proc !=
776 filter->rr_subject.rs_proc)
779 case RCTL_SUBJECT_TYPE_USER:
780 if (filter->rr_subject.rs_uip != NULL &&
781 rule->rr_subject.rs_uip !=
782 filter->rr_subject.rs_uip)
785 case RCTL_SUBJECT_TYPE_LOGINCLASS:
786 if (filter->rr_subject.rs_loginclass != NULL &&
787 rule->rr_subject.rs_loginclass !=
788 filter->rr_subject.rs_loginclass)
791 case RCTL_SUBJECT_TYPE_JAIL:
792 if (filter->rr_subject.rs_prison_racct != NULL &&
793 rule->rr_subject.rs_prison_racct !=
794 filter->rr_subject.rs_prison_racct)
798 panic("rctl_rule_matches: unknown subject type %d",
799 filter->rr_subject_type);
803 if (filter->rr_resource != RACCT_UNDEFINED) {
804 if (rule->rr_resource != filter->rr_resource)
808 if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
809 if (rule->rr_action != filter->rr_action)
813 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
814 if (rule->rr_amount != filter->rr_amount)
818 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
819 if (rule->rr_per != filter->rr_per)
827 str2value(const char *str, int *value, struct dict *table)
834 for (i = 0; table[i].d_name != NULL; i++) {
835 if (strcasecmp(table[i].d_name, str) == 0) {
836 *value = table[i].d_value;
845 str2id(const char *str, id_t *value)
852 *value = strtoul(str, &end, 10);
853 if ((size_t)(end - str) != strlen(str))
860 str2int64(const char *str, int64_t *value)
867 *value = strtoul(str, &end, 10);
868 if ((size_t)(end - str) != strlen(str))
878 * Connect the rule to the racct, increasing refcount for the rule.
881 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
883 struct rctl_rule_link *link;
885 ASSERT_RACCT_ENABLED();
886 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
888 rctl_rule_acquire(rule);
889 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
890 link->rrl_rule = rule;
891 link->rrl_exceeded = 0;
894 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
899 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
901 struct rctl_rule_link *link;
903 ASSERT_RACCT_ENABLED();
904 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
907 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
910 rctl_rule_acquire(rule);
911 link->rrl_rule = rule;
912 link->rrl_exceeded = 0;
914 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
920 * Remove limits for a rules matching the filter and release
921 * the refcounts for the rules, possibly freeing them. Returns
922 * the number of limit structures removed.
925 rctl_racct_remove_rules(struct racct *racct,
926 const struct rctl_rule *filter)
928 struct rctl_rule_link *link, *linktmp;
931 ASSERT_RACCT_ENABLED();
934 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
935 if (!rctl_rule_matches(link->rrl_rule, filter))
938 LIST_REMOVE(link, rrl_next);
939 rctl_rule_release(link->rrl_rule);
940 uma_zfree(rctl_rule_link_zone, link);
947 rctl_rule_acquire_subject(struct rctl_rule *rule)
950 ASSERT_RACCT_ENABLED();
952 switch (rule->rr_subject_type) {
953 case RCTL_SUBJECT_TYPE_UNDEFINED:
954 case RCTL_SUBJECT_TYPE_PROCESS:
956 case RCTL_SUBJECT_TYPE_JAIL:
957 if (rule->rr_subject.rs_prison_racct != NULL)
958 prison_racct_hold(rule->rr_subject.rs_prison_racct);
960 case RCTL_SUBJECT_TYPE_USER:
961 if (rule->rr_subject.rs_uip != NULL)
962 uihold(rule->rr_subject.rs_uip);
964 case RCTL_SUBJECT_TYPE_LOGINCLASS:
965 if (rule->rr_subject.rs_loginclass != NULL)
966 loginclass_hold(rule->rr_subject.rs_loginclass);
969 panic("rctl_rule_acquire_subject: unknown subject type %d",
970 rule->rr_subject_type);
975 rctl_rule_release_subject(struct rctl_rule *rule)
978 ASSERT_RACCT_ENABLED();
980 switch (rule->rr_subject_type) {
981 case RCTL_SUBJECT_TYPE_UNDEFINED:
982 case RCTL_SUBJECT_TYPE_PROCESS:
984 case RCTL_SUBJECT_TYPE_JAIL:
985 if (rule->rr_subject.rs_prison_racct != NULL)
986 prison_racct_free(rule->rr_subject.rs_prison_racct);
988 case RCTL_SUBJECT_TYPE_USER:
989 if (rule->rr_subject.rs_uip != NULL)
990 uifree(rule->rr_subject.rs_uip);
992 case RCTL_SUBJECT_TYPE_LOGINCLASS:
993 if (rule->rr_subject.rs_loginclass != NULL)
994 loginclass_free(rule->rr_subject.rs_loginclass);
997 panic("rctl_rule_release_subject: unknown subject type %d",
998 rule->rr_subject_type);
1003 rctl_rule_alloc(int flags)
1005 struct rctl_rule *rule;
1007 ASSERT_RACCT_ENABLED();
1009 rule = uma_zalloc(rctl_rule_zone, flags);
1012 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1013 rule->rr_subject.rs_proc = NULL;
1014 rule->rr_subject.rs_uip = NULL;
1015 rule->rr_subject.rs_loginclass = NULL;
1016 rule->rr_subject.rs_prison_racct = NULL;
1017 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1018 rule->rr_resource = RACCT_UNDEFINED;
1019 rule->rr_action = RCTL_ACTION_UNDEFINED;
1020 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1021 refcount_init(&rule->rr_refcount, 1);
1027 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
1029 struct rctl_rule *copy;
1031 ASSERT_RACCT_ENABLED();
1033 copy = uma_zalloc(rctl_rule_zone, flags);
1036 copy->rr_subject_type = rule->rr_subject_type;
1037 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
1038 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
1039 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
1040 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
1041 copy->rr_per = rule->rr_per;
1042 copy->rr_resource = rule->rr_resource;
1043 copy->rr_action = rule->rr_action;
1044 copy->rr_amount = rule->rr_amount;
1045 refcount_init(©->rr_refcount, 1);
1046 rctl_rule_acquire_subject(copy);
1052 rctl_rule_acquire(struct rctl_rule *rule)
1055 ASSERT_RACCT_ENABLED();
1056 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1058 refcount_acquire(&rule->rr_refcount);
1062 rctl_rule_free(void *context, int pending)
1064 struct rctl_rule *rule;
1066 rule = (struct rctl_rule *)context;
1068 ASSERT_RACCT_ENABLED();
1069 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
1072 * We don't need locking here; rule is guaranteed to be inaccessible.
1075 rctl_rule_release_subject(rule);
1076 uma_zfree(rctl_rule_zone, rule);
1080 rctl_rule_release(struct rctl_rule *rule)
1083 ASSERT_RACCT_ENABLED();
1084 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1086 if (refcount_release(&rule->rr_refcount)) {
1088 * rctl_rule_release() is often called when iterating
1089 * over all the uidinfo structures in the system,
1090 * holding uihashtbl_lock. Since rctl_rule_free()
1091 * might end up calling uifree(), this would lead
1092 * to lock recursion. Use taskqueue to avoid this.
1094 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
1095 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
1100 rctl_rule_fully_specified(const struct rctl_rule *rule)
1103 ASSERT_RACCT_ENABLED();
1105 switch (rule->rr_subject_type) {
1106 case RCTL_SUBJECT_TYPE_UNDEFINED:
1108 case RCTL_SUBJECT_TYPE_PROCESS:
1109 if (rule->rr_subject.rs_proc == NULL)
1112 case RCTL_SUBJECT_TYPE_USER:
1113 if (rule->rr_subject.rs_uip == NULL)
1116 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1117 if (rule->rr_subject.rs_loginclass == NULL)
1120 case RCTL_SUBJECT_TYPE_JAIL:
1121 if (rule->rr_subject.rs_prison_racct == NULL)
1125 panic("rctl_rule_fully_specified: unknown subject type %d",
1126 rule->rr_subject_type);
1128 if (rule->rr_resource == RACCT_UNDEFINED)
1130 if (rule->rr_action == RCTL_ACTION_UNDEFINED)
1132 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
1134 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
1141 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
1143 struct rctl_rule *rule;
1144 char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
1145 *amountstr, *perstr;
1149 ASSERT_RACCT_ENABLED();
1151 rule = rctl_rule_alloc(M_WAITOK);
1153 subjectstr = strsep(&rulestr, ":");
1154 subject_idstr = strsep(&rulestr, ":");
1155 resourcestr = strsep(&rulestr, ":");
1156 actionstr = strsep(&rulestr, "=/");
1157 amountstr = strsep(&rulestr, "/");
1160 if (subjectstr == NULL || subjectstr[0] == '\0')
1161 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1163 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
1168 if (subject_idstr == NULL || subject_idstr[0] == '\0') {
1169 rule->rr_subject.rs_proc = NULL;
1170 rule->rr_subject.rs_uip = NULL;
1171 rule->rr_subject.rs_loginclass = NULL;
1172 rule->rr_subject.rs_prison_racct = NULL;
1174 switch (rule->rr_subject_type) {
1175 case RCTL_SUBJECT_TYPE_UNDEFINED:
1178 case RCTL_SUBJECT_TYPE_PROCESS:
1179 error = str2id(subject_idstr, &id);
1182 sx_assert(&allproc_lock, SA_LOCKED);
1183 rule->rr_subject.rs_proc = pfind(id);
1184 if (rule->rr_subject.rs_proc == NULL) {
1188 PROC_UNLOCK(rule->rr_subject.rs_proc);
1190 case RCTL_SUBJECT_TYPE_USER:
1191 error = str2id(subject_idstr, &id);
1194 rule->rr_subject.rs_uip = uifind(id);
1196 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1197 rule->rr_subject.rs_loginclass =
1198 loginclass_find(subject_idstr);
1199 if (rule->rr_subject.rs_loginclass == NULL) {
1200 error = ENAMETOOLONG;
1204 case RCTL_SUBJECT_TYPE_JAIL:
1205 rule->rr_subject.rs_prison_racct =
1206 prison_racct_find(subject_idstr);
1207 if (rule->rr_subject.rs_prison_racct == NULL) {
1208 error = ENAMETOOLONG;
1213 panic("rctl_string_to_rule: unknown subject type %d",
1214 rule->rr_subject_type);
1218 if (resourcestr == NULL || resourcestr[0] == '\0')
1219 rule->rr_resource = RACCT_UNDEFINED;
1221 error = str2value(resourcestr, &rule->rr_resource,
1227 if (actionstr == NULL || actionstr[0] == '\0')
1228 rule->rr_action = RCTL_ACTION_UNDEFINED;
1230 error = str2value(actionstr, &rule->rr_action, actionnames);
1235 if (amountstr == NULL || amountstr[0] == '\0')
1236 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1238 error = str2int64(amountstr, &rule->rr_amount);
1241 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
1242 if (rule->rr_amount > INT64_MAX / 1000000) {
1246 rule->rr_amount *= 1000000;
1250 if (perstr == NULL || perstr[0] == '\0')
1251 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1253 error = str2value(perstr, &rule->rr_per, subjectnames);
1262 rctl_rule_release(rule);
1268 * Link a rule with all the subjects it applies to.
1271 rctl_rule_add(struct rctl_rule *rule)
1275 struct uidinfo *uip;
1277 struct prison_racct *prr;
1278 struct loginclass *lc;
1279 struct rctl_rule *rule2;
1282 ASSERT_RACCT_ENABLED();
1283 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1286 * Some rules just don't make sense, like "deny" rule for an undeniable
1287 * resource. The exception are the RSS and %CPU resources - they are
1288 * not deniable in the racct sense, but the limit is enforced in
1291 if (rule->rr_action == RCTL_ACTION_DENY &&
1292 !RACCT_IS_DENIABLE(rule->rr_resource) &&
1293 rule->rr_resource != RACCT_RSS &&
1294 rule->rr_resource != RACCT_PCTCPU) {
1295 return (EOPNOTSUPP);
1298 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1299 !RACCT_IS_DECAYING(rule->rr_resource)) {
1300 return (EOPNOTSUPP);
1303 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1304 rule->rr_resource == RACCT_PCTCPU) {
1305 return (EOPNOTSUPP);
1308 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1309 RACCT_IS_SLOPPY(rule->rr_resource)) {
1310 return (EOPNOTSUPP);
1314 * Make sure there are no duplicated rules. Also, for the "deny"
1315 * rules, remove ones differing only by "amount".
1317 if (rule->rr_action == RCTL_ACTION_DENY) {
1318 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1319 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1320 rctl_rule_remove(rule2);
1321 rctl_rule_release(rule2);
1323 rctl_rule_remove(rule);
1325 switch (rule->rr_subject_type) {
1326 case RCTL_SUBJECT_TYPE_PROCESS:
1327 p = rule->rr_subject.rs_proc;
1328 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1330 rctl_racct_add_rule(p->p_racct, rule);
1332 * In case of per-process rule, we don't have anything more
1337 case RCTL_SUBJECT_TYPE_USER:
1338 uip = rule->rr_subject.rs_uip;
1339 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1340 rctl_racct_add_rule(uip->ui_racct, rule);
1343 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1344 lc = rule->rr_subject.rs_loginclass;
1345 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1346 rctl_racct_add_rule(lc->lc_racct, rule);
1349 case RCTL_SUBJECT_TYPE_JAIL:
1350 prr = rule->rr_subject.rs_prison_racct;
1351 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1352 rctl_racct_add_rule(prr->prr_racct, rule);
1356 panic("rctl_rule_add: unknown subject type %d",
1357 rule->rr_subject_type);
1361 * Now go through all the processes and add the new rule to the ones
1364 sx_assert(&allproc_lock, SA_LOCKED);
1365 FOREACH_PROC_IN_SYSTEM(p) {
1367 switch (rule->rr_subject_type) {
1368 case RCTL_SUBJECT_TYPE_USER:
1369 if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1370 cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1373 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1374 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1377 case RCTL_SUBJECT_TYPE_JAIL:
1379 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1380 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1389 panic("rctl_rule_add: unknown subject type %d",
1390 rule->rr_subject_type);
1393 rctl_racct_add_rule(p->p_racct, rule);
1400 rctl_rule_pre_callback(void)
1407 rctl_rule_post_callback(void)
1414 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1416 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1419 ASSERT_RACCT_ENABLED();
1420 RACCT_LOCK_ASSERT();
1422 found += rctl_racct_remove_rules(racct, filter);
1424 *((int *)arg3) += found;
1428 * Remove all rules that match the filter.
1431 rctl_rule_remove(struct rctl_rule *filter)
1436 ASSERT_RACCT_ENABLED();
1438 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1439 filter->rr_subject.rs_proc != NULL) {
1440 p = filter->rr_subject.rs_proc;
1442 found = rctl_racct_remove_rules(p->p_racct, filter);
1449 loginclass_racct_foreach(rctl_rule_remove_callback,
1450 rctl_rule_pre_callback, rctl_rule_post_callback,
1451 filter, (void *)&found);
1452 ui_racct_foreach(rctl_rule_remove_callback,
1453 rctl_rule_pre_callback, rctl_rule_post_callback,
1454 filter, (void *)&found);
1455 prison_racct_foreach(rctl_rule_remove_callback,
1456 rctl_rule_pre_callback, rctl_rule_post_callback,
1457 filter, (void *)&found);
1459 sx_assert(&allproc_lock, SA_LOCKED);
1461 FOREACH_PROC_IN_SYSTEM(p) {
1462 found += rctl_racct_remove_rules(p->p_racct, filter);
1472 * Appends a rule to the sbuf.
1475 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1479 ASSERT_RACCT_ENABLED();
1481 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1483 switch (rule->rr_subject_type) {
1484 case RCTL_SUBJECT_TYPE_PROCESS:
1485 if (rule->rr_subject.rs_proc == NULL)
1486 sbuf_printf(sb, ":");
1488 sbuf_printf(sb, "%d:",
1489 rule->rr_subject.rs_proc->p_pid);
1491 case RCTL_SUBJECT_TYPE_USER:
1492 if (rule->rr_subject.rs_uip == NULL)
1493 sbuf_printf(sb, ":");
1495 sbuf_printf(sb, "%d:",
1496 rule->rr_subject.rs_uip->ui_uid);
1498 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1499 if (rule->rr_subject.rs_loginclass == NULL)
1500 sbuf_printf(sb, ":");
1502 sbuf_printf(sb, "%s:",
1503 rule->rr_subject.rs_loginclass->lc_name);
1505 case RCTL_SUBJECT_TYPE_JAIL:
1506 if (rule->rr_subject.rs_prison_racct == NULL)
1507 sbuf_printf(sb, ":");
1509 sbuf_printf(sb, "%s:",
1510 rule->rr_subject.rs_prison_racct->prr_name);
1513 panic("rctl_rule_to_sbuf: unknown subject type %d",
1514 rule->rr_subject_type);
1517 amount = rule->rr_amount;
1518 if (amount != RCTL_AMOUNT_UNDEFINED &&
1519 RACCT_IS_IN_MILLIONS(rule->rr_resource))
1522 sbuf_printf(sb, "%s:%s=%jd",
1523 rctl_resource_name(rule->rr_resource),
1524 rctl_action_name(rule->rr_action),
1527 if (rule->rr_per != rule->rr_subject_type)
1528 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1532 * Routine used by RCTL syscalls to read in input string.
1535 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1540 ASSERT_RACCT_ENABLED();
1544 if (inbuflen > RCTL_MAX_INBUFSIZE)
1547 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1548 error = copyinstr(inbufp, str, inbuflen, NULL);
1560 * Routine used by RCTL syscalls to write out output string.
1563 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1567 ASSERT_RACCT_ENABLED();
1569 if (outputsbuf == NULL)
1572 sbuf_finish(outputsbuf);
1573 if (outbuflen < sbuf_len(outputsbuf) + 1) {
1574 sbuf_delete(outputsbuf);
1577 error = copyout(sbuf_data(outputsbuf), outbufp,
1578 sbuf_len(outputsbuf) + 1);
1579 sbuf_delete(outputsbuf);
1583 static struct sbuf *
1584 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1590 ASSERT_RACCT_ENABLED();
1592 sb = sbuf_new_auto();
1593 for (i = 0; i <= RACCT_MAX; i++) {
1594 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1597 amount = racct->r_resources[i];
1599 if (RACCT_IS_IN_MILLIONS(i))
1601 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1603 sbuf_setpos(sb, sbuf_len(sb) - 1);
1608 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1610 struct rctl_rule *filter;
1611 struct sbuf *outputsbuf = NULL;
1613 struct uidinfo *uip;
1614 struct loginclass *lc;
1615 struct prison_racct *prr;
1622 error = priv_check(td, PRIV_RCTL_GET_RACCT);
1626 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1630 sx_slock(&allproc_lock);
1631 error = rctl_string_to_rule(inputstr, &filter);
1632 free(inputstr, M_RCTL);
1634 sx_sunlock(&allproc_lock);
1638 switch (filter->rr_subject_type) {
1639 case RCTL_SUBJECT_TYPE_PROCESS:
1640 p = filter->rr_subject.rs_proc;
1645 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1647 case RCTL_SUBJECT_TYPE_USER:
1648 uip = filter->rr_subject.rs_uip;
1653 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1655 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1656 lc = filter->rr_subject.rs_loginclass;
1661 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1663 case RCTL_SUBJECT_TYPE_JAIL:
1664 prr = filter->rr_subject.rs_prison_racct;
1669 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1675 rctl_rule_release(filter);
1676 sx_sunlock(&allproc_lock);
1680 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1686 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1688 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1689 struct rctl_rule_link *link;
1690 struct sbuf *sb = (struct sbuf *)arg3;
1692 ASSERT_RACCT_ENABLED();
1693 RACCT_LOCK_ASSERT();
1695 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1696 if (!rctl_rule_matches(link->rrl_rule, filter))
1698 rctl_rule_to_sbuf(sb, link->rrl_rule);
1699 sbuf_printf(sb, ",");
1704 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1707 struct rctl_rule *filter;
1708 struct rctl_rule_link *link;
1710 char *inputstr, *buf;
1717 error = priv_check(td, PRIV_RCTL_GET_RULES);
1721 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1725 sx_slock(&allproc_lock);
1726 error = rctl_string_to_rule(inputstr, &filter);
1727 free(inputstr, M_RCTL);
1729 sx_sunlock(&allproc_lock);
1733 bufsize = uap->outbuflen;
1734 if (bufsize > rctl_maxbufsize) {
1735 sx_sunlock(&allproc_lock);
1739 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1740 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1741 KASSERT(sb != NULL, ("sbuf_new failed"));
1743 FOREACH_PROC_IN_SYSTEM(p) {
1745 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1747 * Non-process rules will be added to the buffer later.
1748 * Adding them here would result in duplicated output.
1750 if (link->rrl_rule->rr_subject_type !=
1751 RCTL_SUBJECT_TYPE_PROCESS)
1753 if (!rctl_rule_matches(link->rrl_rule, filter))
1755 rctl_rule_to_sbuf(sb, link->rrl_rule);
1756 sbuf_printf(sb, ",");
1761 loginclass_racct_foreach(rctl_get_rules_callback,
1762 rctl_rule_pre_callback, rctl_rule_post_callback,
1764 ui_racct_foreach(rctl_get_rules_callback,
1765 rctl_rule_pre_callback, rctl_rule_post_callback,
1767 prison_racct_foreach(rctl_get_rules_callback,
1768 rctl_rule_pre_callback, rctl_rule_post_callback,
1770 if (sbuf_error(sb) == ENOMEM) {
1776 * Remove trailing ",".
1778 if (sbuf_len(sb) > 0)
1779 sbuf_setpos(sb, sbuf_len(sb) - 1);
1781 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1783 rctl_rule_release(filter);
1784 sx_sunlock(&allproc_lock);
1790 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1793 struct rctl_rule *filter;
1794 struct rctl_rule_link *link;
1795 char *inputstr, *buf;
1802 error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1806 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1810 sx_slock(&allproc_lock);
1811 error = rctl_string_to_rule(inputstr, &filter);
1812 free(inputstr, M_RCTL);
1814 sx_sunlock(&allproc_lock);
1818 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1819 rctl_rule_release(filter);
1820 sx_sunlock(&allproc_lock);
1823 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1824 rctl_rule_release(filter);
1825 sx_sunlock(&allproc_lock);
1826 return (EOPNOTSUPP);
1828 if (filter->rr_subject.rs_proc == NULL) {
1829 rctl_rule_release(filter);
1830 sx_sunlock(&allproc_lock);
1834 bufsize = uap->outbuflen;
1835 if (bufsize > rctl_maxbufsize) {
1836 rctl_rule_release(filter);
1837 sx_sunlock(&allproc_lock);
1841 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1842 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1843 KASSERT(sb != NULL, ("sbuf_new failed"));
1846 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1848 rctl_rule_to_sbuf(sb, link->rrl_rule);
1849 sbuf_printf(sb, ",");
1852 if (sbuf_error(sb) == ENOMEM) {
1859 * Remove trailing ",".
1861 if (sbuf_len(sb) > 0)
1862 sbuf_setpos(sb, sbuf_len(sb) - 1);
1864 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1866 rctl_rule_release(filter);
1867 sx_sunlock(&allproc_lock);
1873 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1875 struct rctl_rule *rule;
1882 error = priv_check(td, PRIV_RCTL_ADD_RULE);
1886 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1890 sx_slock(&allproc_lock);
1891 error = rctl_string_to_rule(inputstr, &rule);
1892 free(inputstr, M_RCTL);
1894 sx_sunlock(&allproc_lock);
1898 * The 'per' part of a rule is optional.
1900 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1901 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1902 rule->rr_per = rule->rr_subject_type;
1904 if (!rctl_rule_fully_specified(rule)) {
1909 error = rctl_rule_add(rule);
1912 rctl_rule_release(rule);
1913 sx_sunlock(&allproc_lock);
1918 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1920 struct rctl_rule *filter;
1927 error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1931 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1935 sx_slock(&allproc_lock);
1936 error = rctl_string_to_rule(inputstr, &filter);
1937 free(inputstr, M_RCTL);
1939 sx_sunlock(&allproc_lock);
1943 error = rctl_rule_remove(filter);
1944 rctl_rule_release(filter);
1945 sx_sunlock(&allproc_lock);
1951 * Update RCTL rule list after credential change.
1954 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1956 LIST_HEAD(, rctl_rule_link) newrules;
1957 struct rctl_rule_link *link, *newlink;
1958 struct uidinfo *newuip;
1959 struct loginclass *newlc;
1960 struct prison_racct *newprr;
1966 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
1968 newuip = newcred->cr_ruidinfo;
1969 newlc = newcred->cr_loginclass;
1970 newprr = newcred->cr_prison->pr_prison_racct;
1972 LIST_INIT(&newrules);
1976 * First, count the rules that apply to the process with new
1981 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1982 if (link->rrl_rule->rr_subject_type ==
1983 RCTL_SUBJECT_TYPE_PROCESS)
1986 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1988 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1990 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1995 * Create temporary list. We've dropped the rctl_lock in order
1998 for (i = 0; i < rulecnt; i++) {
1999 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
2000 newlink->rrl_rule = NULL;
2001 newlink->rrl_exceeded = 0;
2002 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
2005 newlink = LIST_FIRST(&newrules);
2008 * Assign rules to the newly allocated list entries.
2011 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
2012 if (link->rrl_rule->rr_subject_type ==
2013 RCTL_SUBJECT_TYPE_PROCESS) {
2014 if (newlink == NULL)
2016 rctl_rule_acquire(link->rrl_rule);
2017 newlink->rrl_rule = link->rrl_rule;
2018 newlink->rrl_exceeded = link->rrl_exceeded;
2019 newlink = LIST_NEXT(newlink, rrl_next);
2024 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
2025 if (newlink == NULL)
2027 rctl_rule_acquire(link->rrl_rule);
2028 newlink->rrl_rule = link->rrl_rule;
2029 newlink->rrl_exceeded = link->rrl_exceeded;
2030 newlink = LIST_NEXT(newlink, rrl_next);
2034 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
2035 if (newlink == NULL)
2037 rctl_rule_acquire(link->rrl_rule);
2038 newlink->rrl_rule = link->rrl_rule;
2039 newlink->rrl_exceeded = link->rrl_exceeded;
2040 newlink = LIST_NEXT(newlink, rrl_next);
2044 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
2045 if (newlink == NULL)
2047 rctl_rule_acquire(link->rrl_rule);
2048 newlink->rrl_rule = link->rrl_rule;
2049 newlink->rrl_exceeded = link->rrl_exceeded;
2050 newlink = LIST_NEXT(newlink, rrl_next);
2056 * Free the old rule list.
2058 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
2059 link = LIST_FIRST(&p->p_racct->r_rule_links);
2060 LIST_REMOVE(link, rrl_next);
2061 rctl_rule_release(link->rrl_rule);
2062 uma_zfree(rctl_rule_link_zone, link);
2066 * Replace lists and we're done.
2068 * XXX: Is there any way to switch list heads instead
2069 * of iterating here?
2071 while (!LIST_EMPTY(&newrules)) {
2072 newlink = LIST_FIRST(&newrules);
2073 LIST_REMOVE(newlink, rrl_next);
2074 LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
2087 * Rule list changed while we were not holding the rctl_lock.
2088 * Free the new list and try again.
2090 while (!LIST_EMPTY(&newrules)) {
2091 newlink = LIST_FIRST(&newrules);
2092 LIST_REMOVE(newlink, rrl_next);
2093 if (newlink->rrl_rule != NULL)
2094 rctl_rule_release(newlink->rrl_rule);
2095 uma_zfree(rctl_rule_link_zone, newlink);
2102 * Assign RCTL rules to the newly created process.
2105 rctl_proc_fork(struct proc *parent, struct proc *child)
2107 struct rctl_rule *rule;
2108 struct rctl_rule_link *link;
2111 ASSERT_RACCT_ENABLED();
2112 RACCT_LOCK_ASSERT();
2113 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
2115 LIST_INIT(&child->p_racct->r_rule_links);
2118 * Go through limits applicable to the parent and assign them
2119 * to the child. Rules with 'process' subject have to be duplicated
2120 * in order to make their rr_subject point to the new process.
2122 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
2123 if (link->rrl_rule->rr_subject_type ==
2124 RCTL_SUBJECT_TYPE_PROCESS) {
2125 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
2128 KASSERT(rule->rr_subject.rs_proc == parent,
2129 ("rule->rr_subject.rs_proc != parent"));
2130 rule->rr_subject.rs_proc = child;
2131 error = rctl_racct_add_rule_locked(child->p_racct,
2133 rctl_rule_release(rule);
2137 error = rctl_racct_add_rule_locked(child->p_racct,
2147 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
2148 link = LIST_FIRST(&child->p_racct->r_rule_links);
2149 LIST_REMOVE(link, rrl_next);
2150 rctl_rule_release(link->rrl_rule);
2151 uma_zfree(rctl_rule_link_zone, link);
2158 * Release rules attached to the racct.
2161 rctl_racct_release(struct racct *racct)
2163 struct rctl_rule_link *link;
2165 ASSERT_RACCT_ENABLED();
2166 RACCT_LOCK_ASSERT();
2168 while (!LIST_EMPTY(&racct->r_rule_links)) {
2169 link = LIST_FIRST(&racct->r_rule_links);
2170 LIST_REMOVE(link, rrl_next);
2171 rctl_rule_release(link->rrl_rule);
2172 uma_zfree(rctl_rule_link_zone, link);
2183 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
2184 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2185 rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
2186 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
2190 * Set default values, making sure not to overwrite the ones
2191 * fetched from tunables. Most of those could be set at the
2192 * declaration, except for the rctl_throttle_max - we cannot
2193 * set it there due to hz not being compile time constant.
2195 if (rctl_throttle_min < 1)
2196 rctl_throttle_min = 1;
2197 if (rctl_throttle_max < rctl_throttle_min)
2198 rctl_throttle_max = 2 * hz;
2199 if (rctl_throttle_pct < 0)
2200 rctl_throttle_pct = 100;
2201 if (rctl_throttle_pct2 < 0)
2202 rctl_throttle_pct2 = 100;
2208 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
2215 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
2222 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
2229 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
2236 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)