2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2010 The FreeBSD Foundation
7 * This software was developed by Edward Tomasz Napierala under sponsorship
8 * from the FreeBSD Foundation.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
37 #include <sys/param.h>
39 #include <sys/malloc.h>
40 #include <sys/queue.h>
41 #include <sys/refcount.h>
43 #include <sys/kernel.h>
44 #include <sys/limits.h>
45 #include <sys/loginclass.h>
48 #include <sys/racct.h>
50 #include <sys/resourcevar.h>
52 #include <sys/sysent.h>
53 #include <sys/sysproto.h>
54 #include <sys/systm.h>
55 #include <sys/types.h>
56 #include <sys/eventhandler.h>
58 #include <sys/mutex.h>
59 #include <sys/rwlock.h>
61 #include <sys/taskqueue.h>
67 #error "The RCTL option requires the RACCT option"
70 FEATURE(rctl, "Resource Limits");
73 #define HRF_DONT_INHERIT 1
74 #define HRF_DONT_ACCUMULATE 2
76 #define RCTL_MAX_INBUFSIZE 4 * 1024
77 #define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024
78 #define RCTL_LOG_BUFSIZE 128
80 #define RCTL_PCPU_SHIFT (10 * 1000000)
82 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
83 static int rctl_log_rate_limit = 10;
84 static int rctl_devctl_rate_limit = 10;
87 * Values below are initialized in rctl_init().
89 static int rctl_throttle_min = -1;
90 static int rctl_throttle_max = -1;
91 static int rctl_throttle_pct = -1;
92 static int rctl_throttle_pct2 = -1;
94 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
95 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
96 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
97 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
99 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW, 0, "Resource Limits");
100 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
101 &rctl_maxbufsize, 0, "Maximum output buffer size");
102 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
103 &rctl_log_rate_limit, 0, "Maximum number of log messages per second");
104 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
105 &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
106 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
107 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_min_sysctl, "IU",
108 "Shortest throttling duration, in hz");
109 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
110 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
111 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_max_sysctl, "IU",
112 "Longest throttling duration, in hz");
113 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
114 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
115 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct_sysctl, "IU",
116 "Throttling penalty for process consumption, in percent");
117 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
118 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
119 CTLTYPE_UINT | CTLFLAG_RWTUN, 0, 0, &rctl_throttle_pct2_sysctl, "IU",
120 "Throttling penalty for container consumption, in percent");
121 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
124 * 'rctl_rule_link' connects a rule with every racct it's related to.
125 * For example, rule 'user:X:openfiles:deny=N/process' is linked
126 * with uidinfo for user X, and to each process of that user.
128 struct rctl_rule_link {
129 LIST_ENTRY(rctl_rule_link) rrl_next;
130 struct rctl_rule *rrl_rule;
139 static struct dict subjectnames[] = {
140 { "process", RCTL_SUBJECT_TYPE_PROCESS },
141 { "user", RCTL_SUBJECT_TYPE_USER },
142 { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
143 { "jail", RCTL_SUBJECT_TYPE_JAIL },
146 static struct dict resourcenames[] = {
147 { "cputime", RACCT_CPU },
148 { "datasize", RACCT_DATA },
149 { "stacksize", RACCT_STACK },
150 { "coredumpsize", RACCT_CORE },
151 { "memoryuse", RACCT_RSS },
152 { "memorylocked", RACCT_MEMLOCK },
153 { "maxproc", RACCT_NPROC },
154 { "openfiles", RACCT_NOFILE },
155 { "vmemoryuse", RACCT_VMEM },
156 { "pseudoterminals", RACCT_NPTS },
157 { "swapuse", RACCT_SWAP },
158 { "nthr", RACCT_NTHR },
159 { "msgqqueued", RACCT_MSGQQUEUED },
160 { "msgqsize", RACCT_MSGQSIZE },
161 { "nmsgq", RACCT_NMSGQ },
162 { "nsem", RACCT_NSEM },
163 { "nsemop", RACCT_NSEMOP },
164 { "nshm", RACCT_NSHM },
165 { "shmsize", RACCT_SHMSIZE },
166 { "wallclock", RACCT_WALLCLOCK },
167 { "pcpu", RACCT_PCTCPU },
168 { "readbps", RACCT_READBPS },
169 { "writebps", RACCT_WRITEBPS },
170 { "readiops", RACCT_READIOPS },
171 { "writeiops", RACCT_WRITEIOPS },
174 static struct dict actionnames[] = {
175 { "sighup", RCTL_ACTION_SIGHUP },
176 { "sigint", RCTL_ACTION_SIGINT },
177 { "sigquit", RCTL_ACTION_SIGQUIT },
178 { "sigill", RCTL_ACTION_SIGILL },
179 { "sigtrap", RCTL_ACTION_SIGTRAP },
180 { "sigabrt", RCTL_ACTION_SIGABRT },
181 { "sigemt", RCTL_ACTION_SIGEMT },
182 { "sigfpe", RCTL_ACTION_SIGFPE },
183 { "sigkill", RCTL_ACTION_SIGKILL },
184 { "sigbus", RCTL_ACTION_SIGBUS },
185 { "sigsegv", RCTL_ACTION_SIGSEGV },
186 { "sigsys", RCTL_ACTION_SIGSYS },
187 { "sigpipe", RCTL_ACTION_SIGPIPE },
188 { "sigalrm", RCTL_ACTION_SIGALRM },
189 { "sigterm", RCTL_ACTION_SIGTERM },
190 { "sigurg", RCTL_ACTION_SIGURG },
191 { "sigstop", RCTL_ACTION_SIGSTOP },
192 { "sigtstp", RCTL_ACTION_SIGTSTP },
193 { "sigchld", RCTL_ACTION_SIGCHLD },
194 { "sigttin", RCTL_ACTION_SIGTTIN },
195 { "sigttou", RCTL_ACTION_SIGTTOU },
196 { "sigio", RCTL_ACTION_SIGIO },
197 { "sigxcpu", RCTL_ACTION_SIGXCPU },
198 { "sigxfsz", RCTL_ACTION_SIGXFSZ },
199 { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
200 { "sigprof", RCTL_ACTION_SIGPROF },
201 { "sigwinch", RCTL_ACTION_SIGWINCH },
202 { "siginfo", RCTL_ACTION_SIGINFO },
203 { "sigusr1", RCTL_ACTION_SIGUSR1 },
204 { "sigusr2", RCTL_ACTION_SIGUSR2 },
205 { "sigthr", RCTL_ACTION_SIGTHR },
206 { "deny", RCTL_ACTION_DENY },
207 { "log", RCTL_ACTION_LOG },
208 { "devctl", RCTL_ACTION_DEVCTL },
209 { "throttle", RCTL_ACTION_THROTTLE },
212 static void rctl_init(void);
213 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
215 static uma_zone_t rctl_rule_zone;
216 static uma_zone_t rctl_rule_link_zone;
218 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
219 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
221 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
223 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
225 int error, val = rctl_throttle_min;
227 error = sysctl_handle_int(oidp, &val, 0, req);
228 if (error || !req->newptr)
230 if (val < 1 || val > rctl_throttle_max)
234 rctl_throttle_min = val;
240 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
242 int error, val = rctl_throttle_max;
244 error = sysctl_handle_int(oidp, &val, 0, req);
245 if (error || !req->newptr)
247 if (val < rctl_throttle_min)
251 rctl_throttle_max = val;
257 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
259 int error, val = rctl_throttle_pct;
261 error = sysctl_handle_int(oidp, &val, 0, req);
262 if (error || !req->newptr)
268 rctl_throttle_pct = val;
274 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
276 int error, val = rctl_throttle_pct2;
278 error = sysctl_handle_int(oidp, &val, 0, req);
279 if (error || !req->newptr)
285 rctl_throttle_pct2 = val;
292 rctl_subject_type_name(int subject)
296 for (i = 0; subjectnames[i].d_name != NULL; i++) {
297 if (subjectnames[i].d_value == subject)
298 return (subjectnames[i].d_name);
301 panic("rctl_subject_type_name: unknown subject type %d", subject);
305 rctl_action_name(int action)
309 for (i = 0; actionnames[i].d_name != NULL; i++) {
310 if (actionnames[i].d_value == action)
311 return (actionnames[i].d_name);
314 panic("rctl_action_name: unknown action %d", action);
318 rctl_resource_name(int resource)
322 for (i = 0; resourcenames[i].d_name != NULL; i++) {
323 if (resourcenames[i].d_value == resource)
324 return (resourcenames[i].d_name);
327 panic("rctl_resource_name: unknown resource %d", resource);
330 static struct racct *
331 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
333 struct ucred *cred = p->p_ucred;
335 ASSERT_RACCT_ENABLED();
338 switch (rule->rr_per) {
339 case RCTL_SUBJECT_TYPE_PROCESS:
341 case RCTL_SUBJECT_TYPE_USER:
342 return (cred->cr_ruidinfo->ui_racct);
343 case RCTL_SUBJECT_TYPE_LOGINCLASS:
344 return (cred->cr_loginclass->lc_racct);
345 case RCTL_SUBJECT_TYPE_JAIL:
346 return (cred->cr_prison->pr_prison_racct->prr_racct);
348 panic("%s: unknown per %d", __func__, rule->rr_per);
353 * Return the amount of resource that can be allocated by 'p' before
357 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
359 const struct racct *racct;
362 ASSERT_RACCT_ENABLED();
365 racct = rctl_proc_rule_to_racct(p, rule);
366 available = rule->rr_amount - racct->r_resources[rule->rr_resource];
372 * Called every second for proc, uidinfo, loginclass, and jail containers.
373 * If the limit isn't exceeded, it decreases the usage amount to zero.
374 * Otherwise, it decreases it by the value of the limit. This way
375 * resource consumption exceeding the limit "carries over" to the next
379 rctl_throttle_decay(struct racct *racct, int resource)
381 struct rctl_rule *rule;
382 struct rctl_rule_link *link;
383 int64_t minavailable;
385 ASSERT_RACCT_ENABLED();
388 minavailable = INT64_MAX;
390 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
391 rule = link->rrl_rule;
393 if (rule->rr_resource != resource)
395 if (rule->rr_action != RCTL_ACTION_THROTTLE)
398 if (rule->rr_amount < minavailable)
399 minavailable = rule->rr_amount;
402 if (racct->r_resources[resource] < minavailable) {
403 racct->r_resources[resource] = 0;
406 * Cap utilization counter at ten times the limit. Otherwise,
407 * if we changed the rule lowering the allowed amount, it could
408 * take unreasonably long time for the accumulated resource
411 if (racct->r_resources[resource] > minavailable * 10)
412 racct->r_resources[resource] = minavailable * 10;
414 racct->r_resources[resource] -= minavailable;
419 * Special version of rctl_get_available() for the %CPU resource.
420 * We slightly cheat here and return less than we normally would.
423 rctl_pcpu_available(const struct proc *p) {
424 struct rctl_rule *rule;
425 struct rctl_rule_link *link;
426 int64_t available, minavailable, limit;
428 ASSERT_RACCT_ENABLED();
431 minavailable = INT64_MAX;
434 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
435 rule = link->rrl_rule;
436 if (rule->rr_resource != RACCT_PCTCPU)
438 if (rule->rr_action != RCTL_ACTION_DENY)
440 available = rctl_available_resource(p, rule);
441 if (available < minavailable) {
442 minavailable = available;
443 limit = rule->rr_amount;
448 * Return slightly less than actual value of the available
449 * %cpu resource. This makes %cpu throttling more aggressive
450 * and lets us act sooner than the limits are already exceeded.
453 if (limit > 2 * RCTL_PCPU_SHIFT)
454 minavailable -= RCTL_PCPU_SHIFT;
456 minavailable -= (limit / 2);
459 return (minavailable);
463 xadd(uint64_t a, uint64_t b)
479 xmul(uint64_t a, uint64_t b)
482 if (b != 0 && a > UINT64_MAX / b)
489 * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
490 * to what it keeps allocated now. Returns non-zero if the allocation should
491 * be denied, 0 otherwise.
494 rctl_enforce(struct proc *p, int resource, uint64_t amount)
496 static struct timeval log_lasttime, devctl_lasttime;
497 static int log_curtime = 0, devctl_curtime = 0;
498 struct rctl_rule *rule;
499 struct rctl_rule_link *link;
503 uint64_t sleep_ms, sleep_ratio;
506 ASSERT_RACCT_ENABLED();
510 * There may be more than one matching rule; go through all of them.
511 * Denial should be done last, after logging and sending signals.
513 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
514 rule = link->rrl_rule;
515 if (rule->rr_resource != resource)
518 available = rctl_available_resource(p, rule);
519 if (available >= (int64_t)amount) {
520 link->rrl_exceeded = 0;
524 switch (rule->rr_action) {
525 case RCTL_ACTION_DENY:
528 case RCTL_ACTION_LOG:
530 * If rrl_exceeded != 0, it means we've already
531 * logged a warning for this process.
533 if (link->rrl_exceeded != 0)
537 * If the process state is not fully initialized yet,
538 * we can't access most of the required fields, e.g.
539 * p->p_comm. This happens when called from fork1().
540 * Ignore this rule for now; it will be processed just
541 * after fork, when called from racct_proc_fork_done().
543 if (p->p_state != PRS_NORMAL)
546 if (!ppsratecheck(&log_lasttime, &log_curtime,
547 rctl_log_rate_limit))
550 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
552 printf("rctl_enforce: out of memory\n");
555 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
556 rctl_rule_to_sbuf(&sb, rule);
558 printf("rctl: rule \"%s\" matched by pid %d "
559 "(%s), uid %d, jail %s\n", sbuf_data(&sb),
560 p->p_pid, p->p_comm, p->p_ucred->cr_uid,
561 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
564 link->rrl_exceeded = 1;
566 case RCTL_ACTION_DEVCTL:
567 if (link->rrl_exceeded != 0)
570 if (p->p_state != PRS_NORMAL)
573 if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
574 rctl_devctl_rate_limit))
577 buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
579 printf("rctl_enforce: out of memory\n");
582 sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
583 sbuf_printf(&sb, "rule=");
584 rctl_rule_to_sbuf(&sb, rule);
585 sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
586 p->p_pid, p->p_ucred->cr_ruid,
587 p->p_ucred->cr_prison->pr_prison_racct->prr_name);
589 devctl_notify_f("RCTL", "rule", "matched",
590 sbuf_data(&sb), M_NOWAIT);
593 link->rrl_exceeded = 1;
595 case RCTL_ACTION_THROTTLE:
596 if (p->p_state != PRS_NORMAL)
600 * Make the process sleep for a fraction of second
601 * proportional to the ratio of process' resource
602 * utilization compared to the limit. The point is
603 * to penalize resource hogs: processes that consume
604 * more of the available resources sleep for longer.
606 * We're trying to defer division until the very end,
607 * to minimize the rounding effects. The following
608 * calculation could have been written in a clearer
611 * sleep_ms = hz * p->p_racct->r_resources[resource] /
613 * sleep_ms *= rctl_throttle_pct / 100;
614 * if (sleep_ms < rctl_throttle_min)
615 * sleep_ms = rctl_throttle_min;
618 sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
619 sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100;
620 if (sleep_ms < rctl_throttle_min * rule->rr_amount)
621 sleep_ms = rctl_throttle_min * rule->rr_amount;
624 * Multiply that by the ratio of the resource
625 * consumption for the container compared to the limit,
626 * squared. In other words, a process in a container
627 * that is two times over the limit will be throttled
628 * four times as much for hitting the same rule. The
629 * point is to penalize processes more if the container
630 * itself (eg certain UID or jail) is above the limit.
633 sleep_ratio = -available / rule->rr_amount;
636 sleep_ratio = xmul(sleep_ratio, sleep_ratio);
637 sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
638 sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
641 * Finally the division.
643 sleep_ms /= rule->rr_amount;
645 if (sleep_ms > rctl_throttle_max)
646 sleep_ms = rctl_throttle_max;
648 printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
649 __func__, p->p_pid, p->p_comm,
650 p->p_racct->r_resources[resource],
651 rule->rr_amount, (uintmax_t)sleep_ms,
652 (uintmax_t)sleep_ratio, (intmax_t)available);
655 KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
656 __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
657 racct_proc_throttle(p, sleep_ms);
660 if (link->rrl_exceeded != 0)
663 if (p->p_state != PRS_NORMAL)
666 KASSERT(rule->rr_action > 0 &&
667 rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
668 ("rctl_enforce: unknown action %d",
672 * We're using the fact that RCTL_ACTION_SIG* values
673 * are equal to their counterparts from sys/signal.h.
675 kern_psignal(p, rule->rr_action);
676 link->rrl_exceeded = 1;
683 * Return fake error code; the caller should change it
684 * into one proper for the situation - EFSIZ, ENOMEM etc.
693 rctl_get_limit(struct proc *p, int resource)
695 struct rctl_rule *rule;
696 struct rctl_rule_link *link;
697 uint64_t amount = UINT64_MAX;
699 ASSERT_RACCT_ENABLED();
703 * There may be more than one matching rule; go through all of them.
704 * Denial should be done last, after logging and sending signals.
706 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
707 rule = link->rrl_rule;
708 if (rule->rr_resource != resource)
710 if (rule->rr_action != RCTL_ACTION_DENY)
712 if (rule->rr_amount < amount)
713 amount = rule->rr_amount;
720 rctl_get_available(struct proc *p, int resource)
722 struct rctl_rule *rule;
723 struct rctl_rule_link *link;
724 int64_t available, minavailable, allocated;
726 minavailable = INT64_MAX;
728 ASSERT_RACCT_ENABLED();
732 * There may be more than one matching rule; go through all of them.
733 * Denial should be done last, after logging and sending signals.
735 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
736 rule = link->rrl_rule;
737 if (rule->rr_resource != resource)
739 if (rule->rr_action != RCTL_ACTION_DENY)
741 available = rctl_available_resource(p, rule);
742 if (available < minavailable)
743 minavailable = available;
747 * XXX: Think about this _hard_.
749 allocated = p->p_racct->r_resources[resource];
750 if (minavailable < INT64_MAX - allocated)
751 minavailable += allocated;
752 if (minavailable < 0)
755 return (minavailable);
759 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
762 ASSERT_RACCT_ENABLED();
764 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
765 if (rule->rr_subject_type != filter->rr_subject_type)
768 switch (filter->rr_subject_type) {
769 case RCTL_SUBJECT_TYPE_PROCESS:
770 if (filter->rr_subject.rs_proc != NULL &&
771 rule->rr_subject.rs_proc !=
772 filter->rr_subject.rs_proc)
775 case RCTL_SUBJECT_TYPE_USER:
776 if (filter->rr_subject.rs_uip != NULL &&
777 rule->rr_subject.rs_uip !=
778 filter->rr_subject.rs_uip)
781 case RCTL_SUBJECT_TYPE_LOGINCLASS:
782 if (filter->rr_subject.rs_loginclass != NULL &&
783 rule->rr_subject.rs_loginclass !=
784 filter->rr_subject.rs_loginclass)
787 case RCTL_SUBJECT_TYPE_JAIL:
788 if (filter->rr_subject.rs_prison_racct != NULL &&
789 rule->rr_subject.rs_prison_racct !=
790 filter->rr_subject.rs_prison_racct)
794 panic("rctl_rule_matches: unknown subject type %d",
795 filter->rr_subject_type);
799 if (filter->rr_resource != RACCT_UNDEFINED) {
800 if (rule->rr_resource != filter->rr_resource)
804 if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
805 if (rule->rr_action != filter->rr_action)
809 if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
810 if (rule->rr_amount != filter->rr_amount)
814 if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
815 if (rule->rr_per != filter->rr_per)
823 str2value(const char *str, int *value, struct dict *table)
830 for (i = 0; table[i].d_name != NULL; i++) {
831 if (strcasecmp(table[i].d_name, str) == 0) {
832 *value = table[i].d_value;
841 str2id(const char *str, id_t *value)
848 *value = strtoul(str, &end, 10);
849 if ((size_t)(end - str) != strlen(str))
856 str2int64(const char *str, int64_t *value)
863 *value = strtoul(str, &end, 10);
864 if ((size_t)(end - str) != strlen(str))
874 * Connect the rule to the racct, increasing refcount for the rule.
877 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
879 struct rctl_rule_link *link;
881 ASSERT_RACCT_ENABLED();
882 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
884 rctl_rule_acquire(rule);
885 link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
886 link->rrl_rule = rule;
887 link->rrl_exceeded = 0;
890 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
895 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
897 struct rctl_rule_link *link;
899 ASSERT_RACCT_ENABLED();
900 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
903 link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
906 rctl_rule_acquire(rule);
907 link->rrl_rule = rule;
908 link->rrl_exceeded = 0;
910 LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
916 * Remove limits for a rules matching the filter and release
917 * the refcounts for the rules, possibly freeing them. Returns
918 * the number of limit structures removed.
921 rctl_racct_remove_rules(struct racct *racct,
922 const struct rctl_rule *filter)
924 struct rctl_rule_link *link, *linktmp;
927 ASSERT_RACCT_ENABLED();
930 LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
931 if (!rctl_rule_matches(link->rrl_rule, filter))
934 LIST_REMOVE(link, rrl_next);
935 rctl_rule_release(link->rrl_rule);
936 uma_zfree(rctl_rule_link_zone, link);
943 rctl_rule_acquire_subject(struct rctl_rule *rule)
946 ASSERT_RACCT_ENABLED();
948 switch (rule->rr_subject_type) {
949 case RCTL_SUBJECT_TYPE_UNDEFINED:
950 case RCTL_SUBJECT_TYPE_PROCESS:
952 case RCTL_SUBJECT_TYPE_JAIL:
953 if (rule->rr_subject.rs_prison_racct != NULL)
954 prison_racct_hold(rule->rr_subject.rs_prison_racct);
956 case RCTL_SUBJECT_TYPE_USER:
957 if (rule->rr_subject.rs_uip != NULL)
958 uihold(rule->rr_subject.rs_uip);
960 case RCTL_SUBJECT_TYPE_LOGINCLASS:
961 if (rule->rr_subject.rs_loginclass != NULL)
962 loginclass_hold(rule->rr_subject.rs_loginclass);
965 panic("rctl_rule_acquire_subject: unknown subject type %d",
966 rule->rr_subject_type);
971 rctl_rule_release_subject(struct rctl_rule *rule)
974 ASSERT_RACCT_ENABLED();
976 switch (rule->rr_subject_type) {
977 case RCTL_SUBJECT_TYPE_UNDEFINED:
978 case RCTL_SUBJECT_TYPE_PROCESS:
980 case RCTL_SUBJECT_TYPE_JAIL:
981 if (rule->rr_subject.rs_prison_racct != NULL)
982 prison_racct_free(rule->rr_subject.rs_prison_racct);
984 case RCTL_SUBJECT_TYPE_USER:
985 if (rule->rr_subject.rs_uip != NULL)
986 uifree(rule->rr_subject.rs_uip);
988 case RCTL_SUBJECT_TYPE_LOGINCLASS:
989 if (rule->rr_subject.rs_loginclass != NULL)
990 loginclass_free(rule->rr_subject.rs_loginclass);
993 panic("rctl_rule_release_subject: unknown subject type %d",
994 rule->rr_subject_type);
999 rctl_rule_alloc(int flags)
1001 struct rctl_rule *rule;
1003 ASSERT_RACCT_ENABLED();
1005 rule = uma_zalloc(rctl_rule_zone, flags);
1008 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1009 rule->rr_subject.rs_proc = NULL;
1010 rule->rr_subject.rs_uip = NULL;
1011 rule->rr_subject.rs_loginclass = NULL;
1012 rule->rr_subject.rs_prison_racct = NULL;
1013 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1014 rule->rr_resource = RACCT_UNDEFINED;
1015 rule->rr_action = RCTL_ACTION_UNDEFINED;
1016 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1017 refcount_init(&rule->rr_refcount, 1);
1023 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
1025 struct rctl_rule *copy;
1027 ASSERT_RACCT_ENABLED();
1029 copy = uma_zalloc(rctl_rule_zone, flags);
1032 copy->rr_subject_type = rule->rr_subject_type;
1033 copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
1034 copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
1035 copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
1036 copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
1037 copy->rr_per = rule->rr_per;
1038 copy->rr_resource = rule->rr_resource;
1039 copy->rr_action = rule->rr_action;
1040 copy->rr_amount = rule->rr_amount;
1041 refcount_init(©->rr_refcount, 1);
1042 rctl_rule_acquire_subject(copy);
1048 rctl_rule_acquire(struct rctl_rule *rule)
1051 ASSERT_RACCT_ENABLED();
1052 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1054 refcount_acquire(&rule->rr_refcount);
1058 rctl_rule_free(void *context, int pending)
1060 struct rctl_rule *rule;
1062 rule = (struct rctl_rule *)context;
1064 ASSERT_RACCT_ENABLED();
1065 KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
1068 * We don't need locking here; rule is guaranteed to be inaccessible.
1071 rctl_rule_release_subject(rule);
1072 uma_zfree(rctl_rule_zone, rule);
1076 rctl_rule_release(struct rctl_rule *rule)
1079 ASSERT_RACCT_ENABLED();
1080 KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1082 if (refcount_release(&rule->rr_refcount)) {
1084 * rctl_rule_release() is often called when iterating
1085 * over all the uidinfo structures in the system,
1086 * holding uihashtbl_lock. Since rctl_rule_free()
1087 * might end up calling uifree(), this would lead
1088 * to lock recursion. Use taskqueue to avoid this.
1090 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
1091 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
1096 rctl_rule_fully_specified(const struct rctl_rule *rule)
1099 ASSERT_RACCT_ENABLED();
1101 switch (rule->rr_subject_type) {
1102 case RCTL_SUBJECT_TYPE_UNDEFINED:
1104 case RCTL_SUBJECT_TYPE_PROCESS:
1105 if (rule->rr_subject.rs_proc == NULL)
1108 case RCTL_SUBJECT_TYPE_USER:
1109 if (rule->rr_subject.rs_uip == NULL)
1112 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1113 if (rule->rr_subject.rs_loginclass == NULL)
1116 case RCTL_SUBJECT_TYPE_JAIL:
1117 if (rule->rr_subject.rs_prison_racct == NULL)
1121 panic("rctl_rule_fully_specified: unknown subject type %d",
1122 rule->rr_subject_type);
1124 if (rule->rr_resource == RACCT_UNDEFINED)
1126 if (rule->rr_action == RCTL_ACTION_UNDEFINED)
1128 if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
1130 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
1137 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
1139 struct rctl_rule *rule;
1140 char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
1141 *amountstr, *perstr;
1145 ASSERT_RACCT_ENABLED();
1147 rule = rctl_rule_alloc(M_WAITOK);
1149 subjectstr = strsep(&rulestr, ":");
1150 subject_idstr = strsep(&rulestr, ":");
1151 resourcestr = strsep(&rulestr, ":");
1152 actionstr = strsep(&rulestr, "=/");
1153 amountstr = strsep(&rulestr, "/");
1156 if (subjectstr == NULL || subjectstr[0] == '\0')
1157 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1159 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
1164 if (subject_idstr == NULL || subject_idstr[0] == '\0') {
1165 rule->rr_subject.rs_proc = NULL;
1166 rule->rr_subject.rs_uip = NULL;
1167 rule->rr_subject.rs_loginclass = NULL;
1168 rule->rr_subject.rs_prison_racct = NULL;
1170 switch (rule->rr_subject_type) {
1171 case RCTL_SUBJECT_TYPE_UNDEFINED:
1174 case RCTL_SUBJECT_TYPE_PROCESS:
1175 error = str2id(subject_idstr, &id);
1178 sx_assert(&allproc_lock, SA_LOCKED);
1179 rule->rr_subject.rs_proc = pfind(id);
1180 if (rule->rr_subject.rs_proc == NULL) {
1184 PROC_UNLOCK(rule->rr_subject.rs_proc);
1186 case RCTL_SUBJECT_TYPE_USER:
1187 error = str2id(subject_idstr, &id);
1190 rule->rr_subject.rs_uip = uifind(id);
1192 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1193 rule->rr_subject.rs_loginclass =
1194 loginclass_find(subject_idstr);
1195 if (rule->rr_subject.rs_loginclass == NULL) {
1196 error = ENAMETOOLONG;
1200 case RCTL_SUBJECT_TYPE_JAIL:
1201 rule->rr_subject.rs_prison_racct =
1202 prison_racct_find(subject_idstr);
1203 if (rule->rr_subject.rs_prison_racct == NULL) {
1204 error = ENAMETOOLONG;
1209 panic("rctl_string_to_rule: unknown subject type %d",
1210 rule->rr_subject_type);
1214 if (resourcestr == NULL || resourcestr[0] == '\0')
1215 rule->rr_resource = RACCT_UNDEFINED;
1217 error = str2value(resourcestr, &rule->rr_resource,
1223 if (actionstr == NULL || actionstr[0] == '\0')
1224 rule->rr_action = RCTL_ACTION_UNDEFINED;
1226 error = str2value(actionstr, &rule->rr_action, actionnames);
1231 if (amountstr == NULL || amountstr[0] == '\0')
1232 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1234 error = str2int64(amountstr, &rule->rr_amount);
1237 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
1238 if (rule->rr_amount > INT64_MAX / 1000000) {
1242 rule->rr_amount *= 1000000;
1246 if (perstr == NULL || perstr[0] == '\0')
1247 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1249 error = str2value(perstr, &rule->rr_per, subjectnames);
1258 rctl_rule_release(rule);
1264 * Link a rule with all the subjects it applies to.
1267 rctl_rule_add(struct rctl_rule *rule)
1271 struct uidinfo *uip;
1273 struct prison_racct *prr;
1274 struct loginclass *lc;
1275 struct rctl_rule *rule2;
1278 ASSERT_RACCT_ENABLED();
1279 KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1282 * Some rules just don't make sense, like "deny" rule for an undeniable
1283 * resource. The exception are the RSS and %CPU resources - they are
1284 * not deniable in the racct sense, but the limit is enforced in
1287 if (rule->rr_action == RCTL_ACTION_DENY &&
1288 !RACCT_IS_DENIABLE(rule->rr_resource) &&
1289 rule->rr_resource != RACCT_RSS &&
1290 rule->rr_resource != RACCT_PCTCPU) {
1291 return (EOPNOTSUPP);
1294 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1295 !RACCT_IS_DECAYING(rule->rr_resource)) {
1296 return (EOPNOTSUPP);
1299 if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1300 rule->rr_resource == RACCT_PCTCPU) {
1301 return (EOPNOTSUPP);
1304 if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1305 RACCT_IS_SLOPPY(rule->rr_resource)) {
1306 return (EOPNOTSUPP);
1310 * Make sure there are no duplicated rules. Also, for the "deny"
1311 * rules, remove ones differing only by "amount".
1313 if (rule->rr_action == RCTL_ACTION_DENY) {
1314 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1315 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1316 rctl_rule_remove(rule2);
1317 rctl_rule_release(rule2);
1319 rctl_rule_remove(rule);
1321 switch (rule->rr_subject_type) {
1322 case RCTL_SUBJECT_TYPE_PROCESS:
1323 p = rule->rr_subject.rs_proc;
1324 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1326 rctl_racct_add_rule(p->p_racct, rule);
1328 * In case of per-process rule, we don't have anything more
1333 case RCTL_SUBJECT_TYPE_USER:
1334 uip = rule->rr_subject.rs_uip;
1335 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1336 rctl_racct_add_rule(uip->ui_racct, rule);
1339 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1340 lc = rule->rr_subject.rs_loginclass;
1341 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1342 rctl_racct_add_rule(lc->lc_racct, rule);
1345 case RCTL_SUBJECT_TYPE_JAIL:
1346 prr = rule->rr_subject.rs_prison_racct;
1347 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1348 rctl_racct_add_rule(prr->prr_racct, rule);
1352 panic("rctl_rule_add: unknown subject type %d",
1353 rule->rr_subject_type);
1357 * Now go through all the processes and add the new rule to the ones
1360 sx_assert(&allproc_lock, SA_LOCKED);
1361 FOREACH_PROC_IN_SYSTEM(p) {
1363 switch (rule->rr_subject_type) {
1364 case RCTL_SUBJECT_TYPE_USER:
1365 if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1366 cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1369 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1370 if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1373 case RCTL_SUBJECT_TYPE_JAIL:
1375 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1376 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1385 panic("rctl_rule_add: unknown subject type %d",
1386 rule->rr_subject_type);
1389 rctl_racct_add_rule(p->p_racct, rule);
1396 rctl_rule_pre_callback(void)
1403 rctl_rule_post_callback(void)
1410 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1412 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1415 ASSERT_RACCT_ENABLED();
1416 RACCT_LOCK_ASSERT();
1418 found += rctl_racct_remove_rules(racct, filter);
1420 *((int *)arg3) += found;
1424 * Remove all rules that match the filter.
1427 rctl_rule_remove(struct rctl_rule *filter)
1432 ASSERT_RACCT_ENABLED();
1434 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1435 filter->rr_subject.rs_proc != NULL) {
1436 p = filter->rr_subject.rs_proc;
1438 found = rctl_racct_remove_rules(p->p_racct, filter);
1445 loginclass_racct_foreach(rctl_rule_remove_callback,
1446 rctl_rule_pre_callback, rctl_rule_post_callback,
1447 filter, (void *)&found);
1448 ui_racct_foreach(rctl_rule_remove_callback,
1449 rctl_rule_pre_callback, rctl_rule_post_callback,
1450 filter, (void *)&found);
1451 prison_racct_foreach(rctl_rule_remove_callback,
1452 rctl_rule_pre_callback, rctl_rule_post_callback,
1453 filter, (void *)&found);
1455 sx_assert(&allproc_lock, SA_LOCKED);
1457 FOREACH_PROC_IN_SYSTEM(p) {
1458 found += rctl_racct_remove_rules(p->p_racct, filter);
1468 * Appends a rule to the sbuf.
1471 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1475 ASSERT_RACCT_ENABLED();
1477 sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1479 switch (rule->rr_subject_type) {
1480 case RCTL_SUBJECT_TYPE_PROCESS:
1481 if (rule->rr_subject.rs_proc == NULL)
1482 sbuf_printf(sb, ":");
1484 sbuf_printf(sb, "%d:",
1485 rule->rr_subject.rs_proc->p_pid);
1487 case RCTL_SUBJECT_TYPE_USER:
1488 if (rule->rr_subject.rs_uip == NULL)
1489 sbuf_printf(sb, ":");
1491 sbuf_printf(sb, "%d:",
1492 rule->rr_subject.rs_uip->ui_uid);
1494 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1495 if (rule->rr_subject.rs_loginclass == NULL)
1496 sbuf_printf(sb, ":");
1498 sbuf_printf(sb, "%s:",
1499 rule->rr_subject.rs_loginclass->lc_name);
1501 case RCTL_SUBJECT_TYPE_JAIL:
1502 if (rule->rr_subject.rs_prison_racct == NULL)
1503 sbuf_printf(sb, ":");
1505 sbuf_printf(sb, "%s:",
1506 rule->rr_subject.rs_prison_racct->prr_name);
1509 panic("rctl_rule_to_sbuf: unknown subject type %d",
1510 rule->rr_subject_type);
1513 amount = rule->rr_amount;
1514 if (amount != RCTL_AMOUNT_UNDEFINED &&
1515 RACCT_IS_IN_MILLIONS(rule->rr_resource))
1518 sbuf_printf(sb, "%s:%s=%jd",
1519 rctl_resource_name(rule->rr_resource),
1520 rctl_action_name(rule->rr_action),
1523 if (rule->rr_per != rule->rr_subject_type)
1524 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1528 * Routine used by RCTL syscalls to read in input string.
1531 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1536 ASSERT_RACCT_ENABLED();
1540 if (inbuflen > RCTL_MAX_INBUFSIZE)
1543 str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1544 error = copyinstr(inbufp, str, inbuflen, NULL);
1556 * Routine used by RCTL syscalls to write out output string.
1559 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1563 ASSERT_RACCT_ENABLED();
1565 if (outputsbuf == NULL)
1568 sbuf_finish(outputsbuf);
1569 if (outbuflen < sbuf_len(outputsbuf) + 1) {
1570 sbuf_delete(outputsbuf);
1573 error = copyout(sbuf_data(outputsbuf), outbufp,
1574 sbuf_len(outputsbuf) + 1);
1575 sbuf_delete(outputsbuf);
1579 static struct sbuf *
1580 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1586 ASSERT_RACCT_ENABLED();
1588 sb = sbuf_new_auto();
1589 for (i = 0; i <= RACCT_MAX; i++) {
1590 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1593 amount = racct->r_resources[i];
1595 if (RACCT_IS_IN_MILLIONS(i))
1597 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1599 sbuf_setpos(sb, sbuf_len(sb) - 1);
1604 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1606 struct rctl_rule *filter;
1607 struct sbuf *outputsbuf = NULL;
1609 struct uidinfo *uip;
1610 struct loginclass *lc;
1611 struct prison_racct *prr;
1618 error = priv_check(td, PRIV_RCTL_GET_RACCT);
1622 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1626 sx_slock(&allproc_lock);
1627 error = rctl_string_to_rule(inputstr, &filter);
1628 free(inputstr, M_RCTL);
1630 sx_sunlock(&allproc_lock);
1634 switch (filter->rr_subject_type) {
1635 case RCTL_SUBJECT_TYPE_PROCESS:
1636 p = filter->rr_subject.rs_proc;
1641 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1643 case RCTL_SUBJECT_TYPE_USER:
1644 uip = filter->rr_subject.rs_uip;
1649 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1651 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1652 lc = filter->rr_subject.rs_loginclass;
1657 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1659 case RCTL_SUBJECT_TYPE_JAIL:
1660 prr = filter->rr_subject.rs_prison_racct;
1665 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1671 rctl_rule_release(filter);
1672 sx_sunlock(&allproc_lock);
1676 error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1682 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1684 struct rctl_rule *filter = (struct rctl_rule *)arg2;
1685 struct rctl_rule_link *link;
1686 struct sbuf *sb = (struct sbuf *)arg3;
1688 ASSERT_RACCT_ENABLED();
1689 RACCT_LOCK_ASSERT();
1691 LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1692 if (!rctl_rule_matches(link->rrl_rule, filter))
1694 rctl_rule_to_sbuf(sb, link->rrl_rule);
1695 sbuf_printf(sb, ",");
1700 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1703 struct rctl_rule *filter;
1704 struct rctl_rule_link *link;
1706 char *inputstr, *buf;
1713 error = priv_check(td, PRIV_RCTL_GET_RULES);
1717 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1721 sx_slock(&allproc_lock);
1722 error = rctl_string_to_rule(inputstr, &filter);
1723 free(inputstr, M_RCTL);
1725 sx_sunlock(&allproc_lock);
1729 bufsize = uap->outbuflen;
1730 if (bufsize > rctl_maxbufsize) {
1731 sx_sunlock(&allproc_lock);
1735 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1736 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1737 KASSERT(sb != NULL, ("sbuf_new failed"));
1739 FOREACH_PROC_IN_SYSTEM(p) {
1741 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1743 * Non-process rules will be added to the buffer later.
1744 * Adding them here would result in duplicated output.
1746 if (link->rrl_rule->rr_subject_type !=
1747 RCTL_SUBJECT_TYPE_PROCESS)
1749 if (!rctl_rule_matches(link->rrl_rule, filter))
1751 rctl_rule_to_sbuf(sb, link->rrl_rule);
1752 sbuf_printf(sb, ",");
1757 loginclass_racct_foreach(rctl_get_rules_callback,
1758 rctl_rule_pre_callback, rctl_rule_post_callback,
1760 ui_racct_foreach(rctl_get_rules_callback,
1761 rctl_rule_pre_callback, rctl_rule_post_callback,
1763 prison_racct_foreach(rctl_get_rules_callback,
1764 rctl_rule_pre_callback, rctl_rule_post_callback,
1766 if (sbuf_error(sb) == ENOMEM) {
1772 * Remove trailing ",".
1774 if (sbuf_len(sb) > 0)
1775 sbuf_setpos(sb, sbuf_len(sb) - 1);
1777 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1779 rctl_rule_release(filter);
1780 sx_sunlock(&allproc_lock);
1786 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1789 struct rctl_rule *filter;
1790 struct rctl_rule_link *link;
1791 char *inputstr, *buf;
1798 error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1802 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1806 sx_slock(&allproc_lock);
1807 error = rctl_string_to_rule(inputstr, &filter);
1808 free(inputstr, M_RCTL);
1810 sx_sunlock(&allproc_lock);
1814 if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1815 rctl_rule_release(filter);
1816 sx_sunlock(&allproc_lock);
1819 if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1820 rctl_rule_release(filter);
1821 sx_sunlock(&allproc_lock);
1822 return (EOPNOTSUPP);
1824 if (filter->rr_subject.rs_proc == NULL) {
1825 rctl_rule_release(filter);
1826 sx_sunlock(&allproc_lock);
1830 bufsize = uap->outbuflen;
1831 if (bufsize > rctl_maxbufsize) {
1832 rctl_rule_release(filter);
1833 sx_sunlock(&allproc_lock);
1837 buf = malloc(bufsize, M_RCTL, M_WAITOK);
1838 sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1839 KASSERT(sb != NULL, ("sbuf_new failed"));
1842 LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1844 rctl_rule_to_sbuf(sb, link->rrl_rule);
1845 sbuf_printf(sb, ",");
1848 if (sbuf_error(sb) == ENOMEM) {
1855 * Remove trailing ",".
1857 if (sbuf_len(sb) > 0)
1858 sbuf_setpos(sb, sbuf_len(sb) - 1);
1860 error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1862 rctl_rule_release(filter);
1863 sx_sunlock(&allproc_lock);
1869 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1871 struct rctl_rule *rule;
1878 error = priv_check(td, PRIV_RCTL_ADD_RULE);
1882 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1886 sx_slock(&allproc_lock);
1887 error = rctl_string_to_rule(inputstr, &rule);
1888 free(inputstr, M_RCTL);
1890 sx_sunlock(&allproc_lock);
1894 * The 'per' part of a rule is optional.
1896 if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1897 rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1898 rule->rr_per = rule->rr_subject_type;
1900 if (!rctl_rule_fully_specified(rule)) {
1905 error = rctl_rule_add(rule);
1908 rctl_rule_release(rule);
1909 sx_sunlock(&allproc_lock);
1914 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1916 struct rctl_rule *filter;
1923 error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1927 error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1931 sx_slock(&allproc_lock);
1932 error = rctl_string_to_rule(inputstr, &filter);
1933 free(inputstr, M_RCTL);
1935 sx_sunlock(&allproc_lock);
1939 error = rctl_rule_remove(filter);
1940 rctl_rule_release(filter);
1941 sx_sunlock(&allproc_lock);
1947 * Update RCTL rule list after credential change.
1950 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1952 LIST_HEAD(, rctl_rule_link) newrules;
1953 struct rctl_rule_link *link, *newlink;
1954 struct uidinfo *newuip;
1955 struct loginclass *newlc;
1956 struct prison_racct *newprr;
1962 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
1964 newuip = newcred->cr_ruidinfo;
1965 newlc = newcred->cr_loginclass;
1966 newprr = newcred->cr_prison->pr_prison_racct;
1968 LIST_INIT(&newrules);
1972 * First, count the rules that apply to the process with new
1977 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1978 if (link->rrl_rule->rr_subject_type ==
1979 RCTL_SUBJECT_TYPE_PROCESS)
1982 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1984 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1986 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1991 * Create temporary list. We've dropped the rctl_lock in order
1994 for (i = 0; i < rulecnt; i++) {
1995 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
1996 newlink->rrl_rule = NULL;
1997 newlink->rrl_exceeded = 0;
1998 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
2001 newlink = LIST_FIRST(&newrules);
2004 * Assign rules to the newly allocated list entries.
2007 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
2008 if (link->rrl_rule->rr_subject_type ==
2009 RCTL_SUBJECT_TYPE_PROCESS) {
2010 if (newlink == NULL)
2012 rctl_rule_acquire(link->rrl_rule);
2013 newlink->rrl_rule = link->rrl_rule;
2014 newlink->rrl_exceeded = link->rrl_exceeded;
2015 newlink = LIST_NEXT(newlink, rrl_next);
2020 LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
2021 if (newlink == NULL)
2023 rctl_rule_acquire(link->rrl_rule);
2024 newlink->rrl_rule = link->rrl_rule;
2025 newlink->rrl_exceeded = link->rrl_exceeded;
2026 newlink = LIST_NEXT(newlink, rrl_next);
2030 LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
2031 if (newlink == NULL)
2033 rctl_rule_acquire(link->rrl_rule);
2034 newlink->rrl_rule = link->rrl_rule;
2035 newlink->rrl_exceeded = link->rrl_exceeded;
2036 newlink = LIST_NEXT(newlink, rrl_next);
2040 LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
2041 if (newlink == NULL)
2043 rctl_rule_acquire(link->rrl_rule);
2044 newlink->rrl_rule = link->rrl_rule;
2045 newlink->rrl_exceeded = link->rrl_exceeded;
2046 newlink = LIST_NEXT(newlink, rrl_next);
2052 * Free the old rule list.
2054 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
2055 link = LIST_FIRST(&p->p_racct->r_rule_links);
2056 LIST_REMOVE(link, rrl_next);
2057 rctl_rule_release(link->rrl_rule);
2058 uma_zfree(rctl_rule_link_zone, link);
2062 * Replace lists and we're done.
2064 * XXX: Is there any way to switch list heads instead
2065 * of iterating here?
2067 while (!LIST_EMPTY(&newrules)) {
2068 newlink = LIST_FIRST(&newrules);
2069 LIST_REMOVE(newlink, rrl_next);
2070 LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
2083 * Rule list changed while we were not holding the rctl_lock.
2084 * Free the new list and try again.
2086 while (!LIST_EMPTY(&newrules)) {
2087 newlink = LIST_FIRST(&newrules);
2088 LIST_REMOVE(newlink, rrl_next);
2089 if (newlink->rrl_rule != NULL)
2090 rctl_rule_release(newlink->rrl_rule);
2091 uma_zfree(rctl_rule_link_zone, newlink);
2098 * Assign RCTL rules to the newly created process.
2101 rctl_proc_fork(struct proc *parent, struct proc *child)
2103 struct rctl_rule *rule;
2104 struct rctl_rule_link *link;
2107 ASSERT_RACCT_ENABLED();
2108 RACCT_LOCK_ASSERT();
2109 KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
2111 LIST_INIT(&child->p_racct->r_rule_links);
2114 * Go through limits applicable to the parent and assign them
2115 * to the child. Rules with 'process' subject have to be duplicated
2116 * in order to make their rr_subject point to the new process.
2118 LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
2119 if (link->rrl_rule->rr_subject_type ==
2120 RCTL_SUBJECT_TYPE_PROCESS) {
2121 rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
2124 KASSERT(rule->rr_subject.rs_proc == parent,
2125 ("rule->rr_subject.rs_proc != parent"));
2126 rule->rr_subject.rs_proc = child;
2127 error = rctl_racct_add_rule_locked(child->p_racct,
2129 rctl_rule_release(rule);
2133 error = rctl_racct_add_rule_locked(child->p_racct,
2143 while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
2144 link = LIST_FIRST(&child->p_racct->r_rule_links);
2145 LIST_REMOVE(link, rrl_next);
2146 rctl_rule_release(link->rrl_rule);
2147 uma_zfree(rctl_rule_link_zone, link);
2154 * Release rules attached to the racct.
2157 rctl_racct_release(struct racct *racct)
2159 struct rctl_rule_link *link;
2161 ASSERT_RACCT_ENABLED();
2162 RACCT_LOCK_ASSERT();
2164 while (!LIST_EMPTY(&racct->r_rule_links)) {
2165 link = LIST_FIRST(&racct->r_rule_links);
2166 LIST_REMOVE(link, rrl_next);
2167 rctl_rule_release(link->rrl_rule);
2168 uma_zfree(rctl_rule_link_zone, link);
2179 rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
2180 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2181 rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
2182 sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
2186 * Set default values, making sure not to overwrite the ones
2187 * fetched from tunables. Most of those could be set at the
2188 * declaration, except for the rctl_throttle_max - we cannot
2189 * set it there due to hz not being compile time constant.
2191 if (rctl_throttle_min < 1)
2192 rctl_throttle_min = 1;
2193 if (rctl_throttle_max < rctl_throttle_min)
2194 rctl_throttle_max = 2 * hz;
2195 if (rctl_throttle_pct < 0)
2196 rctl_throttle_pct = 100;
2197 if (rctl_throttle_pct2 < 0)
2198 rctl_throttle_pct2 = 100;
2204 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
2211 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
2218 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
2225 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
2232 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)