]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/kern_rctl.c
sigtimedwait: Prevent timeout math overflows.
[FreeBSD/FreeBSD.git] / sys / kern / kern_rctl.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2010 The FreeBSD Foundation
5  *
6  * This software was developed by Edward Tomasz Napierala under sponsorship
7  * from the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $FreeBSD$
31  */
32
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35
36 #include <sys/param.h>
37 #include <sys/devctl.h>
38 #include <sys/malloc.h>
39 #include <sys/queue.h>
40 #include <sys/refcount.h>
41 #include <sys/jail.h>
42 #include <sys/kernel.h>
43 #include <sys/limits.h>
44 #include <sys/loginclass.h>
45 #include <sys/priv.h>
46 #include <sys/proc.h>
47 #include <sys/racct.h>
48 #include <sys/rctl.h>
49 #include <sys/resourcevar.h>
50 #include <sys/sx.h>
51 #include <sys/sysent.h>
52 #include <sys/sysproto.h>
53 #include <sys/systm.h>
54 #include <sys/types.h>
55 #include <sys/eventhandler.h>
56 #include <sys/lock.h>
57 #include <sys/mutex.h>
58 #include <sys/rwlock.h>
59 #include <sys/sbuf.h>
60 #include <sys/taskqueue.h>
61 #include <sys/tree.h>
62 #include <vm/uma.h>
63
64 #ifdef RCTL
65 #ifndef RACCT
66 #error "The RCTL option requires the RACCT option"
67 #endif
68
69 FEATURE(rctl, "Resource Limits");
70
71 #define HRF_DEFAULT             0
72 #define HRF_DONT_INHERIT        1
73 #define HRF_DONT_ACCUMULATE     2
74
75 #define RCTL_MAX_INBUFSIZE      4 * 1024
76 #define RCTL_MAX_OUTBUFSIZE     16 * 1024 * 1024
77 #define RCTL_LOG_BUFSIZE        128
78
79 #define RCTL_PCPU_SHIFT         (10 * 1000000)
80
81 static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
82 static int rctl_log_rate_limit = 10;
83 static int rctl_devctl_rate_limit = 10;
84
85 /*
86  * Values below are initialized in rctl_init().
87  */
88 static int rctl_throttle_min = -1;
89 static int rctl_throttle_max = -1;
90 static int rctl_throttle_pct = -1;
91 static int rctl_throttle_pct2 = -1;
92
93 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
94 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
95 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
96 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
97
98 SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
99     "Resource Limits");
100 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
101     &rctl_maxbufsize, 0, "Maximum output buffer size");
102 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
103     &rctl_log_rate_limit, 0, "Maximum number of log messages per second");
104 SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
105     &rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
106 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
107     CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
108     &rctl_throttle_min_sysctl, "IU",
109     "Shortest throttling duration, in hz");
110 TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
111 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
112     CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
113     &rctl_throttle_max_sysctl, "IU",
114     "Longest throttling duration, in hz");
115 TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
116 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
117     CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
118     &rctl_throttle_pct_sysctl, "IU",
119     "Throttling penalty for process consumption, in percent");
120 TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
121 SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
122     CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
123     &rctl_throttle_pct2_sysctl, "IU",
124     "Throttling penalty for container consumption, in percent");
125 TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
126
127 /*
128  * 'rctl_rule_link' connects a rule with every racct it's related to.
129  * For example, rule 'user:X:openfiles:deny=N/process' is linked
130  * with uidinfo for user X, and to each process of that user.
131  */
132 struct rctl_rule_link {
133         LIST_ENTRY(rctl_rule_link)      rrl_next;
134         struct rctl_rule                *rrl_rule;
135         int                             rrl_exceeded;
136 };
137
138 struct dict {
139         const char      *d_name;
140         int             d_value;
141 };
142
143 static struct dict subjectnames[] = {
144         { "process", RCTL_SUBJECT_TYPE_PROCESS },
145         { "user", RCTL_SUBJECT_TYPE_USER },
146         { "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
147         { "jail", RCTL_SUBJECT_TYPE_JAIL },
148         { NULL, -1 }};
149
150 static struct dict resourcenames[] = {
151         { "cputime", RACCT_CPU },
152         { "datasize", RACCT_DATA },
153         { "stacksize", RACCT_STACK },
154         { "coredumpsize", RACCT_CORE },
155         { "memoryuse", RACCT_RSS },
156         { "memorylocked", RACCT_MEMLOCK },
157         { "maxproc", RACCT_NPROC },
158         { "openfiles", RACCT_NOFILE },
159         { "vmemoryuse", RACCT_VMEM },
160         { "pseudoterminals", RACCT_NPTS },
161         { "swapuse", RACCT_SWAP },
162         { "nthr", RACCT_NTHR },
163         { "msgqqueued", RACCT_MSGQQUEUED },
164         { "msgqsize", RACCT_MSGQSIZE },
165         { "nmsgq", RACCT_NMSGQ },
166         { "nsem", RACCT_NSEM },
167         { "nsemop", RACCT_NSEMOP },
168         { "nshm", RACCT_NSHM },
169         { "shmsize", RACCT_SHMSIZE },
170         { "wallclock", RACCT_WALLCLOCK },
171         { "pcpu", RACCT_PCTCPU },
172         { "readbps", RACCT_READBPS },
173         { "writebps", RACCT_WRITEBPS },
174         { "readiops", RACCT_READIOPS },
175         { "writeiops", RACCT_WRITEIOPS },
176         { NULL, -1 }};
177
178 static struct dict actionnames[] = {
179         { "sighup", RCTL_ACTION_SIGHUP },
180         { "sigint", RCTL_ACTION_SIGINT },
181         { "sigquit", RCTL_ACTION_SIGQUIT },
182         { "sigill", RCTL_ACTION_SIGILL },
183         { "sigtrap", RCTL_ACTION_SIGTRAP },
184         { "sigabrt", RCTL_ACTION_SIGABRT },
185         { "sigemt", RCTL_ACTION_SIGEMT },
186         { "sigfpe", RCTL_ACTION_SIGFPE },
187         { "sigkill", RCTL_ACTION_SIGKILL },
188         { "sigbus", RCTL_ACTION_SIGBUS },
189         { "sigsegv", RCTL_ACTION_SIGSEGV },
190         { "sigsys", RCTL_ACTION_SIGSYS },
191         { "sigpipe", RCTL_ACTION_SIGPIPE },
192         { "sigalrm", RCTL_ACTION_SIGALRM },
193         { "sigterm", RCTL_ACTION_SIGTERM },
194         { "sigurg", RCTL_ACTION_SIGURG },
195         { "sigstop", RCTL_ACTION_SIGSTOP },
196         { "sigtstp", RCTL_ACTION_SIGTSTP },
197         { "sigchld", RCTL_ACTION_SIGCHLD },
198         { "sigttin", RCTL_ACTION_SIGTTIN },
199         { "sigttou", RCTL_ACTION_SIGTTOU },
200         { "sigio", RCTL_ACTION_SIGIO },
201         { "sigxcpu", RCTL_ACTION_SIGXCPU },
202         { "sigxfsz", RCTL_ACTION_SIGXFSZ },
203         { "sigvtalrm", RCTL_ACTION_SIGVTALRM },
204         { "sigprof", RCTL_ACTION_SIGPROF },
205         { "sigwinch", RCTL_ACTION_SIGWINCH },
206         { "siginfo", RCTL_ACTION_SIGINFO },
207         { "sigusr1", RCTL_ACTION_SIGUSR1 },
208         { "sigusr2", RCTL_ACTION_SIGUSR2 },
209         { "sigthr", RCTL_ACTION_SIGTHR },
210         { "deny", RCTL_ACTION_DENY },
211         { "log", RCTL_ACTION_LOG },
212         { "devctl", RCTL_ACTION_DEVCTL },
213         { "throttle", RCTL_ACTION_THROTTLE },
214         { NULL, -1 }};
215
216 static void rctl_init(void);
217 SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
218
219 static uma_zone_t rctl_rule_zone;
220 static uma_zone_t rctl_rule_link_zone;
221
222 static int rctl_rule_fully_specified(const struct rctl_rule *rule);
223 static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
224
225 static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
226
227 static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
228 {
229         int error, val = rctl_throttle_min;
230
231         error = sysctl_handle_int(oidp, &val, 0, req);
232         if (error || !req->newptr)
233                 return (error);
234         if (val < 1 || val > rctl_throttle_max)
235                 return (EINVAL);
236
237         RACCT_LOCK();
238         rctl_throttle_min = val;
239         RACCT_UNLOCK();
240
241         return (0);
242 }
243
244 static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
245 {
246         int error, val = rctl_throttle_max;
247
248         error = sysctl_handle_int(oidp, &val, 0, req);
249         if (error || !req->newptr)
250                 return (error);
251         if (val < rctl_throttle_min)
252                 return (EINVAL);
253
254         RACCT_LOCK();
255         rctl_throttle_max = val;
256         RACCT_UNLOCK();
257
258         return (0);
259 }
260
261 static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
262 {
263         int error, val = rctl_throttle_pct;
264
265         error = sysctl_handle_int(oidp, &val, 0, req);
266         if (error || !req->newptr)
267                 return (error);
268         if (val < 0)
269                 return (EINVAL);
270
271         RACCT_LOCK();
272         rctl_throttle_pct = val;
273         RACCT_UNLOCK();
274
275         return (0);
276 }
277
278 static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
279 {
280         int error, val = rctl_throttle_pct2;
281
282         error = sysctl_handle_int(oidp, &val, 0, req);
283         if (error || !req->newptr)
284                 return (error);
285         if (val < 0)
286                 return (EINVAL);
287
288         RACCT_LOCK();
289         rctl_throttle_pct2 = val;
290         RACCT_UNLOCK();
291
292         return (0);
293 }
294
295 static const char *
296 rctl_subject_type_name(int subject)
297 {
298         int i;
299
300         for (i = 0; subjectnames[i].d_name != NULL; i++) {
301                 if (subjectnames[i].d_value == subject)
302                         return (subjectnames[i].d_name);
303         }
304
305         panic("rctl_subject_type_name: unknown subject type %d", subject);
306 }
307
308 static const char *
309 rctl_action_name(int action)
310 {
311         int i;
312
313         for (i = 0; actionnames[i].d_name != NULL; i++) {
314                 if (actionnames[i].d_value == action)
315                         return (actionnames[i].d_name);
316         }
317
318         panic("rctl_action_name: unknown action %d", action);
319 }
320
321 const char *
322 rctl_resource_name(int resource)
323 {
324         int i;
325
326         for (i = 0; resourcenames[i].d_name != NULL; i++) {
327                 if (resourcenames[i].d_value == resource)
328                         return (resourcenames[i].d_name);
329         }
330
331         panic("rctl_resource_name: unknown resource %d", resource);
332 }
333
334 static struct racct *
335 rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
336 {
337         struct ucred *cred = p->p_ucred;
338
339         ASSERT_RACCT_ENABLED();
340         RACCT_LOCK_ASSERT();
341
342         switch (rule->rr_per) {
343         case RCTL_SUBJECT_TYPE_PROCESS:
344                 return (p->p_racct);
345         case RCTL_SUBJECT_TYPE_USER:
346                 return (cred->cr_ruidinfo->ui_racct);
347         case RCTL_SUBJECT_TYPE_LOGINCLASS:
348                 return (cred->cr_loginclass->lc_racct);
349         case RCTL_SUBJECT_TYPE_JAIL:
350                 return (cred->cr_prison->pr_prison_racct->prr_racct);
351         default:
352                 panic("%s: unknown per %d", __func__, rule->rr_per);
353         }
354 }
355
356 /*
357  * Return the amount of resource that can be allocated by 'p' before
358  * hitting 'rule'.
359  */
360 static int64_t
361 rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
362 {
363         const struct racct *racct;
364         int64_t available;
365
366         ASSERT_RACCT_ENABLED();
367         RACCT_LOCK_ASSERT();
368
369         racct = rctl_proc_rule_to_racct(p, rule);
370         available = rule->rr_amount - racct->r_resources[rule->rr_resource];
371
372         return (available);
373 }
374
375 /*
376  * Called every second for proc, uidinfo, loginclass, and jail containers.
377  * If the limit isn't exceeded, it decreases the usage amount to zero.
378  * Otherwise, it decreases it by the value of the limit.  This way
379  * resource consumption exceeding the limit "carries over" to the next
380  * period.
381  */
382 void
383 rctl_throttle_decay(struct racct *racct, int resource)
384 {
385         struct rctl_rule *rule;
386         struct rctl_rule_link *link;
387         int64_t minavailable;
388
389         ASSERT_RACCT_ENABLED();
390         RACCT_LOCK_ASSERT();
391
392         minavailable = INT64_MAX;
393
394         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
395                 rule = link->rrl_rule;
396
397                 if (rule->rr_resource != resource)
398                         continue;
399                 if (rule->rr_action != RCTL_ACTION_THROTTLE)
400                         continue;
401
402                 if (rule->rr_amount < minavailable)
403                         minavailable = rule->rr_amount;
404         }
405
406         if (racct->r_resources[resource] < minavailable) {
407                 racct->r_resources[resource] = 0;
408         } else {
409                 /*
410                  * Cap utilization counter at ten times the limit.  Otherwise,
411                  * if we changed the rule lowering the allowed amount, it could
412                  * take unreasonably long time for the accumulated resource
413                  * usage to drop.
414                  */
415                 if (racct->r_resources[resource] > minavailable * 10)
416                         racct->r_resources[resource] = minavailable * 10;
417
418                 racct->r_resources[resource] -= minavailable;
419         }
420 }
421
422 /*
423  * Special version of rctl_get_available() for the %CPU resource.
424  * We slightly cheat here and return less than we normally would.
425  */
426 int64_t
427 rctl_pcpu_available(const struct proc *p) {
428         struct rctl_rule *rule;
429         struct rctl_rule_link *link;
430         int64_t available, minavailable, limit;
431
432         ASSERT_RACCT_ENABLED();
433         RACCT_LOCK_ASSERT();
434
435         minavailable = INT64_MAX;
436         limit = 0;
437
438         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
439                 rule = link->rrl_rule;
440                 if (rule->rr_resource != RACCT_PCTCPU)
441                         continue;
442                 if (rule->rr_action != RCTL_ACTION_DENY)
443                         continue;
444                 available = rctl_available_resource(p, rule);
445                 if (available < minavailable) {
446                         minavailable = available;
447                         limit = rule->rr_amount;
448                 }
449         }
450
451         /*
452          * Return slightly less than actual value of the available
453          * %cpu resource.  This makes %cpu throttling more aggressive
454          * and lets us act sooner than the limits are already exceeded.
455          */
456         if (limit != 0) {
457                 if (limit > 2 * RCTL_PCPU_SHIFT)
458                         minavailable -= RCTL_PCPU_SHIFT;
459                 else
460                         minavailable -= (limit / 2);
461         }
462
463         return (minavailable);
464 }
465
466 static uint64_t
467 xadd(uint64_t a, uint64_t b)
468 {
469         uint64_t c;
470
471         c = a + b;
472
473         /*
474          * Detect overflow.
475          */
476         if (c < a || c < b)
477                 return (UINT64_MAX);
478
479         return (c);
480 }
481
482 static uint64_t
483 xmul(uint64_t a, uint64_t b)
484 {
485
486         if (b != 0 && a > UINT64_MAX / b)
487                 return (UINT64_MAX);
488
489         return (a * b);
490 }
491
492 /*
493  * Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
494  * to what it keeps allocated now.  Returns non-zero if the allocation should
495  * be denied, 0 otherwise.
496  */
497 int
498 rctl_enforce(struct proc *p, int resource, uint64_t amount)
499 {
500         static struct timeval log_lasttime, devctl_lasttime;
501         static int log_curtime = 0, devctl_curtime = 0;
502         struct rctl_rule *rule;
503         struct rctl_rule_link *link;
504         struct sbuf sb;
505         char *buf;
506         int64_t available;
507         uint64_t sleep_ms, sleep_ratio;
508         int should_deny = 0;
509
510         ASSERT_RACCT_ENABLED();
511         RACCT_LOCK_ASSERT();
512
513         /*
514          * There may be more than one matching rule; go through all of them.
515          * Denial should be done last, after logging and sending signals.
516          */
517         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
518                 rule = link->rrl_rule;
519                 if (rule->rr_resource != resource)
520                         continue;
521
522                 available = rctl_available_resource(p, rule);
523                 if (available >= (int64_t)amount) {
524                         link->rrl_exceeded = 0;
525                         continue;
526                 }
527
528                 switch (rule->rr_action) {
529                 case RCTL_ACTION_DENY:
530                         should_deny = 1;
531                         continue;
532                 case RCTL_ACTION_LOG:
533                         /*
534                          * If rrl_exceeded != 0, it means we've already
535                          * logged a warning for this process.
536                          */
537                         if (link->rrl_exceeded != 0)
538                                 continue;
539
540                         /*
541                          * If the process state is not fully initialized yet,
542                          * we can't access most of the required fields, e.g.
543                          * p->p_comm.  This happens when called from fork1().
544                          * Ignore this rule for now; it will be processed just
545                          * after fork, when called from racct_proc_fork_done().
546                          */
547                         if (p->p_state != PRS_NORMAL)
548                                 continue;
549
550                         if (!ppsratecheck(&log_lasttime, &log_curtime,
551                             rctl_log_rate_limit))
552                                 continue;
553
554                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
555                         if (buf == NULL) {
556                                 printf("rctl_enforce: out of memory\n");
557                                 continue;
558                         }
559                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
560                         rctl_rule_to_sbuf(&sb, rule);
561                         sbuf_finish(&sb);
562                         printf("rctl: rule \"%s\" matched by pid %d "
563                             "(%s), uid %d, jail %s\n", sbuf_data(&sb),
564                             p->p_pid, p->p_comm, p->p_ucred->cr_uid,
565                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
566                         sbuf_delete(&sb);
567                         free(buf, M_RCTL);
568                         link->rrl_exceeded = 1;
569                         continue;
570                 case RCTL_ACTION_DEVCTL:
571                         if (link->rrl_exceeded != 0)
572                                 continue;
573
574                         if (p->p_state != PRS_NORMAL)
575                                 continue;
576
577                         if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
578                             rctl_devctl_rate_limit))
579                                 continue;
580
581                         buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
582                         if (buf == NULL) {
583                                 printf("rctl_enforce: out of memory\n");
584                                 continue;
585                         }
586                         sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
587                         sbuf_printf(&sb, "rule=");
588                         rctl_rule_to_sbuf(&sb, rule);
589                         sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
590                             p->p_pid, p->p_ucred->cr_ruid,
591                             p->p_ucred->cr_prison->pr_prison_racct->prr_name);
592                         sbuf_finish(&sb);
593                         devctl_notify("RCTL", "rule", "matched",
594                             sbuf_data(&sb));
595                         sbuf_delete(&sb);
596                         free(buf, M_RCTL);
597                         link->rrl_exceeded = 1;
598                         continue;
599                 case RCTL_ACTION_THROTTLE:
600                         if (p->p_state != PRS_NORMAL)
601                                 continue;
602
603                         if (rule->rr_amount == 0) {
604                                 racct_proc_throttle(p, rctl_throttle_max);
605                                 continue;
606                         }
607
608                         /*
609                          * Make the process sleep for a fraction of second
610                          * proportional to the ratio of process' resource
611                          * utilization compared to the limit.  The point is
612                          * to penalize resource hogs: processes that consume
613                          * more of the available resources sleep for longer.
614                          *
615                          * We're trying to defer division until the very end,
616                          * to minimize the rounding effects.  The following
617                          * calculation could have been written in a clearer
618                          * way like this:
619                          *
620                          * sleep_ms = hz * p->p_racct->r_resources[resource] /
621                          *     rule->rr_amount;
622                          * sleep_ms *= rctl_throttle_pct / 100;
623                          * if (sleep_ms < rctl_throttle_min)
624                          *         sleep_ms = rctl_throttle_min;
625                          *
626                          */
627                         sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
628                         sleep_ms = xmul(sleep_ms,  rctl_throttle_pct) / 100;
629                         if (sleep_ms < rctl_throttle_min * rule->rr_amount)
630                                 sleep_ms = rctl_throttle_min * rule->rr_amount;
631
632                         /*
633                          * Multiply that by the ratio of the resource
634                          * consumption for the container compared to the limit,
635                          * squared.  In other words, a process in a container
636                          * that is two times over the limit will be throttled
637                          * four times as much for hitting the same rule.  The
638                          * point is to penalize processes more if the container
639                          * itself (eg certain UID or jail) is above the limit.
640                          */
641                         if (available < 0)
642                                 sleep_ratio = -available / rule->rr_amount;
643                         else
644                                 sleep_ratio = 0;
645                         sleep_ratio = xmul(sleep_ratio, sleep_ratio);
646                         sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
647                         sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
648
649                         /*
650                          * Finally the division.
651                          */
652                         sleep_ms /= rule->rr_amount;
653
654                         if (sleep_ms > rctl_throttle_max)
655                                 sleep_ms = rctl_throttle_max;
656 #if 0
657                         printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
658                            __func__, p->p_pid, p->p_comm,
659                            p->p_racct->r_resources[resource],
660                            rule->rr_amount, (uintmax_t)sleep_ms,
661                            (uintmax_t)sleep_ratio, (intmax_t)available);
662 #endif
663
664                         KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
665                             __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
666                         racct_proc_throttle(p, sleep_ms);
667                         continue;
668                 default:
669                         if (link->rrl_exceeded != 0)
670                                 continue;
671
672                         if (p->p_state != PRS_NORMAL)
673                                 continue;
674
675                         KASSERT(rule->rr_action > 0 &&
676                             rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
677                             ("rctl_enforce: unknown action %d",
678                              rule->rr_action));
679
680                         /*
681                          * We're using the fact that RCTL_ACTION_SIG* values
682                          * are equal to their counterparts from sys/signal.h.
683                          */
684                         kern_psignal(p, rule->rr_action);
685                         link->rrl_exceeded = 1;
686                         continue;
687                 }
688         }
689
690         if (should_deny) {
691                 /*
692                  * Return fake error code; the caller should change it
693                  * into one proper for the situation - EFSIZ, ENOMEM etc.
694                  */
695                 return (EDOOFUS);
696         }
697
698         return (0);
699 }
700
701 uint64_t
702 rctl_get_limit(struct proc *p, int resource)
703 {
704         struct rctl_rule *rule;
705         struct rctl_rule_link *link;
706         uint64_t amount = UINT64_MAX;
707
708         ASSERT_RACCT_ENABLED();
709         RACCT_LOCK_ASSERT();
710
711         /*
712          * There may be more than one matching rule; go through all of them.
713          * Denial should be done last, after logging and sending signals.
714          */
715         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
716                 rule = link->rrl_rule;
717                 if (rule->rr_resource != resource)
718                         continue;
719                 if (rule->rr_action != RCTL_ACTION_DENY)
720                         continue;
721                 if (rule->rr_amount < amount)
722                         amount = rule->rr_amount;
723         }
724
725         return (amount);
726 }
727
728 uint64_t
729 rctl_get_available(struct proc *p, int resource)
730 {
731         struct rctl_rule *rule;
732         struct rctl_rule_link *link;
733         int64_t available, minavailable, allocated;
734
735         minavailable = INT64_MAX;
736
737         ASSERT_RACCT_ENABLED();
738         RACCT_LOCK_ASSERT();
739
740         /*
741          * There may be more than one matching rule; go through all of them.
742          * Denial should be done last, after logging and sending signals.
743          */
744         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
745                 rule = link->rrl_rule;
746                 if (rule->rr_resource != resource)
747                         continue;
748                 if (rule->rr_action != RCTL_ACTION_DENY)
749                         continue;
750                 available = rctl_available_resource(p, rule);
751                 if (available < minavailable)
752                         minavailable = available;
753         }
754
755         /*
756          * XXX: Think about this _hard_.
757          */
758         allocated = p->p_racct->r_resources[resource];
759         if (minavailable < INT64_MAX - allocated)
760                 minavailable += allocated;
761         if (minavailable < 0)
762                 minavailable = 0;
763
764         return (minavailable);
765 }
766
767 static int
768 rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
769 {
770
771         ASSERT_RACCT_ENABLED();
772
773         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
774                 if (rule->rr_subject_type != filter->rr_subject_type)
775                         return (0);
776
777                 switch (filter->rr_subject_type) {
778                 case RCTL_SUBJECT_TYPE_PROCESS:
779                         if (filter->rr_subject.rs_proc != NULL &&
780                             rule->rr_subject.rs_proc !=
781                             filter->rr_subject.rs_proc)
782                                 return (0);
783                         break;
784                 case RCTL_SUBJECT_TYPE_USER:
785                         if (filter->rr_subject.rs_uip != NULL &&
786                             rule->rr_subject.rs_uip !=
787                             filter->rr_subject.rs_uip)
788                                 return (0);
789                         break;
790                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
791                         if (filter->rr_subject.rs_loginclass != NULL &&
792                             rule->rr_subject.rs_loginclass !=
793                             filter->rr_subject.rs_loginclass)
794                                 return (0);
795                         break;
796                 case RCTL_SUBJECT_TYPE_JAIL:
797                         if (filter->rr_subject.rs_prison_racct != NULL &&
798                             rule->rr_subject.rs_prison_racct !=
799                             filter->rr_subject.rs_prison_racct)
800                                 return (0);
801                         break;
802                 default:
803                         panic("rctl_rule_matches: unknown subject type %d",
804                             filter->rr_subject_type);
805                 }
806         }
807
808         if (filter->rr_resource != RACCT_UNDEFINED) {
809                 if (rule->rr_resource != filter->rr_resource)
810                         return (0);
811         }
812
813         if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
814                 if (rule->rr_action != filter->rr_action)
815                         return (0);
816         }
817
818         if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
819                 if (rule->rr_amount != filter->rr_amount)
820                         return (0);
821         }
822
823         if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
824                 if (rule->rr_per != filter->rr_per)
825                         return (0);
826         }
827
828         return (1);
829 }
830
831 static int
832 str2value(const char *str, int *value, struct dict *table)
833 {
834         int i;
835
836         if (value == NULL)
837                 return (EINVAL);
838
839         for (i = 0; table[i].d_name != NULL; i++) {
840                 if (strcasecmp(table[i].d_name, str) == 0) {
841                         *value =  table[i].d_value;
842                         return (0);
843                 }
844         }
845
846         return (EINVAL);
847 }
848
849 static int
850 str2id(const char *str, id_t *value)
851 {
852         char *end;
853
854         if (str == NULL)
855                 return (EINVAL);
856
857         *value = strtoul(str, &end, 10);
858         if ((size_t)(end - str) != strlen(str))
859                 return (EINVAL);
860
861         return (0);
862 }
863
864 static int
865 str2int64(const char *str, int64_t *value)
866 {
867         char *end;
868
869         if (str == NULL)
870                 return (EINVAL);
871
872         *value = strtoul(str, &end, 10);
873         if ((size_t)(end - str) != strlen(str))
874                 return (EINVAL);
875
876         if (*value < 0)
877                 return (ERANGE);
878
879         return (0);
880 }
881
882 /*
883  * Connect the rule to the racct, increasing refcount for the rule.
884  */
885 static void
886 rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
887 {
888         struct rctl_rule_link *link;
889
890         ASSERT_RACCT_ENABLED();
891         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
892
893         rctl_rule_acquire(rule);
894         link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
895         link->rrl_rule = rule;
896         link->rrl_exceeded = 0;
897
898         RACCT_LOCK();
899         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
900         RACCT_UNLOCK();
901 }
902
903 static int
904 rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
905 {
906         struct rctl_rule_link *link;
907
908         ASSERT_RACCT_ENABLED();
909         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
910         RACCT_LOCK_ASSERT();
911
912         link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
913         if (link == NULL)
914                 return (ENOMEM);
915         rctl_rule_acquire(rule);
916         link->rrl_rule = rule;
917         link->rrl_exceeded = 0;
918
919         LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
920
921         return (0);
922 }
923
924 /*
925  * Remove limits for a rules matching the filter and release
926  * the refcounts for the rules, possibly freeing them.  Returns
927  * the number of limit structures removed.
928  */
929 static int
930 rctl_racct_remove_rules(struct racct *racct,
931     const struct rctl_rule *filter)
932 {
933         struct rctl_rule_link *link, *linktmp;
934         int removed = 0;
935
936         ASSERT_RACCT_ENABLED();
937         RACCT_LOCK_ASSERT();
938
939         LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
940                 if (!rctl_rule_matches(link->rrl_rule, filter))
941                         continue;
942
943                 LIST_REMOVE(link, rrl_next);
944                 rctl_rule_release(link->rrl_rule);
945                 uma_zfree(rctl_rule_link_zone, link);
946                 removed++;
947         }
948         return (removed);
949 }
950
951 static void
952 rctl_rule_acquire_subject(struct rctl_rule *rule)
953 {
954
955         ASSERT_RACCT_ENABLED();
956
957         switch (rule->rr_subject_type) {
958         case RCTL_SUBJECT_TYPE_UNDEFINED:
959         case RCTL_SUBJECT_TYPE_PROCESS:
960                 break;
961         case RCTL_SUBJECT_TYPE_JAIL:
962                 if (rule->rr_subject.rs_prison_racct != NULL)
963                         prison_racct_hold(rule->rr_subject.rs_prison_racct);
964                 break;
965         case RCTL_SUBJECT_TYPE_USER:
966                 if (rule->rr_subject.rs_uip != NULL)
967                         uihold(rule->rr_subject.rs_uip);
968                 break;
969         case RCTL_SUBJECT_TYPE_LOGINCLASS:
970                 if (rule->rr_subject.rs_loginclass != NULL)
971                         loginclass_hold(rule->rr_subject.rs_loginclass);
972                 break;
973         default:
974                 panic("rctl_rule_acquire_subject: unknown subject type %d",
975                     rule->rr_subject_type);
976         }
977 }
978
979 static void
980 rctl_rule_release_subject(struct rctl_rule *rule)
981 {
982
983         ASSERT_RACCT_ENABLED();
984
985         switch (rule->rr_subject_type) {
986         case RCTL_SUBJECT_TYPE_UNDEFINED:
987         case RCTL_SUBJECT_TYPE_PROCESS:
988                 break;
989         case RCTL_SUBJECT_TYPE_JAIL:
990                 if (rule->rr_subject.rs_prison_racct != NULL)
991                         prison_racct_free(rule->rr_subject.rs_prison_racct);
992                 break;
993         case RCTL_SUBJECT_TYPE_USER:
994                 if (rule->rr_subject.rs_uip != NULL)
995                         uifree(rule->rr_subject.rs_uip);
996                 break;
997         case RCTL_SUBJECT_TYPE_LOGINCLASS:
998                 if (rule->rr_subject.rs_loginclass != NULL)
999                         loginclass_free(rule->rr_subject.rs_loginclass);
1000                 break;
1001         default:
1002                 panic("rctl_rule_release_subject: unknown subject type %d",
1003                     rule->rr_subject_type);
1004         }
1005 }
1006
1007 struct rctl_rule *
1008 rctl_rule_alloc(int flags)
1009 {
1010         struct rctl_rule *rule;
1011
1012         ASSERT_RACCT_ENABLED();
1013
1014         rule = uma_zalloc(rctl_rule_zone, flags);
1015         if (rule == NULL)
1016                 return (NULL);
1017         rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1018         rule->rr_subject.rs_proc = NULL;
1019         rule->rr_subject.rs_uip = NULL;
1020         rule->rr_subject.rs_loginclass = NULL;
1021         rule->rr_subject.rs_prison_racct = NULL;
1022         rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1023         rule->rr_resource = RACCT_UNDEFINED;
1024         rule->rr_action = RCTL_ACTION_UNDEFINED;
1025         rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1026         refcount_init(&rule->rr_refcount, 1);
1027
1028         return (rule);
1029 }
1030
1031 struct rctl_rule *
1032 rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
1033 {
1034         struct rctl_rule *copy;
1035
1036         ASSERT_RACCT_ENABLED();
1037
1038         copy = uma_zalloc(rctl_rule_zone, flags);
1039         if (copy == NULL)
1040                 return (NULL);
1041         copy->rr_subject_type = rule->rr_subject_type;
1042         copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
1043         copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
1044         copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
1045         copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
1046         copy->rr_per = rule->rr_per;
1047         copy->rr_resource = rule->rr_resource;
1048         copy->rr_action = rule->rr_action;
1049         copy->rr_amount = rule->rr_amount;
1050         refcount_init(&copy->rr_refcount, 1);
1051         rctl_rule_acquire_subject(copy);
1052
1053         return (copy);
1054 }
1055
1056 void
1057 rctl_rule_acquire(struct rctl_rule *rule)
1058 {
1059
1060         ASSERT_RACCT_ENABLED();
1061         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1062
1063         refcount_acquire(&rule->rr_refcount);
1064 }
1065
1066 static void
1067 rctl_rule_free(void *context, int pending)
1068 {
1069         struct rctl_rule *rule;
1070
1071         rule = (struct rctl_rule *)context;
1072
1073         ASSERT_RACCT_ENABLED();
1074         KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
1075
1076         /*
1077          * We don't need locking here; rule is guaranteed to be inaccessible.
1078          */
1079
1080         rctl_rule_release_subject(rule);
1081         uma_zfree(rctl_rule_zone, rule);
1082 }
1083
1084 void
1085 rctl_rule_release(struct rctl_rule *rule)
1086 {
1087
1088         ASSERT_RACCT_ENABLED();
1089         KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
1090
1091         if (refcount_release(&rule->rr_refcount)) {
1092                 /*
1093                  * rctl_rule_release() is often called when iterating
1094                  * over all the uidinfo structures in the system,
1095                  * holding uihashtbl_lock.  Since rctl_rule_free()
1096                  * might end up calling uifree(), this would lead
1097                  * to lock recursion.  Use taskqueue to avoid this.
1098                  */
1099                 TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
1100                 taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
1101         }
1102 }
1103
1104 static int
1105 rctl_rule_fully_specified(const struct rctl_rule *rule)
1106 {
1107
1108         ASSERT_RACCT_ENABLED();
1109
1110         switch (rule->rr_subject_type) {
1111         case RCTL_SUBJECT_TYPE_UNDEFINED:
1112                 return (0);
1113         case RCTL_SUBJECT_TYPE_PROCESS:
1114                 if (rule->rr_subject.rs_proc == NULL)
1115                         return (0);
1116                 break;
1117         case RCTL_SUBJECT_TYPE_USER:
1118                 if (rule->rr_subject.rs_uip == NULL)
1119                         return (0);
1120                 break;
1121         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1122                 if (rule->rr_subject.rs_loginclass == NULL)
1123                         return (0);
1124                 break;
1125         case RCTL_SUBJECT_TYPE_JAIL:
1126                 if (rule->rr_subject.rs_prison_racct == NULL)
1127                         return (0);
1128                 break;
1129         default:
1130                 panic("rctl_rule_fully_specified: unknown subject type %d",
1131                     rule->rr_subject_type);
1132         }
1133         if (rule->rr_resource == RACCT_UNDEFINED)
1134                 return (0);
1135         if (rule->rr_action == RCTL_ACTION_UNDEFINED)
1136                 return (0);
1137         if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
1138                 return (0);
1139         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
1140                 return (0);
1141
1142         return (1);
1143 }
1144
1145 static int
1146 rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
1147 {
1148         struct rctl_rule *rule;
1149         char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
1150              *amountstr, *perstr;
1151         id_t id;
1152         int error = 0;
1153
1154         ASSERT_RACCT_ENABLED();
1155
1156         rule = rctl_rule_alloc(M_WAITOK);
1157
1158         subjectstr = strsep(&rulestr, ":");
1159         subject_idstr = strsep(&rulestr, ":");
1160         resourcestr = strsep(&rulestr, ":");
1161         actionstr = strsep(&rulestr, "=/");
1162         amountstr = strsep(&rulestr, "/");
1163         perstr = rulestr;
1164
1165         if (subjectstr == NULL || subjectstr[0] == '\0')
1166                 rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
1167         else {
1168                 error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
1169                 if (error != 0)
1170                         goto out;
1171         }
1172
1173         if (subject_idstr == NULL || subject_idstr[0] == '\0') {
1174                 rule->rr_subject.rs_proc = NULL;
1175                 rule->rr_subject.rs_uip = NULL;
1176                 rule->rr_subject.rs_loginclass = NULL;
1177                 rule->rr_subject.rs_prison_racct = NULL;
1178         } else {
1179                 switch (rule->rr_subject_type) {
1180                 case RCTL_SUBJECT_TYPE_UNDEFINED:
1181                         error = EINVAL;
1182                         goto out;
1183                 case RCTL_SUBJECT_TYPE_PROCESS:
1184                         error = str2id(subject_idstr, &id);
1185                         if (error != 0)
1186                                 goto out;
1187                         sx_assert(&allproc_lock, SA_LOCKED);
1188                         rule->rr_subject.rs_proc = pfind(id);
1189                         if (rule->rr_subject.rs_proc == NULL) {
1190                                 error = ESRCH;
1191                                 goto out;
1192                         }
1193                         PROC_UNLOCK(rule->rr_subject.rs_proc);
1194                         break;
1195                 case RCTL_SUBJECT_TYPE_USER:
1196                         error = str2id(subject_idstr, &id);
1197                         if (error != 0)
1198                                 goto out;
1199                         rule->rr_subject.rs_uip = uifind(id);
1200                         break;
1201                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1202                         rule->rr_subject.rs_loginclass =
1203                             loginclass_find(subject_idstr);
1204                         if (rule->rr_subject.rs_loginclass == NULL) {
1205                                 error = ENAMETOOLONG;
1206                                 goto out;
1207                         }
1208                         break;
1209                 case RCTL_SUBJECT_TYPE_JAIL:
1210                         rule->rr_subject.rs_prison_racct =
1211                             prison_racct_find(subject_idstr);
1212                         if (rule->rr_subject.rs_prison_racct == NULL) {
1213                                 error = ENAMETOOLONG;
1214                                 goto out;
1215                         }
1216                         break;
1217                default:
1218                        panic("rctl_string_to_rule: unknown subject type %d",
1219                            rule->rr_subject_type);
1220                }
1221         }
1222
1223         if (resourcestr == NULL || resourcestr[0] == '\0')
1224                 rule->rr_resource = RACCT_UNDEFINED;
1225         else {
1226                 error = str2value(resourcestr, &rule->rr_resource,
1227                     resourcenames);
1228                 if (error != 0)
1229                         goto out;
1230         }
1231
1232         if (actionstr == NULL || actionstr[0] == '\0')
1233                 rule->rr_action = RCTL_ACTION_UNDEFINED;
1234         else {
1235                 error = str2value(actionstr, &rule->rr_action, actionnames);
1236                 if (error != 0)
1237                         goto out;
1238         }
1239
1240         if (amountstr == NULL || amountstr[0] == '\0')
1241                 rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
1242         else {
1243                 error = str2int64(amountstr, &rule->rr_amount);
1244                 if (error != 0)
1245                         goto out;
1246                 if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
1247                         if (rule->rr_amount > INT64_MAX / 1000000) {
1248                                 error = ERANGE;
1249                                 goto out;
1250                         }
1251                         rule->rr_amount *= 1000000;
1252                 }
1253         }
1254
1255         if (perstr == NULL || perstr[0] == '\0')
1256                 rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
1257         else {
1258                 error = str2value(perstr, &rule->rr_per, subjectnames);
1259                 if (error != 0)
1260                         goto out;
1261         }
1262
1263 out:
1264         if (error == 0)
1265                 *rulep = rule;
1266         else
1267                 rctl_rule_release(rule);
1268
1269         return (error);
1270 }
1271
1272 /*
1273  * Link a rule with all the subjects it applies to.
1274  */
1275 int
1276 rctl_rule_add(struct rctl_rule *rule)
1277 {
1278         struct proc *p;
1279         struct ucred *cred;
1280         struct uidinfo *uip;
1281         struct prison *pr;
1282         struct prison_racct *prr;
1283         struct loginclass *lc;
1284         struct rctl_rule *rule2;
1285         int match;
1286
1287         ASSERT_RACCT_ENABLED();
1288         KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
1289
1290         /*
1291          * Some rules just don't make sense, like "deny" rule for an undeniable
1292          * resource.  The exception are the RSS and %CPU resources - they are
1293          * not deniable in the racct sense, but the limit is enforced in
1294          * a different way.
1295          */
1296         if (rule->rr_action == RCTL_ACTION_DENY &&
1297             !RACCT_IS_DENIABLE(rule->rr_resource) &&
1298             rule->rr_resource != RACCT_RSS &&
1299             rule->rr_resource != RACCT_PCTCPU) {
1300                 return (EOPNOTSUPP);
1301         }
1302
1303         if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1304             !RACCT_IS_DECAYING(rule->rr_resource)) {
1305                 return (EOPNOTSUPP);
1306         }
1307
1308         if (rule->rr_action == RCTL_ACTION_THROTTLE &&
1309             rule->rr_resource == RACCT_PCTCPU) {
1310                 return (EOPNOTSUPP);
1311         }
1312
1313         if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
1314             RACCT_IS_SLOPPY(rule->rr_resource)) {
1315                 return (EOPNOTSUPP);
1316         }
1317
1318         /*
1319          * Make sure there are no duplicated rules.  Also, for the "deny"
1320          * rules, remove ones differing only by "amount".
1321          */
1322         if (rule->rr_action == RCTL_ACTION_DENY) {
1323                 rule2 = rctl_rule_duplicate(rule, M_WAITOK);
1324                 rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
1325                 rctl_rule_remove(rule2);
1326                 rctl_rule_release(rule2);
1327         } else
1328                 rctl_rule_remove(rule);
1329
1330         switch (rule->rr_subject_type) {
1331         case RCTL_SUBJECT_TYPE_PROCESS:
1332                 p = rule->rr_subject.rs_proc;
1333                 KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
1334
1335                 rctl_racct_add_rule(p->p_racct, rule);
1336                 /*
1337                  * In case of per-process rule, we don't have anything more
1338                  * to do.
1339                  */
1340                 return (0);
1341
1342         case RCTL_SUBJECT_TYPE_USER:
1343                 uip = rule->rr_subject.rs_uip;
1344                 KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
1345                 rctl_racct_add_rule(uip->ui_racct, rule);
1346                 break;
1347
1348         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1349                 lc = rule->rr_subject.rs_loginclass;
1350                 KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
1351                 rctl_racct_add_rule(lc->lc_racct, rule);
1352                 break;
1353
1354         case RCTL_SUBJECT_TYPE_JAIL:
1355                 prr = rule->rr_subject.rs_prison_racct;
1356                 KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
1357                 rctl_racct_add_rule(prr->prr_racct, rule);
1358                 break;
1359
1360         default:
1361                 panic("rctl_rule_add: unknown subject type %d",
1362                     rule->rr_subject_type);
1363         }
1364
1365         /*
1366          * Now go through all the processes and add the new rule to the ones
1367          * it applies to.
1368          */
1369         sx_assert(&allproc_lock, SA_LOCKED);
1370         FOREACH_PROC_IN_SYSTEM(p) {
1371                 cred = p->p_ucred;
1372                 switch (rule->rr_subject_type) {
1373                 case RCTL_SUBJECT_TYPE_USER:
1374                         if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
1375                             cred->cr_ruidinfo == rule->rr_subject.rs_uip)
1376                                 break;
1377                         continue;
1378                 case RCTL_SUBJECT_TYPE_LOGINCLASS:
1379                         if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
1380                                 break;
1381                         continue;
1382                 case RCTL_SUBJECT_TYPE_JAIL:
1383                         match = 0;
1384                         for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
1385                                 if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
1386                                         match = 1;
1387                                         break;
1388                                 }
1389                         }
1390                         if (match)
1391                                 break;
1392                         continue;
1393                 default:
1394                         panic("rctl_rule_add: unknown subject type %d",
1395                             rule->rr_subject_type);
1396                 }
1397
1398                 rctl_racct_add_rule(p->p_racct, rule);
1399         }
1400
1401         return (0);
1402 }
1403
1404 static void
1405 rctl_rule_pre_callback(void)
1406 {
1407
1408         RACCT_LOCK();
1409 }
1410
1411 static void
1412 rctl_rule_post_callback(void)
1413 {
1414
1415         RACCT_UNLOCK();
1416 }
1417
1418 static void
1419 rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
1420 {
1421         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1422         int found = 0;
1423
1424         ASSERT_RACCT_ENABLED();
1425         RACCT_LOCK_ASSERT();
1426
1427         found += rctl_racct_remove_rules(racct, filter);
1428
1429         *((int *)arg3) += found;
1430 }
1431
1432 /*
1433  * Remove all rules that match the filter.
1434  */
1435 int
1436 rctl_rule_remove(struct rctl_rule *filter)
1437 {
1438         struct proc *p;
1439         int found = 0;
1440
1441         ASSERT_RACCT_ENABLED();
1442
1443         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
1444             filter->rr_subject.rs_proc != NULL) {
1445                 p = filter->rr_subject.rs_proc;
1446                 RACCT_LOCK();
1447                 found = rctl_racct_remove_rules(p->p_racct, filter);
1448                 RACCT_UNLOCK();
1449                 if (found)
1450                         return (0);
1451                 return (ESRCH);
1452         }
1453
1454         loginclass_racct_foreach(rctl_rule_remove_callback,
1455             rctl_rule_pre_callback, rctl_rule_post_callback,
1456             filter, (void *)&found);
1457         ui_racct_foreach(rctl_rule_remove_callback,
1458             rctl_rule_pre_callback, rctl_rule_post_callback,
1459             filter, (void *)&found);
1460         prison_racct_foreach(rctl_rule_remove_callback,
1461             rctl_rule_pre_callback, rctl_rule_post_callback,
1462             filter, (void *)&found);
1463
1464         sx_assert(&allproc_lock, SA_LOCKED);
1465         RACCT_LOCK();
1466         FOREACH_PROC_IN_SYSTEM(p) {
1467                 found += rctl_racct_remove_rules(p->p_racct, filter);
1468         }
1469         RACCT_UNLOCK();
1470
1471         if (found)
1472                 return (0);
1473         return (ESRCH);
1474 }
1475
1476 /*
1477  * Appends a rule to the sbuf.
1478  */
1479 static void
1480 rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
1481 {
1482         int64_t amount;
1483
1484         ASSERT_RACCT_ENABLED();
1485
1486         sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
1487
1488         switch (rule->rr_subject_type) {
1489         case RCTL_SUBJECT_TYPE_PROCESS:
1490                 if (rule->rr_subject.rs_proc == NULL)
1491                         sbuf_printf(sb, ":");
1492                 else
1493                         sbuf_printf(sb, "%d:",
1494                             rule->rr_subject.rs_proc->p_pid);
1495                 break;
1496         case RCTL_SUBJECT_TYPE_USER:
1497                 if (rule->rr_subject.rs_uip == NULL)
1498                         sbuf_printf(sb, ":");
1499                 else
1500                         sbuf_printf(sb, "%d:",
1501                             rule->rr_subject.rs_uip->ui_uid);
1502                 break;
1503         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1504                 if (rule->rr_subject.rs_loginclass == NULL)
1505                         sbuf_printf(sb, ":");
1506                 else
1507                         sbuf_printf(sb, "%s:",
1508                             rule->rr_subject.rs_loginclass->lc_name);
1509                 break;
1510         case RCTL_SUBJECT_TYPE_JAIL:
1511                 if (rule->rr_subject.rs_prison_racct == NULL)
1512                         sbuf_printf(sb, ":");
1513                 else
1514                         sbuf_printf(sb, "%s:",
1515                             rule->rr_subject.rs_prison_racct->prr_name);
1516                 break;
1517         default:
1518                 panic("rctl_rule_to_sbuf: unknown subject type %d",
1519                     rule->rr_subject_type);
1520         }
1521
1522         amount = rule->rr_amount;
1523         if (amount != RCTL_AMOUNT_UNDEFINED &&
1524             RACCT_IS_IN_MILLIONS(rule->rr_resource))
1525                 amount /= 1000000;
1526
1527         sbuf_printf(sb, "%s:%s=%jd",
1528             rctl_resource_name(rule->rr_resource),
1529             rctl_action_name(rule->rr_action),
1530             amount);
1531
1532         if (rule->rr_per != rule->rr_subject_type)
1533                 sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
1534 }
1535
1536 /*
1537  * Routine used by RCTL syscalls to read in input string.
1538  */
1539 static int
1540 rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
1541 {
1542         char *str;
1543         int error;
1544
1545         ASSERT_RACCT_ENABLED();
1546
1547         if (inbuflen <= 0)
1548                 return (EINVAL);
1549         if (inbuflen > RCTL_MAX_INBUFSIZE)
1550                 return (E2BIG);
1551
1552         str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
1553         error = copyinstr(inbufp, str, inbuflen, NULL);
1554         if (error != 0) {
1555                 free(str, M_RCTL);
1556                 return (error);
1557         }
1558
1559         *inputstr = str;
1560
1561         return (0);
1562 }
1563
1564 /*
1565  * Routine used by RCTL syscalls to write out output string.
1566  */
1567 static int
1568 rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
1569 {
1570         int error;
1571
1572         ASSERT_RACCT_ENABLED();
1573
1574         if (outputsbuf == NULL)
1575                 return (0);
1576
1577         sbuf_finish(outputsbuf);
1578         if (outbuflen < sbuf_len(outputsbuf) + 1) {
1579                 sbuf_delete(outputsbuf);
1580                 return (ERANGE);
1581         }
1582         error = copyout(sbuf_data(outputsbuf), outbufp,
1583             sbuf_len(outputsbuf) + 1);
1584         sbuf_delete(outputsbuf);
1585         return (error);
1586 }
1587
1588 static struct sbuf *
1589 rctl_racct_to_sbuf(struct racct *racct, int sloppy)
1590 {
1591         struct sbuf *sb;
1592         int64_t amount;
1593         int i;
1594
1595         ASSERT_RACCT_ENABLED();
1596
1597         sb = sbuf_new_auto();
1598         for (i = 0; i <= RACCT_MAX; i++) {
1599                 if (sloppy == 0 && RACCT_IS_SLOPPY(i))
1600                         continue;
1601                 RACCT_LOCK();
1602                 amount = racct->r_resources[i];
1603                 RACCT_UNLOCK();
1604                 if (RACCT_IS_IN_MILLIONS(i))
1605                         amount /= 1000000;
1606                 sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
1607         }
1608         sbuf_setpos(sb, sbuf_len(sb) - 1);
1609         return (sb);
1610 }
1611
1612 int
1613 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
1614 {
1615         struct rctl_rule *filter;
1616         struct sbuf *outputsbuf = NULL;
1617         struct proc *p;
1618         struct uidinfo *uip;
1619         struct loginclass *lc;
1620         struct prison_racct *prr;
1621         char *inputstr;
1622         int error;
1623
1624         if (!racct_enable)
1625                 return (ENOSYS);
1626
1627         error = priv_check(td, PRIV_RCTL_GET_RACCT);
1628         if (error != 0)
1629                 return (error);
1630
1631         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1632         if (error != 0)
1633                 return (error);
1634
1635         sx_slock(&allproc_lock);
1636         error = rctl_string_to_rule(inputstr, &filter);
1637         free(inputstr, M_RCTL);
1638         if (error != 0) {
1639                 sx_sunlock(&allproc_lock);
1640                 return (error);
1641         }
1642
1643         switch (filter->rr_subject_type) {
1644         case RCTL_SUBJECT_TYPE_PROCESS:
1645                 p = filter->rr_subject.rs_proc;
1646                 if (p == NULL) {
1647                         error = EINVAL;
1648                         goto out;
1649                 }
1650                 outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
1651                 break;
1652         case RCTL_SUBJECT_TYPE_USER:
1653                 uip = filter->rr_subject.rs_uip;
1654                 if (uip == NULL) {
1655                         error = EINVAL;
1656                         goto out;
1657                 }
1658                 outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
1659                 break;
1660         case RCTL_SUBJECT_TYPE_LOGINCLASS:
1661                 lc = filter->rr_subject.rs_loginclass;
1662                 if (lc == NULL) {
1663                         error = EINVAL;
1664                         goto out;
1665                 }
1666                 outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
1667                 break;
1668         case RCTL_SUBJECT_TYPE_JAIL:
1669                 prr = filter->rr_subject.rs_prison_racct;
1670                 if (prr == NULL) {
1671                         error = EINVAL;
1672                         goto out;
1673                 }
1674                 outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
1675                 break;
1676         default:
1677                 error = EINVAL;
1678         }
1679 out:
1680         rctl_rule_release(filter);
1681         sx_sunlock(&allproc_lock);
1682         if (error != 0)
1683                 return (error);
1684
1685         error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
1686
1687         return (error);
1688 }
1689
1690 static void
1691 rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
1692 {
1693         struct rctl_rule *filter = (struct rctl_rule *)arg2;
1694         struct rctl_rule_link *link;
1695         struct sbuf *sb = (struct sbuf *)arg3;
1696
1697         ASSERT_RACCT_ENABLED();
1698         RACCT_LOCK_ASSERT();
1699
1700         LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
1701                 if (!rctl_rule_matches(link->rrl_rule, filter))
1702                         continue;
1703                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1704                 sbuf_printf(sb, ",");
1705         }
1706 }
1707
1708 int
1709 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
1710 {
1711         struct sbuf *sb;
1712         struct rctl_rule *filter;
1713         struct rctl_rule_link *link;
1714         struct proc *p;
1715         char *inputstr, *buf;
1716         size_t bufsize;
1717         int error;
1718
1719         if (!racct_enable)
1720                 return (ENOSYS);
1721
1722         error = priv_check(td, PRIV_RCTL_GET_RULES);
1723         if (error != 0)
1724                 return (error);
1725
1726         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1727         if (error != 0)
1728                 return (error);
1729
1730         sx_slock(&allproc_lock);
1731         error = rctl_string_to_rule(inputstr, &filter);
1732         free(inputstr, M_RCTL);
1733         if (error != 0) {
1734                 sx_sunlock(&allproc_lock);
1735                 return (error);
1736         }
1737
1738         bufsize = uap->outbuflen;
1739         if (bufsize > rctl_maxbufsize) {
1740                 sx_sunlock(&allproc_lock);
1741                 return (E2BIG);
1742         }
1743
1744         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1745         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1746         KASSERT(sb != NULL, ("sbuf_new failed"));
1747
1748         FOREACH_PROC_IN_SYSTEM(p) {
1749                 RACCT_LOCK();
1750                 LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1751                         /*
1752                          * Non-process rules will be added to the buffer later.
1753                          * Adding them here would result in duplicated output.
1754                          */
1755                         if (link->rrl_rule->rr_subject_type !=
1756                             RCTL_SUBJECT_TYPE_PROCESS)
1757                                 continue;
1758                         if (!rctl_rule_matches(link->rrl_rule, filter))
1759                                 continue;
1760                         rctl_rule_to_sbuf(sb, link->rrl_rule);
1761                         sbuf_printf(sb, ",");
1762                 }
1763                 RACCT_UNLOCK();
1764         }
1765
1766         loginclass_racct_foreach(rctl_get_rules_callback,
1767             rctl_rule_pre_callback, rctl_rule_post_callback,
1768             filter, sb);
1769         ui_racct_foreach(rctl_get_rules_callback,
1770             rctl_rule_pre_callback, rctl_rule_post_callback,
1771             filter, sb);
1772         prison_racct_foreach(rctl_get_rules_callback,
1773             rctl_rule_pre_callback, rctl_rule_post_callback,
1774             filter, sb);
1775         if (sbuf_error(sb) == ENOMEM) {
1776                 error = ERANGE;
1777                 goto out;
1778         }
1779
1780         /*
1781          * Remove trailing ",".
1782          */
1783         if (sbuf_len(sb) > 0)
1784                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1785
1786         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1787 out:
1788         rctl_rule_release(filter);
1789         sx_sunlock(&allproc_lock);
1790         free(buf, M_RCTL);
1791         return (error);
1792 }
1793
1794 int
1795 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
1796 {
1797         struct sbuf *sb;
1798         struct rctl_rule *filter;
1799         struct rctl_rule_link *link;
1800         char *inputstr, *buf;
1801         size_t bufsize;
1802         int error;
1803
1804         if (!racct_enable)
1805                 return (ENOSYS);
1806
1807         error = priv_check(td, PRIV_RCTL_GET_LIMITS);
1808         if (error != 0)
1809                 return (error);
1810
1811         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1812         if (error != 0)
1813                 return (error);
1814
1815         sx_slock(&allproc_lock);
1816         error = rctl_string_to_rule(inputstr, &filter);
1817         free(inputstr, M_RCTL);
1818         if (error != 0) {
1819                 sx_sunlock(&allproc_lock);
1820                 return (error);
1821         }
1822
1823         if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
1824                 rctl_rule_release(filter);
1825                 sx_sunlock(&allproc_lock);
1826                 return (EINVAL);
1827         }
1828         if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
1829                 rctl_rule_release(filter);
1830                 sx_sunlock(&allproc_lock);
1831                 return (EOPNOTSUPP);
1832         }
1833         if (filter->rr_subject.rs_proc == NULL) {
1834                 rctl_rule_release(filter);
1835                 sx_sunlock(&allproc_lock);
1836                 return (EINVAL);
1837         }
1838
1839         bufsize = uap->outbuflen;
1840         if (bufsize > rctl_maxbufsize) {
1841                 rctl_rule_release(filter);
1842                 sx_sunlock(&allproc_lock);
1843                 return (E2BIG);
1844         }
1845
1846         buf = malloc(bufsize, M_RCTL, M_WAITOK);
1847         sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
1848         KASSERT(sb != NULL, ("sbuf_new failed"));
1849
1850         RACCT_LOCK();
1851         LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
1852             rrl_next) {
1853                 rctl_rule_to_sbuf(sb, link->rrl_rule);
1854                 sbuf_printf(sb, ",");
1855         }
1856         RACCT_UNLOCK();
1857         if (sbuf_error(sb) == ENOMEM) {
1858                 error = ERANGE;
1859                 sbuf_delete(sb);
1860                 goto out;
1861         }
1862
1863         /*
1864          * Remove trailing ",".
1865          */
1866         if (sbuf_len(sb) > 0)
1867                 sbuf_setpos(sb, sbuf_len(sb) - 1);
1868
1869         error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
1870 out:
1871         rctl_rule_release(filter);
1872         sx_sunlock(&allproc_lock);
1873         free(buf, M_RCTL);
1874         return (error);
1875 }
1876
1877 int
1878 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
1879 {
1880         struct rctl_rule *rule;
1881         char *inputstr;
1882         int error;
1883
1884         if (!racct_enable)
1885                 return (ENOSYS);
1886
1887         error = priv_check(td, PRIV_RCTL_ADD_RULE);
1888         if (error != 0)
1889                 return (error);
1890
1891         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1892         if (error != 0)
1893                 return (error);
1894
1895         sx_slock(&allproc_lock);
1896         error = rctl_string_to_rule(inputstr, &rule);
1897         free(inputstr, M_RCTL);
1898         if (error != 0) {
1899                 sx_sunlock(&allproc_lock);
1900                 return (error);
1901         }
1902         /*
1903          * The 'per' part of a rule is optional.
1904          */
1905         if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
1906             rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
1907                 rule->rr_per = rule->rr_subject_type;
1908
1909         if (!rctl_rule_fully_specified(rule)) {
1910                 error = EINVAL;
1911                 goto out;
1912         }
1913
1914         error = rctl_rule_add(rule);
1915
1916 out:
1917         rctl_rule_release(rule);
1918         sx_sunlock(&allproc_lock);
1919         return (error);
1920 }
1921
1922 int
1923 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
1924 {
1925         struct rctl_rule *filter;
1926         char *inputstr;
1927         int error;
1928
1929         if (!racct_enable)
1930                 return (ENOSYS);
1931
1932         error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
1933         if (error != 0)
1934                 return (error);
1935
1936         error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
1937         if (error != 0)
1938                 return (error);
1939
1940         sx_slock(&allproc_lock);
1941         error = rctl_string_to_rule(inputstr, &filter);
1942         free(inputstr, M_RCTL);
1943         if (error != 0) {
1944                 sx_sunlock(&allproc_lock);
1945                 return (error);
1946         }
1947
1948         error = rctl_rule_remove(filter);
1949         rctl_rule_release(filter);
1950         sx_sunlock(&allproc_lock);
1951
1952         return (error);
1953 }
1954
1955 /*
1956  * Update RCTL rule list after credential change.
1957  */
1958 void
1959 rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
1960 {
1961         LIST_HEAD(, rctl_rule_link) newrules;
1962         struct rctl_rule_link *link, *newlink;
1963         struct uidinfo *newuip;
1964         struct loginclass *newlc;
1965         struct prison_racct *newprr;
1966         int rulecnt, i;
1967
1968         if (!racct_enable)
1969                 return;
1970
1971         PROC_LOCK_ASSERT(p, MA_NOTOWNED);
1972
1973         newuip = newcred->cr_ruidinfo;
1974         newlc = newcred->cr_loginclass;
1975         newprr = newcred->cr_prison->pr_prison_racct;
1976
1977         LIST_INIT(&newrules);
1978
1979 again:
1980         /*
1981          * First, count the rules that apply to the process with new
1982          * credentials.
1983          */
1984         rulecnt = 0;
1985         RACCT_LOCK();
1986         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
1987                 if (link->rrl_rule->rr_subject_type ==
1988                     RCTL_SUBJECT_TYPE_PROCESS)
1989                         rulecnt++;
1990         }
1991         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
1992                 rulecnt++;
1993         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
1994                 rulecnt++;
1995         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
1996                 rulecnt++;
1997         RACCT_UNLOCK();
1998
1999         /*
2000          * Create temporary list.  We've dropped the rctl_lock in order
2001          * to use M_WAITOK.
2002          */
2003         for (i = 0; i < rulecnt; i++) {
2004                 newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
2005                 newlink->rrl_rule = NULL;
2006                 newlink->rrl_exceeded = 0;
2007                 LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
2008         }
2009
2010         newlink = LIST_FIRST(&newrules);
2011
2012         /*
2013          * Assign rules to the newly allocated list entries.
2014          */
2015         RACCT_LOCK();
2016         LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
2017                 if (link->rrl_rule->rr_subject_type ==
2018                     RCTL_SUBJECT_TYPE_PROCESS) {
2019                         if (newlink == NULL)
2020                                 goto goaround;
2021                         rctl_rule_acquire(link->rrl_rule);
2022                         newlink->rrl_rule = link->rrl_rule;
2023                         newlink->rrl_exceeded = link->rrl_exceeded;
2024                         newlink = LIST_NEXT(newlink, rrl_next);
2025                         rulecnt--;
2026                 }
2027         }
2028
2029         LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
2030                 if (newlink == NULL)
2031                         goto goaround;
2032                 rctl_rule_acquire(link->rrl_rule);
2033                 newlink->rrl_rule = link->rrl_rule;
2034                 newlink->rrl_exceeded = link->rrl_exceeded;
2035                 newlink = LIST_NEXT(newlink, rrl_next);
2036                 rulecnt--;
2037         }
2038
2039         LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
2040                 if (newlink == NULL)
2041                         goto goaround;
2042                 rctl_rule_acquire(link->rrl_rule);
2043                 newlink->rrl_rule = link->rrl_rule;
2044                 newlink->rrl_exceeded = link->rrl_exceeded;
2045                 newlink = LIST_NEXT(newlink, rrl_next);
2046                 rulecnt--;
2047         }
2048
2049         LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
2050                 if (newlink == NULL)
2051                         goto goaround;
2052                 rctl_rule_acquire(link->rrl_rule);
2053                 newlink->rrl_rule = link->rrl_rule;
2054                 newlink->rrl_exceeded = link->rrl_exceeded;
2055                 newlink = LIST_NEXT(newlink, rrl_next);
2056                 rulecnt--;
2057         }
2058
2059         if (rulecnt == 0) {
2060                 /*
2061                  * Free the old rule list.
2062                  */
2063                 while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
2064                         link = LIST_FIRST(&p->p_racct->r_rule_links);
2065                         LIST_REMOVE(link, rrl_next);
2066                         rctl_rule_release(link->rrl_rule);
2067                         uma_zfree(rctl_rule_link_zone, link);
2068                 }
2069
2070                 /*
2071                  * Replace lists and we're done.
2072                  *
2073                  * XXX: Is there any way to switch list heads instead
2074                  *      of iterating here?
2075                  */
2076                 while (!LIST_EMPTY(&newrules)) {
2077                         newlink = LIST_FIRST(&newrules);
2078                         LIST_REMOVE(newlink, rrl_next);
2079                         LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
2080                             newlink, rrl_next);
2081                 }
2082
2083                 RACCT_UNLOCK();
2084
2085                 return;
2086         }
2087
2088 goaround:
2089         RACCT_UNLOCK();
2090
2091         /*
2092          * Rule list changed while we were not holding the rctl_lock.
2093          * Free the new list and try again.
2094          */
2095         while (!LIST_EMPTY(&newrules)) {
2096                 newlink = LIST_FIRST(&newrules);
2097                 LIST_REMOVE(newlink, rrl_next);
2098                 if (newlink->rrl_rule != NULL)
2099                         rctl_rule_release(newlink->rrl_rule);
2100                 uma_zfree(rctl_rule_link_zone, newlink);
2101         }
2102
2103         goto again;
2104 }
2105
2106 /*
2107  * Assign RCTL rules to the newly created process.
2108  */
2109 int
2110 rctl_proc_fork(struct proc *parent, struct proc *child)
2111 {
2112         struct rctl_rule *rule;
2113         struct rctl_rule_link *link;
2114         int error;
2115
2116         ASSERT_RACCT_ENABLED();
2117         RACCT_LOCK_ASSERT();
2118         KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
2119
2120         LIST_INIT(&child->p_racct->r_rule_links);
2121
2122         /*
2123          * Go through limits applicable to the parent and assign them
2124          * to the child.  Rules with 'process' subject have to be duplicated
2125          * in order to make their rr_subject point to the new process.
2126          */
2127         LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
2128                 if (link->rrl_rule->rr_subject_type ==
2129                     RCTL_SUBJECT_TYPE_PROCESS) {
2130                         rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
2131                         if (rule == NULL)
2132                                 goto fail;
2133                         KASSERT(rule->rr_subject.rs_proc == parent,
2134                             ("rule->rr_subject.rs_proc != parent"));
2135                         rule->rr_subject.rs_proc = child;
2136                         error = rctl_racct_add_rule_locked(child->p_racct,
2137                             rule);
2138                         rctl_rule_release(rule);
2139                         if (error != 0)
2140                                 goto fail;
2141                 } else {
2142                         error = rctl_racct_add_rule_locked(child->p_racct,
2143                             link->rrl_rule);
2144                         if (error != 0)
2145                                 goto fail;
2146                 }
2147         }
2148
2149         return (0);
2150
2151 fail:
2152         while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
2153                 link = LIST_FIRST(&child->p_racct->r_rule_links);
2154                 LIST_REMOVE(link, rrl_next);
2155                 rctl_rule_release(link->rrl_rule);
2156                 uma_zfree(rctl_rule_link_zone, link);
2157         }
2158
2159         return (EAGAIN);
2160 }
2161
2162 /*
2163  * Release rules attached to the racct.
2164  */
2165 void
2166 rctl_racct_release(struct racct *racct)
2167 {
2168         struct rctl_rule_link *link;
2169
2170         ASSERT_RACCT_ENABLED();
2171         RACCT_LOCK_ASSERT();
2172
2173         while (!LIST_EMPTY(&racct->r_rule_links)) {
2174                 link = LIST_FIRST(&racct->r_rule_links);
2175                 LIST_REMOVE(link, rrl_next);
2176                 rctl_rule_release(link->rrl_rule);
2177                 uma_zfree(rctl_rule_link_zone, link);
2178         }
2179 }
2180
2181 static void
2182 rctl_init(void)
2183 {
2184
2185         if (!racct_enable)
2186                 return;
2187
2188         rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
2189             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2190         rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
2191             sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
2192             UMA_ALIGN_PTR, 0);
2193
2194         /*
2195          * Set default values, making sure not to overwrite the ones
2196          * fetched from tunables.  Most of those could be set at the
2197          * declaration, except for the rctl_throttle_max - we cannot
2198          * set it there due to hz not being compile time constant.
2199          */
2200         if (rctl_throttle_min < 1)
2201                 rctl_throttle_min = 1;
2202         if (rctl_throttle_max < rctl_throttle_min)
2203                 rctl_throttle_max = 2 * hz;
2204         if (rctl_throttle_pct < 0)
2205                 rctl_throttle_pct = 100;
2206         if (rctl_throttle_pct2 < 0)
2207                 rctl_throttle_pct2 = 100;
2208 }
2209
2210 #else /* !RCTL */
2211
2212 int
2213 sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
2214 {
2215
2216         return (ENOSYS);
2217 }
2218
2219 int
2220 sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
2221 {
2222
2223         return (ENOSYS);
2224 }
2225
2226 int
2227 sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
2228 {
2229
2230         return (ENOSYS);
2231 }
2232
2233 int
2234 sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
2235 {
2236
2237         return (ENOSYS);
2238 }
2239
2240 int
2241 sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
2242 {
2243
2244         return (ENOSYS);
2245 }
2246
2247 #endif /* !RCTL */