2 * Copyright (c) 2010 The FreeBSD Foundation
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include "opt_kdtrace.h"
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/eventhandler.h>
41 #include <sys/kernel.h>
42 #include <sys/kthread.h>
44 #include <sys/loginclass.h>
45 #include <sys/malloc.h>
46 #include <sys/mutex.h>
48 #include <sys/racct.h>
49 #include <sys/resourcevar.h>
51 #include <sys/sched.h>
54 #include <sys/sysent.h>
55 #include <sys/sysproto.h>
64 FEATURE(racct, "Resource Accounting");
66 static struct mtx racct_lock;
67 MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
69 static uma_zone_t racct_zone;
71 static void racct_sub_racct(struct racct *dest, const struct racct *src);
72 static void racct_sub_cred_locked(struct ucred *cred, int resource,
74 static void racct_add_cred_locked(struct ucred *cred, int resource,
77 SDT_PROVIDER_DEFINE(racct);
78 SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
80 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
81 "struct proc *", "int", "uint64_t");
82 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
84 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
86 SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
88 SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
89 "struct proc *", "int", "uint64_t");
90 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
92 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
94 SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
95 SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
96 SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
98 SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
99 "struct racct *", "struct racct *");
100 SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
103 int racct_types[] = {
107 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
109 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
115 RACCT_RECLAIMABLE | RACCT_DENIABLE,
117 RACCT_RECLAIMABLE | RACCT_DENIABLE,
119 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
121 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
123 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
125 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
127 RACCT_RECLAIMABLE | RACCT_DENIABLE,
129 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
131 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
133 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
135 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
137 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
139 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
141 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
146 racct_add_racct(struct racct *dest, const struct racct *src)
150 mtx_assert(&racct_lock, MA_OWNED);
153 * Update resource usage in dest.
155 for (i = 0; i <= RACCT_MAX; i++) {
156 KASSERT(dest->r_resources[i] >= 0,
157 ("racct propagation meltdown: dest < 0"));
158 KASSERT(src->r_resources[i] >= 0,
159 ("racct propagation meltdown: src < 0"));
160 dest->r_resources[i] += src->r_resources[i];
165 racct_sub_racct(struct racct *dest, const struct racct *src)
169 mtx_assert(&racct_lock, MA_OWNED);
172 * Update resource usage in dest.
174 for (i = 0; i <= RACCT_MAX; i++) {
175 if (!RACCT_IS_SLOPPY(i)) {
176 KASSERT(dest->r_resources[i] >= 0,
177 ("racct propagation meltdown: dest < 0"));
178 KASSERT(src->r_resources[i] >= 0,
179 ("racct propagation meltdown: src < 0"));
180 KASSERT(src->r_resources[i] <= dest->r_resources[i],
181 ("racct propagation meltdown: src > dest"));
183 if (RACCT_IS_RECLAIMABLE(i)) {
184 dest->r_resources[i] -= src->r_resources[i];
185 if (dest->r_resources[i] < 0) {
186 KASSERT(RACCT_IS_SLOPPY(i),
187 ("racct_sub_racct: usage < 0"));
188 dest->r_resources[i] = 0;
195 racct_create(struct racct **racctp)
198 SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
200 KASSERT(*racctp == NULL, ("racct already allocated"));
202 *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
206 racct_destroy_locked(struct racct **racctp)
211 SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
213 mtx_assert(&racct_lock, MA_OWNED);
214 KASSERT(racctp != NULL, ("NULL racctp"));
215 KASSERT(*racctp != NULL, ("NULL racct"));
219 for (i = 0; i <= RACCT_MAX; i++) {
220 if (RACCT_IS_SLOPPY(i))
222 if (!RACCT_IS_RECLAIMABLE(i))
224 KASSERT(racct->r_resources[i] == 0,
225 ("destroying non-empty racct: "
226 "%ju allocated for resource %d\n",
227 racct->r_resources[i], i));
229 uma_zfree(racct_zone, racct);
234 racct_destroy(struct racct **racct)
237 mtx_lock(&racct_lock);
238 racct_destroy_locked(racct);
239 mtx_unlock(&racct_lock);
243 * Increase consumption of 'resource' by 'amount' for 'racct'
244 * and all its parents. Differently from other cases, 'amount' here
245 * may be less than zero.
248 racct_alloc_resource(struct racct *racct, int resource,
252 mtx_assert(&racct_lock, MA_OWNED);
253 KASSERT(racct != NULL, ("NULL racct"));
255 racct->r_resources[resource] += amount;
256 if (racct->r_resources[resource] < 0) {
257 KASSERT(RACCT_IS_SLOPPY(resource),
258 ("racct_alloc_resource: usage < 0"));
259 racct->r_resources[resource] = 0;
264 racct_add_locked(struct proc *p, int resource, uint64_t amount)
270 if (p->p_flag & P_SYSTEM)
273 SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
276 * We need proc lock to dereference p->p_ucred.
278 PROC_LOCK_ASSERT(p, MA_OWNED);
281 error = rctl_enforce(p, resource, amount);
282 if (error && RACCT_IS_DENIABLE(resource)) {
283 SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
288 racct_alloc_resource(p->p_racct, resource, amount);
289 racct_add_cred_locked(p->p_ucred, resource, amount);
295 * Increase allocation of 'resource' by 'amount' for process 'p'.
296 * Return 0 if it's below limits, or errno, if it's not.
299 racct_add(struct proc *p, int resource, uint64_t amount)
303 mtx_lock(&racct_lock);
304 error = racct_add_locked(p, resource, amount);
305 mtx_unlock(&racct_lock);
310 racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
314 SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
317 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
318 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
319 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
321 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
325 * Increase allocation of 'resource' by 'amount' for credential 'cred'.
326 * Doesn't check for limits and never fails.
328 * XXX: Shouldn't this ever return an error?
331 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
334 mtx_lock(&racct_lock);
335 racct_add_cred_locked(cred, resource, amount);
336 mtx_unlock(&racct_lock);
340 * Increase allocation of 'resource' by 'amount' for process 'p'.
341 * Doesn't check for limits and never fails.
344 racct_add_force(struct proc *p, int resource, uint64_t amount)
347 if (p->p_flag & P_SYSTEM)
350 SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
353 * We need proc lock to dereference p->p_ucred.
355 PROC_LOCK_ASSERT(p, MA_OWNED);
357 mtx_lock(&racct_lock);
358 racct_alloc_resource(p->p_racct, resource, amount);
359 mtx_unlock(&racct_lock);
360 racct_add_cred(p->p_ucred, resource, amount);
364 racct_set_locked(struct proc *p, int resource, uint64_t amount)
371 if (p->p_flag & P_SYSTEM)
374 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
377 * We need proc lock to dereference p->p_ucred.
379 PROC_LOCK_ASSERT(p, MA_OWNED);
381 diff = amount - p->p_racct->r_resources[resource];
383 KASSERT(diff >= 0 || RACCT_IS_RECLAIMABLE(resource),
384 ("racct_set: usage of non-reclaimable resource %d dropping",
389 error = rctl_enforce(p, resource, diff);
390 if (error && RACCT_IS_DENIABLE(resource)) {
391 SDT_PROBE(racct, kernel, rusage, set_failure, p,
392 resource, amount, 0, 0);
397 racct_alloc_resource(p->p_racct, resource, diff);
399 racct_add_cred_locked(p->p_ucred, resource, diff);
401 racct_sub_cred_locked(p->p_ucred, resource, -diff);
407 * Set allocation of 'resource' to 'amount' for process 'p'.
408 * Return 0 if it's below limits, or errno, if it's not.
410 * Note that decreasing the allocation always returns 0,
411 * even if it's above the limit.
414 racct_set(struct proc *p, int resource, uint64_t amount)
418 mtx_lock(&racct_lock);
419 error = racct_set_locked(p, resource, amount);
420 mtx_unlock(&racct_lock);
425 racct_set_force(struct proc *p, int resource, uint64_t amount)
429 if (p->p_flag & P_SYSTEM)
432 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
435 * We need proc lock to dereference p->p_ucred.
437 PROC_LOCK_ASSERT(p, MA_OWNED);
439 mtx_lock(&racct_lock);
440 diff = amount - p->p_racct->r_resources[resource];
441 racct_alloc_resource(p->p_racct, resource, diff);
443 racct_add_cred_locked(p->p_ucred, resource, diff);
445 racct_sub_cred_locked(p->p_ucred, resource, -diff);
446 mtx_unlock(&racct_lock);
450 * Returns amount of 'resource' the process 'p' can keep allocated.
451 * Allocating more than that would be denied, unless the resource
452 * is marked undeniable. Amount of already allocated resource does
456 racct_get_limit(struct proc *p, int resource)
460 return (rctl_get_limit(p, resource));
467 * Returns amount of 'resource' the process 'p' can keep allocated.
468 * Allocating more than that would be denied, unless the resource
469 * is marked undeniable. Amount of already allocated resource does
473 racct_get_available(struct proc *p, int resource)
477 return (rctl_get_available(p, resource));
484 * Decrease allocation of 'resource' by 'amount' for process 'p'.
487 racct_sub(struct proc *p, int resource, uint64_t amount)
490 if (p->p_flag & P_SYSTEM)
493 SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
496 * We need proc lock to dereference p->p_ucred.
498 PROC_LOCK_ASSERT(p, MA_OWNED);
499 KASSERT(RACCT_IS_RECLAIMABLE(resource),
500 ("racct_sub: called for non-reclaimable resource %d", resource));
502 mtx_lock(&racct_lock);
503 KASSERT(amount <= p->p_racct->r_resources[resource],
504 ("racct_sub: freeing %ju of resource %d, which is more "
505 "than allocated %jd for %s (pid %d)", amount, resource,
506 (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
508 racct_alloc_resource(p->p_racct, resource, -amount);
509 racct_sub_cred_locked(p->p_ucred, resource, amount);
510 mtx_unlock(&racct_lock);
514 racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
518 SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
522 KASSERT(RACCT_IS_RECLAIMABLE(resource),
523 ("racct_sub_cred: called for non-reclaimable resource %d",
527 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
528 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
529 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
531 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
535 * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
538 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
541 mtx_lock(&racct_lock);
542 racct_sub_cred_locked(cred, resource, amount);
543 mtx_unlock(&racct_lock);
547 * Inherit resource usage information from the parent process.
550 racct_proc_fork(struct proc *parent, struct proc *child)
555 * Create racct for the child process.
557 racct_create(&child->p_racct);
560 * No resource accounting for kernel processes.
562 if (child->p_flag & P_SYSTEM)
567 mtx_lock(&racct_lock);
570 error = rctl_proc_fork(parent, child);
576 * Inherit resource usage.
578 for (i = 0; i <= RACCT_MAX; i++) {
579 if (parent->p_racct->r_resources[i] == 0 ||
580 !RACCT_IS_INHERITABLE(i))
583 error = racct_set_locked(child, i,
584 parent->p_racct->r_resources[i]);
589 error = racct_add_locked(child, RACCT_NPROC, 1);
590 error += racct_add_locked(child, RACCT_NTHR, 1);
593 mtx_unlock(&racct_lock);
601 * Called at the end of fork1(), to handle rules that require the process
602 * to be fully initialized.
605 racct_proc_fork_done(struct proc *child)
610 mtx_lock(&racct_lock);
611 rctl_enforce(child, RACCT_NPROC, 0);
612 rctl_enforce(child, RACCT_NTHR, 0);
613 mtx_unlock(&racct_lock);
619 racct_proc_exit(struct proc *p)
626 * We don't need to calculate rux, proc_reap() has already done this.
628 runtime = cputick2usec(p->p_rux.rux_runtime);
630 KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
632 if (runtime < p->p_prev_runtime)
633 runtime = p->p_prev_runtime;
635 mtx_lock(&racct_lock);
636 racct_set_locked(p, RACCT_CPU, runtime);
638 for (i = 0; i <= RACCT_MAX; i++) {
639 if (p->p_racct->r_resources[i] == 0)
641 if (!RACCT_IS_RECLAIMABLE(i))
643 racct_set_locked(p, i, 0);
646 mtx_unlock(&racct_lock);
650 rctl_racct_release(p->p_racct);
652 racct_destroy(&p->p_racct);
656 * Called after credentials change, to move resource utilisation
660 racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
661 struct ucred *newcred)
663 struct uidinfo *olduip, *newuip;
664 struct loginclass *oldlc, *newlc;
665 struct prison *oldpr, *newpr, *pr;
667 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
669 newuip = newcred->cr_ruidinfo;
670 olduip = oldcred->cr_ruidinfo;
671 newlc = newcred->cr_loginclass;
672 oldlc = oldcred->cr_loginclass;
673 newpr = newcred->cr_prison;
674 oldpr = oldcred->cr_prison;
676 mtx_lock(&racct_lock);
677 if (newuip != olduip) {
678 racct_sub_racct(olduip->ui_racct, p->p_racct);
679 racct_add_racct(newuip->ui_racct, p->p_racct);
681 if (newlc != oldlc) {
682 racct_sub_racct(oldlc->lc_racct, p->p_racct);
683 racct_add_racct(newlc->lc_racct, p->p_racct);
685 if (newpr != oldpr) {
686 for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
687 racct_sub_racct(pr->pr_prison_racct->prr_racct,
689 for (pr = newpr; pr != NULL; pr = pr->pr_parent)
690 racct_add_racct(pr->pr_prison_racct->prr_racct,
693 mtx_unlock(&racct_lock);
696 rctl_proc_ucred_changed(p, newcred);
705 struct timeval wallclock;
709 sx_slock(&allproc_lock);
711 FOREACH_PROC_IN_SYSTEM(p) {
712 if (p->p_state != PRS_NORMAL)
714 if (p->p_flag & P_SYSTEM)
717 microuptime(&wallclock);
718 timevalsub(&wallclock, &p->p_stats->p_start);
721 FOREACH_THREAD_IN_PROC(p, td) {
726 runtime = cputick2usec(p->p_rux.rux_runtime);
729 KASSERT(runtime >= p->p_prev_runtime,
730 ("runtime < p_prev_runtime"));
732 if (runtime < p->p_prev_runtime)
733 runtime = p->p_prev_runtime;
735 p->p_prev_runtime = runtime;
736 mtx_lock(&racct_lock);
737 racct_set_locked(p, RACCT_CPU, runtime);
738 racct_set_locked(p, RACCT_WALLCLOCK,
739 (uint64_t)wallclock.tv_sec * 1000000 +
741 mtx_unlock(&racct_lock);
744 sx_sunlock(&allproc_lock);
749 static struct kproc_desc racctd_kp = {
754 SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
760 racct_zone = uma_zcreate("racct", sizeof(struct racct),
761 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
763 * XXX: Move this somewhere.
765 prison0.pr_prison_racct = prison_racct_find("0");
767 SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
772 racct_add(struct proc *p, int resource, uint64_t amount)
779 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
784 racct_add_force(struct proc *p, int resource, uint64_t amount)
791 racct_set(struct proc *p, int resource, uint64_t amount)
798 racct_set_force(struct proc *p, int resource, uint64_t amount)
803 racct_sub(struct proc *p, int resource, uint64_t amount)
808 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
813 racct_get_limit(struct proc *p, int resource)
820 racct_get_available(struct proc *p, int resource)
827 racct_create(struct racct **racctp)
832 racct_destroy(struct racct **racctp)
837 racct_proc_fork(struct proc *parent, struct proc *child)
844 racct_proc_fork_done(struct proc *child)
849 racct_proc_exit(struct proc *p)