2 * Copyright (c) 2010 The FreeBSD Foundation
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include "opt_kdtrace.h"
37 #include <sys/param.h>
38 #include <sys/eventhandler.h>
39 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/kthread.h>
44 #include <sys/loginclass.h>
45 #include <sys/malloc.h>
46 #include <sys/mutex.h>
48 #include <sys/racct.h>
49 #include <sys/resourcevar.h>
51 #include <sys/sched.h>
54 #include <sys/sysent.h>
55 #include <sys/sysproto.h>
56 #include <sys/systm.h>
65 FEATURE(racct, "Resource Accounting");
67 static struct mtx racct_lock;
68 MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
70 static uma_zone_t racct_zone;
72 static void racct_sub_racct(struct racct *dest, const struct racct *src);
73 static void racct_sub_cred_locked(struct ucred *cred, int resource,
75 static void racct_add_cred_locked(struct ucred *cred, int resource,
78 SDT_PROVIDER_DEFINE(racct);
79 SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
81 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
82 "struct proc *", "int", "uint64_t");
83 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
85 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
87 SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
89 SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
90 "struct proc *", "int", "uint64_t");
91 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
93 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
95 SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
96 SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
97 SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
99 SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
100 "struct racct *", "struct racct *");
101 SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
104 int racct_types[] = {
108 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
110 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
116 RACCT_RECLAIMABLE | RACCT_DENIABLE,
118 RACCT_RECLAIMABLE | RACCT_DENIABLE,
120 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
122 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
124 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
126 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
128 RACCT_RECLAIMABLE | RACCT_DENIABLE,
130 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
132 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
134 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
136 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
138 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
140 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
142 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
147 racct_add_racct(struct racct *dest, const struct racct *src)
151 mtx_assert(&racct_lock, MA_OWNED);
154 * Update resource usage in dest.
156 for (i = 0; i <= RACCT_MAX; i++) {
157 KASSERT(dest->r_resources[i] >= 0,
158 ("racct propagation meltdown: dest < 0"));
159 KASSERT(src->r_resources[i] >= 0,
160 ("racct propagation meltdown: src < 0"));
161 dest->r_resources[i] += src->r_resources[i];
166 racct_sub_racct(struct racct *dest, const struct racct *src)
170 mtx_assert(&racct_lock, MA_OWNED);
173 * Update resource usage in dest.
175 for (i = 0; i <= RACCT_MAX; i++) {
176 if (!RACCT_IS_SLOPPY(i)) {
177 KASSERT(dest->r_resources[i] >= 0,
178 ("racct propagation meltdown: dest < 0"));
179 KASSERT(src->r_resources[i] >= 0,
180 ("racct propagation meltdown: src < 0"));
181 KASSERT(src->r_resources[i] <= dest->r_resources[i],
182 ("racct propagation meltdown: src > dest"));
184 if (RACCT_IS_RECLAIMABLE(i)) {
185 dest->r_resources[i] -= src->r_resources[i];
186 if (dest->r_resources[i] < 0) {
187 KASSERT(RACCT_IS_SLOPPY(i),
188 ("racct_sub_racct: usage < 0"));
189 dest->r_resources[i] = 0;
196 racct_create(struct racct **racctp)
199 SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
201 KASSERT(*racctp == NULL, ("racct already allocated"));
203 *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
207 racct_destroy_locked(struct racct **racctp)
212 SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
214 mtx_assert(&racct_lock, MA_OWNED);
215 KASSERT(racctp != NULL, ("NULL racctp"));
216 KASSERT(*racctp != NULL, ("NULL racct"));
220 for (i = 0; i <= RACCT_MAX; i++) {
221 if (RACCT_IS_SLOPPY(i))
223 if (!RACCT_IS_RECLAIMABLE(i))
225 KASSERT(racct->r_resources[i] == 0,
226 ("destroying non-empty racct: "
227 "%ju allocated for resource %d\n",
228 racct->r_resources[i], i));
230 uma_zfree(racct_zone, racct);
235 racct_destroy(struct racct **racct)
238 mtx_lock(&racct_lock);
239 racct_destroy_locked(racct);
240 mtx_unlock(&racct_lock);
244 * Increase consumption of 'resource' by 'amount' for 'racct'
245 * and all its parents. Differently from other cases, 'amount' here
246 * may be less than zero.
249 racct_alloc_resource(struct racct *racct, int resource,
253 mtx_assert(&racct_lock, MA_OWNED);
254 KASSERT(racct != NULL, ("NULL racct"));
256 racct->r_resources[resource] += amount;
257 if (racct->r_resources[resource] < 0) {
258 KASSERT(RACCT_IS_SLOPPY(resource),
259 ("racct_alloc_resource: usage < 0"));
260 racct->r_resources[resource] = 0;
265 * Increase allocation of 'resource' by 'amount' for process 'p'.
266 * Return 0 if it's below limits, or errno, if it's not.
269 racct_add(struct proc *p, int resource, uint64_t amount)
275 if (p->p_flag & P_SYSTEM)
278 SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
281 * We need proc lock to dereference p->p_ucred.
283 PROC_LOCK_ASSERT(p, MA_OWNED);
285 mtx_lock(&racct_lock);
287 error = rctl_enforce(p, resource, amount);
288 if (error && RACCT_IS_DENIABLE(resource)) {
289 SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
291 mtx_unlock(&racct_lock);
295 racct_alloc_resource(p->p_racct, resource, amount);
296 racct_add_cred_locked(p->p_ucred, resource, amount);
297 mtx_unlock(&racct_lock);
303 racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
307 SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
310 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
311 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
312 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
314 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
318 * Increase allocation of 'resource' by 'amount' for credential 'cred'.
319 * Doesn't check for limits and never fails.
321 * XXX: Shouldn't this ever return an error?
324 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
327 mtx_lock(&racct_lock);
328 racct_add_cred_locked(cred, resource, amount);
329 mtx_unlock(&racct_lock);
333 * Increase allocation of 'resource' by 'amount' for process 'p'.
334 * Doesn't check for limits and never fails.
337 racct_add_force(struct proc *p, int resource, uint64_t amount)
340 if (p->p_flag & P_SYSTEM)
343 SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
346 * We need proc lock to dereference p->p_ucred.
348 PROC_LOCK_ASSERT(p, MA_OWNED);
350 mtx_lock(&racct_lock);
351 racct_alloc_resource(p->p_racct, resource, amount);
352 mtx_unlock(&racct_lock);
353 racct_add_cred(p->p_ucred, resource, amount);
357 racct_set_locked(struct proc *p, int resource, uint64_t amount)
364 if (p->p_flag & P_SYSTEM)
367 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
370 * We need proc lock to dereference p->p_ucred.
372 PROC_LOCK_ASSERT(p, MA_OWNED);
374 diff = amount - p->p_racct->r_resources[resource];
376 KASSERT(diff >= 0 || RACCT_IS_RECLAIMABLE(resource),
377 ("racct_set: usage of non-reclaimable resource %d dropping",
382 error = rctl_enforce(p, resource, diff);
383 if (error && RACCT_IS_DENIABLE(resource)) {
384 SDT_PROBE(racct, kernel, rusage, set_failure, p,
385 resource, amount, 0, 0);
390 racct_alloc_resource(p->p_racct, resource, diff);
392 racct_add_cred_locked(p->p_ucred, resource, diff);
394 racct_sub_cred_locked(p->p_ucred, resource, -diff);
400 * Set allocation of 'resource' to 'amount' for process 'p'.
401 * Return 0 if it's below limits, or errno, if it's not.
403 * Note that decreasing the allocation always returns 0,
404 * even if it's above the limit.
407 racct_set(struct proc *p, int resource, uint64_t amount)
411 mtx_lock(&racct_lock);
412 error = racct_set_locked(p, resource, amount);
413 mtx_unlock(&racct_lock);
418 racct_set_force(struct proc *p, int resource, uint64_t amount)
422 if (p->p_flag & P_SYSTEM)
425 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
428 * We need proc lock to dereference p->p_ucred.
430 PROC_LOCK_ASSERT(p, MA_OWNED);
432 mtx_lock(&racct_lock);
433 diff = amount - p->p_racct->r_resources[resource];
434 racct_alloc_resource(p->p_racct, resource, diff);
436 racct_add_cred_locked(p->p_ucred, resource, diff);
438 racct_sub_cred_locked(p->p_ucred, resource, -diff);
439 mtx_unlock(&racct_lock);
443 * Returns amount of 'resource' the process 'p' can keep allocated.
444 * Allocating more than that would be denied, unless the resource
445 * is marked undeniable. Amount of already allocated resource does
449 racct_get_limit(struct proc *p, int resource)
453 return (rctl_get_limit(p, resource));
460 * Returns amount of 'resource' the process 'p' can keep allocated.
461 * Allocating more than that would be denied, unless the resource
462 * is marked undeniable. Amount of already allocated resource does
466 racct_get_available(struct proc *p, int resource)
470 return (rctl_get_available(p, resource));
477 * Decrease allocation of 'resource' by 'amount' for process 'p'.
480 racct_sub(struct proc *p, int resource, uint64_t amount)
483 if (p->p_flag & P_SYSTEM)
486 SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
489 * We need proc lock to dereference p->p_ucred.
491 PROC_LOCK_ASSERT(p, MA_OWNED);
492 KASSERT(RACCT_IS_RECLAIMABLE(resource),
493 ("racct_sub: called for non-reclaimable resource %d", resource));
495 mtx_lock(&racct_lock);
496 KASSERT(amount <= p->p_racct->r_resources[resource],
497 ("racct_sub: freeing %ju of resource %d, which is more "
498 "than allocated %jd for %s (pid %d)", amount, resource,
499 (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
501 racct_alloc_resource(p->p_racct, resource, -amount);
502 racct_sub_cred_locked(p->p_ucred, resource, amount);
503 mtx_unlock(&racct_lock);
507 racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
511 SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
515 KASSERT(RACCT_IS_RECLAIMABLE(resource),
516 ("racct_sub_cred: called for non-reclaimable resource %d",
520 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
521 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
522 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
524 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
528 * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
531 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
534 mtx_lock(&racct_lock);
535 racct_sub_cred_locked(cred, resource, amount);
536 mtx_unlock(&racct_lock);
540 * Inherit resource usage information from the parent process.
543 racct_proc_fork(struct proc *parent, struct proc *child)
548 * Create racct for the child process.
550 racct_create(&child->p_racct);
553 * No resource accounting for kernel processes.
555 if (child->p_flag & P_SYSTEM)
560 mtx_lock(&racct_lock);
563 * Inherit resource usage.
565 for (i = 0; i <= RACCT_MAX; i++) {
566 if (parent->p_racct->r_resources[i] == 0 ||
567 !RACCT_IS_INHERITABLE(i))
570 error = racct_set_locked(child, i,
571 parent->p_racct->r_resources[i]);
574 * XXX: The only purpose of these two lines is
575 * to prevent from tripping checks in racct_destroy().
577 for (i = 0; i <= RACCT_MAX; i++)
578 racct_set_locked(child, i, 0);
584 error = rctl_proc_fork(parent, child);
587 * XXX: The only purpose of these two lines is to prevent from
588 * tripping checks in racct_destroy().
590 for (i = 0; i <= RACCT_MAX; i++)
591 racct_set_locked(child, i, 0);
597 racct_destroy_locked(&child->p_racct);
598 mtx_unlock(&racct_lock);
606 racct_proc_exit(struct proc *p)
613 * We don't need to calculate rux, proc_reap() has already done this.
615 runtime = cputick2usec(p->p_rux.rux_runtime);
617 KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
619 if (runtime < p->p_prev_runtime)
620 runtime = p->p_prev_runtime;
622 mtx_lock(&racct_lock);
623 racct_set_locked(p, RACCT_CPU, runtime);
625 for (i = 0; i <= RACCT_MAX; i++) {
626 if (p->p_racct->r_resources[i] == 0)
628 if (!RACCT_IS_RECLAIMABLE(i))
630 racct_set_locked(p, i, 0);
633 mtx_unlock(&racct_lock);
637 rctl_racct_release(p->p_racct);
639 racct_destroy(&p->p_racct);
643 * Called after credentials change, to move resource utilisation
647 racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
648 struct ucred *newcred)
650 struct uidinfo *olduip, *newuip;
651 struct loginclass *oldlc, *newlc;
652 struct prison *oldpr, *newpr, *pr;
654 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
656 newuip = newcred->cr_ruidinfo;
657 olduip = oldcred->cr_ruidinfo;
658 newlc = newcred->cr_loginclass;
659 oldlc = oldcred->cr_loginclass;
660 newpr = newcred->cr_prison;
661 oldpr = oldcred->cr_prison;
663 mtx_lock(&racct_lock);
664 if (newuip != olduip) {
665 racct_sub_racct(olduip->ui_racct, p->p_racct);
666 racct_add_racct(newuip->ui_racct, p->p_racct);
668 if (newlc != oldlc) {
669 racct_sub_racct(oldlc->lc_racct, p->p_racct);
670 racct_add_racct(newlc->lc_racct, p->p_racct);
672 if (newpr != oldpr) {
673 for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
674 racct_sub_racct(pr->pr_prison_racct->prr_racct,
676 for (pr = newpr; pr != NULL; pr = pr->pr_parent)
677 racct_add_racct(pr->pr_prison_racct->prr_racct,
680 mtx_unlock(&racct_lock);
683 rctl_proc_ucred_changed(p, newcred);
692 struct timeval wallclock;
696 sx_slock(&allproc_lock);
698 FOREACH_PROC_IN_SYSTEM(p) {
699 if (p->p_state != PRS_NORMAL)
701 if (p->p_flag & P_SYSTEM)
704 microuptime(&wallclock);
705 timevalsub(&wallclock, &p->p_stats->p_start);
708 FOREACH_THREAD_IN_PROC(p, td) {
713 runtime = cputick2usec(p->p_rux.rux_runtime);
716 KASSERT(runtime >= p->p_prev_runtime,
717 ("runtime < p_prev_runtime"));
719 if (runtime < p->p_prev_runtime)
720 runtime = p->p_prev_runtime;
722 p->p_prev_runtime = runtime;
723 mtx_lock(&racct_lock);
724 racct_set_locked(p, RACCT_CPU, runtime);
725 racct_set_locked(p, RACCT_WALLCLOCK,
726 wallclock.tv_sec * 1000000 + wallclock.tv_usec);
727 mtx_unlock(&racct_lock);
730 sx_sunlock(&allproc_lock);
735 static struct kproc_desc racctd_kp = {
740 SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
746 racct_zone = uma_zcreate("racct", sizeof(struct racct),
747 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
749 * XXX: Move this somewhere.
751 prison0.pr_prison_racct = prison_racct_find("0");
753 SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
758 racct_add(struct proc *p, int resource, uint64_t amount)
765 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
770 racct_add_force(struct proc *p, int resource, uint64_t amount)
777 racct_set(struct proc *p, int resource, uint64_t amount)
784 racct_set_force(struct proc *p, int resource, uint64_t amount)
789 racct_sub(struct proc *p, int resource, uint64_t amount)
794 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
799 racct_get_limit(struct proc *p, int resource)
806 racct_get_available(struct proc *p, int resource)
813 racct_create(struct racct **racctp)
818 racct_destroy(struct racct **racctp)
823 racct_proc_fork(struct proc *parent, struct proc *child)
830 racct_proc_exit(struct proc *p)