2 * Copyright (c) 2010 The FreeBSD Foundation
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include "opt_kdtrace.h"
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/eventhandler.h>
41 #include <sys/kernel.h>
42 #include <sys/kthread.h>
44 #include <sys/loginclass.h>
45 #include <sys/malloc.h>
46 #include <sys/mutex.h>
48 #include <sys/racct.h>
49 #include <sys/resourcevar.h>
51 #include <sys/sched.h>
54 #include <sys/sysent.h>
55 #include <sys/sysproto.h>
64 FEATURE(racct, "Resource Accounting");
66 static struct mtx racct_lock;
67 MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
69 static uma_zone_t racct_zone;
71 static void racct_sub_racct(struct racct *dest, const struct racct *src);
72 static void racct_sub_cred_locked(struct ucred *cred, int resource,
74 static void racct_add_cred_locked(struct ucred *cred, int resource,
77 SDT_PROVIDER_DEFINE(racct);
78 SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
80 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
81 "struct proc *", "int", "uint64_t");
82 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
84 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
86 SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
88 SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
89 "struct proc *", "int", "uint64_t");
90 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
92 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
94 SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
95 SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
96 SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
98 SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
99 "struct racct *", "struct racct *");
100 SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
103 int racct_types[] = {
107 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
109 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
115 RACCT_RECLAIMABLE | RACCT_DENIABLE,
117 RACCT_RECLAIMABLE | RACCT_DENIABLE,
119 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
121 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
123 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
125 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
127 RACCT_RECLAIMABLE | RACCT_DENIABLE,
129 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
131 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
133 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
135 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
137 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
139 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
141 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
146 racct_add_racct(struct racct *dest, const struct racct *src)
150 mtx_assert(&racct_lock, MA_OWNED);
153 * Update resource usage in dest.
155 for (i = 0; i <= RACCT_MAX; i++) {
156 KASSERT(dest->r_resources[i] >= 0,
157 ("racct propagation meltdown: dest < 0"));
158 KASSERT(src->r_resources[i] >= 0,
159 ("racct propagation meltdown: src < 0"));
160 dest->r_resources[i] += src->r_resources[i];
165 racct_sub_racct(struct racct *dest, const struct racct *src)
169 mtx_assert(&racct_lock, MA_OWNED);
172 * Update resource usage in dest.
174 for (i = 0; i <= RACCT_MAX; i++) {
175 if (!RACCT_IS_SLOPPY(i)) {
176 KASSERT(dest->r_resources[i] >= 0,
177 ("racct propagation meltdown: dest < 0"));
178 KASSERT(src->r_resources[i] >= 0,
179 ("racct propagation meltdown: src < 0"));
180 KASSERT(src->r_resources[i] <= dest->r_resources[i],
181 ("racct propagation meltdown: src > dest"));
183 if (RACCT_IS_RECLAIMABLE(i)) {
184 dest->r_resources[i] -= src->r_resources[i];
185 if (dest->r_resources[i] < 0) {
186 KASSERT(RACCT_IS_SLOPPY(i),
187 ("racct_sub_racct: usage < 0"));
188 dest->r_resources[i] = 0;
195 racct_create(struct racct **racctp)
198 SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
200 KASSERT(*racctp == NULL, ("racct already allocated"));
202 *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
206 racct_destroy_locked(struct racct **racctp)
211 SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
213 mtx_assert(&racct_lock, MA_OWNED);
214 KASSERT(racctp != NULL, ("NULL racctp"));
215 KASSERT(*racctp != NULL, ("NULL racct"));
219 for (i = 0; i <= RACCT_MAX; i++) {
220 if (RACCT_IS_SLOPPY(i))
222 if (!RACCT_IS_RECLAIMABLE(i))
224 KASSERT(racct->r_resources[i] == 0,
225 ("destroying non-empty racct: "
226 "%ju allocated for resource %d\n",
227 racct->r_resources[i], i));
229 uma_zfree(racct_zone, racct);
234 racct_destroy(struct racct **racct)
237 mtx_lock(&racct_lock);
238 racct_destroy_locked(racct);
239 mtx_unlock(&racct_lock);
243 * Increase consumption of 'resource' by 'amount' for 'racct'
244 * and all its parents. Differently from other cases, 'amount' here
245 * may be less than zero.
248 racct_alloc_resource(struct racct *racct, int resource,
252 mtx_assert(&racct_lock, MA_OWNED);
253 KASSERT(racct != NULL, ("NULL racct"));
255 racct->r_resources[resource] += amount;
256 if (racct->r_resources[resource] < 0) {
257 KASSERT(RACCT_IS_SLOPPY(resource),
258 ("racct_alloc_resource: usage < 0"));
259 racct->r_resources[resource] = 0;
264 racct_add_locked(struct proc *p, int resource, uint64_t amount)
270 SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
273 * We need proc lock to dereference p->p_ucred.
275 PROC_LOCK_ASSERT(p, MA_OWNED);
278 error = rctl_enforce(p, resource, amount);
279 if (error && RACCT_IS_DENIABLE(resource)) {
280 SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
285 racct_alloc_resource(p->p_racct, resource, amount);
286 racct_add_cred_locked(p->p_ucred, resource, amount);
292 * Increase allocation of 'resource' by 'amount' for process 'p'.
293 * Return 0 if it's below limits, or errno, if it's not.
296 racct_add(struct proc *p, int resource, uint64_t amount)
300 mtx_lock(&racct_lock);
301 error = racct_add_locked(p, resource, amount);
302 mtx_unlock(&racct_lock);
307 racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
311 SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
314 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
315 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
316 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
318 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
322 * Increase allocation of 'resource' by 'amount' for credential 'cred'.
323 * Doesn't check for limits and never fails.
325 * XXX: Shouldn't this ever return an error?
328 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
331 mtx_lock(&racct_lock);
332 racct_add_cred_locked(cred, resource, amount);
333 mtx_unlock(&racct_lock);
337 * Increase allocation of 'resource' by 'amount' for process 'p'.
338 * Doesn't check for limits and never fails.
341 racct_add_force(struct proc *p, int resource, uint64_t amount)
344 SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
347 * We need proc lock to dereference p->p_ucred.
349 PROC_LOCK_ASSERT(p, MA_OWNED);
351 mtx_lock(&racct_lock);
352 racct_alloc_resource(p->p_racct, resource, amount);
353 mtx_unlock(&racct_lock);
354 racct_add_cred(p->p_ucred, resource, amount);
358 racct_set_locked(struct proc *p, int resource, uint64_t amount)
365 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
368 * We need proc lock to dereference p->p_ucred.
370 PROC_LOCK_ASSERT(p, MA_OWNED);
372 diff = amount - p->p_racct->r_resources[resource];
374 KASSERT(diff >= 0 || RACCT_IS_RECLAIMABLE(resource),
375 ("racct_set: usage of non-reclaimable resource %d dropping",
380 error = rctl_enforce(p, resource, diff);
381 if (error && RACCT_IS_DENIABLE(resource)) {
382 SDT_PROBE(racct, kernel, rusage, set_failure, p,
383 resource, amount, 0, 0);
388 racct_alloc_resource(p->p_racct, resource, diff);
390 racct_add_cred_locked(p->p_ucred, resource, diff);
392 racct_sub_cred_locked(p->p_ucred, resource, -diff);
398 * Set allocation of 'resource' to 'amount' for process 'p'.
399 * Return 0 if it's below limits, or errno, if it's not.
401 * Note that decreasing the allocation always returns 0,
402 * even if it's above the limit.
405 racct_set(struct proc *p, int resource, uint64_t amount)
409 mtx_lock(&racct_lock);
410 error = racct_set_locked(p, resource, amount);
411 mtx_unlock(&racct_lock);
416 racct_set_force(struct proc *p, int resource, uint64_t amount)
420 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
423 * We need proc lock to dereference p->p_ucred.
425 PROC_LOCK_ASSERT(p, MA_OWNED);
427 mtx_lock(&racct_lock);
428 diff = amount - p->p_racct->r_resources[resource];
429 racct_alloc_resource(p->p_racct, resource, diff);
431 racct_add_cred_locked(p->p_ucred, resource, diff);
433 racct_sub_cred_locked(p->p_ucred, resource, -diff);
434 mtx_unlock(&racct_lock);
438 * Returns amount of 'resource' the process 'p' can keep allocated.
439 * Allocating more than that would be denied, unless the resource
440 * is marked undeniable. Amount of already allocated resource does
444 racct_get_limit(struct proc *p, int resource)
448 return (rctl_get_limit(p, resource));
455 * Returns amount of 'resource' the process 'p' can keep allocated.
456 * Allocating more than that would be denied, unless the resource
457 * is marked undeniable. Amount of already allocated resource does
461 racct_get_available(struct proc *p, int resource)
465 return (rctl_get_available(p, resource));
472 * Decrease allocation of 'resource' by 'amount' for process 'p'.
475 racct_sub(struct proc *p, int resource, uint64_t amount)
478 SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
481 * We need proc lock to dereference p->p_ucred.
483 PROC_LOCK_ASSERT(p, MA_OWNED);
484 KASSERT(RACCT_IS_RECLAIMABLE(resource),
485 ("racct_sub: called for non-reclaimable resource %d", resource));
487 mtx_lock(&racct_lock);
488 KASSERT(amount <= p->p_racct->r_resources[resource],
489 ("racct_sub: freeing %ju of resource %d, which is more "
490 "than allocated %jd for %s (pid %d)", amount, resource,
491 (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
493 racct_alloc_resource(p->p_racct, resource, -amount);
494 racct_sub_cred_locked(p->p_ucred, resource, amount);
495 mtx_unlock(&racct_lock);
499 racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
503 SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
507 KASSERT(RACCT_IS_RECLAIMABLE(resource),
508 ("racct_sub_cred: called for non-reclaimable resource %d",
512 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
513 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
514 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
516 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
520 * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
523 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
526 mtx_lock(&racct_lock);
527 racct_sub_cred_locked(cred, resource, amount);
528 mtx_unlock(&racct_lock);
532 * Inherit resource usage information from the parent process.
535 racct_proc_fork(struct proc *parent, struct proc *child)
540 * Create racct for the child process.
542 racct_create(&child->p_racct);
546 mtx_lock(&racct_lock);
549 error = rctl_proc_fork(parent, child);
555 * Inherit resource usage.
557 for (i = 0; i <= RACCT_MAX; i++) {
558 if (parent->p_racct->r_resources[i] == 0 ||
559 !RACCT_IS_INHERITABLE(i))
562 error = racct_set_locked(child, i,
563 parent->p_racct->r_resources[i]);
568 error = racct_add_locked(child, RACCT_NPROC, 1);
569 error += racct_add_locked(child, RACCT_NTHR, 1);
572 mtx_unlock(&racct_lock);
577 racct_proc_exit(child);
583 * Called at the end of fork1(), to handle rules that require the process
584 * to be fully initialized.
587 racct_proc_fork_done(struct proc *child)
592 mtx_lock(&racct_lock);
593 rctl_enforce(child, RACCT_NPROC, 0);
594 rctl_enforce(child, RACCT_NTHR, 0);
595 mtx_unlock(&racct_lock);
601 racct_proc_exit(struct proc *p)
608 * We don't need to calculate rux, proc_reap() has already done this.
610 runtime = cputick2usec(p->p_rux.rux_runtime);
612 KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
614 if (runtime < p->p_prev_runtime)
615 runtime = p->p_prev_runtime;
617 mtx_lock(&racct_lock);
618 racct_set_locked(p, RACCT_CPU, runtime);
620 for (i = 0; i <= RACCT_MAX; i++) {
621 if (p->p_racct->r_resources[i] == 0)
623 if (!RACCT_IS_RECLAIMABLE(i))
625 racct_set_locked(p, i, 0);
628 mtx_unlock(&racct_lock);
632 rctl_racct_release(p->p_racct);
634 racct_destroy(&p->p_racct);
638 * Called after credentials change, to move resource utilisation
642 racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
643 struct ucred *newcred)
645 struct uidinfo *olduip, *newuip;
646 struct loginclass *oldlc, *newlc;
647 struct prison *oldpr, *newpr, *pr;
649 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
651 newuip = newcred->cr_ruidinfo;
652 olduip = oldcred->cr_ruidinfo;
653 newlc = newcred->cr_loginclass;
654 oldlc = oldcred->cr_loginclass;
655 newpr = newcred->cr_prison;
656 oldpr = oldcred->cr_prison;
658 mtx_lock(&racct_lock);
659 if (newuip != olduip) {
660 racct_sub_racct(olduip->ui_racct, p->p_racct);
661 racct_add_racct(newuip->ui_racct, p->p_racct);
663 if (newlc != oldlc) {
664 racct_sub_racct(oldlc->lc_racct, p->p_racct);
665 racct_add_racct(newlc->lc_racct, p->p_racct);
667 if (newpr != oldpr) {
668 for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
669 racct_sub_racct(pr->pr_prison_racct->prr_racct,
671 for (pr = newpr; pr != NULL; pr = pr->pr_parent)
672 racct_add_racct(pr->pr_prison_racct->prr_racct,
675 mtx_unlock(&racct_lock);
678 rctl_proc_ucred_changed(p, newcred);
683 racct_move(struct racct *dest, struct racct *src)
686 mtx_lock(&racct_lock);
688 racct_add_racct(dest, src);
689 racct_sub_racct(src, src);
691 mtx_unlock(&racct_lock);
699 struct timeval wallclock;
703 sx_slock(&allproc_lock);
705 FOREACH_PROC_IN_SYSTEM(p) {
706 if (p->p_state != PRS_NORMAL)
709 microuptime(&wallclock);
710 timevalsub(&wallclock, &p->p_stats->p_start);
713 FOREACH_THREAD_IN_PROC(p, td)
715 runtime = cputick2usec(p->p_rux.rux_runtime);
718 KASSERT(runtime >= p->p_prev_runtime,
719 ("runtime < p_prev_runtime"));
721 if (runtime < p->p_prev_runtime)
722 runtime = p->p_prev_runtime;
724 p->p_prev_runtime = runtime;
725 mtx_lock(&racct_lock);
726 racct_set_locked(p, RACCT_CPU, runtime);
727 racct_set_locked(p, RACCT_WALLCLOCK,
728 (uint64_t)wallclock.tv_sec * 1000000 +
730 mtx_unlock(&racct_lock);
733 sx_sunlock(&allproc_lock);
738 static struct kproc_desc racctd_kp = {
743 SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
749 racct_zone = uma_zcreate("racct", sizeof(struct racct),
750 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
752 * XXX: Move this somewhere.
754 prison0.pr_prison_racct = prison_racct_find("0");
756 SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
761 racct_add(struct proc *p, int resource, uint64_t amount)
768 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
773 racct_add_force(struct proc *p, int resource, uint64_t amount)
780 racct_set(struct proc *p, int resource, uint64_t amount)
787 racct_set_force(struct proc *p, int resource, uint64_t amount)
792 racct_sub(struct proc *p, int resource, uint64_t amount)
797 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
802 racct_get_limit(struct proc *p, int resource)
809 racct_get_available(struct proc *p, int resource)
816 racct_create(struct racct **racctp)
821 racct_destroy(struct racct **racctp)
826 racct_proc_fork(struct proc *parent, struct proc *child)
833 racct_proc_fork_done(struct proc *child)
838 racct_proc_exit(struct proc *p)