2 * Copyright (c) 2010 The FreeBSD Foundation
5 * This software was developed by Edward Tomasz Napierala under sponsorship
6 * from the FreeBSD Foundation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include "opt_kdtrace.h"
37 #include <sys/param.h>
38 #include <sys/eventhandler.h>
39 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/kthread.h>
44 #include <sys/loginclass.h>
45 #include <sys/malloc.h>
46 #include <sys/mutex.h>
48 #include <sys/racct.h>
49 #include <sys/resourcevar.h>
51 #include <sys/sched.h>
54 #include <sys/sysent.h>
55 #include <sys/sysproto.h>
56 #include <sys/systm.h>
65 FEATURE(racct, "Resource Accounting");
67 static struct mtx racct_lock;
68 MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
70 static uma_zone_t racct_zone;
72 static void racct_sub_racct(struct racct *dest, const struct racct *src);
73 static void racct_sub_cred_locked(struct ucred *cred, int resource,
75 static void racct_add_cred_locked(struct ucred *cred, int resource,
78 SDT_PROVIDER_DEFINE(racct);
79 SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
81 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
82 "struct proc *", "int", "uint64_t");
83 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
85 SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
87 SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
89 SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
90 "struct proc *", "int", "uint64_t");
91 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
93 SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
95 SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
96 SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
97 SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
99 SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
100 "struct racct *", "struct racct *");
101 SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
104 int racct_types[] = {
108 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
110 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
116 RACCT_RECLAIMABLE | RACCT_DENIABLE,
118 RACCT_RECLAIMABLE | RACCT_DENIABLE,
120 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
122 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
124 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
126 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
128 RACCT_RECLAIMABLE | RACCT_DENIABLE,
130 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
132 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
134 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
136 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
138 RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
140 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
142 RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
147 racct_add_racct(struct racct *dest, const struct racct *src)
151 mtx_assert(&racct_lock, MA_OWNED);
154 * Update resource usage in dest.
156 for (i = 0; i <= RACCT_MAX; i++) {
157 KASSERT(dest->r_resources[i] >= 0,
158 ("racct propagation meltdown: dest < 0"));
159 KASSERT(src->r_resources[i] >= 0,
160 ("racct propagation meltdown: src < 0"));
161 dest->r_resources[i] += src->r_resources[i];
166 racct_sub_racct(struct racct *dest, const struct racct *src)
170 mtx_assert(&racct_lock, MA_OWNED);
173 * Update resource usage in dest.
175 for (i = 0; i <= RACCT_MAX; i++) {
176 if (!RACCT_IS_SLOPPY(i)) {
177 KASSERT(dest->r_resources[i] >= 0,
178 ("racct propagation meltdown: dest < 0"));
179 KASSERT(src->r_resources[i] >= 0,
180 ("racct propagation meltdown: src < 0"));
181 KASSERT(src->r_resources[i] <= dest->r_resources[i],
182 ("racct propagation meltdown: src > dest"));
184 if (RACCT_IS_RECLAIMABLE(i)) {
185 dest->r_resources[i] -= src->r_resources[i];
186 if (dest->r_resources[i] < 0) {
187 KASSERT(RACCT_IS_SLOPPY(i),
188 ("racct_sub_racct: usage < 0"));
189 dest->r_resources[i] = 0;
196 racct_create(struct racct **racctp)
199 SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
201 KASSERT(*racctp == NULL, ("racct already allocated"));
203 *racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
207 racct_destroy_locked(struct racct **racctp)
212 SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
214 mtx_assert(&racct_lock, MA_OWNED);
215 KASSERT(racctp != NULL, ("NULL racctp"));
216 KASSERT(*racctp != NULL, ("NULL racct"));
220 for (i = 0; i <= RACCT_MAX; i++) {
221 if (RACCT_IS_SLOPPY(i))
223 if (!RACCT_IS_RECLAIMABLE(i))
225 KASSERT(racct->r_resources[i] == 0,
226 ("destroying non-empty racct: "
227 "%ju allocated for resource %d\n",
228 racct->r_resources[i], i));
230 uma_zfree(racct_zone, racct);
235 racct_destroy(struct racct **racct)
238 mtx_lock(&racct_lock);
239 racct_destroy_locked(racct);
240 mtx_unlock(&racct_lock);
244 * Increase consumption of 'resource' by 'amount' for 'racct'
245 * and all its parents. Differently from other cases, 'amount' here
246 * may be less than zero.
249 racct_alloc_resource(struct racct *racct, int resource,
253 mtx_assert(&racct_lock, MA_OWNED);
254 KASSERT(racct != NULL, ("NULL racct"));
256 racct->r_resources[resource] += amount;
257 if (racct->r_resources[resource] < 0) {
258 KASSERT(RACCT_IS_SLOPPY(resource),
259 ("racct_alloc_resource: usage < 0"));
260 racct->r_resources[resource] = 0;
265 racct_add_locked(struct proc *p, int resource, uint64_t amount)
271 if (p->p_flag & P_SYSTEM)
274 SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
277 * We need proc lock to dereference p->p_ucred.
279 PROC_LOCK_ASSERT(p, MA_OWNED);
282 error = rctl_enforce(p, resource, amount);
283 if (error && RACCT_IS_DENIABLE(resource)) {
284 SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
289 racct_alloc_resource(p->p_racct, resource, amount);
290 racct_add_cred_locked(p->p_ucred, resource, amount);
296 * Increase allocation of 'resource' by 'amount' for process 'p'.
297 * Return 0 if it's below limits, or errno, if it's not.
300 racct_add(struct proc *p, int resource, uint64_t amount)
304 mtx_lock(&racct_lock);
305 error = racct_add_locked(p, resource, amount);
306 mtx_unlock(&racct_lock);
311 racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
315 SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
318 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
319 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
320 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
322 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
326 * Increase allocation of 'resource' by 'amount' for credential 'cred'.
327 * Doesn't check for limits and never fails.
329 * XXX: Shouldn't this ever return an error?
332 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
335 mtx_lock(&racct_lock);
336 racct_add_cred_locked(cred, resource, amount);
337 mtx_unlock(&racct_lock);
341 * Increase allocation of 'resource' by 'amount' for process 'p'.
342 * Doesn't check for limits and never fails.
345 racct_add_force(struct proc *p, int resource, uint64_t amount)
348 if (p->p_flag & P_SYSTEM)
351 SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
354 * We need proc lock to dereference p->p_ucred.
356 PROC_LOCK_ASSERT(p, MA_OWNED);
358 mtx_lock(&racct_lock);
359 racct_alloc_resource(p->p_racct, resource, amount);
360 mtx_unlock(&racct_lock);
361 racct_add_cred(p->p_ucred, resource, amount);
365 racct_set_locked(struct proc *p, int resource, uint64_t amount)
372 if (p->p_flag & P_SYSTEM)
375 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
378 * We need proc lock to dereference p->p_ucred.
380 PROC_LOCK_ASSERT(p, MA_OWNED);
382 diff = amount - p->p_racct->r_resources[resource];
384 KASSERT(diff >= 0 || RACCT_IS_RECLAIMABLE(resource),
385 ("racct_set: usage of non-reclaimable resource %d dropping",
390 error = rctl_enforce(p, resource, diff);
391 if (error && RACCT_IS_DENIABLE(resource)) {
392 SDT_PROBE(racct, kernel, rusage, set_failure, p,
393 resource, amount, 0, 0);
398 racct_alloc_resource(p->p_racct, resource, diff);
400 racct_add_cred_locked(p->p_ucred, resource, diff);
402 racct_sub_cred_locked(p->p_ucred, resource, -diff);
408 * Set allocation of 'resource' to 'amount' for process 'p'.
409 * Return 0 if it's below limits, or errno, if it's not.
411 * Note that decreasing the allocation always returns 0,
412 * even if it's above the limit.
415 racct_set(struct proc *p, int resource, uint64_t amount)
419 mtx_lock(&racct_lock);
420 error = racct_set_locked(p, resource, amount);
421 mtx_unlock(&racct_lock);
426 racct_set_force(struct proc *p, int resource, uint64_t amount)
430 if (p->p_flag & P_SYSTEM)
433 SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
436 * We need proc lock to dereference p->p_ucred.
438 PROC_LOCK_ASSERT(p, MA_OWNED);
440 mtx_lock(&racct_lock);
441 diff = amount - p->p_racct->r_resources[resource];
442 racct_alloc_resource(p->p_racct, resource, diff);
444 racct_add_cred_locked(p->p_ucred, resource, diff);
446 racct_sub_cred_locked(p->p_ucred, resource, -diff);
447 mtx_unlock(&racct_lock);
451 * Returns amount of 'resource' the process 'p' can keep allocated.
452 * Allocating more than that would be denied, unless the resource
453 * is marked undeniable. Amount of already allocated resource does
457 racct_get_limit(struct proc *p, int resource)
461 return (rctl_get_limit(p, resource));
468 * Returns amount of 'resource' the process 'p' can keep allocated.
469 * Allocating more than that would be denied, unless the resource
470 * is marked undeniable. Amount of already allocated resource does
474 racct_get_available(struct proc *p, int resource)
478 return (rctl_get_available(p, resource));
485 * Decrease allocation of 'resource' by 'amount' for process 'p'.
488 racct_sub(struct proc *p, int resource, uint64_t amount)
491 if (p->p_flag & P_SYSTEM)
494 SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
497 * We need proc lock to dereference p->p_ucred.
499 PROC_LOCK_ASSERT(p, MA_OWNED);
500 KASSERT(RACCT_IS_RECLAIMABLE(resource),
501 ("racct_sub: called for non-reclaimable resource %d", resource));
503 mtx_lock(&racct_lock);
504 KASSERT(amount <= p->p_racct->r_resources[resource],
505 ("racct_sub: freeing %ju of resource %d, which is more "
506 "than allocated %jd for %s (pid %d)", amount, resource,
507 (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
509 racct_alloc_resource(p->p_racct, resource, -amount);
510 racct_sub_cred_locked(p->p_ucred, resource, amount);
511 mtx_unlock(&racct_lock);
515 racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
519 SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
523 KASSERT(RACCT_IS_RECLAIMABLE(resource),
524 ("racct_sub_cred: called for non-reclaimable resource %d",
528 racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
529 for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
530 racct_alloc_resource(pr->pr_prison_racct->prr_racct, resource,
532 racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
536 * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
539 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
542 mtx_lock(&racct_lock);
543 racct_sub_cred_locked(cred, resource, amount);
544 mtx_unlock(&racct_lock);
548 * Inherit resource usage information from the parent process.
551 racct_proc_fork(struct proc *parent, struct proc *child)
556 * Create racct for the child process.
558 racct_create(&child->p_racct);
561 * No resource accounting for kernel processes.
563 if (child->p_flag & P_SYSTEM)
568 mtx_lock(&racct_lock);
571 error = rctl_proc_fork(parent, child);
577 * Inherit resource usage.
579 for (i = 0; i <= RACCT_MAX; i++) {
580 if (parent->p_racct->r_resources[i] == 0 ||
581 !RACCT_IS_INHERITABLE(i))
584 error = racct_set_locked(child, i,
585 parent->p_racct->r_resources[i]);
590 error = racct_add_locked(child, RACCT_NPROC, 1);
591 error += racct_add_locked(child, RACCT_NTHR, 1);
594 mtx_unlock(&racct_lock);
602 * Called at the end of fork1(), to handle rules that require the process
603 * to be fully initialized.
606 racct_proc_fork_done(struct proc *child)
611 mtx_lock(&racct_lock);
612 rctl_enforce(child, RACCT_NPROC, 0);
613 rctl_enforce(child, RACCT_NTHR, 0);
614 mtx_unlock(&racct_lock);
620 racct_proc_exit(struct proc *p)
627 * We don't need to calculate rux, proc_reap() has already done this.
629 runtime = cputick2usec(p->p_rux.rux_runtime);
631 KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
633 if (runtime < p->p_prev_runtime)
634 runtime = p->p_prev_runtime;
636 mtx_lock(&racct_lock);
637 racct_set_locked(p, RACCT_CPU, runtime);
639 for (i = 0; i <= RACCT_MAX; i++) {
640 if (p->p_racct->r_resources[i] == 0)
642 if (!RACCT_IS_RECLAIMABLE(i))
644 racct_set_locked(p, i, 0);
647 mtx_unlock(&racct_lock);
651 rctl_racct_release(p->p_racct);
653 racct_destroy(&p->p_racct);
657 * Called after credentials change, to move resource utilisation
661 racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
662 struct ucred *newcred)
664 struct uidinfo *olduip, *newuip;
665 struct loginclass *oldlc, *newlc;
666 struct prison *oldpr, *newpr, *pr;
668 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
670 newuip = newcred->cr_ruidinfo;
671 olduip = oldcred->cr_ruidinfo;
672 newlc = newcred->cr_loginclass;
673 oldlc = oldcred->cr_loginclass;
674 newpr = newcred->cr_prison;
675 oldpr = oldcred->cr_prison;
677 mtx_lock(&racct_lock);
678 if (newuip != olduip) {
679 racct_sub_racct(olduip->ui_racct, p->p_racct);
680 racct_add_racct(newuip->ui_racct, p->p_racct);
682 if (newlc != oldlc) {
683 racct_sub_racct(oldlc->lc_racct, p->p_racct);
684 racct_add_racct(newlc->lc_racct, p->p_racct);
686 if (newpr != oldpr) {
687 for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
688 racct_sub_racct(pr->pr_prison_racct->prr_racct,
690 for (pr = newpr; pr != NULL; pr = pr->pr_parent)
691 racct_add_racct(pr->pr_prison_racct->prr_racct,
694 mtx_unlock(&racct_lock);
697 rctl_proc_ucred_changed(p, newcred);
706 struct timeval wallclock;
710 sx_slock(&allproc_lock);
712 FOREACH_PROC_IN_SYSTEM(p) {
713 if (p->p_state != PRS_NORMAL)
715 if (p->p_flag & P_SYSTEM)
718 microuptime(&wallclock);
719 timevalsub(&wallclock, &p->p_stats->p_start);
722 FOREACH_THREAD_IN_PROC(p, td) {
727 runtime = cputick2usec(p->p_rux.rux_runtime);
730 KASSERT(runtime >= p->p_prev_runtime,
731 ("runtime < p_prev_runtime"));
733 if (runtime < p->p_prev_runtime)
734 runtime = p->p_prev_runtime;
736 p->p_prev_runtime = runtime;
737 mtx_lock(&racct_lock);
738 racct_set_locked(p, RACCT_CPU, runtime);
739 racct_set_locked(p, RACCT_WALLCLOCK,
740 wallclock.tv_sec * 1000000 + wallclock.tv_usec);
741 mtx_unlock(&racct_lock);
744 sx_sunlock(&allproc_lock);
749 static struct kproc_desc racctd_kp = {
754 SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
760 racct_zone = uma_zcreate("racct", sizeof(struct racct),
761 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
763 * XXX: Move this somewhere.
765 prison0.pr_prison_racct = prison_racct_find("0");
767 SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
772 racct_add(struct proc *p, int resource, uint64_t amount)
779 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
784 racct_add_force(struct proc *p, int resource, uint64_t amount)
791 racct_set(struct proc *p, int resource, uint64_t amount)
798 racct_set_force(struct proc *p, int resource, uint64_t amount)
803 racct_sub(struct proc *p, int resource, uint64_t amount)
808 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
813 racct_get_limit(struct proc *p, int resource)
820 racct_get_available(struct proc *p, int resource)
827 racct_create(struct racct **racctp)
832 racct_destroy(struct racct **racctp)
837 racct_proc_fork(struct proc *parent, struct proc *child)
844 racct_proc_fork_done(struct proc *child)
849 racct_proc_exit(struct proc *p)