sys/kern/kern_exit.c

   1 /*-
   2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  * (c) UNIX System Laboratories, Inc.
   5  * All or some portions of this file are derived from material licensed
   6  * to the University of California by American Telephone and Telegraph
   7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   8  * the permission of UNIX System Laboratories, Inc.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 4. Neither the name of the University nor the names of its contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  *
  34  *      @(#)kern_exit.c 8.7 (Berkeley) 2/12/94
  35  */
  36
  37 #include <sys/cdefs.h>
  38 __FBSDID("$FreeBSD$");
  39
  40 #include "opt_compat.h"
  41 #include "opt_ktrace.h"
  42 #include "opt_mac.h"
  43
  44 #include <sys/param.h>
  45 #include <sys/systm.h>
  46 #include <sys/sysproto.h>
  47 #include <sys/eventhandler.h>
  48 #include <sys/kernel.h>
  49 #include <sys/malloc.h>
  50 #include <sys/lock.h>
  51 #include <sys/mutex.h>
  52 #include <sys/proc.h>
  53 #include <sys/pioctl.h>
  54 #include <sys/tty.h>
  55 #include <sys/wait.h>
  56 #include <sys/vmmeter.h>
  57 #include <sys/vnode.h>
  58 #include <sys/resourcevar.h>
  59 #include <sys/sbuf.h>
  60 #include <sys/signalvar.h>
  61 #include <sys/sched.h>
  62 #include <sys/sx.h>
  63 #include <sys/syscallsubr.h>
  64 #include <sys/syslog.h>
  65 #include <sys/ptrace.h>
  66 #include <sys/acct.h>           /* for acct_process() function prototype */
  67 #include <sys/filedesc.h>
  68 #include <sys/mac.h>
  69 #include <sys/shm.h>
  70 #include <sys/sem.h>
  71 #ifdef KTRACE
  72 #include <sys/ktrace.h>
  73 #endif
  74
  75 #include <security/audit/audit.h>
  76
  77 #include <vm/vm.h>
  78 #include <vm/vm_extern.h>
  79 #include <vm/vm_param.h>
  80 #include <vm/pmap.h>
  81 #include <vm/vm_map.h>
  82 #include <vm/vm_page.h>
  83 #include <vm/uma.h>
  84
  85 /* Required to be non-static for SysVR4 emulator */
  86 MALLOC_DEFINE(M_ZOMBIE, "zombie", "zombie proc status");
  87
  88 /* Hook for NFS teardown procedure. */
  89 void (*nlminfo_release_p)(struct proc *p);
  90
  91 /*
  92  * exit --
  93  *      Death of process.
  94  *
  95  * MPSAFE
  96  */
  97 void
  98 sys_exit(struct thread *td, struct sys_exit_args *uap)
  99 {
 100
 101         exit1(td, W_EXITCODE(uap->rval, 0));
 102         /* NOTREACHED */
 103 }
 104
 105 /*
 106  * Exit: deallocate address space and other resources, change proc state
 107  * to zombie, and unlink proc from allproc and parent's lists.  Save exit
 108  * status and rusage for wait().  Check for child processes and orphan them.
 109  */
 110 void
 111 exit1(struct thread *td, int rv)
 112 {
 113         uint64_t new_switchtime;
 114         struct proc *p, *nq, *q;
 115         struct tty *tp;
 116         struct vnode *ttyvp;
 117         struct vmspace *vm;
 118         struct vnode *vtmp;
 119 #ifdef KTRACE
 120         struct vnode *tracevp;
 121         struct ucred *tracecred;
 122 #endif
 123         struct plimit *plim;
 124         int locked, refcnt;
 125
 126         /*
 127          * Drop Giant if caller has it.  Eventually we should warn about
 128          * being called with Giant held.
 129          */
 130         while (mtx_owned(&Giant))
 131                 mtx_unlock(&Giant);
 132
 133         p = td->td_proc;
 134         if (p == initproc) {
 135                 printf("init died (signal %d, exit %d)\n",
 136                     WTERMSIG(rv), WEXITSTATUS(rv));
 137                 panic("Going nowhere without my init!");
 138         }
 139
 140         /*
 141          * MUST abort all other threads before proceeding past here.
 142          */
 143         PROC_LOCK(p);
 144         if (p->p_flag & P_HADTHREADS) {
 145 retry:
 146                 /*
 147                  * First check if some other thread got here before us..
 148                  * if so, act apropriatly, (exit or suspend);
 149                  */
 150                 thread_suspend_check(0);
 151
 152                 /*
 153                  * Kill off the other threads. This requires
 154                  * some co-operation from other parts of the kernel
 155                  * so it may not be instantaneous.  With this state set
 156                  * any thread entering the kernel from userspace will
 157                  * thread_exit() in trap().  Any thread attempting to
 158                  * sleep will return immediately with EINTR or EWOULDBLOCK
 159                  * which will hopefully force them to back out to userland
 160                  * freeing resources as they go.  Any thread attempting
 161                  * to return to userland will thread_exit() from userret().
 162                  * thread_exit() will unsuspend us when the last of the
 163                  * other threads exits.
 164                  * If there is already a thread singler after resumption,
 165                  * calling thread_single will fail; in that case, we just
 166                  * re-check all suspension request, the thread should
 167                  * either be suspended there or exit.
 168                  */
 169                 if (thread_single(SINGLE_EXIT))
 170                         goto retry;
 171
 172                 /*
 173                  * All other activity in this process is now stopped.
 174                  * Threading support has been turned off.
 175                  */
 176         }
 177
 178         p->p_flag |= P_WEXIT;
 179
 180         PROC_LOCK(p->p_pptr);
 181         sigqueue_take(p->p_ksi);
 182         PROC_UNLOCK(p->p_pptr);
 183
 184         PROC_UNLOCK(p);
 185
 186 #ifdef AUDIT
 187         /*
 188          * The Sun BSM exit token contains two components: an exit status as
 189          * passed to exit(), and a return value to indicate what sort of exit
 190          * it was.  The exit status is WEXITSTATUS(rv), but it's not clear
 191          * what the return value is.
 192          */
 193         AUDIT_ARG(exit, WEXITSTATUS(rv), 0);
 194         AUDIT_SYSCALL_EXIT(0, td);
 195 #endif
 196
 197         /* Are we a task leader? */
 198         if (p == p->p_leader) {
 199                 mtx_lock(&ppeers_lock);
 200                 q = p->p_peers;
 201                 while (q != NULL) {
 202                         PROC_LOCK(q);
 203                         psignal(q, SIGKILL);
 204                         PROC_UNLOCK(q);
 205                         q = q->p_peers;
 206                 }
 207                 while (p->p_peers != NULL)
 208                         msleep(p, &ppeers_lock, PWAIT, "exit1", 0);
 209                 mtx_unlock(&ppeers_lock);
 210         }
 211
 212         PROC_LOCK(p);
 213         _STOPEVENT(p, S_EXIT, rv);
 214         wakeup(&p->p_stype);    /* Wakeup anyone in procfs' PIOCWAIT */
 215         PROC_UNLOCK(p);
 216
 217         /*
 218          * Check if any loadable modules need anything done at process exit.
 219          * E.g. SYSV IPC stuff
 220          * XXX what if one of these generates an error?
 221          */
 222         EVENTHANDLER_INVOKE(process_exit, p);
 223
 224         MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage),
 225                 M_ZOMBIE, M_WAITOK);
 226         /*
 227          * If parent is waiting for us to exit or exec,
 228          * P_PPWAIT is set; we will wakeup the parent below.
 229          */
 230         PROC_LOCK(p);
 231         stopprofclock(p);
 232         p->p_flag &= ~(P_TRACED | P_PPWAIT);
 233
 234         /*
 235          * Stop the real interval timer.  If the handler is currently
 236          * executing, prevent it from rearming itself and let it finish.
 237          */
 238         if (timevalisset(&p->p_realtimer.it_value) &&
 239             callout_stop(&p->p_itcallout) == 0) {
 240                 timevalclear(&p->p_realtimer.it_interval);
 241                 msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0);
 242                 KASSERT(!timevalisset(&p->p_realtimer.it_value),
 243                     ("realtime timer is still armed"));
 244         }
 245         sigqueue_flush(&p->p_sigqueue);
 246         sigqueue_flush(&td->td_sigqueue);
 247         PROC_UNLOCK(p);
 248
 249         /*
 250          * Reset any sigio structures pointing to us as a result of
 251          * F_SETOWN with our pid.
 252          */
 253         mtx_lock(&Giant);       /* XXX: not sure if needed */
 254         funsetownlst(&p->p_sigiolst);
 255         mtx_unlock(&Giant);
 256
 257         /*
 258          * If this process has an nlminfo data area (for lockd), release it
 259          */
 260         if (nlminfo_release_p != NULL && p->p_nlminfo != NULL)
 261                 (*nlminfo_release_p)(p);
 262
 263         /*
 264          * Close open files and release open-file table.
 265          * This may block!
 266          */
 267         fdfree(td);
 268
 269         /*
 270          * If this thread tickled GEOM, we need to wait for the giggling to
 271          * stop before we return to userland
 272          */
 273         if (td->td_pflags & TDP_GEOM)
 274                 g_waitidle();
 275
 276         /*
 277          * Remove ourself from our leader's peer list and wake our leader.
 278          */
 279         mtx_lock(&ppeers_lock);
 280         if (p->p_leader->p_peers) {
 281                 q = p->p_leader;
 282                 while (q->p_peers != p)
 283                         q = q->p_peers;
 284                 q->p_peers = p->p_peers;
 285                 wakeup(p->p_leader);
 286         }
 287         mtx_unlock(&ppeers_lock);
 288
 289         /* The next two chunks should probably be moved to vmspace_exit. */
 290         vm = p->p_vmspace;
 291         /*
 292          * Release user portion of address space.
 293          * This releases references to vnodes,
 294          * which could cause I/O if the file has been unlinked.
 295          * Need to do this early enough that we can still sleep.
 296          * Can't free the entire vmspace as the kernel stack
 297          * may be mapped within that space also.
 298          *
 299          * Processes sharing the same vmspace may exit in one order, and
 300          * get cleaned up by vmspace_exit() in a different order.  The
 301          * last exiting process to reach this point releases as much of
 302          * the environment as it can, and the last process cleaned up
 303          * by vmspace_exit() (which decrements exitingcnt) cleans up the
 304          * remainder.
 305          */
 306         atomic_add_int(&vm->vm_exitingcnt, 1);
 307         do
 308                 refcnt = vm->vm_refcnt;
 309         while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt - 1));
 310         if (refcnt == 1) {
 311                 shmexit(vm);
 312                 pmap_remove_pages(vmspace_pmap(vm), vm_map_min(&vm->vm_map),
 313                     vm_map_max(&vm->vm_map));
 314                 (void) vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map),
 315                     vm_map_max(&vm->vm_map));
 316         }
 317
 318         sx_xlock(&proctree_lock);
 319         if (SESS_LEADER(p)) {
 320                 struct session *sp;
 321
 322                 sp = p->p_session;
 323                 if (sp->s_ttyvp) {
 324                         locked = VFS_LOCK_GIANT(sp->s_ttyvp->v_mount);
 325                         /*
 326                          * Controlling process.
 327                          * Signal foreground pgrp,
 328                          * drain controlling terminal
 329                          * and revoke access to controlling terminal.
 330                          */
 331                         if (sp->s_ttyp && (sp->s_ttyp->t_session == sp)) {
 332                                 tp = sp->s_ttyp;
 333                                 if (sp->s_ttyp->t_pgrp) {
 334                                         PGRP_LOCK(sp->s_ttyp->t_pgrp);
 335                                         pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1);
 336                                         PGRP_UNLOCK(sp->s_ttyp->t_pgrp);
 337                                 }
 338                                 /* XXX tp should be locked. */
 339                                 sx_xunlock(&proctree_lock);
 340                                 (void) ttywait(tp);
 341                                 sx_xlock(&proctree_lock);
 342                                 /*
 343                                  * The tty could have been revoked
 344                                  * if we blocked.
 345                                  */
 346                                 if (sp->s_ttyvp) {
 347                                         ttyvp = sp->s_ttyvp;
 348                                         SESS_LOCK(p->p_session);
 349                                         sp->s_ttyvp = NULL;
 350                                         SESS_UNLOCK(p->p_session);
 351                                         sx_xunlock(&proctree_lock);
 352                                         VOP_LOCK(ttyvp, LK_EXCLUSIVE, td);
 353                                         VOP_REVOKE(ttyvp, REVOKEALL);
 354                                         vput(ttyvp);
 355                                         sx_xlock(&proctree_lock);
 356                                 }
 357                         }
 358                         if (sp->s_ttyvp) {
 359                                 ttyvp = sp->s_ttyvp;
 360                                 SESS_LOCK(p->p_session);
 361                                 sp->s_ttyvp = NULL;
 362                                 SESS_UNLOCK(p->p_session);
 363                                 vrele(ttyvp);
 364                         }
 365                         /*
 366                          * s_ttyp is not zero'd; we use this to indicate
 367                          * that the session once had a controlling terminal.
 368                          * (for logging and informational purposes)
 369                          */
 370                         VFS_UNLOCK_GIANT(locked);
 371                 }
 372                 SESS_LOCK(p->p_session);
 373                 sp->s_leader = NULL;
 374                 SESS_UNLOCK(p->p_session);
 375         }
 376         fixjobc(p, p->p_pgrp, 0);
 377         sx_xunlock(&proctree_lock);
 378         (void)acct_process(td);
 379 #ifdef KTRACE
 380         /*
 381          * Drain any pending records on the thread and release the trace
 382          * file.  It might be better if drain-and-clear were atomic.
 383          */
 384         ktrprocexit(td);
 385         PROC_LOCK(p);
 386         mtx_lock(&ktrace_mtx);
 387         p->p_traceflag = 0;     /* don't trace the vrele() */
 388         tracevp = p->p_tracevp;
 389         p->p_tracevp = NULL;
 390         tracecred = p->p_tracecred;
 391         p->p_tracecred = NULL;
 392         mtx_unlock(&ktrace_mtx);
 393         PROC_UNLOCK(p);
 394         if (tracevp != NULL) {
 395                 locked = VFS_LOCK_GIANT(tracevp->v_mount);
 396                 vrele(tracevp);
 397                 VFS_UNLOCK_GIANT(locked);
 398         }
 399         if (tracecred != NULL)
 400                 crfree(tracecred);
 401 #endif
 402         /*
 403          * Release reference to text vnode
 404          */
 405         if ((vtmp = p->p_textvp) != NULL) {
 406                 p->p_textvp = NULL;
 407                 locked = VFS_LOCK_GIANT(vtmp->v_mount);
 408                 vrele(vtmp);
 409                 VFS_UNLOCK_GIANT(locked);
 410         }
 411
 412         /*
 413          * Release our limits structure.
 414          */
 415         PROC_LOCK(p);
 416         plim = p->p_limit;
 417         p->p_limit = NULL;
 418         PROC_UNLOCK(p);
 419         lim_free(plim);
 420
 421         /*
 422          * Remove proc from allproc queue and pidhash chain.
 423          * Place onto zombproc.  Unlink from parent's child list.
 424          */
 425         sx_xlock(&allproc_lock);
 426         LIST_REMOVE(p, p_list);
 427         LIST_INSERT_HEAD(&zombproc, p, p_list);
 428         LIST_REMOVE(p, p_hash);
 429         sx_xunlock(&allproc_lock);
 430
 431         /*
 432          * Reparent all of our children to init.
 433          */
 434         sx_xlock(&proctree_lock);
 435         q = LIST_FIRST(&p->p_children);
 436         if (q != NULL)          /* only need this if any child is S_ZOMB */
 437                 wakeup(initproc);
 438         for (; q != NULL; q = nq) {
 439                 nq = LIST_NEXT(q, p_sibling);
 440                 PROC_LOCK(q);
 441                 proc_reparent(q, initproc);
 442                 q->p_sigparent = SIGCHLD;
 443                 /*
 444                  * Traced processes are killed
 445                  * since their existence means someone is screwing up.
 446                  */
 447                 if (q->p_flag & P_TRACED) {
 448                         q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE);
 449                         psignal(q, SIGKILL);
 450                 }
 451                 PROC_UNLOCK(q);
 452         }
 453
 454         /*
 455          * Save exit status and finalize rusage info except for times,
 456          * adding in child rusage info later when our time is locked.
 457          */
 458         PROC_LOCK(p);
 459         p->p_xstat = rv;
 460         p->p_xthread = td;
 461         p->p_stats->p_ru.ru_nvcsw++;
 462         *p->p_ru = p->p_stats->p_ru;
 463
 464         /*
 465          * Notify interested parties of our demise.
 466          */
 467         KNOTE_LOCKED(&p->p_klist, NOTE_EXIT);
 468
 469         /*
 470          * Just delete all entries in the p_klist. At this point we won't
 471          * report any more events, and there are nasty race conditions that
 472          * can beat us if we don't.
 473          */
 474         knlist_clear(&p->p_klist, 1);
 475
 476         /*
 477          * Notify parent that we're gone.  If parent has the PS_NOCLDWAIT
 478          * flag set, or if the handler is set to SIG_IGN, notify process
 479          * 1 instead (and hope it will handle this situation).
 480          */
 481         PROC_LOCK(p->p_pptr);
 482         mtx_lock(&p->p_pptr->p_sigacts->ps_mtx);
 483         if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) {
 484                 struct proc *pp;
 485
 486                 mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
 487                 pp = p->p_pptr;
 488                 PROC_UNLOCK(pp);
 489                 proc_reparent(p, initproc);
 490                 p->p_sigparent = SIGCHLD;
 491                 PROC_LOCK(p->p_pptr);
 492                 /*
 493                  * If this was the last child of our parent, notify
 494                  * parent, so in case he was wait(2)ing, he will
 495                  * continue.
 496                  */
 497                 if (LIST_EMPTY(&pp->p_children))
 498                         wakeup(pp);
 499         } else
 500                 mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
 501
 502         if (p->p_pptr == initproc)
 503                 psignal(p->p_pptr, SIGCHLD);
 504         else if (p->p_sigparent != 0) {
 505                 if (p->p_sigparent == SIGCHLD)
 506                         childproc_exited(p);
 507                 else    /* LINUX thread */
 508                         psignal(p->p_pptr, p->p_sigparent);
 509         }
 510         PROC_UNLOCK(p->p_pptr);
 511         PROC_UNLOCK(p);
 512
 513         /*
 514          * Finally, call machine-dependent code to release the remaining
 515          * resources including address space.
 516          * The address space is released by "vmspace_exitfree(p)" in
 517          * vm_waitproc().
 518          */
 519         cpu_exit(td);
 520
 521         WITNESS_WARN(WARN_PANIC, &proctree_lock.sx_object,
 522             "process (pid %d) exiting", p->p_pid);
 523
 524         PROC_LOCK(p);
 525         PROC_LOCK(p->p_pptr);
 526         sx_xunlock(&proctree_lock);
 527
 528         /*
 529          * We have to wait until after acquiring all locks before
 530          * changing p_state.  We need to avoid all possible context
 531          * switches (including ones from blocking on a mutex) while
 532          * marked as a zombie.  We also have to set the zombie state
 533          * before we release the parent process' proc lock to avoid
 534          * a lost wakeup.  So, we first call wakeup, then we grab the
 535          * sched lock, update the state, and release the parent process'
 536          * proc lock.
 537          */
 538         wakeup(p->p_pptr);
 539         mtx_lock_spin(&sched_lock);
 540         p->p_state = PRS_ZOMBIE;
 541         PROC_UNLOCK(p->p_pptr);
 542
 543         ruadd(p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux);
 544
 545         /* Do the same timestamp bookkeeping that mi_switch() would do. */
 546         new_switchtime = cpu_ticks();
 547         p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime));
 548         p->p_rux.rux_uticks += td->td_uticks;
 549         p->p_rux.rux_sticks += td->td_sticks;
 550         p->p_rux.rux_iticks += td->td_iticks;
 551         PCPU_SET(switchtime, new_switchtime);
 552         PCPU_SET(switchticks, ticks);
 553         cnt.v_swtch++;
 554
 555         sched_exit(p->p_pptr, td);
 556
 557         /*
 558          * Hopefully no one will try to deliver a signal to the process this
 559          * late in the game.
 560          */
 561         knlist_destroy(&p->p_klist);
 562
 563         /*
 564          * Make sure the scheduler takes this thread out of its tables etc.
 565          * This will also release this thread's reference to the ucred.
 566          * Other thread parts to release include pcb bits and such.
 567          */
 568         thread_exit();
 569 }
 570
 571
 572 #ifndef _SYS_SYSPROTO_H_
 573 struct abort2_args {
 574         char *why;
 575         int nargs;
 576         void **args;
 577 };
 578 #endif
 579
 580 /*
 581  * MPSAFE.
 582  */
 583 int
 584 abort2(struct thread *td, struct abort2_args *uap)
 585 {
 586         struct proc *p = td->td_proc;
 587         struct sbuf *sb;
 588         void *uargs[16];
 589         int error, i, sig;
 590
 591         error = 0;      /* satisfy compiler */
 592
 593         /*
 594          * Do it right now so we can log either proper call of abort2(), or
 595          * note, that invalid argument was passed. 512 is big enough to
 596          * handle 16 arguments' descriptions with additional comments.
 597          */
 598         sb = sbuf_new(NULL, NULL, 512, SBUF_FIXEDLEN);
 599         sbuf_clear(sb);
 600         sbuf_printf(sb, "%s(pid %d uid %d) aborted: ",
 601             p->p_comm, p->p_pid, td->td_ucred->cr_uid);
 602         /*
 603          * Since we can't return from abort2(), send SIGKILL in cases, where
 604          * abort2() was called improperly
 605          */
 606         sig = SIGKILL;
 607         /* Prevent from DoSes from user-space. */
 608         if (uap->nargs < 0 || uap->nargs > 16)
 609                 goto out;
 610         if (uap->args == NULL)
 611                 goto out;
 612         error = copyin(uap->args, uargs, uap->nargs * sizeof(void *));
 613         if (error != 0)
 614                 goto out;
 615         /*
 616          * Limit size of 'reason' string to 128. Will fit even when
 617          * maximal number of arguments was chosen to be logged.
 618          */
 619         if (uap->why != NULL) {
 620                 error = sbuf_copyin(sb, uap->why, 128);
 621                 if (error < 0)
 622                         goto out;
 623         } else {
 624                 sbuf_printf(sb, "(null)");
 625         }
 626         if (uap->nargs) {
 627                 sbuf_printf(sb, "(");
 628                 for (i = 0;i < uap->nargs; i++)
 629                         sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]);
 630                 sbuf_printf(sb, ")");
 631         }
 632         /*
 633          * Final stage: arguments were proper, string has been
 634          * successfully copied from userspace, and copying pointers
 635          * from user-space succeed.
 636          */
 637         sig = SIGABRT;
 638 out:
 639         if (sig == SIGKILL) {
 640                 sbuf_trim(sb);
 641                 sbuf_printf(sb, " (Reason text inaccessible)");
 642         }
 643         sbuf_cat(sb, "\n");
 644         sbuf_finish(sb);
 645         log(LOG_INFO, "%s", sbuf_data(sb));
 646         sbuf_delete(sb);
 647         exit1(td, W_EXITCODE(0, sig));
 648         return (0);
 649 }
 650
 651
 652 #ifdef COMPAT_43
 653 /*
 654  * The dirty work is handled by kern_wait().
 655  *
 656  * MPSAFE.
 657  */
 658 int
 659 owait(struct thread *td, struct owait_args *uap __unused)
 660 {
 661         int error, status;
 662
 663         error = kern_wait(td, WAIT_ANY, &status, 0, NULL);
 664         if (error == 0)
 665                 td->td_retval[1] = status;
 666         return (error);
 667 }
 668 #endif /* COMPAT_43 */
 669
 670 /*
 671  * The dirty work is handled by kern_wait().
 672  *
 673  * MPSAFE.
 674  */
 675 int
 676 wait4(struct thread *td, struct wait_args *uap)
 677 {
 678         struct rusage ru, *rup;
 679         int error, status;
 680
 681         if (uap->rusage != NULL)
 682                 rup = &ru;
 683         else
 684                 rup = NULL;
 685         error = kern_wait(td, uap->pid, &status, uap->options, rup);
 686         if (uap->status != NULL && error == 0)
 687                 error = copyout(&status, uap->status, sizeof(status));
 688         if (uap->rusage != NULL && error == 0)
 689                 error = copyout(&ru, uap->rusage, sizeof(struct rusage));
 690         return (error);
 691 }
 692
 693 int
 694 kern_wait(struct thread *td, pid_t pid, int *status, int options,
 695     struct rusage *rusage)
 696 {
 697         struct proc *p, *q, *t;
 698         int error, nfound;
 699
 700         AUDIT_ARG(pid, pid);
 701
 702         q = td->td_proc;
 703         if (pid == 0) {
 704                 PROC_LOCK(q);
 705                 pid = -q->p_pgid;
 706                 PROC_UNLOCK(q);
 707         }
 708         if (options &~ (WUNTRACED|WNOHANG|WCONTINUED|WLINUXCLONE))
 709                 return (EINVAL);
 710 loop:
 711         if (q->p_flag & P_STATCHILD) {
 712                 PROC_LOCK(q);
 713                 q->p_flag &= ~P_STATCHILD;
 714                 PROC_UNLOCK(q);
 715         }
 716         nfound = 0;
 717         sx_xlock(&proctree_lock);
 718         LIST_FOREACH(p, &q->p_children, p_sibling) {
 719                 PROC_LOCK(p);
 720                 if (pid != WAIT_ANY &&
 721                     p->p_pid != pid && p->p_pgid != -pid) {
 722                         PROC_UNLOCK(p);
 723                         continue;
 724                 }
 725                 if (p_canwait(td, p)) {
 726                         PROC_UNLOCK(p);
 727                         continue;
 728                 }
 729
 730                 /*
 731                  * This special case handles a kthread spawned by linux_clone
 732                  * (see linux_misc.c).  The linux_wait4 and linux_waitpid
 733                  * functions need to be able to distinguish between waiting
 734                  * on a process and waiting on a thread.  It is a thread if
 735                  * p_sigparent is not SIGCHLD, and the WLINUXCLONE option
 736                  * signifies we want to wait for threads and not processes.
 737                  */
 738                 if ((p->p_sigparent != SIGCHLD) ^
 739                     ((options & WLINUXCLONE) != 0)) {
 740                         PROC_UNLOCK(p);
 741                         continue;
 742                 }
 743
 744                 nfound++;
 745                 if (p->p_state == PRS_ZOMBIE) {
 746
 747                         /*
 748                          * It is possible that the last thread of this
 749                          * process is still running on another CPU
 750                          * in thread_exit() after having dropped the process
 751                          * lock via PROC_UNLOCK() but before it has completed
 752                          * cpu_throw().  In that case, the other thread must
 753                          * still hold sched_lock, so simply by acquiring
 754                          * sched_lock once we will wait long enough for the
 755                          * thread to exit in that case.
 756                          */
 757                         mtx_lock_spin(&sched_lock);
 758                         mtx_unlock_spin(&sched_lock);
 759
 760                         td->td_retval[0] = p->p_pid;
 761                         if (status)
 762                                 *status = p->p_xstat;   /* convert to int */
 763                         if (rusage) {
 764                                 *rusage = *p->p_ru;
 765                                 calcru(p, &rusage->ru_utime, &rusage->ru_stime);
 766                         }
 767
 768                         PROC_LOCK(q);
 769                         sigqueue_take(p->p_ksi);
 770                         PROC_UNLOCK(q);
 771
 772                         /*
 773                          * If we got the child via a ptrace 'attach',
 774                          * we need to give it back to the old parent.
 775                          */
 776                         PROC_UNLOCK(p);
 777                         if (p->p_oppid && (t = pfind(p->p_oppid)) != NULL) {
 778                                 PROC_LOCK(p);
 779                                 p->p_oppid = 0;
 780                                 proc_reparent(p, t);
 781                                 PROC_UNLOCK(p);
 782                                 tdsignal(t, NULL, SIGCHLD, p->p_ksi);
 783                                 wakeup(t);
 784                                 PROC_UNLOCK(t);
 785                                 sx_xunlock(&proctree_lock);
 786                                 return (0);
 787                         }
 788
 789                         /*
 790                          * Remove other references to this process to ensure
 791                          * we have an exclusive reference.
 792                          */
 793                         sx_xlock(&allproc_lock);
 794                         LIST_REMOVE(p, p_list); /* off zombproc */
 795                         sx_xunlock(&allproc_lock);
 796                         LIST_REMOVE(p, p_sibling);
 797                         leavepgrp(p);
 798                         sx_xunlock(&proctree_lock);
 799
 800                         /*
 801                          * As a side effect of this lock, we know that
 802                          * all other writes to this proc are visible now, so
 803                          * no more locking is needed for p.
 804                          */
 805                         PROC_LOCK(p);
 806                         p->p_xstat = 0;         /* XXX: why? */
 807                         PROC_UNLOCK(p);
 808                         PROC_LOCK(q);
 809                         ruadd(&q->p_stats->p_cru, &q->p_crux, p->p_ru,
 810                             &p->p_rux);
 811                         PROC_UNLOCK(q);
 812                         FREE(p->p_ru, M_ZOMBIE);
 813                         p->p_ru = NULL;
 814
 815                         /*
 816                          * Decrement the count of procs running with this uid.
 817                          */
 818                         (void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0);
 819
 820                         /*
 821                          * Free credentials, arguments, and sigacts.
 822                          */
 823                         crfree(p->p_ucred);
 824                         p->p_ucred = NULL;
 825                         pargs_drop(p->p_args);
 826                         p->p_args = NULL;
 827                         sigacts_free(p->p_sigacts);
 828                         p->p_sigacts = NULL;
 829
 830                         /*
 831                          * Do any thread-system specific cleanups.
 832                          */
 833                         thread_wait(p);
 834
 835                         /*
 836                          * Give vm and machine-dependent layer a chance
 837                          * to free anything that cpu_exit couldn't
 838                          * release while still running in process context.
 839                          */
 840                         vm_waitproc(p);
 841 #ifdef MAC
 842                         mac_destroy_proc(p);
 843 #endif
 844 #ifdef AUDIT
 845                         audit_proc_free(p);
 846 #endif
 847                         KASSERT(FIRST_THREAD_IN_PROC(p),
 848                             ("kern_wait: no residual thread!"));
 849                         uma_zfree(proc_zone, p);
 850                         sx_xlock(&allproc_lock);
 851                         nprocs--;
 852                         sx_xunlock(&allproc_lock);
 853                         return (0);
 854                 }
 855                 mtx_lock_spin(&sched_lock);
 856                 if ((p->p_flag & P_STOPPED_SIG) &&
 857                     (p->p_suspcount == p->p_numthreads) &&
 858                     (p->p_flag & P_WAITED) == 0 &&
 859                     (p->p_flag & P_TRACED || options & WUNTRACED)) {
 860                         mtx_unlock_spin(&sched_lock);
 861                         p->p_flag |= P_WAITED;
 862                         sx_xunlock(&proctree_lock);
 863                         td->td_retval[0] = p->p_pid;
 864                         if (status)
 865                                 *status = W_STOPCODE(p->p_xstat);
 866                         PROC_UNLOCK(p);
 867
 868                         PROC_LOCK(q);
 869                         sigqueue_take(p->p_ksi);
 870                         PROC_UNLOCK(q);
 871
 872                         return (0);
 873                 }
 874                 mtx_unlock_spin(&sched_lock);
 875                 if (options & WCONTINUED && (p->p_flag & P_CONTINUED)) {
 876                         sx_xunlock(&proctree_lock);
 877                         td->td_retval[0] = p->p_pid;
 878                         p->p_flag &= ~P_CONTINUED;
 879                         PROC_UNLOCK(p);
 880
 881                         PROC_LOCK(q);
 882                         sigqueue_take(p->p_ksi);
 883                         PROC_UNLOCK(q);
 884
 885                         if (status)
 886                                 *status = SIGCONT;
 887                         return (0);
 888                 }
 889                 PROC_UNLOCK(p);
 890         }
 891         if (nfound == 0) {
 892                 sx_xunlock(&proctree_lock);
 893                 return (ECHILD);
 894         }
 895         if (options & WNOHANG) {
 896                 sx_xunlock(&proctree_lock);
 897                 td->td_retval[0] = 0;
 898                 return (0);
 899         }
 900         PROC_LOCK(q);
 901         sx_xunlock(&proctree_lock);
 902         if (q->p_flag & P_STATCHILD) {
 903                 q->p_flag &= ~P_STATCHILD;
 904                 error = 0;
 905         } else
 906                 error = msleep(q, &q->p_mtx, PWAIT | PCATCH, "wait", 0);
 907         PROC_UNLOCK(q);
 908         if (error)
 909                 return (error);
 910         goto loop;
 911 }
 912
 913 /*
 914  * Make process 'parent' the new parent of process 'child'.
 915  * Must be called with an exclusive hold of proctree lock.
 916  */
 917 void
 918 proc_reparent(struct proc *child, struct proc *parent)
 919 {
 920
 921         sx_assert(&proctree_lock, SX_XLOCKED);
 922         PROC_LOCK_ASSERT(child, MA_OWNED);
 923         if (child->p_pptr == parent)
 924                 return;
 925
 926         LIST_REMOVE(child, p_sibling);
 927         LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
 928         child->p_pptr = parent;
 929 }