sys/compat/linux/linux_misc.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-3-Clause
   3  *
   4  * Copyright (c) 2002 Doug Rabson
   5  * Copyright (c) 1994-1995 Søren Schmidt
   6  * All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer
  13  *    in this position and unchanged.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  * 3. The name of the author may not be used to endorse or promote products
  18  *    derived from this software without specific prior written permission
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30  */
  31
  32 #include <sys/cdefs.h>
  33 __FBSDID("$FreeBSD$");
  34
  35 #include "opt_compat.h"
  36
  37 #include <sys/param.h>
  38 #include <sys/blist.h>
  39 #include <sys/fcntl.h>
  40 #if defined(__i386__)
  41 #include <sys/imgact_aout.h>
  42 #endif
  43 #include <sys/jail.h>
  44 #include <sys/kernel.h>
  45 #include <sys/limits.h>
  46 #include <sys/lock.h>
  47 #include <sys/malloc.h>
  48 #include <sys/mman.h>
  49 #include <sys/mount.h>
  50 #include <sys/msgbuf.h>
  51 #include <sys/mutex.h>
  52 #include <sys/namei.h>
  53 #include <sys/poll.h>
  54 #include <sys/priv.h>
  55 #include <sys/proc.h>
  56 #include <sys/procctl.h>
  57 #include <sys/reboot.h>
  58 #include <sys/racct.h>
  59 #include <sys/random.h>
  60 #include <sys/resourcevar.h>
  61 #include <sys/sched.h>
  62 #include <sys/sdt.h>
  63 #include <sys/signalvar.h>
  64 #include <sys/stat.h>
  65 #include <sys/syscallsubr.h>
  66 #include <sys/sysctl.h>
  67 #include <sys/sysproto.h>
  68 #include <sys/systm.h>
  69 #include <sys/time.h>
  70 #include <sys/vmmeter.h>
  71 #include <sys/vnode.h>
  72 #include <sys/wait.h>
  73 #include <sys/cpuset.h>
  74 #include <sys/uio.h>
  75
  76 #include <security/mac/mac_framework.h>
  77
  78 #include <vm/vm.h>
  79 #include <vm/pmap.h>
  80 #include <vm/vm_kern.h>
  81 #include <vm/vm_map.h>
  82 #include <vm/vm_extern.h>
  83 #include <vm/swap_pager.h>
  84
  85 #ifdef COMPAT_LINUX32
  86 #include <machine/../linux32/linux.h>
  87 #include <machine/../linux32/linux32_proto.h>
  88 #else
  89 #include <machine/../linux/linux.h>
  90 #include <machine/../linux/linux_proto.h>
  91 #endif
  92
  93 #include <compat/linux/linux_common.h>
  94 #include <compat/linux/linux_dtrace.h>
  95 #include <compat/linux/linux_file.h>
  96 #include <compat/linux/linux_mib.h>
  97 #include <compat/linux/linux_signal.h>
  98 #include <compat/linux/linux_timer.h>
  99 #include <compat/linux/linux_util.h>
 100 #include <compat/linux/linux_sysproto.h>
 101 #include <compat/linux/linux_emul.h>
 102 #include <compat/linux/linux_misc.h>
 103
 104 int stclohz;                            /* Statistics clock frequency */
 105
 106 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
 107         RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
 108         RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
 109         RLIMIT_MEMLOCK, RLIMIT_AS
 110 };
 111
 112 struct l_sysinfo {
 113         l_long          uptime;         /* Seconds since boot */
 114         l_ulong         loads[3];       /* 1, 5, and 15 minute load averages */
 115 #define LINUX_SYSINFO_LOADS_SCALE 65536
 116         l_ulong         totalram;       /* Total usable main memory size */
 117         l_ulong         freeram;        /* Available memory size */
 118         l_ulong         sharedram;      /* Amount of shared memory */
 119         l_ulong         bufferram;      /* Memory used by buffers */
 120         l_ulong         totalswap;      /* Total swap space size */
 121         l_ulong         freeswap;       /* swap space still available */
 122         l_ushort        procs;          /* Number of current processes */
 123         l_ushort        pads;
 124         l_ulong         totalhigh;
 125         l_ulong         freehigh;
 126         l_uint          mem_unit;
 127         char            _f[20-2*sizeof(l_long)-sizeof(l_int)];  /* padding */
 128 };
 129
 130 struct l_pselect6arg {
 131         l_uintptr_t     ss;
 132         l_size_t        ss_len;
 133 };
 134
 135 static int      linux_utimensat_lts_to_ts(struct l_timespec *,
 136                         struct timespec *);
 137 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 138 static int      linux_utimensat_lts64_to_ts(struct l_timespec64 *,
 139                         struct timespec *);
 140 #endif
 141 static int      linux_common_utimensat(struct thread *, int,
 142                         const char *, struct timespec *, int);
 143 static int      linux_common_pselect6(struct thread *, l_int,
 144                         l_fd_set *, l_fd_set *, l_fd_set *,
 145                         struct timespec *, l_uintptr_t *);
 146 static int      linux_common_ppoll(struct thread *, struct pollfd *,
 147                         uint32_t, struct timespec *, l_sigset_t *,
 148                         l_size_t);
 149 static int      linux_pollin(struct thread *, struct pollfd *,
 150                         struct pollfd *, u_int);
 151 static int      linux_pollout(struct thread *, struct pollfd *,
 152                         struct pollfd *, u_int);
 153
 154 int
 155 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
 156 {
 157         struct l_sysinfo sysinfo;
 158         int i, j;
 159         struct timespec ts;
 160
 161         bzero(&sysinfo, sizeof(sysinfo));
 162         getnanouptime(&ts);
 163         if (ts.tv_nsec != 0)
 164                 ts.tv_sec++;
 165         sysinfo.uptime = ts.tv_sec;
 166
 167         /* Use the information from the mib to get our load averages */
 168         for (i = 0; i < 3; i++)
 169                 sysinfo.loads[i] = averunnable.ldavg[i] *
 170                     LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale;
 171
 172         sysinfo.totalram = physmem * PAGE_SIZE;
 173         sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE;
 174
 175         /*
 176          * sharedram counts pages allocated to named, swap-backed objects such
 177          * as shared memory segments and tmpfs files.  There is no cheap way to
 178          * compute this, so just leave the field unpopulated.  Linux itself only
 179          * started setting this field in the 3.x timeframe.
 180          */
 181         sysinfo.sharedram = 0;
 182         sysinfo.bufferram = 0;
 183
 184         swap_pager_status(&i, &j);
 185         sysinfo.totalswap = i * PAGE_SIZE;
 186         sysinfo.freeswap = (i - j) * PAGE_SIZE;
 187
 188         sysinfo.procs = nprocs;
 189
 190         /*
 191          * Platforms supported by the emulation layer do not have a notion of
 192          * high memory.
 193          */
 194         sysinfo.totalhigh = 0;
 195         sysinfo.freehigh = 0;
 196
 197         sysinfo.mem_unit = 1;
 198
 199         return (copyout(&sysinfo, args->info, sizeof(sysinfo)));
 200 }
 201
 202 #ifdef LINUX_LEGACY_SYSCALLS
 203 int
 204 linux_alarm(struct thread *td, struct linux_alarm_args *args)
 205 {
 206         struct itimerval it, old_it;
 207         u_int secs;
 208         int error __diagused;
 209
 210         secs = args->secs;
 211         /*
 212          * Linux alarm() is always successful. Limit secs to INT32_MAX / 2
 213          * to match kern_setitimer()'s limit to avoid error from it.
 214          *
 215          * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit
 216          * platforms.
 217          */
 218         if (secs > INT32_MAX / 2)
 219                 secs = INT32_MAX / 2;
 220
 221         it.it_value.tv_sec = secs;
 222         it.it_value.tv_usec = 0;
 223         timevalclear(&it.it_interval);
 224         error = kern_setitimer(td, ITIMER_REAL, &it, &old_it);
 225         KASSERT(error == 0, ("kern_setitimer returns %d", error));
 226
 227         if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) ||
 228             old_it.it_value.tv_usec >= 500000)
 229                 old_it.it_value.tv_sec++;
 230         td->td_retval[0] = old_it.it_value.tv_sec;
 231         return (0);
 232 }
 233 #endif
 234
 235 int
 236 linux_brk(struct thread *td, struct linux_brk_args *args)
 237 {
 238         struct vmspace *vm = td->td_proc->p_vmspace;
 239         uintptr_t new, old;
 240
 241         old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize);
 242         new = (uintptr_t)args->dsend;
 243         if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new))
 244                 td->td_retval[0] = (register_t)new;
 245         else
 246                 td->td_retval[0] = (register_t)old;
 247
 248         return (0);
 249 }
 250
 251 #if defined(__i386__)
 252 /* XXX: what about amd64/linux32? */
 253
 254 int
 255 linux_uselib(struct thread *td, struct linux_uselib_args *args)
 256 {
 257         struct nameidata ni;
 258         struct vnode *vp;
 259         struct exec *a_out;
 260         vm_map_t map;
 261         vm_map_entry_t entry;
 262         struct vattr attr;
 263         vm_offset_t vmaddr;
 264         unsigned long file_offset;
 265         unsigned long bss_size;
 266         char *library;
 267         ssize_t aresid;
 268         int error;
 269         bool locked, opened, textset;
 270
 271         a_out = NULL;
 272         vp = NULL;
 273         locked = false;
 274         textset = false;
 275         opened = false;
 276
 277         if (!LUSECONVPATH(td)) {
 278                 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
 279                     UIO_USERSPACE, args->library, td);
 280                 error = namei(&ni);
 281         } else {
 282                 LCONVPATHEXIST(args->library, &library);
 283                 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
 284                     UIO_SYSSPACE, library, td);
 285                 error = namei(&ni);
 286                 LFREEPATH(library);
 287         }
 288         if (error)
 289                 goto cleanup;
 290
 291         vp = ni.ni_vp;
 292         NDFREE(&ni, NDF_ONLY_PNBUF);
 293
 294         /*
 295          * From here on down, we have a locked vnode that must be unlocked.
 296          * XXX: The code below largely duplicates exec_check_permissions().
 297          */
 298         locked = true;
 299
 300         /* Executable? */
 301         error = VOP_GETATTR(vp, &attr, td->td_ucred);
 302         if (error)
 303                 goto cleanup;
 304
 305         if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
 306             ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
 307                 /* EACCESS is what exec(2) returns. */
 308                 error = ENOEXEC;
 309                 goto cleanup;
 310         }
 311
 312         /* Sensible size? */
 313         if (attr.va_size == 0) {
 314                 error = ENOEXEC;
 315                 goto cleanup;
 316         }
 317
 318         /* Can we access it? */
 319         error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
 320         if (error)
 321                 goto cleanup;
 322
 323         /*
 324          * XXX: This should use vn_open() so that it is properly authorized,
 325          * and to reduce code redundancy all over the place here.
 326          * XXX: Not really, it duplicates far more of exec_check_permissions()
 327          * than vn_open().
 328          */
 329 #ifdef MAC
 330         error = mac_vnode_check_open(td->td_ucred, vp, VREAD);
 331         if (error)
 332                 goto cleanup;
 333 #endif
 334         error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
 335         if (error)
 336                 goto cleanup;
 337         opened = true;
 338
 339         /* Pull in executable header into exec_map */
 340         error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE,
 341             VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0);
 342         if (error)
 343                 goto cleanup;
 344
 345         /* Is it a Linux binary ? */
 346         if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
 347                 error = ENOEXEC;
 348                 goto cleanup;
 349         }
 350
 351         /*
 352          * While we are here, we should REALLY do some more checks
 353          */
 354
 355         /* Set file/virtual offset based on a.out variant. */
 356         switch ((int)(a_out->a_magic & 0xffff)) {
 357         case 0413:                      /* ZMAGIC */
 358                 file_offset = 1024;
 359                 break;
 360         case 0314:                      /* QMAGIC */
 361                 file_offset = 0;
 362                 break;
 363         default:
 364                 error = ENOEXEC;
 365                 goto cleanup;
 366         }
 367
 368         bss_size = round_page(a_out->a_bss);
 369
 370         /* Check various fields in header for validity/bounds. */
 371         if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
 372                 error = ENOEXEC;
 373                 goto cleanup;
 374         }
 375
 376         /* text + data can't exceed file size */
 377         if (a_out->a_data + a_out->a_text > attr.va_size) {
 378                 error = EFAULT;
 379                 goto cleanup;
 380         }
 381
 382         /*
 383          * text/data/bss must not exceed limits
 384          * XXX - this is not complete. it should check current usage PLUS
 385          * the resources needed by this library.
 386          */
 387         PROC_LOCK(td->td_proc);
 388         if (a_out->a_text > maxtsiz ||
 389             a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) ||
 390             racct_set(td->td_proc, RACCT_DATA, a_out->a_data +
 391             bss_size) != 0) {
 392                 PROC_UNLOCK(td->td_proc);
 393                 error = ENOMEM;
 394                 goto cleanup;
 395         }
 396         PROC_UNLOCK(td->td_proc);
 397
 398         /*
 399          * Prevent more writers.
 400          */
 401         error = VOP_SET_TEXT(vp);
 402         if (error != 0)
 403                 goto cleanup;
 404         textset = true;
 405
 406         /*
 407          * Lock no longer needed
 408          */
 409         locked = false;
 410         VOP_UNLOCK(vp);
 411
 412         /*
 413          * Check if file_offset page aligned. Currently we cannot handle
 414          * misalinged file offsets, and so we read in the entire image
 415          * (what a waste).
 416          */
 417         if (file_offset & PAGE_MASK) {
 418                 /* Map text+data read/write/execute */
 419
 420                 /* a_entry is the load address and is page aligned */
 421                 vmaddr = trunc_page(a_out->a_entry);
 422
 423                 /* get anon user mapping, read+write+execute */
 424                 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
 425                     &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE,
 426                     VM_PROT_ALL, VM_PROT_ALL, 0);
 427                 if (error)
 428                         goto cleanup;
 429
 430                 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset,
 431                     a_out->a_text + a_out->a_data, UIO_USERSPACE, 0,
 432                     td->td_ucred, NOCRED, &aresid, td);
 433                 if (error != 0)
 434                         goto cleanup;
 435                 if (aresid != 0) {
 436                         error = ENOEXEC;
 437                         goto cleanup;
 438                 }
 439         } else {
 440                 /*
 441                  * for QMAGIC, a_entry is 20 bytes beyond the load address
 442                  * to skip the executable header
 443                  */
 444                 vmaddr = trunc_page(a_out->a_entry);
 445
 446                 /*
 447                  * Map it all into the process's space as a single
 448                  * copy-on-write "data" segment.
 449                  */
 450                 map = &td->td_proc->p_vmspace->vm_map;
 451                 error = vm_mmap(map, &vmaddr,
 452                     a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
 453                     MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset);
 454                 if (error)
 455                         goto cleanup;
 456                 vm_map_lock(map);
 457                 if (!vm_map_lookup_entry(map, vmaddr, &entry)) {
 458                         vm_map_unlock(map);
 459                         error = EDOOFUS;
 460                         goto cleanup;
 461                 }
 462                 entry->eflags |= MAP_ENTRY_VN_EXEC;
 463                 vm_map_unlock(map);
 464                 textset = false;
 465         }
 466
 467         if (bss_size != 0) {
 468                 /* Calculate BSS start address */
 469                 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
 470                     a_out->a_data;
 471
 472                 /* allocate some 'anon' space */
 473                 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
 474                     &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL,
 475                     VM_PROT_ALL, 0);
 476                 if (error)
 477                         goto cleanup;
 478         }
 479
 480 cleanup:
 481         if (opened) {
 482                 if (locked)
 483                         VOP_UNLOCK(vp);
 484                 locked = false;
 485                 VOP_CLOSE(vp, FREAD, td->td_ucred, td);
 486         }
 487         if (textset) {
 488                 if (!locked) {
 489                         locked = true;
 490                         VOP_LOCK(vp, LK_SHARED | LK_RETRY);
 491                 }
 492                 VOP_UNSET_TEXT_CHECKED(vp);
 493         }
 494         if (locked)
 495                 VOP_UNLOCK(vp);
 496
 497         /* Release the temporary mapping. */
 498         if (a_out)
 499                 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE);
 500
 501         return (error);
 502 }
 503
 504 #endif  /* __i386__ */
 505
 506 #ifdef LINUX_LEGACY_SYSCALLS
 507 int
 508 linux_select(struct thread *td, struct linux_select_args *args)
 509 {
 510         l_timeval ltv;
 511         struct timeval tv0, tv1, utv, *tvp;
 512         int error;
 513
 514         /*
 515          * Store current time for computation of the amount of
 516          * time left.
 517          */
 518         if (args->timeout) {
 519                 if ((error = copyin(args->timeout, &ltv, sizeof(ltv))))
 520                         goto select_out;
 521                 utv.tv_sec = ltv.tv_sec;
 522                 utv.tv_usec = ltv.tv_usec;
 523
 524                 if (itimerfix(&utv)) {
 525                         /*
 526                          * The timeval was invalid.  Convert it to something
 527                          * valid that will act as it does under Linux.
 528                          */
 529                         utv.tv_sec += utv.tv_usec / 1000000;
 530                         utv.tv_usec %= 1000000;
 531                         if (utv.tv_usec < 0) {
 532                                 utv.tv_sec -= 1;
 533                                 utv.tv_usec += 1000000;
 534                         }
 535                         if (utv.tv_sec < 0)
 536                                 timevalclear(&utv);
 537                 }
 538                 microtime(&tv0);
 539                 tvp = &utv;
 540         } else
 541                 tvp = NULL;
 542
 543         error = kern_select(td, args->nfds, args->readfds, args->writefds,
 544             args->exceptfds, tvp, LINUX_NFDBITS);
 545         if (error)
 546                 goto select_out;
 547
 548         if (args->timeout) {
 549                 if (td->td_retval[0]) {
 550                         /*
 551                          * Compute how much time was left of the timeout,
 552                          * by subtracting the current time and the time
 553                          * before we started the call, and subtracting
 554                          * that result from the user-supplied value.
 555                          */
 556                         microtime(&tv1);
 557                         timevalsub(&tv1, &tv0);
 558                         timevalsub(&utv, &tv1);
 559                         if (utv.tv_sec < 0)
 560                                 timevalclear(&utv);
 561                 } else
 562                         timevalclear(&utv);
 563                 ltv.tv_sec = utv.tv_sec;
 564                 ltv.tv_usec = utv.tv_usec;
 565                 if ((error = copyout(&ltv, args->timeout, sizeof(ltv))))
 566                         goto select_out;
 567         }
 568
 569 select_out:
 570         return (error);
 571 }
 572 #endif
 573
 574 int
 575 linux_mremap(struct thread *td, struct linux_mremap_args *args)
 576 {
 577         uintptr_t addr;
 578         size_t len;
 579         int error = 0;
 580
 581         if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) {
 582                 td->td_retval[0] = 0;
 583                 return (EINVAL);
 584         }
 585
 586         /*
 587          * Check for the page alignment.
 588          * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK.
 589          */
 590         if (args->addr & PAGE_MASK) {
 591                 td->td_retval[0] = 0;
 592                 return (EINVAL);
 593         }
 594
 595         args->new_len = round_page(args->new_len);
 596         args->old_len = round_page(args->old_len);
 597
 598         if (args->new_len > args->old_len) {
 599                 td->td_retval[0] = 0;
 600                 return (ENOMEM);
 601         }
 602
 603         if (args->new_len < args->old_len) {
 604                 addr = args->addr + args->new_len;
 605                 len = args->old_len - args->new_len;
 606                 error = kern_munmap(td, addr, len);
 607         }
 608
 609         td->td_retval[0] = error ? 0 : (uintptr_t)args->addr;
 610         return (error);
 611 }
 612
 613 #define LINUX_MS_ASYNC       0x0001
 614 #define LINUX_MS_INVALIDATE  0x0002
 615 #define LINUX_MS_SYNC        0x0004
 616
 617 int
 618 linux_msync(struct thread *td, struct linux_msync_args *args)
 619 {
 620
 621         return (kern_msync(td, args->addr, args->len,
 622             args->fl & ~LINUX_MS_SYNC));
 623 }
 624
 625 #ifdef LINUX_LEGACY_SYSCALLS
 626 int
 627 linux_time(struct thread *td, struct linux_time_args *args)
 628 {
 629         struct timeval tv;
 630         l_time_t tm;
 631         int error;
 632
 633         microtime(&tv);
 634         tm = tv.tv_sec;
 635         if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm))))
 636                 return (error);
 637         td->td_retval[0] = tm;
 638         return (0);
 639 }
 640 #endif
 641
 642 struct l_times_argv {
 643         l_clock_t       tms_utime;
 644         l_clock_t       tms_stime;
 645         l_clock_t       tms_cutime;
 646         l_clock_t       tms_cstime;
 647 };
 648
 649 /*
 650  * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value.
 651  * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK
 652  * auxiliary vector entry.
 653  */
 654 #define CLK_TCK         100
 655
 656 #define CONVOTCK(r)     (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK))
 657 #define CONVNTCK(r)     (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz))
 658
 659 #define CONVTCK(r)      (linux_kernver(td) >= LINUX_KERNVER_2004000 ?           \
 660                             CONVNTCK(r) : CONVOTCK(r))
 661
 662 int
 663 linux_times(struct thread *td, struct linux_times_args *args)
 664 {
 665         struct timeval tv, utime, stime, cutime, cstime;
 666         struct l_times_argv tms;
 667         struct proc *p;
 668         int error;
 669
 670         if (args->buf != NULL) {
 671                 p = td->td_proc;
 672                 PROC_LOCK(p);
 673                 PROC_STATLOCK(p);
 674                 calcru(p, &utime, &stime);
 675                 PROC_STATUNLOCK(p);
 676                 calccru(p, &cutime, &cstime);
 677                 PROC_UNLOCK(p);
 678
 679                 tms.tms_utime = CONVTCK(utime);
 680                 tms.tms_stime = CONVTCK(stime);
 681
 682                 tms.tms_cutime = CONVTCK(cutime);
 683                 tms.tms_cstime = CONVTCK(cstime);
 684
 685                 if ((error = copyout(&tms, args->buf, sizeof(tms))))
 686                         return (error);
 687         }
 688
 689         microuptime(&tv);
 690         td->td_retval[0] = (int)CONVTCK(tv);
 691         return (0);
 692 }
 693
 694 int
 695 linux_newuname(struct thread *td, struct linux_newuname_args *args)
 696 {
 697         struct l_new_utsname utsname;
 698         char osname[LINUX_MAX_UTSNAME];
 699         char osrelease[LINUX_MAX_UTSNAME];
 700         char *p;
 701
 702         linux_get_osname(td, osname);
 703         linux_get_osrelease(td, osrelease);
 704
 705         bzero(&utsname, sizeof(utsname));
 706         strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME);
 707         getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME);
 708         getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME);
 709         strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME);
 710         strlcpy(utsname.version, version, LINUX_MAX_UTSNAME);
 711         for (p = utsname.version; *p != '\0'; ++p)
 712                 if (*p == '\n') {
 713                         *p = '\0';
 714                         break;
 715                 }
 716 #if defined(__amd64__)
 717         /*
 718          * On amd64, Linux uname(2) needs to return "x86_64"
 719          * for both 64-bit and 32-bit applications.  On 32-bit,
 720          * the string returned by getauxval(AT_PLATFORM) needs
 721          * to remain "i686", though.
 722          */
 723 #if defined(COMPAT_LINUX32)
 724         if (linux32_emulate_i386)
 725                 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME);
 726         else
 727 #endif
 728         strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME);
 729 #elif defined(__aarch64__)
 730         strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME);
 731 #elif defined(__i386__)
 732         strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME);
 733 #endif
 734
 735         return (copyout(&utsname, args->buf, sizeof(utsname)));
 736 }
 737
 738 struct l_utimbuf {
 739         l_time_t l_actime;
 740         l_time_t l_modtime;
 741 };
 742
 743 #ifdef LINUX_LEGACY_SYSCALLS
 744 int
 745 linux_utime(struct thread *td, struct linux_utime_args *args)
 746 {
 747         struct timeval tv[2], *tvp;
 748         struct l_utimbuf lut;
 749         char *fname;
 750         int error;
 751
 752         if (args->times) {
 753                 if ((error = copyin(args->times, &lut, sizeof lut)) != 0)
 754                         return (error);
 755                 tv[0].tv_sec = lut.l_actime;
 756                 tv[0].tv_usec = 0;
 757                 tv[1].tv_sec = lut.l_modtime;
 758                 tv[1].tv_usec = 0;
 759                 tvp = tv;
 760         } else
 761                 tvp = NULL;
 762
 763         if (!LUSECONVPATH(td)) {
 764                 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE,
 765                     tvp, UIO_SYSSPACE);
 766         } else {
 767                 LCONVPATHEXIST(args->fname, &fname);
 768                 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp,
 769                     UIO_SYSSPACE);
 770                 LFREEPATH(fname);
 771         }
 772         return (error);
 773 }
 774 #endif
 775
 776 #ifdef LINUX_LEGACY_SYSCALLS
 777 int
 778 linux_utimes(struct thread *td, struct linux_utimes_args *args)
 779 {
 780         l_timeval ltv[2];
 781         struct timeval tv[2], *tvp = NULL;
 782         char *fname;
 783         int error;
 784
 785         if (args->tptr != NULL) {
 786                 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0)
 787                         return (error);
 788                 tv[0].tv_sec = ltv[0].tv_sec;
 789                 tv[0].tv_usec = ltv[0].tv_usec;
 790                 tv[1].tv_sec = ltv[1].tv_sec;
 791                 tv[1].tv_usec = ltv[1].tv_usec;
 792                 tvp = tv;
 793         }
 794
 795         if (!LUSECONVPATH(td)) {
 796                 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE,
 797                     tvp, UIO_SYSSPACE);
 798         } else {
 799                 LCONVPATHEXIST(args->fname, &fname);
 800                 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE,
 801                     tvp, UIO_SYSSPACE);
 802                 LFREEPATH(fname);
 803         }
 804         return (error);
 805 }
 806 #endif
 807
 808 static int
 809 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times)
 810 {
 811
 812         if (l_times->tv_nsec != LINUX_UTIME_OMIT &&
 813             l_times->tv_nsec != LINUX_UTIME_NOW &&
 814             (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999))
 815                 return (EINVAL);
 816
 817         times->tv_sec = l_times->tv_sec;
 818         switch (l_times->tv_nsec)
 819         {
 820         case LINUX_UTIME_OMIT:
 821                 times->tv_nsec = UTIME_OMIT;
 822                 break;
 823         case LINUX_UTIME_NOW:
 824                 times->tv_nsec = UTIME_NOW;
 825                 break;
 826         default:
 827                 times->tv_nsec = l_times->tv_nsec;
 828         }
 829
 830         return (0);
 831 }
 832
 833 static int
 834 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname,
 835     struct timespec *timesp, int lflags)
 836 {
 837         char *path = NULL;
 838         int error, dfd, flags = 0;
 839
 840         dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd;
 841
 842         if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH))
 843                 return (EINVAL);
 844
 845         if (timesp != NULL) {
 846                 /* This breaks POSIX, but is what the Linux kernel does
 847                  * _on purpose_ (documented in the man page for utimensat(2)),
 848                  * so we must follow that behaviour. */
 849                 if (timesp[0].tv_nsec == UTIME_OMIT &&
 850                     timesp[1].tv_nsec == UTIME_OMIT)
 851                         return (0);
 852         }
 853
 854         if (lflags & LINUX_AT_SYMLINK_NOFOLLOW)
 855                 flags |= AT_SYMLINK_NOFOLLOW;
 856         if (lflags & LINUX_AT_EMPTY_PATH)
 857                 flags |= AT_EMPTY_PATH;
 858
 859         if (!LUSECONVPATH(td)) {
 860                 if (pathname != NULL) {
 861                         return (kern_utimensat(td, dfd, pathname,
 862                             UIO_USERSPACE, timesp, UIO_SYSSPACE, flags));
 863                 }
 864         }
 865
 866         if (pathname != NULL)
 867                 LCONVPATHEXIST_AT(pathname, &path, dfd);
 868         else if (lflags != 0)
 869                 return (EINVAL);
 870
 871         if (path == NULL)
 872                 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE);
 873         else {
 874                 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp,
 875                         UIO_SYSSPACE, flags);
 876                 LFREEPATH(path);
 877         }
 878
 879         return (error);
 880 }
 881
 882 int
 883 linux_utimensat(struct thread *td, struct linux_utimensat_args *args)
 884 {
 885         struct l_timespec l_times[2];
 886         struct timespec times[2], *timesp;
 887         int error;
 888
 889         if (args->times != NULL) {
 890                 error = copyin(args->times, l_times, sizeof(l_times));
 891                 if (error != 0)
 892                         return (error);
 893
 894                 error = linux_utimensat_lts_to_ts(&l_times[0], &times[0]);
 895                 if (error != 0)
 896                         return (error);
 897                 error = linux_utimensat_lts_to_ts(&l_times[1], &times[1]);
 898                 if (error != 0)
 899                         return (error);
 900                 timesp = times;
 901         } else
 902                 timesp = NULL;
 903
 904         return (linux_common_utimensat(td, args->dfd, args->pathname,
 905             timesp, args->flags));
 906 }
 907
 908 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 909 static int
 910 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times)
 911 {
 912
 913         if (l_times->tv_nsec != LINUX_UTIME_OMIT &&
 914             l_times->tv_nsec != LINUX_UTIME_NOW &&
 915             (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999))
 916                 return (EINVAL);
 917
 918         times->tv_sec = l_times->tv_sec;
 919         switch (l_times->tv_nsec)
 920         {
 921         case LINUX_UTIME_OMIT:
 922                 times->tv_nsec = UTIME_OMIT;
 923                 break;
 924         case LINUX_UTIME_NOW:
 925                 times->tv_nsec = UTIME_NOW;
 926                 break;
 927         default:
 928                 times->tv_nsec = l_times->tv_nsec;
 929         }
 930
 931         return (0);
 932 }
 933
 934 int
 935 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args)
 936 {
 937         struct l_timespec64 l_times[2];
 938         struct timespec times[2], *timesp;
 939         int error;
 940
 941         if (args->times64 != NULL) {
 942                 error = copyin(args->times64, l_times, sizeof(l_times));
 943                 if (error != 0)
 944                         return (error);
 945
 946                 error = linux_utimensat_lts64_to_ts(&l_times[0], &times[0]);
 947                 if (error != 0)
 948                         return (error);
 949                 error = linux_utimensat_lts64_to_ts(&l_times[1], &times[1]);
 950                 if (error != 0)
 951                         return (error);
 952                 timesp = times;
 953         } else
 954                 timesp = NULL;
 955
 956         return (linux_common_utimensat(td, args->dfd, args->pathname,
 957             timesp, args->flags));
 958 }
 959 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 960
 961 #ifdef LINUX_LEGACY_SYSCALLS
 962 int
 963 linux_futimesat(struct thread *td, struct linux_futimesat_args *args)
 964 {
 965         l_timeval ltv[2];
 966         struct timeval tv[2], *tvp = NULL;
 967         char *fname;
 968         int error, dfd;
 969
 970         dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 971
 972         if (args->utimes != NULL) {
 973                 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0)
 974                         return (error);
 975                 tv[0].tv_sec = ltv[0].tv_sec;
 976                 tv[0].tv_usec = ltv[0].tv_usec;
 977                 tv[1].tv_sec = ltv[1].tv_sec;
 978                 tv[1].tv_usec = ltv[1].tv_usec;
 979                 tvp = tv;
 980         }
 981
 982         if (!LUSECONVPATH(td)) {
 983                 error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE,
 984                     tvp, UIO_SYSSPACE);
 985         } else {
 986                 LCONVPATHEXIST_AT(args->filename, &fname, dfd);
 987                 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE,
 988                     tvp, UIO_SYSSPACE);
 989                 LFREEPATH(fname);
 990         }
 991         return (error);
 992 }
 993 #endif
 994
 995 static int
 996 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp,
 997     int options, void *rup, l_siginfo_t *infop)
 998 {
 999         l_siginfo_t lsi;
1000         siginfo_t siginfo;
1001         struct __wrusage wru;
1002         int error, status, tmpstat, sig;
1003
1004         error = kern_wait6(td, idtype, id, &status, options,
1005             rup != NULL ? &wru : NULL, &siginfo);
1006
1007         if (error == 0 && statusp) {
1008                 tmpstat = status & 0xffff;
1009                 if (WIFSIGNALED(tmpstat)) {
1010                         tmpstat = (tmpstat & 0xffffff80) |
1011                             bsd_to_linux_signal(WTERMSIG(tmpstat));
1012                 } else if (WIFSTOPPED(tmpstat)) {
1013                         tmpstat = (tmpstat & 0xffff00ff) |
1014                             (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8);
1015 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32))
1016                         if (WSTOPSIG(status) == SIGTRAP) {
1017                                 tmpstat = linux_ptrace_status(td,
1018                                     siginfo.si_pid, tmpstat);
1019                         }
1020 #endif
1021                 } else if (WIFCONTINUED(tmpstat)) {
1022                         tmpstat = 0xffff;
1023                 }
1024                 error = copyout(&tmpstat, statusp, sizeof(int));
1025         }
1026         if (error == 0 && rup != NULL)
1027                 error = linux_copyout_rusage(&wru.wru_self, rup);
1028         if (error == 0 && infop != NULL && td->td_retval[0] != 0) {
1029                 sig = bsd_to_linux_signal(siginfo.si_signo);
1030                 siginfo_to_lsiginfo(&siginfo, &lsi, sig);
1031                 error = copyout(&lsi, infop, sizeof(lsi));
1032         }
1033
1034         return (error);
1035 }
1036
1037 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1038 int
1039 linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
1040 {
1041         struct linux_wait4_args wait4_args;
1042
1043         wait4_args.pid = args->pid;
1044         wait4_args.status = args->status;
1045         wait4_args.options = args->options;
1046         wait4_args.rusage = NULL;
1047
1048         return (linux_wait4(td, &wait4_args));
1049 }
1050 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1051
1052 int
1053 linux_wait4(struct thread *td, struct linux_wait4_args *args)
1054 {
1055         struct proc *p;
1056         int options, id, idtype;
1057
1058         if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG |
1059             LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL))
1060                 return (EINVAL);
1061
1062         /* -INT_MIN is not defined. */
1063         if (args->pid == INT_MIN)
1064                 return (ESRCH);
1065
1066         options = 0;
1067         linux_to_bsd_waitopts(args->options, &options);
1068
1069         /*
1070          * For backward compatibility we implicitly add flags WEXITED
1071          * and WTRAPPED here.
1072          */
1073         options |= WEXITED | WTRAPPED;
1074
1075         /*
1076          * As FreeBSD does not have __WALL option bit analogue explicitly set all
1077          * possible option bits to emulate Linux __WALL wait option bit. The same
1078          * for waitid system call.
1079          */
1080         if ((args->options & __WALL) != 0)
1081                 options |= WUNTRACED | WCONTINUED | WLINUXCLONE;
1082
1083         if (args->pid == WAIT_ANY) {
1084                 idtype = P_ALL;
1085                 id = 0;
1086         } else if (args->pid < 0) {
1087                 idtype = P_PGID;
1088                 id = (id_t)-args->pid;
1089         } else if (args->pid == 0) {
1090                 idtype = P_PGID;
1091                 p = td->td_proc;
1092                 PROC_LOCK(p);
1093                 id = p->p_pgid;
1094                 PROC_UNLOCK(p);
1095         } else {
1096                 idtype = P_PID;
1097                 id = (id_t)args->pid;
1098         }
1099
1100         return (linux_common_wait(td, idtype, id, args->status, options,
1101             args->rusage, NULL));
1102 }
1103
1104 int
1105 linux_waitid(struct thread *td, struct linux_waitid_args *args)
1106 {
1107         idtype_t idtype;
1108         int error, options;
1109         struct proc *p;
1110         pid_t id;
1111
1112         if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED |
1113             LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL))
1114                 return (EINVAL);
1115
1116         options = 0;
1117         linux_to_bsd_waitopts(args->options, &options);
1118         if ((args->options & __WALL) != 0)
1119                 options |= WEXITED | WTRAPPED | WUNTRACED |
1120                     WCONTINUED | WLINUXCLONE;
1121
1122         id = args->id;
1123         switch (args->idtype) {
1124         case LINUX_P_ALL:
1125                 idtype = P_ALL;
1126                 break;
1127         case LINUX_P_PID:
1128                 if (args->id <= 0)
1129                         return (EINVAL);
1130                 idtype = P_PID;
1131                 break;
1132         case LINUX_P_PGID:
1133                 if (linux_use54(td) && args->id == 0) {
1134                         p = td->td_proc;
1135                         PROC_LOCK(p);
1136                         id = p->p_pgid;
1137                         PROC_UNLOCK(p);
1138                 } else if (args->id <= 0)
1139                         return (EINVAL);
1140                 idtype = P_PGID;
1141                 break;
1142         case LINUX_P_PIDFD:
1143                 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype");
1144                 return (ENOSYS);
1145         default:
1146                 return (EINVAL);
1147         }
1148
1149         error = linux_common_wait(td, idtype, id, NULL, options,
1150             args->rusage, args->info);
1151         td->td_retval[0] = 0;
1152
1153         return (error);
1154 }
1155
1156 #ifdef LINUX_LEGACY_SYSCALLS
1157 int
1158 linux_mknod(struct thread *td, struct linux_mknod_args *args)
1159 {
1160         char *path;
1161         int error;
1162         enum uio_seg seg;
1163         bool convpath;
1164
1165         convpath = LUSECONVPATH(td);
1166         if (!convpath) {
1167                 path = args->path;
1168                 seg = UIO_USERSPACE;
1169         } else {
1170                 LCONVPATHCREAT(args->path, &path);
1171                 seg = UIO_SYSSPACE;
1172         }
1173
1174         switch (args->mode & S_IFMT) {
1175         case S_IFIFO:
1176         case S_IFSOCK:
1177                 error = kern_mkfifoat(td, AT_FDCWD, path, seg,
1178                     args->mode);
1179                 break;
1180
1181         case S_IFCHR:
1182         case S_IFBLK:
1183                 error = kern_mknodat(td, AT_FDCWD, path, seg,
1184                     args->mode, args->dev);
1185                 break;
1186
1187         case S_IFDIR:
1188                 error = EPERM;
1189                 break;
1190
1191         case 0:
1192                 args->mode |= S_IFREG;
1193                 /* FALLTHROUGH */
1194         case S_IFREG:
1195                 error = kern_openat(td, AT_FDCWD, path, seg,
1196                     O_WRONLY | O_CREAT | O_TRUNC, args->mode);
1197                 if (error == 0)
1198                         kern_close(td, td->td_retval[0]);
1199                 break;
1200
1201         default:
1202                 error = EINVAL;
1203                 break;
1204         }
1205         if (convpath)
1206                 LFREEPATH(path);
1207         return (error);
1208 }
1209 #endif
1210
1211 int
1212 linux_mknodat(struct thread *td, struct linux_mknodat_args *args)
1213 {
1214         char *path;
1215         int error, dfd;
1216         enum uio_seg seg;
1217         bool convpath;
1218
1219         dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
1220
1221         convpath = LUSECONVPATH(td);
1222         if (!convpath) {
1223                 path = __DECONST(char *, args->filename);
1224                 seg = UIO_USERSPACE;
1225         } else {
1226                 LCONVPATHCREAT_AT(args->filename, &path, dfd);
1227                 seg = UIO_SYSSPACE;
1228         }
1229
1230         switch (args->mode & S_IFMT) {
1231         case S_IFIFO:
1232         case S_IFSOCK:
1233                 error = kern_mkfifoat(td, dfd, path, seg, args->mode);
1234                 break;
1235
1236         case S_IFCHR:
1237         case S_IFBLK:
1238                 error = kern_mknodat(td, dfd, path, seg, args->mode,
1239                     args->dev);
1240                 break;
1241
1242         case S_IFDIR:
1243                 error = EPERM;
1244                 break;
1245
1246         case 0:
1247                 args->mode |= S_IFREG;
1248                 /* FALLTHROUGH */
1249         case S_IFREG:
1250                 error = kern_openat(td, dfd, path, seg,
1251                     O_WRONLY | O_CREAT | O_TRUNC, args->mode);
1252                 if (error == 0)
1253                         kern_close(td, td->td_retval[0]);
1254                 break;
1255
1256         default:
1257                 error = EINVAL;
1258                 break;
1259         }
1260         if (convpath)
1261                 LFREEPATH(path);
1262         return (error);
1263 }
1264
1265 /*
1266  * UGH! This is just about the dumbest idea I've ever heard!!
1267  */
1268 int
1269 linux_personality(struct thread *td, struct linux_personality_args *args)
1270 {
1271         struct linux_pemuldata *pem;
1272         struct proc *p = td->td_proc;
1273         uint32_t old;
1274
1275         PROC_LOCK(p);
1276         pem = pem_find(p);
1277         old = pem->persona;
1278         if (args->per != 0xffffffff)
1279                 pem->persona = args->per;
1280         PROC_UNLOCK(p);
1281
1282         td->td_retval[0] = old;
1283         return (0);
1284 }
1285
1286 struct l_itimerval {
1287         l_timeval it_interval;
1288         l_timeval it_value;
1289 };
1290
1291 #define B2L_ITIMERVAL(bip, lip)                                         \
1292         (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec;          \
1293         (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec;        \
1294         (bip)->it_value.tv_sec = (lip)->it_value.tv_sec;                \
1295         (bip)->it_value.tv_usec = (lip)->it_value.tv_usec;
1296
1297 int
1298 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap)
1299 {
1300         int error;
1301         struct l_itimerval ls;
1302         struct itimerval aitv, oitv;
1303
1304         if (uap->itv == NULL) {
1305                 uap->itv = uap->oitv;
1306                 return (linux_getitimer(td, (struct linux_getitimer_args *)uap));
1307         }
1308
1309         error = copyin(uap->itv, &ls, sizeof(ls));
1310         if (error != 0)
1311                 return (error);
1312         B2L_ITIMERVAL(&aitv, &ls);
1313         error = kern_setitimer(td, uap->which, &aitv, &oitv);
1314         if (error != 0 || uap->oitv == NULL)
1315                 return (error);
1316         B2L_ITIMERVAL(&ls, &oitv);
1317
1318         return (copyout(&ls, uap->oitv, sizeof(ls)));
1319 }
1320
1321 int
1322 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap)
1323 {
1324         int error;
1325         struct l_itimerval ls;
1326         struct itimerval aitv;
1327
1328         error = kern_getitimer(td, uap->which, &aitv);
1329         if (error != 0)
1330                 return (error);
1331         B2L_ITIMERVAL(&ls, &aitv);
1332         return (copyout(&ls, uap->itv, sizeof(ls)));
1333 }
1334
1335 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1336 int
1337 linux_nice(struct thread *td, struct linux_nice_args *args)
1338 {
1339
1340         return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc));
1341 }
1342 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1343
1344 int
1345 linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
1346 {
1347         struct ucred *newcred, *oldcred;
1348         l_gid_t *linux_gidset;
1349         gid_t *bsd_gidset;
1350         int ngrp, error;
1351         struct proc *p;
1352
1353         ngrp = args->gidsetsize;
1354         if (ngrp < 0 || ngrp >= ngroups_max + 1)
1355                 return (EINVAL);
1356         linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK);
1357         error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t));
1358         if (error)
1359                 goto out;
1360         newcred = crget();
1361         crextend(newcred, ngrp + 1);
1362         p = td->td_proc;
1363         PROC_LOCK(p);
1364         oldcred = p->p_ucred;
1365         crcopy(newcred, oldcred);
1366
1367         /*
1368          * cr_groups[0] holds egid. Setting the whole set from
1369          * the supplied set will cause egid to be changed too.
1370          * Keep cr_groups[0] unchanged to prevent that.
1371          */
1372
1373         if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) {
1374                 PROC_UNLOCK(p);
1375                 crfree(newcred);
1376                 goto out;
1377         }
1378
1379         if (ngrp > 0) {
1380                 newcred->cr_ngroups = ngrp + 1;
1381
1382                 bsd_gidset = newcred->cr_groups;
1383                 ngrp--;
1384                 while (ngrp >= 0) {
1385                         bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
1386                         ngrp--;
1387                 }
1388         } else
1389                 newcred->cr_ngroups = 1;
1390
1391         setsugid(p);
1392         proc_set_cred(p, newcred);
1393         PROC_UNLOCK(p);
1394         crfree(oldcred);
1395         error = 0;
1396 out:
1397         free(linux_gidset, M_LINUX);
1398         return (error);
1399 }
1400
1401 int
1402 linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
1403 {
1404         struct ucred *cred;
1405         l_gid_t *linux_gidset;
1406         gid_t *bsd_gidset;
1407         int bsd_gidsetsz, ngrp, error;
1408
1409         cred = td->td_ucred;
1410         bsd_gidset = cred->cr_groups;
1411         bsd_gidsetsz = cred->cr_ngroups - 1;
1412
1413         /*
1414          * cr_groups[0] holds egid. Returning the whole set
1415          * here will cause a duplicate. Exclude cr_groups[0]
1416          * to prevent that.
1417          */
1418
1419         if ((ngrp = args->gidsetsize) == 0) {
1420                 td->td_retval[0] = bsd_gidsetsz;
1421                 return (0);
1422         }
1423
1424         if (ngrp < bsd_gidsetsz)
1425                 return (EINVAL);
1426
1427         ngrp = 0;
1428         linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset),
1429             M_LINUX, M_WAITOK);
1430         while (ngrp < bsd_gidsetsz) {
1431                 linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
1432                 ngrp++;
1433         }
1434
1435         error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t));
1436         free(linux_gidset, M_LINUX);
1437         if (error)
1438                 return (error);
1439
1440         td->td_retval[0] = ngrp;
1441         return (0);
1442 }
1443
1444 static bool
1445 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim)
1446 {
1447
1448         if (linux_dummy_rlimits == 0)
1449                 return (false);
1450
1451         switch (resource) {
1452         case LINUX_RLIMIT_LOCKS:
1453         case LINUX_RLIMIT_SIGPENDING:
1454         case LINUX_RLIMIT_MSGQUEUE:
1455         case LINUX_RLIMIT_RTTIME:
1456                 rlim->rlim_cur = LINUX_RLIM_INFINITY;
1457                 rlim->rlim_max = LINUX_RLIM_INFINITY;
1458                 return (true);
1459         case LINUX_RLIMIT_NICE:
1460         case LINUX_RLIMIT_RTPRIO:
1461                 rlim->rlim_cur = 0;
1462                 rlim->rlim_max = 0;
1463                 return (true);
1464         default:
1465                 return (false);
1466         }
1467 }
1468
1469 int
1470 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
1471 {
1472         struct rlimit bsd_rlim;
1473         struct l_rlimit rlim;
1474         u_int which;
1475         int error;
1476
1477         if (args->resource >= LINUX_RLIM_NLIMITS)
1478                 return (EINVAL);
1479
1480         which = linux_to_bsd_resource[args->resource];
1481         if (which == -1)
1482                 return (EINVAL);
1483
1484         error = copyin(args->rlim, &rlim, sizeof(rlim));
1485         if (error)
1486                 return (error);
1487
1488         bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur;
1489         bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max;
1490         return (kern_setrlimit(td, which, &bsd_rlim));
1491 }
1492
1493 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1494 int
1495 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
1496 {
1497         struct l_rlimit rlim;
1498         struct rlimit bsd_rlim;
1499         u_int which;
1500
1501         if (linux_get_dummy_limit(args->resource, &bsd_rlim)) {
1502                 rlim.rlim_cur = bsd_rlim.rlim_cur;
1503                 rlim.rlim_max = bsd_rlim.rlim_max;
1504                 return (copyout(&rlim, args->rlim, sizeof(rlim)));
1505         }
1506
1507         if (args->resource >= LINUX_RLIM_NLIMITS)
1508                 return (EINVAL);
1509
1510         which = linux_to_bsd_resource[args->resource];
1511         if (which == -1)
1512                 return (EINVAL);
1513
1514         lim_rlimit(td, which, &bsd_rlim);
1515
1516 #ifdef COMPAT_LINUX32
1517         rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur;
1518         if (rlim.rlim_cur == UINT_MAX)
1519                 rlim.rlim_cur = INT_MAX;
1520         rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max;
1521         if (rlim.rlim_max == UINT_MAX)
1522                 rlim.rlim_max = INT_MAX;
1523 #else
1524         rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur;
1525         if (rlim.rlim_cur == ULONG_MAX)
1526                 rlim.rlim_cur = LONG_MAX;
1527         rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max;
1528         if (rlim.rlim_max == ULONG_MAX)
1529                 rlim.rlim_max = LONG_MAX;
1530 #endif
1531         return (copyout(&rlim, args->rlim, sizeof(rlim)));
1532 }
1533 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1534
1535 int
1536 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
1537 {
1538         struct l_rlimit rlim;
1539         struct rlimit bsd_rlim;
1540         u_int which;
1541
1542         if (linux_get_dummy_limit(args->resource, &bsd_rlim)) {
1543                 rlim.rlim_cur = bsd_rlim.rlim_cur;
1544                 rlim.rlim_max = bsd_rlim.rlim_max;
1545                 return (copyout(&rlim, args->rlim, sizeof(rlim)));
1546         }
1547
1548         if (args->resource >= LINUX_RLIM_NLIMITS)
1549                 return (EINVAL);
1550
1551         which = linux_to_bsd_resource[args->resource];
1552         if (which == -1)
1553                 return (EINVAL);
1554
1555         lim_rlimit(td, which, &bsd_rlim);
1556
1557         rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur;
1558         rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max;
1559         return (copyout(&rlim, args->rlim, sizeof(rlim)));
1560 }
1561
1562 int
1563 linux_sched_setscheduler(struct thread *td,
1564     struct linux_sched_setscheduler_args *args)
1565 {
1566         struct sched_param sched_param;
1567         struct thread *tdt;
1568         int error, policy;
1569
1570         switch (args->policy) {
1571         case LINUX_SCHED_OTHER:
1572                 policy = SCHED_OTHER;
1573                 break;
1574         case LINUX_SCHED_FIFO:
1575                 policy = SCHED_FIFO;
1576                 break;
1577         case LINUX_SCHED_RR:
1578                 policy = SCHED_RR;
1579                 break;
1580         default:
1581                 return (EINVAL);
1582         }
1583
1584         error = copyin(args->param, &sched_param, sizeof(sched_param));
1585         if (error)
1586                 return (error);
1587
1588         if (linux_map_sched_prio) {
1589                 switch (policy) {
1590                 case SCHED_OTHER:
1591                         if (sched_param.sched_priority != 0)
1592                                 return (EINVAL);
1593
1594                         sched_param.sched_priority =
1595                             PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE;
1596                         break;
1597                 case SCHED_FIFO:
1598                 case SCHED_RR:
1599                         if (sched_param.sched_priority < 1 ||
1600                             sched_param.sched_priority >= LINUX_MAX_RT_PRIO)
1601                                 return (EINVAL);
1602
1603                         /*
1604                          * Map [1, LINUX_MAX_RT_PRIO - 1] to
1605                          * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down).
1606                          */
1607                         sched_param.sched_priority =
1608                             (sched_param.sched_priority - 1) *
1609                             (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) /
1610                             (LINUX_MAX_RT_PRIO - 1);
1611                         break;
1612                 }
1613         }
1614
1615         tdt = linux_tdfind(td, args->pid, -1);
1616         if (tdt == NULL)
1617                 return (ESRCH);
1618
1619         error = kern_sched_setscheduler(td, tdt, policy, &sched_param);
1620         PROC_UNLOCK(tdt->td_proc);
1621         return (error);
1622 }
1623
1624 int
1625 linux_sched_getscheduler(struct thread *td,
1626     struct linux_sched_getscheduler_args *args)
1627 {
1628         struct thread *tdt;
1629         int error, policy;
1630
1631         tdt = linux_tdfind(td, args->pid, -1);
1632         if (tdt == NULL)
1633                 return (ESRCH);
1634
1635         error = kern_sched_getscheduler(td, tdt, &policy);
1636         PROC_UNLOCK(tdt->td_proc);
1637
1638         switch (policy) {
1639         case SCHED_OTHER:
1640                 td->td_retval[0] = LINUX_SCHED_OTHER;
1641                 break;
1642         case SCHED_FIFO:
1643                 td->td_retval[0] = LINUX_SCHED_FIFO;
1644                 break;
1645         case SCHED_RR:
1646                 td->td_retval[0] = LINUX_SCHED_RR;
1647                 break;
1648         }
1649         return (error);
1650 }
1651
1652 int
1653 linux_sched_get_priority_max(struct thread *td,
1654     struct linux_sched_get_priority_max_args *args)
1655 {
1656         struct sched_get_priority_max_args bsd;
1657
1658         if (linux_map_sched_prio) {
1659                 switch (args->policy) {
1660                 case LINUX_SCHED_OTHER:
1661                         td->td_retval[0] = 0;
1662                         return (0);
1663                 case LINUX_SCHED_FIFO:
1664                 case LINUX_SCHED_RR:
1665                         td->td_retval[0] = LINUX_MAX_RT_PRIO - 1;
1666                         return (0);
1667                 default:
1668                         return (EINVAL);
1669                 }
1670         }
1671
1672         switch (args->policy) {
1673         case LINUX_SCHED_OTHER:
1674                 bsd.policy = SCHED_OTHER;
1675                 break;
1676         case LINUX_SCHED_FIFO:
1677                 bsd.policy = SCHED_FIFO;
1678                 break;
1679         case LINUX_SCHED_RR:
1680                 bsd.policy = SCHED_RR;
1681                 break;
1682         default:
1683                 return (EINVAL);
1684         }
1685         return (sys_sched_get_priority_max(td, &bsd));
1686 }
1687
1688 int
1689 linux_sched_get_priority_min(struct thread *td,
1690     struct linux_sched_get_priority_min_args *args)
1691 {
1692         struct sched_get_priority_min_args bsd;
1693
1694         if (linux_map_sched_prio) {
1695                 switch (args->policy) {
1696                 case LINUX_SCHED_OTHER:
1697                         td->td_retval[0] = 0;
1698                         return (0);
1699                 case LINUX_SCHED_FIFO:
1700                 case LINUX_SCHED_RR:
1701                         td->td_retval[0] = 1;
1702                         return (0);
1703                 default:
1704                         return (EINVAL);
1705                 }
1706         }
1707
1708         switch (args->policy) {
1709         case LINUX_SCHED_OTHER:
1710                 bsd.policy = SCHED_OTHER;
1711                 break;
1712         case LINUX_SCHED_FIFO:
1713                 bsd.policy = SCHED_FIFO;
1714                 break;
1715         case LINUX_SCHED_RR:
1716                 bsd.policy = SCHED_RR;
1717                 break;
1718         default:
1719                 return (EINVAL);
1720         }
1721         return (sys_sched_get_priority_min(td, &bsd));
1722 }
1723
1724 #define REBOOT_CAD_ON   0x89abcdef
1725 #define REBOOT_CAD_OFF  0
1726 #define REBOOT_HALT     0xcdef0123
1727 #define REBOOT_RESTART  0x01234567
1728 #define REBOOT_RESTART2 0xA1B2C3D4
1729 #define REBOOT_POWEROFF 0x4321FEDC
1730 #define REBOOT_MAGIC1   0xfee1dead
1731 #define REBOOT_MAGIC2   0x28121969
1732 #define REBOOT_MAGIC2A  0x05121996
1733 #define REBOOT_MAGIC2B  0x16041998
1734
1735 int
1736 linux_reboot(struct thread *td, struct linux_reboot_args *args)
1737 {
1738         struct reboot_args bsd_args;
1739
1740         if (args->magic1 != REBOOT_MAGIC1)
1741                 return (EINVAL);
1742
1743         switch (args->magic2) {
1744         case REBOOT_MAGIC2:
1745         case REBOOT_MAGIC2A:
1746         case REBOOT_MAGIC2B:
1747                 break;
1748         default:
1749                 return (EINVAL);
1750         }
1751
1752         switch (args->cmd) {
1753         case REBOOT_CAD_ON:
1754         case REBOOT_CAD_OFF:
1755                 return (priv_check(td, PRIV_REBOOT));
1756         case REBOOT_HALT:
1757                 bsd_args.opt = RB_HALT;
1758                 break;
1759         case REBOOT_RESTART:
1760         case REBOOT_RESTART2:
1761                 bsd_args.opt = 0;
1762                 break;
1763         case REBOOT_POWEROFF:
1764                 bsd_args.opt = RB_POWEROFF;
1765                 break;
1766         default:
1767                 return (EINVAL);
1768         }
1769         return (sys_reboot(td, &bsd_args));
1770 }
1771
1772 int
1773 linux_getpid(struct thread *td, struct linux_getpid_args *args)
1774 {
1775
1776         td->td_retval[0] = td->td_proc->p_pid;
1777
1778         return (0);
1779 }
1780
1781 int
1782 linux_gettid(struct thread *td, struct linux_gettid_args *args)
1783 {
1784         struct linux_emuldata *em;
1785
1786         em = em_find(td);
1787         KASSERT(em != NULL, ("gettid: emuldata not found.\n"));
1788
1789         td->td_retval[0] = em->em_tid;
1790
1791         return (0);
1792 }
1793
1794 int
1795 linux_getppid(struct thread *td, struct linux_getppid_args *args)
1796 {
1797
1798         td->td_retval[0] = kern_getppid(td);
1799         return (0);
1800 }
1801
1802 int
1803 linux_getgid(struct thread *td, struct linux_getgid_args *args)
1804 {
1805
1806         td->td_retval[0] = td->td_ucred->cr_rgid;
1807         return (0);
1808 }
1809
1810 int
1811 linux_getuid(struct thread *td, struct linux_getuid_args *args)
1812 {
1813
1814         td->td_retval[0] = td->td_ucred->cr_ruid;
1815         return (0);
1816 }
1817
1818 int
1819 linux_getsid(struct thread *td, struct linux_getsid_args *args)
1820 {
1821
1822         return (kern_getsid(td, args->pid));
1823 }
1824
1825 int
1826 linux_nosys(struct thread *td, struct nosys_args *ignore)
1827 {
1828
1829         return (ENOSYS);
1830 }
1831
1832 int
1833 linux_getpriority(struct thread *td, struct linux_getpriority_args *args)
1834 {
1835         int error;
1836
1837         error = kern_getpriority(td, args->which, args->who);
1838         td->td_retval[0] = 20 - td->td_retval[0];
1839         return (error);
1840 }
1841
1842 int
1843 linux_sethostname(struct thread *td, struct linux_sethostname_args *args)
1844 {
1845         int name[2];
1846
1847         name[0] = CTL_KERN;
1848         name[1] = KERN_HOSTNAME;
1849         return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname,
1850             args->len, 0, 0));
1851 }
1852
1853 int
1854 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args)
1855 {
1856         int name[2];
1857
1858         name[0] = CTL_KERN;
1859         name[1] = KERN_NISDOMAINNAME;
1860         return (userland_sysctl(td, name, 2, 0, 0, 0, args->name,
1861             args->len, 0, 0));
1862 }
1863
1864 int
1865 linux_exit_group(struct thread *td, struct linux_exit_group_args *args)
1866 {
1867
1868         LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid,
1869             args->error_code);
1870
1871         /*
1872          * XXX: we should send a signal to the parent if
1873          * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?)
1874          * as it doesnt occur often.
1875          */
1876         exit1(td, args->error_code, 0);
1877                 /* NOTREACHED */
1878 }
1879
1880 #define _LINUX_CAPABILITY_VERSION_1  0x19980330
1881 #define _LINUX_CAPABILITY_VERSION_2  0x20071026
1882 #define _LINUX_CAPABILITY_VERSION_3  0x20080522
1883
1884 struct l_user_cap_header {
1885         l_int   version;
1886         l_int   pid;
1887 };
1888
1889 struct l_user_cap_data {
1890         l_int   effective;
1891         l_int   permitted;
1892         l_int   inheritable;
1893 };
1894
1895 int
1896 linux_capget(struct thread *td, struct linux_capget_args *uap)
1897 {
1898         struct l_user_cap_header luch;
1899         struct l_user_cap_data lucd[2];
1900         int error, u32s;
1901
1902         if (uap->hdrp == NULL)
1903                 return (EFAULT);
1904
1905         error = copyin(uap->hdrp, &luch, sizeof(luch));
1906         if (error != 0)
1907                 return (error);
1908
1909         switch (luch.version) {
1910         case _LINUX_CAPABILITY_VERSION_1:
1911                 u32s = 1;
1912                 break;
1913         case _LINUX_CAPABILITY_VERSION_2:
1914         case _LINUX_CAPABILITY_VERSION_3:
1915                 u32s = 2;
1916                 break;
1917         default:
1918                 luch.version = _LINUX_CAPABILITY_VERSION_1;
1919                 error = copyout(&luch, uap->hdrp, sizeof(luch));
1920                 if (error)
1921                         return (error);
1922                 return (EINVAL);
1923         }
1924
1925         if (luch.pid)
1926                 return (EPERM);
1927
1928         if (uap->datap) {
1929                 /*
1930                  * The current implementation doesn't support setting
1931                  * a capability (it's essentially a stub) so indicate
1932                  * that no capabilities are currently set or available
1933                  * to request.
1934                  */
1935                 memset(&lucd, 0, u32s * sizeof(lucd[0]));
1936                 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0]));
1937         }
1938
1939         return (error);
1940 }
1941
1942 int
1943 linux_capset(struct thread *td, struct linux_capset_args *uap)
1944 {
1945         struct l_user_cap_header luch;
1946         struct l_user_cap_data lucd[2];
1947         int error, i, u32s;
1948
1949         if (uap->hdrp == NULL || uap->datap == NULL)
1950                 return (EFAULT);
1951
1952         error = copyin(uap->hdrp, &luch, sizeof(luch));
1953         if (error != 0)
1954                 return (error);
1955
1956         switch (luch.version) {
1957         case _LINUX_CAPABILITY_VERSION_1:
1958                 u32s = 1;
1959                 break;
1960         case _LINUX_CAPABILITY_VERSION_2:
1961         case _LINUX_CAPABILITY_VERSION_3:
1962                 u32s = 2;
1963                 break;
1964         default:
1965                 luch.version = _LINUX_CAPABILITY_VERSION_1;
1966                 error = copyout(&luch, uap->hdrp, sizeof(luch));
1967                 if (error)
1968                         return (error);
1969                 return (EINVAL);
1970         }
1971
1972         if (luch.pid)
1973                 return (EPERM);
1974
1975         error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0]));
1976         if (error != 0)
1977                 return (error);
1978
1979         /* We currently don't support setting any capabilities. */
1980         for (i = 0; i < u32s; i++) {
1981                 if (lucd[i].effective || lucd[i].permitted ||
1982                     lucd[i].inheritable) {
1983                         linux_msg(td,
1984                             "capset[%d] effective=0x%x, permitted=0x%x, "
1985                             "inheritable=0x%x is not implemented", i,
1986                             (int)lucd[i].effective, (int)lucd[i].permitted,
1987                             (int)lucd[i].inheritable);
1988                         return (EPERM);
1989                 }
1990         }
1991
1992         return (0);
1993 }
1994
1995 int
1996 linux_prctl(struct thread *td, struct linux_prctl_args *args)
1997 {
1998         int error = 0, max_size, arg;
1999         struct proc *p = td->td_proc;
2000         char comm[LINUX_MAX_COMM_LEN];
2001         int pdeath_signal, trace_state;
2002
2003         switch (args->option) {
2004         case LINUX_PR_SET_PDEATHSIG:
2005                 if (!LINUX_SIG_VALID(args->arg2))
2006                         return (EINVAL);
2007                 pdeath_signal = linux_to_bsd_signal(args->arg2);
2008                 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL,
2009                     &pdeath_signal));
2010         case LINUX_PR_GET_PDEATHSIG:
2011                 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS,
2012                     &pdeath_signal);
2013                 if (error != 0)
2014                         return (error);
2015                 pdeath_signal = bsd_to_linux_signal(pdeath_signal);
2016                 return (copyout(&pdeath_signal,
2017                     (void *)(register_t)args->arg2,
2018                     sizeof(pdeath_signal)));
2019         /*
2020          * In Linux, this flag controls if set[gu]id processes can coredump.
2021          * There are additional semantics imposed on processes that cannot
2022          * coredump:
2023          * - Such processes can not be ptraced.
2024          * - There are some semantics around ownership of process-related files
2025          *   in the /proc namespace.
2026          *
2027          * In FreeBSD, we can (and by default, do) disable setuid coredump
2028          * system-wide with 'sugid_coredump.'  We control tracability on a
2029          * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag).
2030          * By happy coincidence, P2_NOTRACE also prevents coredumping.  So the
2031          * procctl is roughly analogous to Linux's DUMPABLE.
2032          *
2033          * So, proxy these knobs to the corresponding PROC_TRACE setting.
2034          */
2035         case LINUX_PR_GET_DUMPABLE:
2036                 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS,
2037                     &trace_state);
2038                 if (error != 0)
2039                         return (error);
2040                 td->td_retval[0] = (trace_state != -1);
2041                 return (0);
2042         case LINUX_PR_SET_DUMPABLE:
2043                 /*
2044                  * It is only valid for userspace to set one of these two
2045                  * flags, and only one at a time.
2046                  */
2047                 switch (args->arg2) {
2048                 case LINUX_SUID_DUMP_DISABLE:
2049                         trace_state = PROC_TRACE_CTL_DISABLE_EXEC;
2050                         break;
2051                 case LINUX_SUID_DUMP_USER:
2052                         trace_state = PROC_TRACE_CTL_ENABLE;
2053                         break;
2054                 default:
2055                         return (EINVAL);
2056                 }
2057                 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL,
2058                     &trace_state));
2059         case LINUX_PR_GET_KEEPCAPS:
2060                 /*
2061                  * Indicate that we always clear the effective and
2062                  * permitted capability sets when the user id becomes
2063                  * non-zero (actually the capability sets are simply
2064                  * always zero in the current implementation).
2065                  */
2066                 td->td_retval[0] = 0;
2067                 break;
2068         case LINUX_PR_SET_KEEPCAPS:
2069                 /*
2070                  * Ignore requests to keep the effective and permitted
2071                  * capability sets when the user id becomes non-zero.
2072                  */
2073                 break;
2074         case LINUX_PR_SET_NAME:
2075                 /*
2076                  * To be on the safe side we need to make sure to not
2077                  * overflow the size a Linux program expects. We already
2078                  * do this here in the copyin, so that we don't need to
2079                  * check on copyout.
2080                  */
2081                 max_size = MIN(sizeof(comm), sizeof(p->p_comm));
2082                 error = copyinstr((void *)(register_t)args->arg2, comm,
2083                     max_size, NULL);
2084
2085                 /* Linux silently truncates the name if it is too long. */
2086                 if (error == ENAMETOOLONG) {
2087                         /*
2088                          * XXX: copyinstr() isn't documented to populate the
2089                          * array completely, so do a copyin() to be on the
2090                          * safe side. This should be changed in case
2091                          * copyinstr() is changed to guarantee this.
2092                          */
2093                         error = copyin((void *)(register_t)args->arg2, comm,
2094                             max_size - 1);
2095                         comm[max_size - 1] = '\0';
2096                 }
2097                 if (error)
2098                         return (error);
2099
2100                 PROC_LOCK(p);
2101                 strlcpy(p->p_comm, comm, sizeof(p->p_comm));
2102                 PROC_UNLOCK(p);
2103                 break;
2104         case LINUX_PR_GET_NAME:
2105                 PROC_LOCK(p);
2106                 strlcpy(comm, p->p_comm, sizeof(comm));
2107                 PROC_UNLOCK(p);
2108                 error = copyout(comm, (void *)(register_t)args->arg2,
2109                     strlen(comm) + 1);
2110                 break;
2111         case LINUX_PR_GET_SECCOMP:
2112         case LINUX_PR_SET_SECCOMP:
2113                 /*
2114                  * Same as returned by Linux without CONFIG_SECCOMP enabled.
2115                  */
2116                 error = EINVAL;
2117                 break;
2118         case LINUX_PR_CAPBSET_READ:
2119 #if 0
2120                 /*
2121                  * This makes too much noise with Ubuntu Focal.
2122                  */
2123                 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d",
2124                     (int)args->arg2);
2125 #endif
2126                 error = EINVAL;
2127                 break;
2128         case LINUX_PR_SET_NO_NEW_PRIVS:
2129                 arg = args->arg2 == 1 ?
2130                     PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE;
2131                 error = kern_procctl(td, P_PID, p->p_pid,
2132                     PROC_NO_NEW_PRIVS_CTL, &arg);
2133                 break;
2134         case LINUX_PR_SET_PTRACER:
2135                 linux_msg(td, "unsupported prctl PR_SET_PTRACER");
2136                 error = EINVAL;
2137                 break;
2138         default:
2139                 linux_msg(td, "unsupported prctl option %d", args->option);
2140                 error = EINVAL;
2141                 break;
2142         }
2143
2144         return (error);
2145 }
2146
2147 int
2148 linux_sched_setparam(struct thread *td,
2149     struct linux_sched_setparam_args *uap)
2150 {
2151         struct sched_param sched_param;
2152         struct thread *tdt;
2153         int error, policy;
2154
2155         error = copyin(uap->param, &sched_param, sizeof(sched_param));
2156         if (error)
2157                 return (error);
2158
2159         tdt = linux_tdfind(td, uap->pid, -1);
2160         if (tdt == NULL)
2161                 return (ESRCH);
2162
2163         if (linux_map_sched_prio) {
2164                 error = kern_sched_getscheduler(td, tdt, &policy);
2165                 if (error)
2166                         goto out;
2167
2168                 switch (policy) {
2169                 case SCHED_OTHER:
2170                         if (sched_param.sched_priority != 0) {
2171                                 error = EINVAL;
2172                                 goto out;
2173                         }
2174                         sched_param.sched_priority =
2175                             PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE;
2176                         break;
2177                 case SCHED_FIFO:
2178                 case SCHED_RR:
2179                         if (sched_param.sched_priority < 1 ||
2180                             sched_param.sched_priority >= LINUX_MAX_RT_PRIO) {
2181                                 error = EINVAL;
2182                                 goto out;
2183                         }
2184                         /*
2185                          * Map [1, LINUX_MAX_RT_PRIO - 1] to
2186                          * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down).
2187                          */
2188                         sched_param.sched_priority =
2189                             (sched_param.sched_priority - 1) *
2190                             (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) /
2191                             (LINUX_MAX_RT_PRIO - 1);
2192                         break;
2193                 }
2194         }
2195
2196         error = kern_sched_setparam(td, tdt, &sched_param);
2197 out:    PROC_UNLOCK(tdt->td_proc);
2198         return (error);
2199 }
2200
2201 int
2202 linux_sched_getparam(struct thread *td,
2203     struct linux_sched_getparam_args *uap)
2204 {
2205         struct sched_param sched_param;
2206         struct thread *tdt;
2207         int error, policy;
2208
2209         tdt = linux_tdfind(td, uap->pid, -1);
2210         if (tdt == NULL)
2211                 return (ESRCH);
2212
2213         error = kern_sched_getparam(td, tdt, &sched_param);
2214         if (error) {
2215                 PROC_UNLOCK(tdt->td_proc);
2216                 return (error);
2217         }
2218
2219         if (linux_map_sched_prio) {
2220                 error = kern_sched_getscheduler(td, tdt, &policy);
2221                 PROC_UNLOCK(tdt->td_proc);
2222                 if (error)
2223                         return (error);
2224
2225                 switch (policy) {
2226                 case SCHED_OTHER:
2227                         sched_param.sched_priority = 0;
2228                         break;
2229                 case SCHED_FIFO:
2230                 case SCHED_RR:
2231                         /*
2232                          * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to
2233                          * [1, LINUX_MAX_RT_PRIO - 1] (rounding up).
2234                          */
2235                         sched_param.sched_priority =
2236                             (sched_param.sched_priority *
2237                             (LINUX_MAX_RT_PRIO - 1) +
2238                             (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) /
2239                             (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1;
2240                         break;
2241                 }
2242         } else
2243                 PROC_UNLOCK(tdt->td_proc);
2244
2245         error = copyout(&sched_param, uap->param, sizeof(sched_param));
2246         return (error);
2247 }
2248
2249 static const struct cpuset_copy_cb copy_set = {
2250         .cpuset_copyin = copyin,
2251         .cpuset_copyout = copyout
2252 };
2253
2254 /*
2255  * Get affinity of a process.
2256  */
2257 int
2258 linux_sched_getaffinity(struct thread *td,
2259     struct linux_sched_getaffinity_args *args)
2260 {
2261         int error;
2262         struct thread *tdt;
2263
2264         if (args->len < sizeof(cpuset_t))
2265                 return (EINVAL);
2266
2267         tdt = linux_tdfind(td, args->pid, -1);
2268         if (tdt == NULL)
2269                 return (ESRCH);
2270
2271         PROC_UNLOCK(tdt->td_proc);
2272
2273         error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
2274             tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr,
2275             &copy_set);
2276         if (error == 0)
2277                 td->td_retval[0] = sizeof(cpuset_t);
2278
2279         return (error);
2280 }
2281
2282 /*
2283  *  Set affinity of a process.
2284  */
2285 int
2286 linux_sched_setaffinity(struct thread *td,
2287     struct linux_sched_setaffinity_args *args)
2288 {
2289         struct thread *tdt;
2290
2291         if (args->len < sizeof(cpuset_t))
2292                 return (EINVAL);
2293
2294         tdt = linux_tdfind(td, args->pid, -1);
2295         if (tdt == NULL)
2296                 return (ESRCH);
2297
2298         PROC_UNLOCK(tdt->td_proc);
2299
2300         return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID,
2301             tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr,
2302             &copy_set));
2303 }
2304
2305 struct linux_rlimit64 {
2306         uint64_t        rlim_cur;
2307         uint64_t        rlim_max;
2308 };
2309
2310 int
2311 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args)
2312 {
2313         struct rlimit rlim, nrlim;
2314         struct linux_rlimit64 lrlim;
2315         struct proc *p;
2316         u_int which;
2317         int flags;
2318         int error;
2319
2320         if (args->new == NULL && args->old != NULL) {
2321                 if (linux_get_dummy_limit(args->resource, &rlim)) {
2322                         lrlim.rlim_cur = rlim.rlim_cur;
2323                         lrlim.rlim_max = rlim.rlim_max;
2324                         return (copyout(&lrlim, args->old, sizeof(lrlim)));
2325                 }
2326         }
2327
2328         if (args->resource >= LINUX_RLIM_NLIMITS)
2329                 return (EINVAL);
2330
2331         which = linux_to_bsd_resource[args->resource];
2332         if (which == -1)
2333                 return (EINVAL);
2334
2335         if (args->new != NULL) {
2336                 /*
2337                  * Note. Unlike FreeBSD where rlim is signed 64-bit Linux
2338                  * rlim is unsigned 64-bit. FreeBSD treats negative limits
2339                  * as INFINITY so we do not need a conversion even.
2340                  */
2341                 error = copyin(args->new, &nrlim, sizeof(nrlim));
2342                 if (error != 0)
2343                         return (error);
2344         }
2345
2346         flags = PGET_HOLD | PGET_NOTWEXIT;
2347         if (args->new != NULL)
2348                 flags |= PGET_CANDEBUG;
2349         else
2350                 flags |= PGET_CANSEE;
2351         if (args->pid == 0) {
2352                 p = td->td_proc;
2353                 PHOLD(p);
2354         } else {
2355                 error = pget(args->pid, flags, &p);
2356                 if (error != 0)
2357                         return (error);
2358         }
2359         if (args->old != NULL) {
2360                 PROC_LOCK(p);
2361                 lim_rlimit_proc(p, which, &rlim);
2362                 PROC_UNLOCK(p);
2363                 if (rlim.rlim_cur == RLIM_INFINITY)
2364                         lrlim.rlim_cur = LINUX_RLIM_INFINITY;
2365                 else
2366                         lrlim.rlim_cur = rlim.rlim_cur;
2367                 if (rlim.rlim_max == RLIM_INFINITY)
2368                         lrlim.rlim_max = LINUX_RLIM_INFINITY;
2369                 else
2370                         lrlim.rlim_max = rlim.rlim_max;
2371                 error = copyout(&lrlim, args->old, sizeof(lrlim));
2372                 if (error != 0)
2373                         goto out;
2374         }
2375
2376         if (args->new != NULL)
2377                 error = kern_proc_setrlimit(td, p, which, &nrlim);
2378
2379  out:
2380         PRELE(p);
2381         return (error);
2382 }
2383
2384 int
2385 linux_pselect6(struct thread *td, struct linux_pselect6_args *args)
2386 {
2387         struct l_timespec lts;
2388         struct timespec ts, *tsp;
2389         int error;
2390
2391         if (args->tsp != NULL) {
2392                 error = copyin(args->tsp, &lts, sizeof(lts));
2393                 if (error != 0)
2394                         return (error);
2395                 error = linux_to_native_timespec(&ts, &lts);
2396                 if (error != 0)
2397                         return (error);
2398                 tsp = &ts;
2399         } else
2400                 tsp = NULL;
2401
2402         error = linux_common_pselect6(td, args->nfds, args->readfds,
2403             args->writefds, args->exceptfds, tsp, args->sig);
2404         if (error != 0)
2405                 return (error);
2406
2407         if (args->tsp != NULL) {
2408                 error = native_to_linux_timespec(&lts, tsp);
2409                 if (error == 0)
2410                         error = copyout(&lts, args->tsp, sizeof(lts));
2411         }
2412         return (error);
2413 }
2414
2415 static int
2416 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds,
2417     l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp,
2418     l_uintptr_t *sig)
2419 {
2420         struct timeval utv, tv0, tv1, *tvp;
2421         struct l_pselect6arg lpse6;
2422         l_sigset_t l_ss;
2423         sigset_t *ssp;
2424         sigset_t ss;
2425         int error;
2426
2427         ssp = NULL;
2428         if (sig != NULL) {
2429                 error = copyin(sig, &lpse6, sizeof(lpse6));
2430                 if (error != 0)
2431                         return (error);
2432                 if (lpse6.ss_len != sizeof(l_ss))
2433                         return (EINVAL);
2434                 if (lpse6.ss != 0) {
2435                         error = copyin(PTRIN(lpse6.ss), &l_ss,
2436                             sizeof(l_ss));
2437                         if (error != 0)
2438                                 return (error);
2439                         linux_to_bsd_sigset(&l_ss, &ss);
2440                         ssp = &ss;
2441                 }
2442         } else
2443                 ssp = NULL;
2444
2445         /*
2446          * Currently glibc changes nanosecond number to microsecond.
2447          * This mean losing precision but for now it is hardly seen.
2448          */
2449         if (tsp != NULL) {
2450                 TIMESPEC_TO_TIMEVAL(&utv, tsp);
2451                 if (itimerfix(&utv))
2452                         return (EINVAL);
2453
2454                 microtime(&tv0);
2455                 tvp = &utv;
2456         } else
2457                 tvp = NULL;
2458
2459         error = kern_pselect(td, nfds, readfds, writefds,
2460             exceptfds, tvp, ssp, LINUX_NFDBITS);
2461
2462         if (error == 0 && tsp != NULL) {
2463                 if (td->td_retval[0] != 0) {
2464                         /*
2465                          * Compute how much time was left of the timeout,
2466                          * by subtracting the current time and the time
2467                          * before we started the call, and subtracting
2468                          * that result from the user-supplied value.
2469                          */
2470
2471                         microtime(&tv1);
2472                         timevalsub(&tv1, &tv0);
2473                         timevalsub(&utv, &tv1);
2474                         if (utv.tv_sec < 0)
2475                                 timevalclear(&utv);
2476                 } else
2477                         timevalclear(&utv);
2478                 TIMEVAL_TO_TIMESPEC(&utv, tsp);
2479         }
2480         return (error);
2481 }
2482
2483 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
2484 int
2485 linux_pselect6_time64(struct thread *td,
2486     struct linux_pselect6_time64_args *args)
2487 {
2488         struct l_timespec64 lts;
2489         struct timespec ts, *tsp;
2490         int error;
2491
2492         if (args->tsp != NULL) {
2493                 error = copyin(args->tsp, &lts, sizeof(lts));
2494                 if (error != 0)
2495                         return (error);
2496                 error = linux_to_native_timespec64(&ts, &lts);
2497                 if (error != 0)
2498                         return (error);
2499                 tsp = &ts;
2500         } else
2501                 tsp = NULL;
2502
2503         error = linux_common_pselect6(td, args->nfds, args->readfds,
2504             args->writefds, args->exceptfds, tsp, args->sig);
2505         if (error != 0)
2506                 return (error);
2507
2508         if (args->tsp != NULL) {
2509                 error = native_to_linux_timespec64(&lts, tsp);
2510                 if (error == 0)
2511                         error = copyout(&lts, args->tsp, sizeof(lts));
2512         }
2513         return (error);
2514 }
2515 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
2516
2517 int
2518 linux_ppoll(struct thread *td, struct linux_ppoll_args *args)
2519 {
2520         struct timespec uts, *tsp;
2521         struct l_timespec lts;
2522         int error;
2523
2524         if (args->tsp != NULL) {
2525                 error = copyin(args->tsp, &lts, sizeof(lts));
2526                 if (error)
2527                         return (error);
2528                 error = linux_to_native_timespec(&uts, &lts);
2529                 if (error != 0)
2530                         return (error);
2531                 tsp = &uts;
2532         } else
2533                 tsp = NULL;
2534
2535         error = linux_common_ppoll(td, args->fds, args->nfds, tsp,
2536             args->sset, args->ssize);
2537         if (error != 0)
2538                 return (error);
2539         if (tsp != NULL) {
2540                 error = native_to_linux_timespec(&lts, tsp);
2541                 if (error == 0)
2542                         error = copyout(&lts, args->tsp, sizeof(lts));
2543         }
2544         return (error);
2545 }
2546
2547 static int
2548 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds,
2549     struct timespec *tsp, l_sigset_t *sset, l_size_t ssize)
2550 {
2551         struct timespec ts0, ts1;
2552         struct pollfd stackfds[32];
2553         struct pollfd *kfds;
2554         l_sigset_t l_ss;
2555         sigset_t *ssp;
2556         sigset_t ss;
2557         int error;
2558
2559         if (kern_poll_maxfds(nfds))
2560                 return (EINVAL);
2561         if (sset != NULL) {
2562                 if (ssize != sizeof(l_ss))
2563                         return (EINVAL);
2564                 error = copyin(sset, &l_ss, sizeof(l_ss));
2565                 if (error)
2566                         return (error);
2567                 linux_to_bsd_sigset(&l_ss, &ss);
2568                 ssp = &ss;
2569         } else
2570                 ssp = NULL;
2571         if (tsp != NULL)
2572                 nanotime(&ts0);
2573
2574         if (nfds > nitems(stackfds))
2575                 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK);
2576         else
2577                 kfds = stackfds;
2578         error = linux_pollin(td, kfds, fds, nfds);
2579         if (error != 0)
2580                 goto out;
2581
2582         error = kern_poll_kfds(td, kfds, nfds, tsp, ssp);
2583         if (error == 0)
2584                 error = linux_pollout(td, kfds, fds, nfds);
2585
2586         if (error == 0 && tsp != NULL) {
2587                 if (td->td_retval[0]) {
2588                         nanotime(&ts1);
2589                         timespecsub(&ts1, &ts0, &ts1);
2590                         timespecsub(tsp, &ts1, tsp);
2591                         if (tsp->tv_sec < 0)
2592                                 timespecclear(tsp);
2593                 } else
2594                         timespecclear(tsp);
2595         }
2596
2597 out:
2598         if (nfds > nitems(stackfds))
2599                 free(kfds, M_TEMP);
2600         return (error);
2601 }
2602
2603 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
2604 int
2605 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args)
2606 {
2607         struct timespec uts, *tsp;
2608         struct l_timespec64 lts;
2609         int error;
2610
2611         if (args->tsp != NULL) {
2612                 error = copyin(args->tsp, &lts, sizeof(lts));
2613                 if (error != 0)
2614                         return (error);
2615                 error = linux_to_native_timespec64(&uts, &lts);
2616                 if (error != 0)
2617                         return (error);
2618                 tsp = &uts;
2619         } else
2620                 tsp = NULL;
2621         error = linux_common_ppoll(td, args->fds, args->nfds, tsp,
2622             args->sset, args->ssize);
2623         if (error != 0)
2624                 return (error);
2625         if (tsp != NULL) {
2626                 error = native_to_linux_timespec64(&lts, tsp);
2627                 if (error == 0)
2628                         error = copyout(&lts, args->tsp, sizeof(lts));
2629         }
2630         return (error);
2631 }
2632 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
2633
2634 static int
2635 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd)
2636 {
2637         int error;
2638         u_int i;
2639
2640         error = copyin(ufds, fds, nfd * sizeof(*fds));
2641         if (error != 0)
2642                 return (error);
2643
2644         for (i = 0; i < nfd; i++) {
2645                 if (fds->events != 0)
2646                         linux_to_bsd_poll_events(td, fds->fd,
2647                             fds->events, &fds->events);
2648                 fds++;
2649         }
2650         return (0);
2651 }
2652
2653 static int
2654 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd)
2655 {
2656         int error = 0;
2657         u_int i, n = 0;
2658
2659         for (i = 0; i < nfd; i++) {
2660                 if (fds->revents != 0) {
2661                         bsd_to_linux_poll_events(fds->revents,
2662                             &fds->revents);
2663                         n++;
2664                 }
2665                 error = copyout(&fds->revents, &ufds->revents,
2666                     sizeof(ufds->revents));
2667                 if (error)
2668                         return (error);
2669                 fds++;
2670                 ufds++;
2671         }
2672         td->td_retval[0] = n;
2673         return (0);
2674 }
2675
2676 int
2677 linux_sched_rr_get_interval(struct thread *td,
2678     struct linux_sched_rr_get_interval_args *uap)
2679 {
2680         struct timespec ts;
2681         struct l_timespec lts;
2682         struct thread *tdt;
2683         int error;
2684
2685         /*
2686          * According to man in case the invalid pid specified
2687          * EINVAL should be returned.
2688          */
2689         if (uap->pid < 0)
2690                 return (EINVAL);
2691
2692         tdt = linux_tdfind(td, uap->pid, -1);
2693         if (tdt == NULL)
2694                 return (ESRCH);
2695
2696         error = kern_sched_rr_get_interval_td(td, tdt, &ts);
2697         PROC_UNLOCK(tdt->td_proc);
2698         if (error != 0)
2699                 return (error);
2700         error = native_to_linux_timespec(&lts, &ts);
2701         if (error != 0)
2702                 return (error);
2703         return (copyout(&lts, uap->interval, sizeof(lts)));
2704 }
2705
2706 /*
2707  * In case when the Linux thread is the initial thread in
2708  * the thread group thread id is equal to the process id.
2709  * Glibc depends on this magic (assert in pthread_getattr_np.c).
2710  */
2711 struct thread *
2712 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid)
2713 {
2714         struct linux_emuldata *em;
2715         struct thread *tdt;
2716         struct proc *p;
2717
2718         tdt = NULL;
2719         if (tid == 0 || tid == td->td_tid) {
2720                 tdt = td;
2721                 PROC_LOCK(tdt->td_proc);
2722         } else if (tid > PID_MAX)
2723                 tdt = tdfind(tid, pid);
2724         else {
2725                 /*
2726                  * Initial thread where the tid equal to the pid.
2727                  */
2728                 p = pfind(tid);
2729                 if (p != NULL) {
2730                         if (SV_PROC_ABI(p) != SV_ABI_LINUX) {
2731                                 /*
2732                                  * p is not a Linuxulator process.
2733                                  */
2734                                 PROC_UNLOCK(p);
2735                                 return (NULL);
2736                         }
2737                         FOREACH_THREAD_IN_PROC(p, tdt) {
2738                                 em = em_find(tdt);
2739                                 if (tid == em->em_tid)
2740                                         return (tdt);
2741                         }
2742                         PROC_UNLOCK(p);
2743                 }
2744                 return (NULL);
2745         }
2746
2747         return (tdt);
2748 }
2749
2750 void
2751 linux_to_bsd_waitopts(int options, int *bsdopts)
2752 {
2753
2754         if (options & LINUX_WNOHANG)
2755                 *bsdopts |= WNOHANG;
2756         if (options & LINUX_WUNTRACED)
2757                 *bsdopts |= WUNTRACED;
2758         if (options & LINUX_WEXITED)
2759                 *bsdopts |= WEXITED;
2760         if (options & LINUX_WCONTINUED)
2761                 *bsdopts |= WCONTINUED;
2762         if (options & LINUX_WNOWAIT)
2763                 *bsdopts |= WNOWAIT;
2764
2765         if (options & __WCLONE)
2766                 *bsdopts |= WLINUXCLONE;
2767 }
2768
2769 int
2770 linux_getrandom(struct thread *td, struct linux_getrandom_args *args)
2771 {
2772         struct uio uio;
2773         struct iovec iov;
2774         int error;
2775
2776         if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM))
2777                 return (EINVAL);
2778         if (args->count > INT_MAX)
2779                 args->count = INT_MAX;
2780
2781         iov.iov_base = args->buf;
2782         iov.iov_len = args->count;
2783
2784         uio.uio_iov = &iov;
2785         uio.uio_iovcnt = 1;
2786         uio.uio_resid = iov.iov_len;
2787         uio.uio_segflg = UIO_USERSPACE;
2788         uio.uio_rw = UIO_READ;
2789         uio.uio_td = td;
2790
2791         error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK);
2792         if (error == 0)
2793                 td->td_retval[0] = args->count - uio.uio_resid;
2794         return (error);
2795 }
2796
2797 int
2798 linux_mincore(struct thread *td, struct linux_mincore_args *args)
2799 {
2800
2801         /* Needs to be page-aligned */
2802         if (args->start & PAGE_MASK)
2803                 return (EINVAL);
2804         return (kern_mincore(td, args->start, args->len, args->vec));
2805 }
2806
2807 #define SYSLOG_TAG      "<6>"
2808
2809 int
2810 linux_syslog(struct thread *td, struct linux_syslog_args *args)
2811 {
2812         char buf[128], *src, *dst;
2813         u_int seq;
2814         int buflen, error;
2815
2816         if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) {
2817                 linux_msg(td, "syslog unsupported type 0x%x", args->type);
2818                 return (EINVAL);
2819         }
2820
2821         if (args->len < 6) {
2822                 td->td_retval[0] = 0;
2823                 return (0);
2824         }
2825
2826         error = priv_check(td, PRIV_MSGBUF);
2827         if (error)
2828                 return (error);
2829
2830         mtx_lock(&msgbuf_lock);
2831         msgbuf_peekbytes(msgbufp, NULL, 0, &seq);
2832         mtx_unlock(&msgbuf_lock);
2833
2834         dst = args->buf;
2835         error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG));
2836         /* The -1 is to skip the trailing '\0'. */
2837         dst += sizeof(SYSLOG_TAG) - 1;
2838
2839         while (error == 0) {
2840                 mtx_lock(&msgbuf_lock);
2841                 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq);
2842                 mtx_unlock(&msgbuf_lock);
2843
2844                 if (buflen == 0)
2845                         break;
2846
2847                 for (src = buf; src < buf + buflen && error == 0; src++) {
2848                         if (*src == '\0')
2849                                 continue;
2850
2851                         if (dst >= args->buf + args->len)
2852                                 goto out;
2853
2854                         error = copyout(src, dst, 1);
2855                         dst++;
2856
2857                         if (*src == '\n' && *(src + 1) != '<' &&
2858                             dst + sizeof(SYSLOG_TAG) < args->buf + args->len) {
2859                                 error = copyout(&SYSLOG_TAG,
2860                                     dst, sizeof(SYSLOG_TAG));
2861                                 dst += sizeof(SYSLOG_TAG) - 1;
2862                         }
2863                 }
2864         }
2865 out:
2866         td->td_retval[0] = dst - args->buf;
2867         return (error);
2868 }
2869
2870 int
2871 linux_getcpu(struct thread *td, struct linux_getcpu_args *args)
2872 {
2873         int cpu, error, node;
2874
2875         cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */
2876         error = 0;
2877         node = cpuid_to_pcpu[cpu]->pc_domain;
2878
2879         if (args->cpu != NULL)
2880                 error = copyout(&cpu, args->cpu, sizeof(l_int));
2881         if (args->node != NULL)
2882                 error = copyout(&node, args->node, sizeof(l_int));
2883         return (error);
2884 }
2885
2886 #if defined(__i386__) || defined(__amd64__)
2887 int
2888 linux_poll(struct thread *td, struct linux_poll_args *args)
2889 {
2890         struct timespec ts, *tsp;
2891
2892         if (args->timeout != INFTIM) {
2893                 if (args->timeout < 0)
2894                         return (EINVAL);
2895                 ts.tv_sec = args->timeout / 1000;
2896                 ts.tv_nsec = (args->timeout % 1000) * 1000000;
2897                 tsp = &ts;
2898         } else
2899                 tsp = NULL;
2900
2901         return (linux_common_ppoll(td, args->fds, args->nfds,
2902             tsp, NULL, 0));
2903 }
2904 #endif /* __i386__ || __amd64__ */
2905
2906 int
2907 linux_seccomp(struct thread *td, struct linux_seccomp_args *args)
2908 {
2909
2910         switch (args->op) {
2911         case LINUX_SECCOMP_GET_ACTION_AVAIL:
2912                 return (EOPNOTSUPP);
2913         default:
2914                 /*
2915                  * Ignore unknown operations, just like Linux kernel built
2916                  * without CONFIG_SECCOMP.
2917                  */
2918                 return (EINVAL);
2919         }
2920 }