2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 1994-1995 Søren Schmidt
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include "opt_compat.h"
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/capsicum.h>
38 #include <sys/dirent.h>
39 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
43 #include <sys/malloc.h>
45 #include <sys/mount.h>
46 #include <sys/mutex.h>
47 #include <sys/namei.h>
51 #include <sys/syscallsubr.h>
52 #include <sys/sysproto.h>
54 #include <sys/unistd.h>
55 #include <sys/vnode.h>
58 #include <compat/freebsd32/freebsd32_misc.h>
59 #include <machine/../linux32/linux.h>
60 #include <machine/../linux32/linux32_proto.h>
62 #include <machine/../linux/linux.h>
63 #include <machine/../linux/linux_proto.h>
65 #include <compat/linux/linux_misc.h>
66 #include <compat/linux/linux_util.h>
67 #include <compat/linux/linux_file.h>
69 static int linux_common_open(struct thread *, int, char *, int, int);
70 static int linux_getdents_error(struct thread *, int, int);
72 static struct bsd_to_linux_bitmap seal_bitmap[] = {
73 BITMAP_1t1_LINUX(F_SEAL_SEAL),
74 BITMAP_1t1_LINUX(F_SEAL_SHRINK),
75 BITMAP_1t1_LINUX(F_SEAL_GROW),
76 BITMAP_1t1_LINUX(F_SEAL_WRITE),
79 #define MFD_HUGETLB_ENTRY(_size) \
81 .bsd_value = MFD_HUGE_##_size, \
82 .linux_value = LINUX_HUGETLB_FLAG_ENCODE_##_size \
84 static struct bsd_to_linux_bitmap mfd_bitmap[] = {
85 BITMAP_1t1_LINUX(MFD_CLOEXEC),
86 BITMAP_1t1_LINUX(MFD_ALLOW_SEALING),
87 BITMAP_1t1_LINUX(MFD_HUGETLB),
88 MFD_HUGETLB_ENTRY(64KB),
89 MFD_HUGETLB_ENTRY(512KB),
90 MFD_HUGETLB_ENTRY(1MB),
91 MFD_HUGETLB_ENTRY(2MB),
92 MFD_HUGETLB_ENTRY(8MB),
93 MFD_HUGETLB_ENTRY(16MB),
94 MFD_HUGETLB_ENTRY(32MB),
95 MFD_HUGETLB_ENTRY(256MB),
96 MFD_HUGETLB_ENTRY(512MB),
97 MFD_HUGETLB_ENTRY(1GB),
98 MFD_HUGETLB_ENTRY(2GB),
99 MFD_HUGETLB_ENTRY(16GB),
101 #undef MFD_HUGETLB_ENTRY
103 #ifdef LINUX_LEGACY_SYSCALLS
105 linux_creat(struct thread *td, struct linux_creat_args *args)
110 LCONVPATHEXIST(td, args->path, &path);
112 error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE,
113 O_WRONLY | O_CREAT | O_TRUNC, args->mode);
120 linux_common_open(struct thread *td, int dirfd, char *path, int l_flags, int mode)
122 struct proc *p = td->td_proc;
125 int bsd_flags, error;
128 switch (l_flags & LINUX_O_ACCMODE) {
130 bsd_flags |= O_WRONLY;
136 bsd_flags |= O_RDONLY;
138 if (l_flags & LINUX_O_NDELAY)
139 bsd_flags |= O_NONBLOCK;
140 if (l_flags & LINUX_O_APPEND)
141 bsd_flags |= O_APPEND;
142 if (l_flags & LINUX_O_SYNC)
143 bsd_flags |= O_FSYNC;
144 if (l_flags & LINUX_O_CLOEXEC)
145 bsd_flags |= O_CLOEXEC;
146 if (l_flags & LINUX_O_NONBLOCK)
147 bsd_flags |= O_NONBLOCK;
148 if (l_flags & LINUX_O_ASYNC)
149 bsd_flags |= O_ASYNC;
150 if (l_flags & LINUX_O_CREAT)
151 bsd_flags |= O_CREAT;
152 if (l_flags & LINUX_O_TRUNC)
153 bsd_flags |= O_TRUNC;
154 if (l_flags & LINUX_O_EXCL)
156 if (l_flags & LINUX_O_NOCTTY)
157 bsd_flags |= O_NOCTTY;
158 if (l_flags & LINUX_O_DIRECT)
159 bsd_flags |= O_DIRECT;
160 if (l_flags & LINUX_O_NOFOLLOW)
161 bsd_flags |= O_NOFOLLOW;
162 if (l_flags & LINUX_O_DIRECTORY)
163 bsd_flags |= O_DIRECTORY;
164 /* XXX LINUX_O_NOATIME: unable to be easily implemented. */
166 error = kern_openat(td, dirfd, path, UIO_SYSSPACE, bsd_flags, mode);
172 if (p->p_flag & P_CONTROLT)
174 if (bsd_flags & O_NOCTTY)
178 * XXX In between kern_openat() and fget(), another process
179 * having the same filedesc could use that fd without
182 fd = td->td_retval[0];
183 if (fget(td, fd, &cap_ioctl_rights, &fp) == 0) {
184 if (fp->f_type != DTYPE_VNODE) {
188 sx_slock(&proctree_lock);
190 if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
192 sx_sunlock(&proctree_lock);
193 /* XXXPJD: Verify if TIOCSCTTY is allowed. */
194 (void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
198 sx_sunlock(&proctree_lock);
209 linux_openat(struct thread *td, struct linux_openat_args *args)
214 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
215 if (args->flags & LINUX_O_CREAT)
216 LCONVPATH_AT(td, args->filename, &path, 1, dfd);
218 LCONVPATH_AT(td, args->filename, &path, 0, dfd);
220 return (linux_common_open(td, dfd, path, args->flags, args->mode));
223 #ifdef LINUX_LEGACY_SYSCALLS
225 linux_open(struct thread *td, struct linux_open_args *args)
229 if (args->flags & LINUX_O_CREAT)
230 LCONVPATHCREAT(td, args->path, &path);
232 LCONVPATHEXIST(td, args->path, &path);
234 return (linux_common_open(td, AT_FDCWD, path, args->flags, args->mode));
239 linux_lseek(struct thread *td, struct linux_lseek_args *args)
242 return (kern_lseek(td, args->fdes, args->off, args->whence));
245 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
247 linux_llseek(struct thread *td, struct linux_llseek_args *args)
252 off = (args->olow) | (((off_t) args->ohigh) << 32);
254 error = kern_lseek(td, args->fd, off, args->whence);
258 error = copyout(td->td_retval, args->res, sizeof(off_t));
262 td->td_retval[0] = 0;
265 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
268 * Note that linux_getdents(2) and linux_getdents64(2) have the same
269 * arguments. They only differ in the definition of struct dirent they
271 * Note that linux_readdir(2) is a special case of linux_getdents(2)
272 * where count is always equals 1, meaning that the buffer is one
273 * dirent-structure in size and that the code can't handle more anyway.
274 * Note that linux_readdir(2) can't be implemented by means of linux_getdents(2)
275 * as in case when the *dent buffer size is equal to 1 linux_getdents(2) will
280 linux_getdents_error(struct thread *td, int fd, int err)
286 /* Linux return ENOTDIR in case when fd is not a directory. */
287 error = getvnode(td, fd, &cap_read_rights, &fp);
291 if (vp->v_type != VDIR) {
303 char d_name[LINUX_NAME_MAX + 1];
311 char d_name[LINUX_NAME_MAX + 1];
315 * Linux uses the last byte in the dirent buffer to store d_type,
316 * at least glibc-2.7 requires it. That is why l_dirent is padded with 2 bytes.
318 #define LINUX_RECLEN(namlen) \
319 roundup(offsetof(struct l_dirent, d_name) + (namlen) + 2, sizeof(l_ulong))
321 #define LINUX_RECLEN64(namlen) \
322 roundup(offsetof(struct l_dirent64, d_name) + (namlen) + 1, \
325 #ifdef LINUX_LEGACY_SYSCALLS
327 linux_getdents(struct thread *td, struct linux_getdents_args *args)
330 caddr_t inp, buf; /* BSD-format */
331 int len, reclen; /* BSD-format */
332 caddr_t outp; /* Linux-format */
333 int resid, linuxreclen; /* Linux-format */
334 caddr_t lbuf; /* Linux-format */
336 struct l_dirent *linux_dirent;
340 buflen = min(args->count, MAXBSIZE);
341 buf = malloc(buflen, M_TEMP, M_WAITOK);
343 error = kern_getdirentries(td, args->fd, buf, buflen,
344 &base, NULL, UIO_SYSSPACE);
346 error = linux_getdents_error(td, args->fd, error);
350 lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
352 len = td->td_retval[0];
354 outp = (caddr_t)args->dent;
359 bdp = (struct dirent *) inp;
360 reclen = bdp->d_reclen;
361 linuxreclen = LINUX_RECLEN(bdp->d_namlen);
363 * No more space in the user supplied dirent buffer.
366 if (resid < linuxreclen) {
371 linux_dirent = (struct l_dirent*)lbuf;
372 linux_dirent->d_ino = bdp->d_fileno;
373 linux_dirent->d_off = base + reclen;
374 linux_dirent->d_reclen = linuxreclen;
376 * Copy d_type to last byte of l_dirent buffer
378 lbuf[linuxreclen - 1] = bdp->d_type;
379 strlcpy(linux_dirent->d_name, bdp->d_name,
380 linuxreclen - offsetof(struct l_dirent, d_name)-1);
381 error = copyout(linux_dirent, outp, linuxreclen);
389 retval += linuxreclen;
391 resid -= linuxreclen;
393 td->td_retval[0] = retval;
404 linux_getdents64(struct thread *td, struct linux_getdents64_args *args)
407 caddr_t inp, buf; /* BSD-format */
408 int len, reclen; /* BSD-format */
409 caddr_t outp; /* Linux-format */
410 int resid, linuxreclen; /* Linux-format */
411 caddr_t lbuf; /* Linux-format */
413 struct l_dirent64 *linux_dirent64;
417 buflen = min(args->count, MAXBSIZE);
418 buf = malloc(buflen, M_TEMP, M_WAITOK);
420 error = kern_getdirentries(td, args->fd, buf, buflen,
421 &base, NULL, UIO_SYSSPACE);
423 error = linux_getdents_error(td, args->fd, error);
427 lbuf = malloc(LINUX_RECLEN64(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
429 len = td->td_retval[0];
431 outp = (caddr_t)args->dirent;
436 bdp = (struct dirent *) inp;
437 reclen = bdp->d_reclen;
438 linuxreclen = LINUX_RECLEN64(bdp->d_namlen);
440 * No more space in the user supplied dirent buffer.
443 if (resid < linuxreclen) {
448 linux_dirent64 = (struct l_dirent64*)lbuf;
449 linux_dirent64->d_ino = bdp->d_fileno;
450 linux_dirent64->d_off = base + reclen;
451 linux_dirent64->d_reclen = linuxreclen;
452 linux_dirent64->d_type = bdp->d_type;
453 strlcpy(linux_dirent64->d_name, bdp->d_name,
454 linuxreclen - offsetof(struct l_dirent64, d_name));
455 error = copyout(linux_dirent64, outp, linuxreclen);
463 retval += linuxreclen;
465 resid -= linuxreclen;
467 td->td_retval[0] = retval;
476 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
478 linux_readdir(struct thread *td, struct linux_readdir_args *args)
481 caddr_t buf; /* BSD-format */
482 int linuxreclen; /* Linux-format */
483 caddr_t lbuf; /* Linux-format */
485 struct l_dirent *linux_dirent;
488 buflen = LINUX_RECLEN(LINUX_NAME_MAX);
489 buf = malloc(buflen, M_TEMP, M_WAITOK);
491 error = kern_getdirentries(td, args->fd, buf, buflen,
492 &base, NULL, UIO_SYSSPACE);
494 error = linux_getdents_error(td, args->fd, error);
497 if (td->td_retval[0] == 0)
500 lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
502 bdp = (struct dirent *) buf;
503 linuxreclen = LINUX_RECLEN(bdp->d_namlen);
505 linux_dirent = (struct l_dirent*)lbuf;
506 linux_dirent->d_ino = bdp->d_fileno;
507 linux_dirent->d_off = linuxreclen;
508 linux_dirent->d_reclen = bdp->d_namlen;
509 strlcpy(linux_dirent->d_name, bdp->d_name,
510 linuxreclen - offsetof(struct l_dirent, d_name));
511 error = copyout(linux_dirent, args->dent, linuxreclen);
513 td->td_retval[0] = linuxreclen;
520 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
524 * These exist mainly for hooks for doing /compat/linux translation.
527 #ifdef LINUX_LEGACY_SYSCALLS
529 linux_access(struct thread *td, struct linux_access_args *args)
534 /* Linux convention. */
535 if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
538 LCONVPATHEXIST(td, args->path, &path);
540 error = kern_accessat(td, AT_FDCWD, path, UIO_SYSSPACE, 0,
549 linux_faccessat(struct thread *td, struct linux_faccessat_args *args)
554 /* Linux convention. */
555 if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
558 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
559 LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
561 error = kern_accessat(td, dfd, path, UIO_SYSSPACE, 0, args->amode);
567 #ifdef LINUX_LEGACY_SYSCALLS
569 linux_unlink(struct thread *td, struct linux_unlink_args *args)
575 LCONVPATHEXIST(td, args->path, &path);
577 error = kern_funlinkat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0, 0);
578 if (error == EPERM) {
579 /* Introduce POSIX noncompliant behaviour of Linux */
580 if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st,
582 if (S_ISDIR(st.st_mode))
592 linux_unlinkat(struct thread *td, struct linux_unlinkat_args *args)
598 if (args->flag & ~LINUX_AT_REMOVEDIR)
601 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
602 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd);
604 if (args->flag & LINUX_AT_REMOVEDIR)
605 error = kern_frmdirat(td, dfd, path, FD_NONE, UIO_SYSSPACE, 0);
607 error = kern_funlinkat(td, dfd, path, FD_NONE, UIO_SYSSPACE, 0,
609 if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) {
610 /* Introduce POSIX noncompliant behaviour of Linux */
611 if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path,
612 UIO_SYSSPACE, &st, NULL) == 0 && S_ISDIR(st.st_mode))
619 linux_chdir(struct thread *td, struct linux_chdir_args *args)
624 LCONVPATHEXIST(td, args->path, &path);
626 error = kern_chdir(td, path, UIO_SYSSPACE);
631 #ifdef LINUX_LEGACY_SYSCALLS
633 linux_chmod(struct thread *td, struct linux_chmod_args *args)
638 LCONVPATHEXIST(td, args->path, &path);
640 error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE,
648 linux_fchmodat(struct thread *td, struct linux_fchmodat_args *args)
653 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
654 LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
656 error = kern_fchmodat(td, dfd, path, UIO_SYSSPACE, args->mode, 0);
661 #ifdef LINUX_LEGACY_SYSCALLS
663 linux_mkdir(struct thread *td, struct linux_mkdir_args *args)
668 LCONVPATHCREAT(td, args->path, &path);
670 error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode);
677 linux_mkdirat(struct thread *td, struct linux_mkdirat_args *args)
682 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
683 LCONVPATHCREAT_AT(td, args->pathname, &path, dfd);
685 error = kern_mkdirat(td, dfd, path, UIO_SYSSPACE, args->mode);
690 #ifdef LINUX_LEGACY_SYSCALLS
692 linux_rmdir(struct thread *td, struct linux_rmdir_args *args)
697 LCONVPATHEXIST(td, args->path, &path);
699 error = kern_frmdirat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0);
705 linux_rename(struct thread *td, struct linux_rename_args *args)
710 LCONVPATHEXIST(td, args->from, &from);
711 /* Expand LCONVPATHCREATE so that `from' can be freed on errors */
712 error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
718 error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE);
726 linux_renameat(struct thread *td, struct linux_renameat_args *args)
728 struct linux_renameat2_args renameat2_args = {
729 .olddfd = args->olddfd,
730 .oldname = args->oldname,
731 .newdfd = args->newdfd,
732 .newname = args->newname,
736 return (linux_renameat2(td, &renameat2_args));
740 linux_renameat2(struct thread *td, struct linux_renameat2_args *args)
743 int error, olddfd, newdfd;
745 if (args->flags != 0) {
746 if (args->flags & ~(LINUX_RENAME_EXCHANGE |
747 LINUX_RENAME_NOREPLACE | LINUX_RENAME_WHITEOUT))
749 if (args->flags & LINUX_RENAME_EXCHANGE &&
750 args->flags & (LINUX_RENAME_NOREPLACE |
751 LINUX_RENAME_WHITEOUT))
753 linux_msg(td, "renameat2 unsupported flags 0x%x",
758 olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
759 newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
760 LCONVPATHEXIST_AT(td, args->oldname, &from, olddfd);
761 /* Expand LCONVPATHCREATE so that `from' can be freed on errors */
762 error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
768 error = kern_renameat(td, olddfd, from, newdfd, to, UIO_SYSSPACE);
774 #ifdef LINUX_LEGACY_SYSCALLS
776 linux_symlink(struct thread *td, struct linux_symlink_args *args)
781 LCONVPATHEXIST(td, args->path, &path);
782 /* Expand LCONVPATHCREATE so that `path' can be freed on errors */
783 error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
789 error = kern_symlinkat(td, path, AT_FDCWD, to, UIO_SYSSPACE);
797 linux_symlinkat(struct thread *td, struct linux_symlinkat_args *args)
802 dfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
803 LCONVPATHEXIST(td, args->oldname, &path);
804 /* Expand LCONVPATHCREATE so that `path' can be freed on errors */
805 error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, dfd);
811 error = kern_symlinkat(td, path, dfd, to, UIO_SYSSPACE);
817 #ifdef LINUX_LEGACY_SYSCALLS
819 linux_readlink(struct thread *td, struct linux_readlink_args *args)
824 LCONVPATHEXIST(td, args->name, &name);
826 error = kern_readlinkat(td, AT_FDCWD, name, UIO_SYSSPACE,
827 args->buf, UIO_USERSPACE, args->count);
834 linux_readlinkat(struct thread *td, struct linux_readlinkat_args *args)
839 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
840 LCONVPATHEXIST_AT(td, args->path, &name, dfd);
842 error = kern_readlinkat(td, dfd, name, UIO_SYSSPACE, args->buf,
843 UIO_USERSPACE, args->bufsiz);
849 linux_truncate(struct thread *td, struct linux_truncate_args *args)
854 LCONVPATHEXIST(td, args->path, &path);
855 error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
860 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
862 linux_truncate64(struct thread *td, struct linux_truncate64_args *args)
868 #if defined(__amd64__) && defined(COMPAT_LINUX32)
869 length = PAIR32TO64(off_t, args->length);
871 length = args->length;
874 LCONVPATHEXIST(td, args->path, &path);
875 error = kern_truncate(td, path, UIO_SYSSPACE, length);
879 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
882 linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args)
885 return (kern_ftruncate(td, args->fd, args->length));
888 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
890 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
894 #if defined(__amd64__) && defined(COMPAT_LINUX32)
895 length = PAIR32TO64(off_t, args->length);
897 length = args->length;
900 return (kern_ftruncate(td, args->fd, length));
904 #ifdef LINUX_LEGACY_SYSCALLS
906 linux_link(struct thread *td, struct linux_link_args *args)
911 LCONVPATHEXIST(td, args->path, &path);
912 /* Expand LCONVPATHCREATE so that `path' can be freed on errors */
913 error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
919 error = kern_linkat(td, AT_FDCWD, AT_FDCWD, path, to, UIO_SYSSPACE,
928 linux_linkat(struct thread *td, struct linux_linkat_args *args)
931 int error, olddfd, newdfd, follow;
933 if (args->flag & ~LINUX_AT_SYMLINK_FOLLOW)
936 olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
937 newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
938 LCONVPATHEXIST_AT(td, args->oldname, &path, olddfd);
939 /* Expand LCONVPATHCREATE so that `path' can be freed on errors */
940 error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
946 follow = (args->flag & LINUX_AT_SYMLINK_FOLLOW) == 0 ? NOFOLLOW :
948 error = kern_linkat(td, olddfd, newdfd, path, to, UIO_SYSSPACE, follow);
955 linux_fdatasync(struct thread *td, struct linux_fdatasync_args *uap)
958 return (kern_fsync(td, uap->fd, false));
962 linux_sync_file_range(struct thread *td, struct linux_sync_file_range_args *uap)
964 off_t nbytes, offset;
966 #if defined(__amd64__) && defined(COMPAT_LINUX32)
967 nbytes = PAIR32TO64(off_t, uap->nbytes);
968 offset = PAIR32TO64(off_t, uap->offset);
970 nbytes = uap->nbytes;
971 offset = uap->offset;
974 if (offset < 0 || nbytes < 0 ||
975 (uap->flags & ~(LINUX_SYNC_FILE_RANGE_WAIT_BEFORE |
976 LINUX_SYNC_FILE_RANGE_WRITE |
977 LINUX_SYNC_FILE_RANGE_WAIT_AFTER)) != 0) {
981 return (kern_fsync(td, uap->fd, false));
985 linux_pread(struct thread *td, struct linux_pread_args *uap)
991 #if defined(__amd64__) && defined(COMPAT_LINUX32)
992 offset = PAIR32TO64(off_t, uap->offset);
994 offset = uap->offset;
997 error = kern_pread(td, uap->fd, uap->buf, uap->nbyte, offset);
999 /* This seems to violate POSIX but Linux does it. */
1000 error = fgetvp(td, uap->fd, &cap_pread_rights, &vp);
1003 if (vp->v_type == VDIR)
1011 linux_pwrite(struct thread *td, struct linux_pwrite_args *uap)
1015 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1016 offset = PAIR32TO64(off_t, uap->offset);
1018 offset = uap->offset;
1021 return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, offset));
1025 linux_preadv(struct thread *td, struct linux_preadv_args *uap)
1032 * According http://man7.org/linux/man-pages/man2/preadv.2.html#NOTES
1033 * pos_l and pos_h, respectively, contain the
1034 * low order and high order 32 bits of offset.
1036 offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
1037 (sizeof(offset) * 4)) | uap->pos_l;
1040 #ifdef COMPAT_LINUX32
1041 error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
1043 error = copyinuio(uap->vec, uap->vlen, &auio);
1047 error = kern_preadv(td, uap->fd, auio, offset);
1053 linux_pwritev(struct thread *td, struct linux_pwritev_args *uap)
1060 * According http://man7.org/linux/man-pages/man2/pwritev.2.html#NOTES
1061 * pos_l and pos_h, respectively, contain the
1062 * low order and high order 32 bits of offset.
1064 offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
1065 (sizeof(offset) * 4)) | uap->pos_l;
1068 #ifdef COMPAT_LINUX32
1069 error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
1071 error = copyinuio(uap->vec, uap->vlen, &auio);
1075 error = kern_pwritev(td, uap->fd, auio, offset);
1081 linux_mount(struct thread *td, struct linux_mount_args *args)
1083 char fstypename[MFSNAMELEN];
1084 char *mntonname, *mntfromname;
1087 mntonname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1088 mntfromname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
1089 error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1,
1093 if (args->specialfile != NULL) {
1094 error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL);
1098 mntfromname[0] = '\0';
1100 error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL);
1104 if (strcmp(fstypename, "ext2") == 0) {
1105 strcpy(fstypename, "ext2fs");
1106 } else if (strcmp(fstypename, "proc") == 0) {
1107 strcpy(fstypename, "linprocfs");
1108 } else if (strcmp(fstypename, "vfat") == 0) {
1109 strcpy(fstypename, "msdosfs");
1115 * Linux SYNC flag is not included; the closest equivalent
1116 * FreeBSD has is !ASYNC, which is our default.
1118 if (args->rwflag & LINUX_MS_RDONLY)
1119 fsflags |= MNT_RDONLY;
1120 if (args->rwflag & LINUX_MS_NOSUID)
1121 fsflags |= MNT_NOSUID;
1122 if (args->rwflag & LINUX_MS_NOEXEC)
1123 fsflags |= MNT_NOEXEC;
1124 if (args->rwflag & LINUX_MS_REMOUNT)
1125 fsflags |= MNT_UPDATE;
1127 error = kernel_vmount(fsflags,
1128 "fstype", fstypename,
1129 "fspath", mntonname,
1130 "from", mntfromname,
1133 free(mntonname, M_TEMP);
1134 free(mntfromname, M_TEMP);
1138 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1140 linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
1143 return (kern_unmount(td, args->path, 0));
1145 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1147 #ifdef LINUX_LEGACY_SYSCALLS
1149 linux_umount(struct thread *td, struct linux_umount_args *args)
1154 if ((args->flags & LINUX_MNT_FORCE) != 0) {
1155 args->flags &= ~LINUX_MNT_FORCE;
1158 if (args->flags != 0) {
1159 linux_msg(td, "unsupported umount2 flags %#x", args->flags);
1163 return (kern_unmount(td, args->path, flags));
1168 * fcntl family of syscalls
1178 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1184 linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock)
1186 switch (linux_flock->l_type) {
1188 bsd_flock->l_type = F_RDLCK;
1191 bsd_flock->l_type = F_WRLCK;
1194 bsd_flock->l_type = F_UNLCK;
1197 bsd_flock->l_type = -1;
1200 bsd_flock->l_whence = linux_flock->l_whence;
1201 bsd_flock->l_start = (off_t)linux_flock->l_start;
1202 bsd_flock->l_len = (off_t)linux_flock->l_len;
1203 bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1204 bsd_flock->l_sysid = 0;
1208 bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock)
1210 switch (bsd_flock->l_type) {
1212 linux_flock->l_type = LINUX_F_RDLCK;
1215 linux_flock->l_type = LINUX_F_WRLCK;
1218 linux_flock->l_type = LINUX_F_UNLCK;
1221 linux_flock->l_whence = bsd_flock->l_whence;
1222 linux_flock->l_start = (l_off_t)bsd_flock->l_start;
1223 linux_flock->l_len = (l_off_t)bsd_flock->l_len;
1224 linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1227 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1235 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1241 linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock)
1243 switch (linux_flock->l_type) {
1245 bsd_flock->l_type = F_RDLCK;
1248 bsd_flock->l_type = F_WRLCK;
1251 bsd_flock->l_type = F_UNLCK;
1254 bsd_flock->l_type = -1;
1257 bsd_flock->l_whence = linux_flock->l_whence;
1258 bsd_flock->l_start = (off_t)linux_flock->l_start;
1259 bsd_flock->l_len = (off_t)linux_flock->l_len;
1260 bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
1261 bsd_flock->l_sysid = 0;
1265 bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock)
1267 switch (bsd_flock->l_type) {
1269 linux_flock->l_type = LINUX_F_RDLCK;
1272 linux_flock->l_type = LINUX_F_WRLCK;
1275 linux_flock->l_type = LINUX_F_UNLCK;
1278 linux_flock->l_whence = bsd_flock->l_whence;
1279 linux_flock->l_start = (l_loff_t)bsd_flock->l_start;
1280 linux_flock->l_len = (l_loff_t)bsd_flock->l_len;
1281 linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
1283 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1286 fcntl_common(struct thread *td, struct linux_fcntl_args *args)
1288 struct l_flock linux_flock;
1289 struct flock bsd_flock;
1294 switch (args->cmd) {
1296 return (kern_fcntl(td, args->fd, F_DUPFD, args->arg));
1299 return (kern_fcntl(td, args->fd, F_GETFD, 0));
1302 return (kern_fcntl(td, args->fd, F_SETFD, args->arg));
1305 error = kern_fcntl(td, args->fd, F_GETFL, 0);
1306 result = td->td_retval[0];
1307 td->td_retval[0] = 0;
1308 if (result & O_RDONLY)
1309 td->td_retval[0] |= LINUX_O_RDONLY;
1310 if (result & O_WRONLY)
1311 td->td_retval[0] |= LINUX_O_WRONLY;
1312 if (result & O_RDWR)
1313 td->td_retval[0] |= LINUX_O_RDWR;
1314 if (result & O_NDELAY)
1315 td->td_retval[0] |= LINUX_O_NONBLOCK;
1316 if (result & O_APPEND)
1317 td->td_retval[0] |= LINUX_O_APPEND;
1318 if (result & O_FSYNC)
1319 td->td_retval[0] |= LINUX_O_SYNC;
1320 if (result & O_ASYNC)
1321 td->td_retval[0] |= LINUX_O_ASYNC;
1322 #ifdef LINUX_O_NOFOLLOW
1323 if (result & O_NOFOLLOW)
1324 td->td_retval[0] |= LINUX_O_NOFOLLOW;
1326 #ifdef LINUX_O_DIRECT
1327 if (result & O_DIRECT)
1328 td->td_retval[0] |= LINUX_O_DIRECT;
1334 if (args->arg & LINUX_O_NDELAY)
1336 if (args->arg & LINUX_O_APPEND)
1338 if (args->arg & LINUX_O_SYNC)
1340 if (args->arg & LINUX_O_ASYNC)
1342 #ifdef LINUX_O_NOFOLLOW
1343 if (args->arg & LINUX_O_NOFOLLOW)
1346 #ifdef LINUX_O_DIRECT
1347 if (args->arg & LINUX_O_DIRECT)
1350 return (kern_fcntl(td, args->fd, F_SETFL, arg));
1353 error = copyin((void *)args->arg, &linux_flock,
1354 sizeof(linux_flock));
1357 linux_to_bsd_flock(&linux_flock, &bsd_flock);
1358 error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1361 bsd_to_linux_flock(&bsd_flock, &linux_flock);
1362 return (copyout(&linux_flock, (void *)args->arg,
1363 sizeof(linux_flock)));
1366 error = copyin((void *)args->arg, &linux_flock,
1367 sizeof(linux_flock));
1370 linux_to_bsd_flock(&linux_flock, &bsd_flock);
1371 return (kern_fcntl(td, args->fd, F_SETLK,
1372 (intptr_t)&bsd_flock));
1374 case LINUX_F_SETLKW:
1375 error = copyin((void *)args->arg, &linux_flock,
1376 sizeof(linux_flock));
1379 linux_to_bsd_flock(&linux_flock, &bsd_flock);
1380 return (kern_fcntl(td, args->fd, F_SETLKW,
1381 (intptr_t)&bsd_flock));
1383 case LINUX_F_GETOWN:
1384 return (kern_fcntl(td, args->fd, F_GETOWN, 0));
1386 case LINUX_F_SETOWN:
1388 * XXX some Linux applications depend on F_SETOWN having no
1389 * significant effect for pipes (SIGIO is not delivered for
1390 * pipes under Linux-2.2.35 at least).
1392 error = fget(td, args->fd,
1393 &cap_fcntl_rights, &fp);
1396 if (fp->f_type == DTYPE_PIPE) {
1402 return (kern_fcntl(td, args->fd, F_SETOWN, args->arg));
1404 case LINUX_F_DUPFD_CLOEXEC:
1405 return (kern_fcntl(td, args->fd, F_DUPFD_CLOEXEC, args->arg));
1407 * Our F_SEAL_* values match Linux one for maximum compatibility. So we
1408 * only needed to account for different values for fcntl(2) commands.
1410 case LINUX_F_GET_SEALS:
1411 error = kern_fcntl(td, args->fd, F_GET_SEALS, 0);
1414 td->td_retval[0] = bsd_to_linux_bits(td->td_retval[0],
1418 case LINUX_F_ADD_SEALS:
1419 return (kern_fcntl(td, args->fd, F_ADD_SEALS,
1420 linux_to_bsd_bits(args->arg, seal_bitmap, 0)));
1422 linux_msg(td, "unsupported fcntl cmd %d\n", args->cmd);
1428 linux_fcntl(struct thread *td, struct linux_fcntl_args *args)
1431 return (fcntl_common(td, args));
1434 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1436 linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
1438 struct l_flock64 linux_flock;
1439 struct flock bsd_flock;
1440 struct linux_fcntl_args fcntl_args;
1443 switch (args->cmd) {
1444 case LINUX_F_GETLK64:
1445 error = copyin((void *)args->arg, &linux_flock,
1446 sizeof(linux_flock));
1449 linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1450 error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
1453 bsd_to_linux_flock64(&bsd_flock, &linux_flock);
1454 return (copyout(&linux_flock, (void *)args->arg,
1455 sizeof(linux_flock)));
1457 case LINUX_F_SETLK64:
1458 error = copyin((void *)args->arg, &linux_flock,
1459 sizeof(linux_flock));
1462 linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1463 return (kern_fcntl(td, args->fd, F_SETLK,
1464 (intptr_t)&bsd_flock));
1466 case LINUX_F_SETLKW64:
1467 error = copyin((void *)args->arg, &linux_flock,
1468 sizeof(linux_flock));
1471 linux_to_bsd_flock64(&linux_flock, &bsd_flock);
1472 return (kern_fcntl(td, args->fd, F_SETLKW,
1473 (intptr_t)&bsd_flock));
1476 fcntl_args.fd = args->fd;
1477 fcntl_args.cmd = args->cmd;
1478 fcntl_args.arg = args->arg;
1479 return (fcntl_common(td, &fcntl_args));
1481 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1483 #ifdef LINUX_LEGACY_SYSCALLS
1485 linux_chown(struct thread *td, struct linux_chown_args *args)
1490 LCONVPATHEXIST(td, args->path, &path);
1492 error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid,
1500 linux_fchownat(struct thread *td, struct linux_fchownat_args *args)
1503 int error, dfd, flag;
1505 if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW)
1508 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
1509 LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
1511 flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) == 0 ? 0 :
1512 AT_SYMLINK_NOFOLLOW;
1513 error = kern_fchownat(td, dfd, path, UIO_SYSSPACE, args->uid, args->gid,
1519 #ifdef LINUX_LEGACY_SYSCALLS
1521 linux_lchown(struct thread *td, struct linux_lchown_args *args)
1526 LCONVPATHEXIST(td, args->path, &path);
1528 error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid,
1529 args->gid, AT_SYMLINK_NOFOLLOW);
1536 convert_fadvice(int advice)
1539 case LINUX_POSIX_FADV_NORMAL:
1540 return (POSIX_FADV_NORMAL);
1541 case LINUX_POSIX_FADV_RANDOM:
1542 return (POSIX_FADV_RANDOM);
1543 case LINUX_POSIX_FADV_SEQUENTIAL:
1544 return (POSIX_FADV_SEQUENTIAL);
1545 case LINUX_POSIX_FADV_WILLNEED:
1546 return (POSIX_FADV_WILLNEED);
1547 case LINUX_POSIX_FADV_DONTNEED:
1548 return (POSIX_FADV_DONTNEED);
1549 case LINUX_POSIX_FADV_NOREUSE:
1550 return (POSIX_FADV_NOREUSE);
1557 linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args)
1562 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1563 offset = PAIR32TO64(off_t, args->offset);
1565 offset = args->offset;
1568 advice = convert_fadvice(args->advice);
1571 return (kern_posix_fadvise(td, args->fd, offset, args->len, advice));
1574 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
1576 linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args)
1581 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1582 len = PAIR32TO64(off_t, args->len);
1583 offset = PAIR32TO64(off_t, args->offset);
1586 offset = args->offset;
1589 advice = convert_fadvice(args->advice);
1592 return (kern_posix_fadvise(td, args->fd, offset, len, advice));
1594 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
1596 #ifdef LINUX_LEGACY_SYSCALLS
1598 linux_pipe(struct thread *td, struct linux_pipe_args *args)
1603 error = kern_pipe(td, fildes, 0, NULL, NULL);
1607 error = copyout(fildes, args->pipefds, sizeof(fildes));
1609 (void)kern_close(td, fildes[0]);
1610 (void)kern_close(td, fildes[1]);
1618 linux_pipe2(struct thread *td, struct linux_pipe2_args *args)
1623 if ((args->flags & ~(LINUX_O_NONBLOCK | LINUX_O_CLOEXEC)) != 0)
1627 if ((args->flags & LINUX_O_NONBLOCK) != 0)
1628 flags |= O_NONBLOCK;
1629 if ((args->flags & LINUX_O_CLOEXEC) != 0)
1631 error = kern_pipe(td, fildes, flags, NULL, NULL);
1635 error = copyout(fildes, args->pipefds, sizeof(fildes));
1637 (void)kern_close(td, fildes[0]);
1638 (void)kern_close(td, fildes[1]);
1645 linux_dup3(struct thread *td, struct linux_dup3_args *args)
1650 if (args->oldfd == args->newfd)
1652 if ((args->flags & ~LINUX_O_CLOEXEC) != 0)
1654 if (args->flags & LINUX_O_CLOEXEC)
1655 cmd = F_DUP2FD_CLOEXEC;
1659 newfd = args->newfd;
1660 return (kern_fcntl(td, args->oldfd, cmd, newfd));
1664 linux_fallocate(struct thread *td, struct linux_fallocate_args *args)
1669 * We emulate only posix_fallocate system call for which
1672 if (args->mode != 0)
1675 #if defined(__amd64__) && defined(COMPAT_LINUX32)
1676 len = PAIR32TO64(off_t, args->len);
1677 offset = PAIR32TO64(off_t, args->offset);
1680 offset = args->offset;
1683 return (kern_posix_fallocate(td, args->fd, offset, len));
1687 linux_copy_file_range(struct thread *td, struct linux_copy_file_range_args
1690 l_loff_t inoff, outoff, *inoffp, *outoffp;
1694 * copy_file_range(2) on Linux doesn't define any flags (yet), so is
1695 * the native implementation. Enforce it.
1697 if (args->flags != 0) {
1698 linux_msg(td, "copy_file_range unsupported flags 0x%x",
1703 inoffp = outoffp = NULL;
1704 if (args->off_in != NULL) {
1705 error = copyin(args->off_in, &inoff, sizeof(l_loff_t));
1710 if (args->off_out != NULL) {
1711 error = copyin(args->off_out, &outoff, sizeof(l_loff_t));
1717 error = kern_copy_file_range(td, args->fd_in, inoffp, args->fd_out,
1718 outoffp, args->len, flags);
1719 if (error == 0 && args->off_in != NULL)
1720 error = copyout(inoffp, args->off_in, sizeof(l_loff_t));
1721 if (error == 0 && args->off_out != NULL)
1722 error = copyout(outoffp, args->off_out, sizeof(l_loff_t));
1726 #define LINUX_MEMFD_PREFIX "memfd:"
1729 linux_memfd_create(struct thread *td, struct linux_memfd_create_args *args)
1731 char memfd_name[LINUX_NAME_MAX + 1];
1732 int error, flags, shmflags, oflags;
1735 * This is our clever trick to avoid the heap allocation to copy in the
1736 * uname. We don't really need to go this far out of our way, but it
1737 * does keep the rest of this function fairly clean as they don't have
1738 * to worry about cleanup on the way out.
1740 error = copyinstr(args->uname_ptr,
1741 memfd_name + sizeof(LINUX_MEMFD_PREFIX) - 1,
1742 LINUX_NAME_MAX - sizeof(LINUX_MEMFD_PREFIX) - 1, NULL);
1744 if (error == ENAMETOOLONG)
1749 memcpy(memfd_name, LINUX_MEMFD_PREFIX, sizeof(LINUX_MEMFD_PREFIX) - 1);
1750 flags = linux_to_bsd_bits(args->flags, mfd_bitmap, 0);
1751 if ((flags & ~(MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB |
1752 MFD_HUGE_MASK)) != 0)
1754 /* Size specified but no HUGETLB. */
1755 if ((flags & MFD_HUGE_MASK) != 0 && (flags & MFD_HUGETLB) == 0)
1757 /* We don't actually support HUGETLB. */
1758 if ((flags & MFD_HUGETLB) != 0)
1761 shmflags = SHM_GROW_ON_WRITE;
1762 if ((flags & MFD_CLOEXEC) != 0)
1763 oflags |= O_CLOEXEC;
1764 if ((flags & MFD_ALLOW_SEALING) != 0)
1765 shmflags |= SHM_ALLOW_SEALING;
1766 return (kern_shm_open2(td, SHM_ANON, oflags, 0, shmflags, NULL,