2 * Copyright (c) 1994-1996 Søren Schmidt
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_emul.h>
68 #include <compat/linux/linux_futex.h>
69 #include <compat/linux/linux_mib.h>
70 #include <compat/linux/linux_misc.h>
71 #include <compat/linux/linux_signal.h>
72 #include <compat/linux/linux_util.h>
74 MODULE_VERSION(linux, 1);
76 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
78 #if BYTE_ORDER == LITTLE_ENDIAN
79 #define SHELLMAGIC 0x2123 /* #! */
81 #define SHELLMAGIC 0x2321
85 * Allow the sendsig functions to use the ldebug() facility
86 * even though they are not syscalls themselves. Map them
87 * to syscall 0. This is slightly less bogus than using
90 #define LINUX_SYS_linux_rt_sendsig 0
91 #define LINUX_SYS_linux_sendsig 0
93 #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings))
95 extern char linux_sigcode[];
96 extern int linux_szsigcode;
98 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
100 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
101 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
103 static int linux_fixup(register_t **stack_base,
104 struct image_params *iparams);
105 static int elf_linux_fixup(register_t **stack_base,
106 struct image_params *iparams);
107 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
108 static void exec_linux_setregs(struct thread *td,
109 struct image_params *imgp, u_long stack);
110 static register_t *linux_copyout_strings(struct image_params *imgp);
111 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
113 static int linux_szplatform;
114 const char *linux_platform;
116 static eventhandler_tag linux_exit_tag;
117 static eventhandler_tag linux_exec_tag;
120 * Linux syscalls return negative errno's, we do positive and map them
122 * FreeBSD: src/sys/sys/errno.h
123 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
124 * linux-2.6.17.8/include/asm-generic/errno.h
126 static int bsd_to_linux_errno[ELAST + 1] = {
127 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
128 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
129 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
130 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
131 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
132 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
133 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
134 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
135 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
139 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
140 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
141 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
142 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
143 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
144 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
145 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
146 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
147 0, LINUX_SIGUSR1, LINUX_SIGUSR2
150 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
151 SIGHUP, SIGINT, SIGQUIT, SIGILL,
152 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
153 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
154 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
155 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
156 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
157 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
158 SIGIO, SIGURG, SIGSYS
161 #define LINUX_T_UNKNOWN 255
162 static int _bsd_to_linux_trapcode[] = {
163 LINUX_T_UNKNOWN, /* 0 */
164 6, /* 1 T_PRIVINFLT */
165 LINUX_T_UNKNOWN, /* 2 */
167 LINUX_T_UNKNOWN, /* 4 */
168 LINUX_T_UNKNOWN, /* 5 */
169 16, /* 6 T_ARITHTRAP */
170 254, /* 7 T_ASTFLT */
171 LINUX_T_UNKNOWN, /* 8 */
172 13, /* 9 T_PROTFLT */
173 1, /* 10 T_TRCTRAP */
174 LINUX_T_UNKNOWN, /* 11 */
175 14, /* 12 T_PAGEFLT */
176 LINUX_T_UNKNOWN, /* 13 */
177 17, /* 14 T_ALIGNFLT */
178 LINUX_T_UNKNOWN, /* 15 */
179 LINUX_T_UNKNOWN, /* 16 */
180 LINUX_T_UNKNOWN, /* 17 */
186 8, /* 23 T_DOUBLEFLT */
187 9, /* 24 T_FPOPFLT */
188 10, /* 25 T_TSSFLT */
189 11, /* 26 T_SEGNPFLT */
190 12, /* 27 T_STKFLT */
192 19, /* 29 T_XMMFLT */
193 15 /* 30 T_RESERVED */
195 #define bsd_to_linux_trapcode(code) \
196 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
197 _bsd_to_linux_trapcode[(code)]: \
201 * If FreeBSD & Linux have a difference of opinion about what a trap
202 * means, deal with it here.
207 translate_traps(int signal, int trap_code)
209 if (signal != SIGBUS)
223 linux_fixup(register_t **stack_base, struct image_params *imgp)
225 register_t *argv, *envp;
228 envp = *stack_base + (imgp->args->argc + 1);
230 **stack_base = (intptr_t)(void *)envp;
232 **stack_base = (intptr_t)(void *)argv;
234 **stack_base = imgp->args->argc;
239 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
243 Elf32_Addr *uplatform;
244 struct ps_strings *arginfo;
247 KASSERT(curthread->td_proc == imgp->proc,
248 ("unsafe elf_linux_fixup(), should be curproc"));
251 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
252 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
253 args = (Elf32_Auxargs *)imgp->auxargs;
254 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
256 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
259 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
260 * as it has appeared in the 2.4.0-rc7 first time.
261 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
262 * glibc falls back to the hard-coded CLK_TCK value when aux entry
264 * Also see linux_times() implementation.
266 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
267 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
268 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
269 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
270 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
271 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
272 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
273 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
274 AUXARGS_ENTRY(pos, AT_BASE, args->base);
275 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
276 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
277 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
278 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
279 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
280 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
281 if (args->execfd != -1)
282 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
283 AUXARGS_ENTRY(pos, AT_NULL, 0);
285 free(imgp->auxargs, M_TEMP);
286 imgp->auxargs = NULL;
289 **stack_base = (register_t)imgp->args->argc;
294 * Copied from kern/kern_exec.c
297 linux_copyout_strings(struct image_params *imgp)
301 char *stringp, *destp;
302 register_t *stack_base;
303 struct ps_strings *arginfo;
307 * Calculate string base and vector table pointers.
308 * Also deal with signal trampoline code for this exec type.
311 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
312 destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
313 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
316 * install LINUX_PLATFORM
318 copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
322 * If we have a valid auxargs ptr, prepare some room
327 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
328 * lower compatibility.
330 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
331 (LINUX_AT_COUNT * 2);
333 * The '+ 2' is for the null pointers at the end of each of
334 * the arg and env vector sets,and imgp->auxarg_size is room
335 * for argument of Runtime loader.
337 vectp = (char **)(destp - (imgp->args->argc +
338 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
341 * The '+ 2' is for the null pointers at the end of each of
342 * the arg and env vector sets
344 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
349 * vectp also becomes our initial stack base
351 stack_base = (register_t *)vectp;
353 stringp = imgp->args->begin_argv;
354 argc = imgp->args->argc;
355 envc = imgp->args->envc;
358 * Copy out strings - arguments and environment.
360 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
363 * Fill in "ps_strings" struct for ps, w, etc.
365 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
366 suword(&arginfo->ps_nargvstr, argc);
369 * Fill in argument portion of vector table.
371 for (; argc > 0; --argc) {
372 suword(vectp++, (long)(intptr_t)destp);
373 while (*stringp++ != 0)
378 /* a null vector table pointer separates the argp's from the envp's */
381 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
382 suword(&arginfo->ps_nenvstr, envc);
385 * Fill in environment portion of vector table.
387 for (; envc > 0; --envc) {
388 suword(vectp++, (long)(intptr_t)destp);
389 while (*stringp++ != 0)
394 /* end of vector table is a null pointer */
402 extern int _ucodesel, _udatasel;
403 extern unsigned long linux_sznonrtsigcode;
406 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
408 struct thread *td = curthread;
409 struct proc *p = td->td_proc;
411 struct trapframe *regs;
412 struct l_rt_sigframe *fp, frame;
416 sig = ksi->ksi_signo;
417 code = ksi->ksi_code;
418 PROC_LOCK_ASSERT(p, MA_OWNED);
420 mtx_assert(&psp->ps_mtx, MA_OWNED);
422 oonstack = sigonstack(regs->tf_esp);
425 if (ldebug(rt_sendsig))
426 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
427 catcher, sig, (void*)mask, code);
430 * Allocate space for the signal handler context.
432 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
433 SIGISMEMBER(psp->ps_sigonstack, sig)) {
434 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
435 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
437 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
438 mtx_unlock(&psp->ps_mtx);
441 * Build the argument list for the signal handler.
443 if (p->p_sysent->sv_sigtbl)
444 if (sig <= p->p_sysent->sv_sigsize)
445 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
447 bzero(&frame, sizeof(frame));
449 frame.sf_handler = catcher;
451 frame.sf_siginfo = &fp->sf_si;
452 frame.sf_ucontext = &fp->sf_sc;
454 /* Fill in POSIX parts */
455 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
458 * Build the signal context to be used by sigreturn.
460 frame.sf_sc.uc_flags = 0; /* XXX ??? */
461 frame.sf_sc.uc_link = NULL; /* XXX ??? */
463 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
464 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
465 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
466 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
469 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
471 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
472 frame.sf_sc.uc_mcontext.sc_gs = rgs();
473 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
474 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
475 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
476 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
477 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
478 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
479 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
480 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
481 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
482 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
483 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
484 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
485 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
486 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
487 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
488 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
489 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
490 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
493 if (ldebug(rt_sendsig))
494 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
495 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
496 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
499 if (copyout(&frame, fp, sizeof(frame)) != 0) {
501 * Process has trashed its stack; give it an illegal
502 * instruction to halt it in its tracks.
505 if (ldebug(rt_sendsig))
506 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
514 * Build context to run handler in.
516 regs->tf_esp = (int)fp;
517 regs->tf_eip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
518 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
519 regs->tf_cs = _ucodesel;
520 regs->tf_ds = _udatasel;
521 regs->tf_es = _udatasel;
522 regs->tf_fs = _udatasel;
523 regs->tf_ss = _udatasel;
525 mtx_lock(&psp->ps_mtx);
530 * Send an interrupt to process.
532 * Stack is set up to allow sigcode stored
533 * in u. to call routine, followed by kcall
534 * to sigreturn routine below. After sigreturn
535 * resets the signal mask, the stack, and the
536 * frame pointer, it returns to the user
540 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
542 struct thread *td = curthread;
543 struct proc *p = td->td_proc;
545 struct trapframe *regs;
546 struct l_sigframe *fp, frame;
551 PROC_LOCK_ASSERT(p, MA_OWNED);
553 sig = ksi->ksi_signo;
554 code = ksi->ksi_code;
555 mtx_assert(&psp->ps_mtx, MA_OWNED);
556 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
557 /* Signal handler installed with SA_SIGINFO. */
558 linux_rt_sendsig(catcher, ksi, mask);
562 oonstack = sigonstack(regs->tf_esp);
566 printf(ARGS(sendsig, "%p, %d, %p, %u"),
567 catcher, sig, (void*)mask, code);
571 * Allocate space for the signal handler context.
573 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
574 SIGISMEMBER(psp->ps_sigonstack, sig)) {
575 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
576 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
578 fp = (struct l_sigframe *)regs->tf_esp - 1;
579 mtx_unlock(&psp->ps_mtx);
583 * Build the argument list for the signal handler.
585 if (p->p_sysent->sv_sigtbl)
586 if (sig <= p->p_sysent->sv_sigsize)
587 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
589 bzero(&frame, sizeof(frame));
591 frame.sf_handler = catcher;
594 bsd_to_linux_sigset(mask, &lmask);
597 * Build the signal context to be used by sigreturn.
599 frame.sf_sc.sc_mask = lmask.__bits[0];
600 frame.sf_sc.sc_gs = rgs();
601 frame.sf_sc.sc_fs = regs->tf_fs;
602 frame.sf_sc.sc_es = regs->tf_es;
603 frame.sf_sc.sc_ds = regs->tf_ds;
604 frame.sf_sc.sc_edi = regs->tf_edi;
605 frame.sf_sc.sc_esi = regs->tf_esi;
606 frame.sf_sc.sc_ebp = regs->tf_ebp;
607 frame.sf_sc.sc_ebx = regs->tf_ebx;
608 frame.sf_sc.sc_edx = regs->tf_edx;
609 frame.sf_sc.sc_ecx = regs->tf_ecx;
610 frame.sf_sc.sc_eax = regs->tf_eax;
611 frame.sf_sc.sc_eip = regs->tf_eip;
612 frame.sf_sc.sc_cs = regs->tf_cs;
613 frame.sf_sc.sc_eflags = regs->tf_eflags;
614 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
615 frame.sf_sc.sc_ss = regs->tf_ss;
616 frame.sf_sc.sc_err = regs->tf_err;
617 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
618 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
620 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
621 frame.sf_extramask[i] = lmask.__bits[i+1];
623 if (copyout(&frame, fp, sizeof(frame)) != 0) {
625 * Process has trashed its stack; give it an illegal
626 * instruction to halt it in its tracks.
633 * Build context to run handler in.
635 regs->tf_esp = (int)fp;
636 regs->tf_eip = p->p_sysent->sv_sigcode_base;
637 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
638 regs->tf_cs = _ucodesel;
639 regs->tf_ds = _udatasel;
640 regs->tf_es = _udatasel;
641 regs->tf_fs = _udatasel;
642 regs->tf_ss = _udatasel;
644 mtx_lock(&psp->ps_mtx);
648 * System call to cleanup state after a signal
649 * has been taken. Reset signal mask and
650 * stack state from context left by sendsig (above).
651 * Return to previous pc and psl as specified by
652 * context left by sendsig. Check carefully to
653 * make sure that the user has not modified the
654 * psl to gain improper privileges or to cause
658 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
660 struct l_sigframe frame;
661 struct trapframe *regs;
670 if (ldebug(sigreturn))
671 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
674 * The trampoline code hands us the sigframe.
675 * It is unsafe to keep track of it ourselves, in the event that a
676 * program jumps out of a signal handler.
678 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
682 * Check for security violations.
684 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
685 eflags = frame.sf_sc.sc_eflags;
687 * XXX do allow users to change the privileged flag PSL_RF. The
688 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
689 * sometimes set it there too. tf_eflags is kept in the signal
690 * context during signal handling and there is no other place
691 * to remember it, so the PSL_RF bit may be corrupted by the
692 * signal handler without us knowing. Corruption of the PSL_RF
693 * bit at worst causes one more or one less debugger trap, so
694 * allowing it is fairly harmless.
696 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
700 * Don't allow users to load a valid privileged %cs. Let the
701 * hardware check for invalid selectors, excess privilege in
702 * other selectors, invalid %eip's and invalid %esp's.
704 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
705 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
706 ksiginfo_init_trap(&ksi);
707 ksi.ksi_signo = SIGBUS;
708 ksi.ksi_code = BUS_OBJERR;
709 ksi.ksi_trapno = T_PROTFLT;
710 ksi.ksi_addr = (void *)regs->tf_eip;
711 trapsignal(td, &ksi);
715 lmask.__bits[0] = frame.sf_sc.sc_mask;
716 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
717 lmask.__bits[i+1] = frame.sf_extramask[i];
718 linux_to_bsd_sigset(&lmask, &bmask);
719 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
722 * Restore signal context.
724 /* %gs was restored by the trampoline. */
725 regs->tf_fs = frame.sf_sc.sc_fs;
726 regs->tf_es = frame.sf_sc.sc_es;
727 regs->tf_ds = frame.sf_sc.sc_ds;
728 regs->tf_edi = frame.sf_sc.sc_edi;
729 regs->tf_esi = frame.sf_sc.sc_esi;
730 regs->tf_ebp = frame.sf_sc.sc_ebp;
731 regs->tf_ebx = frame.sf_sc.sc_ebx;
732 regs->tf_edx = frame.sf_sc.sc_edx;
733 regs->tf_ecx = frame.sf_sc.sc_ecx;
734 regs->tf_eax = frame.sf_sc.sc_eax;
735 regs->tf_eip = frame.sf_sc.sc_eip;
736 regs->tf_cs = frame.sf_sc.sc_cs;
737 regs->tf_eflags = eflags;
738 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
739 regs->tf_ss = frame.sf_sc.sc_ss;
741 return (EJUSTRETURN);
745 * System call to cleanup state after a signal
746 * has been taken. Reset signal mask and
747 * stack state from context left by rt_sendsig (above).
748 * Return to previous pc and psl as specified by
749 * context left by sendsig. Check carefully to
750 * make sure that the user has not modified the
751 * psl to gain improper privileges or to cause
755 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
757 struct l_ucontext uc;
758 struct l_sigcontext *context;
762 struct trapframe *regs;
769 if (ldebug(rt_sigreturn))
770 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
773 * The trampoline code hands us the ucontext.
774 * It is unsafe to keep track of it ourselves, in the event that a
775 * program jumps out of a signal handler.
777 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
780 context = &uc.uc_mcontext;
783 * Check for security violations.
785 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
786 eflags = context->sc_eflags;
788 * XXX do allow users to change the privileged flag PSL_RF. The
789 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
790 * sometimes set it there too. tf_eflags is kept in the signal
791 * context during signal handling and there is no other place
792 * to remember it, so the PSL_RF bit may be corrupted by the
793 * signal handler without us knowing. Corruption of the PSL_RF
794 * bit at worst causes one more or one less debugger trap, so
795 * allowing it is fairly harmless.
797 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
801 * Don't allow users to load a valid privileged %cs. Let the
802 * hardware check for invalid selectors, excess privilege in
803 * other selectors, invalid %eip's and invalid %esp's.
805 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
806 if (!CS_SECURE(context->sc_cs)) {
807 ksiginfo_init_trap(&ksi);
808 ksi.ksi_signo = SIGBUS;
809 ksi.ksi_code = BUS_OBJERR;
810 ksi.ksi_trapno = T_PROTFLT;
811 ksi.ksi_addr = (void *)regs->tf_eip;
812 trapsignal(td, &ksi);
816 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
817 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
820 * Restore signal context
822 /* %gs was restored by the trampoline. */
823 regs->tf_fs = context->sc_fs;
824 regs->tf_es = context->sc_es;
825 regs->tf_ds = context->sc_ds;
826 regs->tf_edi = context->sc_edi;
827 regs->tf_esi = context->sc_esi;
828 regs->tf_ebp = context->sc_ebp;
829 regs->tf_ebx = context->sc_ebx;
830 regs->tf_edx = context->sc_edx;
831 regs->tf_ecx = context->sc_ecx;
832 regs->tf_eax = context->sc_eax;
833 regs->tf_eip = context->sc_eip;
834 regs->tf_cs = context->sc_cs;
835 regs->tf_eflags = eflags;
836 regs->tf_esp = context->sc_esp_at_signal;
837 regs->tf_ss = context->sc_ss;
840 * call sigaltstack & ignore results..
843 ss.ss_sp = lss->ss_sp;
844 ss.ss_size = lss->ss_size;
845 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
848 if (ldebug(rt_sigreturn))
849 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
850 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
852 (void)kern_sigaltstack(td, &ss, NULL);
854 return (EJUSTRETURN);
858 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
861 struct trapframe *frame;
864 frame = td->td_frame;
866 sa->code = frame->tf_eax;
867 sa->args[0] = frame->tf_ebx;
868 sa->args[1] = frame->tf_ecx;
869 sa->args[2] = frame->tf_edx;
870 sa->args[3] = frame->tf_esi;
871 sa->args[4] = frame->tf_edi;
872 sa->args[5] = frame->tf_ebp; /* Unconfirmed */
874 if (sa->code >= p->p_sysent->sv_size)
875 sa->callp = &p->p_sysent->sv_table[0];
877 sa->callp = &p->p_sysent->sv_table[sa->code];
878 sa->narg = sa->callp->sy_narg;
880 td->td_retval[0] = 0;
881 td->td_retval[1] = frame->tf_edx;
887 * If a linux binary is exec'ing something, try this image activator
888 * first. We override standard shell script execution in order to
889 * be able to modify the interpreter path. We only do this if a linux
890 * binary is doing the exec, so we do not create an EXEC module for it.
892 static int exec_linux_imgact_try(struct image_params *iparams);
895 exec_linux_imgact_try(struct image_params *imgp)
897 const char *head = (const char *)imgp->image_header;
902 * The interpreter for shell scripts run from a linux binary needs
903 * to be located in /compat/linux if possible in order to recursively
904 * maintain linux path emulation.
906 if (((const short *)head)[0] == SHELLMAGIC) {
908 * Run our normal shell image activator. If it succeeds attempt
909 * to use the alternate path for the interpreter. If an alternate
910 * path is found, use our stringspace to store it.
912 if ((error = exec_shell_imgact(imgp)) == 0) {
913 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
914 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
916 imgp->args->fname_buf =
917 imgp->interpreter_name = rpath;
924 * exec_setregs may initialize some registers differently than Linux
925 * does, thus potentially confusing Linux binaries. If necessary, we
926 * override the exec_setregs default(s) here.
929 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
931 struct pcb *pcb = td->td_pcb;
933 exec_setregs(td, imgp, stack);
935 /* Linux sets %gs to 0, we default to _udatasel */
939 pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
943 linux_get_machine(const char **dst)
961 struct sysentvec linux_sysvec = {
962 .sv_size = LINUX_SYS_MAXSYSCALL,
963 .sv_table = linux_sysent,
965 .sv_sigsize = LINUX_SIGTBLSZ,
966 .sv_sigtbl = bsd_to_linux_signal,
967 .sv_errsize = ELAST + 1,
968 .sv_errtbl = bsd_to_linux_errno,
969 .sv_transtrap = translate_traps,
970 .sv_fixup = linux_fixup,
971 .sv_sendsig = linux_sendsig,
972 .sv_sigcode = linux_sigcode,
973 .sv_szsigcode = &linux_szsigcode,
974 .sv_prepsyscall = NULL,
975 .sv_name = "Linux a.out",
977 .sv_imgact_try = exec_linux_imgact_try,
978 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
979 .sv_pagesize = PAGE_SIZE,
980 .sv_minuser = VM_MIN_ADDRESS,
981 .sv_maxuser = VM_MAXUSER_ADDRESS,
982 .sv_usrstack = LINUX_USRSTACK,
983 .sv_psstrings = PS_STRINGS,
984 .sv_stackprot = VM_PROT_ALL,
985 .sv_copyout_strings = exec_copyout_strings,
986 .sv_setregs = exec_linux_setregs,
989 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
990 .sv_set_syscall_retval = cpu_set_syscall_retval,
991 .sv_fetch_syscall_args = linux_fetch_syscall_args,
992 .sv_syscallnames = NULL,
993 .sv_shared_page_base = LINUX_SHAREDPAGE,
994 .sv_shared_page_len = PAGE_SIZE,
995 .sv_schedtail = linux_schedtail,
997 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
999 struct sysentvec elf_linux_sysvec = {
1000 .sv_size = LINUX_SYS_MAXSYSCALL,
1001 .sv_table = linux_sysent,
1003 .sv_sigsize = LINUX_SIGTBLSZ,
1004 .sv_sigtbl = bsd_to_linux_signal,
1005 .sv_errsize = ELAST + 1,
1006 .sv_errtbl = bsd_to_linux_errno,
1007 .sv_transtrap = translate_traps,
1008 .sv_fixup = elf_linux_fixup,
1009 .sv_sendsig = linux_sendsig,
1010 .sv_sigcode = linux_sigcode,
1011 .sv_szsigcode = &linux_szsigcode,
1012 .sv_prepsyscall = NULL,
1013 .sv_name = "Linux ELF",
1014 .sv_coredump = elf32_coredump,
1015 .sv_imgact_try = exec_linux_imgact_try,
1016 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1017 .sv_pagesize = PAGE_SIZE,
1018 .sv_minuser = VM_MIN_ADDRESS,
1019 .sv_maxuser = VM_MAXUSER_ADDRESS,
1020 .sv_usrstack = LINUX_USRSTACK,
1021 .sv_psstrings = LINUX_PS_STRINGS,
1022 .sv_stackprot = VM_PROT_ALL,
1023 .sv_copyout_strings = linux_copyout_strings,
1024 .sv_setregs = exec_linux_setregs,
1025 .sv_fixlimit = NULL,
1027 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1028 .sv_set_syscall_retval = cpu_set_syscall_retval,
1029 .sv_fetch_syscall_args = linux_fetch_syscall_args,
1030 .sv_syscallnames = NULL,
1031 .sv_shared_page_base = LINUX_SHAREDPAGE,
1032 .sv_shared_page_len = PAGE_SIZE,
1033 .sv_schedtail = linux_schedtail,
1035 INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1037 static char GNU_ABI_VENDOR[] = "GNU";
1038 static int GNULINUX_ABI_DESC = 0;
1041 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1043 const Elf32_Word *desc;
1046 p = (uintptr_t)(note + 1);
1047 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1049 desc = (const Elf32_Word *)p;
1050 if (desc[0] != GNULINUX_ABI_DESC)
1054 * For linux we encode osrel as follows (see linux_mib.c):
1055 * VVVMMMIII (version, major, minor), see linux_mib.c.
1057 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1062 static Elf_Brandnote linux_brandnote = {
1063 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1064 .hdr.n_descsz = 16, /* XXX at least 16 */
1066 .vendor = GNU_ABI_VENDOR,
1067 .flags = BN_TRANSLATE_OSREL,
1068 .trans_osrel = linux_trans_osrel
1071 static Elf32_Brandinfo linux_brand = {
1072 .brand = ELFOSABI_LINUX,
1074 .compat_3_brand = "Linux",
1075 .emul_path = "/compat/linux",
1076 .interp_path = "/lib/ld-linux.so.1",
1077 .sysvec = &elf_linux_sysvec,
1078 .interp_newpath = NULL,
1079 .brand_note = &linux_brandnote,
1080 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1083 static Elf32_Brandinfo linux_glibc2brand = {
1084 .brand = ELFOSABI_LINUX,
1086 .compat_3_brand = "Linux",
1087 .emul_path = "/compat/linux",
1088 .interp_path = "/lib/ld-linux.so.2",
1089 .sysvec = &elf_linux_sysvec,
1090 .interp_newpath = NULL,
1091 .brand_note = &linux_brandnote,
1092 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1095 Elf32_Brandinfo *linux_brandlist[] = {
1102 linux_elf_modevent(module_t mod, int type, void *data)
1104 Elf32_Brandinfo **brandinfo;
1106 struct linux_ioctl_handler **lihp;
1107 struct linux_device_handler **ldhp;
1113 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1115 if (elf32_insert_brand_entry(*brandinfo) < 0)
1118 SET_FOREACH(lihp, linux_ioctl_handler_set)
1119 linux_ioctl_register_handler(*lihp);
1120 SET_FOREACH(ldhp, linux_device_handler_set)
1121 linux_device_register_handler(*ldhp);
1122 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1123 sx_init(&emul_shared_lock, "emuldata->shared lock");
1124 LIST_INIT(&futex_list);
1125 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1126 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1128 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1130 linux_get_machine(&linux_platform);
1131 linux_szplatform = roundup(strlen(linux_platform) + 1,
1133 linux_osd_jail_register();
1134 stclohz = (stathz ? stathz : hz);
1136 printf("Linux ELF exec handler installed\n");
1138 printf("cannot insert Linux ELF brand handler\n");
1141 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1143 if (elf32_brand_inuse(*brandinfo))
1146 for (brandinfo = &linux_brandlist[0];
1147 *brandinfo != NULL; ++brandinfo)
1148 if (elf32_remove_brand_entry(*brandinfo) < 0)
1152 SET_FOREACH(lihp, linux_ioctl_handler_set)
1153 linux_ioctl_unregister_handler(*lihp);
1154 SET_FOREACH(ldhp, linux_device_handler_set)
1155 linux_device_unregister_handler(*ldhp);
1156 mtx_destroy(&emul_lock);
1157 sx_destroy(&emul_shared_lock);
1158 mtx_destroy(&futex_mtx);
1159 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1160 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1161 linux_osd_jail_deregister();
1163 printf("Linux ELF exec handler removed\n");
1165 printf("Could not deinstall ELF interpreter entry\n");
1173 static moduledata_t linux_elf_mod = {
1179 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);