2 * Copyright (c) 1994-1996 Søren Schmidt
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_futex.h>
68 #include <compat/linux/linux_emul.h>
69 #include <compat/linux/linux_mib.h>
70 #include <compat/linux/linux_misc.h>
71 #include <compat/linux/linux_signal.h>
72 #include <compat/linux/linux_util.h>
74 MODULE_VERSION(linux, 1);
76 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
78 #if BYTE_ORDER == LITTLE_ENDIAN
79 #define SHELLMAGIC 0x2123 /* #! */
81 #define SHELLMAGIC 0x2321
85 * Allow the sendsig functions to use the ldebug() facility
86 * even though they are not syscalls themselves. Map them
87 * to syscall 0. This is slightly less bogus than using
90 #define LINUX_SYS_linux_rt_sendsig 0
91 #define LINUX_SYS_linux_sendsig 0
93 extern char linux_sigcode[];
94 extern int linux_szsigcode;
96 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
98 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
99 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
101 static int linux_fixup(register_t **stack_base,
102 struct image_params *iparams);
103 static int elf_linux_fixup(register_t **stack_base,
104 struct image_params *iparams);
105 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
107 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
108 static void exec_linux_setregs(struct thread *td, u_long entry,
109 u_long stack, u_long ps_strings);
110 static register_t *linux_copyout_strings(struct image_params *imgp);
111 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
113 static int linux_szplatform;
114 const char *linux_platform;
116 static eventhandler_tag linux_exit_tag;
117 static eventhandler_tag linux_schedtail_tag;
118 static eventhandler_tag linux_exec_tag;
121 * Linux syscalls return negative errno's, we do positive and map them
123 * FreeBSD: src/sys/sys/errno.h
124 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
125 * linux-2.6.17.8/include/asm-generic/errno.h
127 static int bsd_to_linux_errno[ELAST + 1] = {
128 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
129 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
130 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
131 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
132 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
133 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
134 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
135 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
136 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
140 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
141 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
142 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
143 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
144 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
145 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
146 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
147 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
148 0, LINUX_SIGUSR1, LINUX_SIGUSR2
151 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
152 SIGHUP, SIGINT, SIGQUIT, SIGILL,
153 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
154 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
155 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
156 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
157 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
158 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
159 SIGIO, SIGURG, SIGSYS
162 #define LINUX_T_UNKNOWN 255
163 static int _bsd_to_linux_trapcode[] = {
164 LINUX_T_UNKNOWN, /* 0 */
165 6, /* 1 T_PRIVINFLT */
166 LINUX_T_UNKNOWN, /* 2 */
168 LINUX_T_UNKNOWN, /* 4 */
169 LINUX_T_UNKNOWN, /* 5 */
170 16, /* 6 T_ARITHTRAP */
171 254, /* 7 T_ASTFLT */
172 LINUX_T_UNKNOWN, /* 8 */
173 13, /* 9 T_PROTFLT */
174 1, /* 10 T_TRCTRAP */
175 LINUX_T_UNKNOWN, /* 11 */
176 14, /* 12 T_PAGEFLT */
177 LINUX_T_UNKNOWN, /* 13 */
178 17, /* 14 T_ALIGNFLT */
179 LINUX_T_UNKNOWN, /* 15 */
180 LINUX_T_UNKNOWN, /* 16 */
181 LINUX_T_UNKNOWN, /* 17 */
187 8, /* 23 T_DOUBLEFLT */
188 9, /* 24 T_FPOPFLT */
189 10, /* 25 T_TSSFLT */
190 11, /* 26 T_SEGNPFLT */
191 12, /* 27 T_STKFLT */
193 19, /* 29 T_XMMFLT */
194 15 /* 30 T_RESERVED */
196 #define bsd_to_linux_trapcode(code) \
197 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
198 _bsd_to_linux_trapcode[(code)]: \
202 * If FreeBSD & Linux have a difference of opinion about what a trap
203 * means, deal with it here.
208 translate_traps(int signal, int trap_code)
210 if (signal != SIGBUS)
224 linux_fixup(register_t **stack_base, struct image_params *imgp)
226 register_t *argv, *envp;
229 envp = *stack_base + (imgp->args->argc + 1);
231 **stack_base = (intptr_t)(void *)envp;
233 **stack_base = (intptr_t)(void *)argv;
235 **stack_base = imgp->args->argc;
240 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
244 Elf32_Addr *uplatform;
245 struct ps_strings *arginfo;
248 KASSERT(curthread->td_proc == imgp->proc,
249 ("unsafe elf_linux_fixup(), should be curproc"));
252 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
253 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
255 args = (Elf32_Auxargs *)imgp->auxargs;
256 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
258 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
261 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
262 * as it has appeared in the 2.4.0-rc7 first time.
263 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
264 * glibc falls back to the hard-coded CLK_TCK value when aux entry
266 * Also see linux_times() implementation.
268 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
269 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
270 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
271 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
272 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
273 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
274 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
275 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
276 AUXARGS_ENTRY(pos, AT_BASE, args->base);
277 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
278 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
279 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
280 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
281 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
282 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
283 if (args->execfd != -1)
284 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
285 AUXARGS_ENTRY(pos, AT_NULL, 0);
287 free(imgp->auxargs, M_TEMP);
288 imgp->auxargs = NULL;
291 **stack_base = (register_t)imgp->args->argc;
296 * Copied from kern/kern_exec.c
299 linux_copyout_strings(struct image_params *imgp)
303 char *stringp, *destp;
304 register_t *stack_base;
305 struct ps_strings *arginfo;
309 * Calculate string base and vector table pointers.
310 * Also deal with signal trampoline code for this exec type.
313 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
314 destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
315 linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
321 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
322 linux_szsigcode), linux_szsigcode);
325 * install LINUX_PLATFORM
327 copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
328 linux_szplatform), linux_szplatform);
331 * If we have a valid auxargs ptr, prepare some room
336 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
337 * lower compatibility.
339 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
340 (LINUX_AT_COUNT * 2);
342 * The '+ 2' is for the null pointers at the end of each of
343 * the arg and env vector sets,and imgp->auxarg_size is room
344 * for argument of Runtime loader.
346 vectp = (char **)(destp - (imgp->args->argc +
347 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
350 * The '+ 2' is for the null pointers at the end of each of
351 * the arg and env vector sets
353 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
358 * vectp also becomes our initial stack base
360 stack_base = (register_t *)vectp;
362 stringp = imgp->args->begin_argv;
363 argc = imgp->args->argc;
364 envc = imgp->args->envc;
367 * Copy out strings - arguments and environment.
369 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
372 * Fill in "ps_strings" struct for ps, w, etc.
374 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
375 suword(&arginfo->ps_nargvstr, argc);
378 * Fill in argument portion of vector table.
380 for (; argc > 0; --argc) {
381 suword(vectp++, (long)(intptr_t)destp);
382 while (*stringp++ != 0)
387 /* a null vector table pointer separates the argp's from the envp's */
390 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
391 suword(&arginfo->ps_nenvstr, envc);
394 * Fill in environment portion of vector table.
396 for (; envc > 0; --envc) {
397 suword(vectp++, (long)(intptr_t)destp);
398 while (*stringp++ != 0)
403 /* end of vector table is a null pointer */
411 extern int _ucodesel, _udatasel;
412 extern unsigned long linux_sznonrtsigcode;
415 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
417 struct thread *td = curthread;
418 struct proc *p = td->td_proc;
420 struct trapframe *regs;
421 struct l_rt_sigframe *fp, frame;
425 sig = ksi->ksi_signo;
426 code = ksi->ksi_code;
427 PROC_LOCK_ASSERT(p, MA_OWNED);
429 mtx_assert(&psp->ps_mtx, MA_OWNED);
431 oonstack = sigonstack(regs->tf_esp);
434 if (ldebug(rt_sendsig))
435 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
436 catcher, sig, (void*)mask, code);
439 * Allocate space for the signal handler context.
441 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
442 SIGISMEMBER(psp->ps_sigonstack, sig)) {
443 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
444 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
446 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
447 mtx_unlock(&psp->ps_mtx);
450 * Build the argument list for the signal handler.
452 if (p->p_sysent->sv_sigtbl)
453 if (sig <= p->p_sysent->sv_sigsize)
454 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
456 bzero(&frame, sizeof(frame));
458 frame.sf_handler = catcher;
460 frame.sf_siginfo = &fp->sf_si;
461 frame.sf_ucontext = &fp->sf_sc;
463 /* Fill in POSIX parts */
464 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
467 * Build the signal context to be used by sigreturn.
469 frame.sf_sc.uc_flags = 0; /* XXX ??? */
470 frame.sf_sc.uc_link = NULL; /* XXX ??? */
472 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
473 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
474 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
475 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
478 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
480 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
481 frame.sf_sc.uc_mcontext.sc_gs = rgs();
482 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
483 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
484 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
485 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
486 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
487 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
488 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
489 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
490 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
491 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
492 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
493 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
494 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
495 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
496 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
497 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
498 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
499 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
502 if (ldebug(rt_sendsig))
503 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
504 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
505 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
508 if (copyout(&frame, fp, sizeof(frame)) != 0) {
510 * Process has trashed its stack; give it an illegal
511 * instruction to halt it in its tracks.
514 if (ldebug(rt_sendsig))
515 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
523 * Build context to run handler in.
525 regs->tf_esp = (int)fp;
526 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
527 linux_sznonrtsigcode;
528 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
529 regs->tf_cs = _ucodesel;
530 regs->tf_ds = _udatasel;
531 regs->tf_es = _udatasel;
532 regs->tf_fs = _udatasel;
533 regs->tf_ss = _udatasel;
535 mtx_lock(&psp->ps_mtx);
540 * Send an interrupt to process.
542 * Stack is set up to allow sigcode stored
543 * in u. to call routine, followed by kcall
544 * to sigreturn routine below. After sigreturn
545 * resets the signal mask, the stack, and the
546 * frame pointer, it returns to the user
550 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
552 struct thread *td = curthread;
553 struct proc *p = td->td_proc;
555 struct trapframe *regs;
556 struct l_sigframe *fp, frame;
561 PROC_LOCK_ASSERT(p, MA_OWNED);
563 sig = ksi->ksi_signo;
564 code = ksi->ksi_code;
565 mtx_assert(&psp->ps_mtx, MA_OWNED);
566 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
567 /* Signal handler installed with SA_SIGINFO. */
568 linux_rt_sendsig(catcher, ksi, mask);
572 oonstack = sigonstack(regs->tf_esp);
576 printf(ARGS(sendsig, "%p, %d, %p, %u"),
577 catcher, sig, (void*)mask, code);
581 * Allocate space for the signal handler context.
583 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
584 SIGISMEMBER(psp->ps_sigonstack, sig)) {
585 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
586 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
588 fp = (struct l_sigframe *)regs->tf_esp - 1;
589 mtx_unlock(&psp->ps_mtx);
593 * Build the argument list for the signal handler.
595 if (p->p_sysent->sv_sigtbl)
596 if (sig <= p->p_sysent->sv_sigsize)
597 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
599 bzero(&frame, sizeof(frame));
601 frame.sf_handler = catcher;
604 bsd_to_linux_sigset(mask, &lmask);
607 * Build the signal context to be used by sigreturn.
609 frame.sf_sc.sc_mask = lmask.__bits[0];
610 frame.sf_sc.sc_gs = rgs();
611 frame.sf_sc.sc_fs = regs->tf_fs;
612 frame.sf_sc.sc_es = regs->tf_es;
613 frame.sf_sc.sc_ds = regs->tf_ds;
614 frame.sf_sc.sc_edi = regs->tf_edi;
615 frame.sf_sc.sc_esi = regs->tf_esi;
616 frame.sf_sc.sc_ebp = regs->tf_ebp;
617 frame.sf_sc.sc_ebx = regs->tf_ebx;
618 frame.sf_sc.sc_edx = regs->tf_edx;
619 frame.sf_sc.sc_ecx = regs->tf_ecx;
620 frame.sf_sc.sc_eax = regs->tf_eax;
621 frame.sf_sc.sc_eip = regs->tf_eip;
622 frame.sf_sc.sc_cs = regs->tf_cs;
623 frame.sf_sc.sc_eflags = regs->tf_eflags;
624 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
625 frame.sf_sc.sc_ss = regs->tf_ss;
626 frame.sf_sc.sc_err = regs->tf_err;
627 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
628 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
630 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
631 frame.sf_extramask[i] = lmask.__bits[i+1];
633 if (copyout(&frame, fp, sizeof(frame)) != 0) {
635 * Process has trashed its stack; give it an illegal
636 * instruction to halt it in its tracks.
643 * Build context to run handler in.
645 regs->tf_esp = (int)fp;
646 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
647 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
648 regs->tf_cs = _ucodesel;
649 regs->tf_ds = _udatasel;
650 regs->tf_es = _udatasel;
651 regs->tf_fs = _udatasel;
652 regs->tf_ss = _udatasel;
654 mtx_lock(&psp->ps_mtx);
658 * System call to cleanup state after a signal
659 * has been taken. Reset signal mask and
660 * stack state from context left by sendsig (above).
661 * Return to previous pc and psl as specified by
662 * context left by sendsig. Check carefully to
663 * make sure that the user has not modified the
664 * psl to gain improper privileges or to cause
668 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
670 struct l_sigframe frame;
671 struct trapframe *regs;
680 if (ldebug(sigreturn))
681 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
684 * The trampoline code hands us the sigframe.
685 * It is unsafe to keep track of it ourselves, in the event that a
686 * program jumps out of a signal handler.
688 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
692 * Check for security violations.
694 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
695 eflags = frame.sf_sc.sc_eflags;
697 * XXX do allow users to change the privileged flag PSL_RF. The
698 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
699 * sometimes set it there too. tf_eflags is kept in the signal
700 * context during signal handling and there is no other place
701 * to remember it, so the PSL_RF bit may be corrupted by the
702 * signal handler without us knowing. Corruption of the PSL_RF
703 * bit at worst causes one more or one less debugger trap, so
704 * allowing it is fairly harmless.
706 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
710 * Don't allow users to load a valid privileged %cs. Let the
711 * hardware check for invalid selectors, excess privilege in
712 * other selectors, invalid %eip's and invalid %esp's.
714 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
715 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
716 ksiginfo_init_trap(&ksi);
717 ksi.ksi_signo = SIGBUS;
718 ksi.ksi_code = BUS_OBJERR;
719 ksi.ksi_trapno = T_PROTFLT;
720 ksi.ksi_addr = (void *)regs->tf_eip;
721 trapsignal(td, &ksi);
725 lmask.__bits[0] = frame.sf_sc.sc_mask;
726 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
727 lmask.__bits[i+1] = frame.sf_extramask[i];
728 linux_to_bsd_sigset(&lmask, &bmask);
729 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
732 * Restore signal context.
734 /* %gs was restored by the trampoline. */
735 regs->tf_fs = frame.sf_sc.sc_fs;
736 regs->tf_es = frame.sf_sc.sc_es;
737 regs->tf_ds = frame.sf_sc.sc_ds;
738 regs->tf_edi = frame.sf_sc.sc_edi;
739 regs->tf_esi = frame.sf_sc.sc_esi;
740 regs->tf_ebp = frame.sf_sc.sc_ebp;
741 regs->tf_ebx = frame.sf_sc.sc_ebx;
742 regs->tf_edx = frame.sf_sc.sc_edx;
743 regs->tf_ecx = frame.sf_sc.sc_ecx;
744 regs->tf_eax = frame.sf_sc.sc_eax;
745 regs->tf_eip = frame.sf_sc.sc_eip;
746 regs->tf_cs = frame.sf_sc.sc_cs;
747 regs->tf_eflags = eflags;
748 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
749 regs->tf_ss = frame.sf_sc.sc_ss;
751 return (EJUSTRETURN);
755 * System call to cleanup state after a signal
756 * has been taken. Reset signal mask and
757 * stack state from context left by rt_sendsig (above).
758 * Return to previous pc and psl as specified by
759 * context left by sendsig. Check carefully to
760 * make sure that the user has not modified the
761 * psl to gain improper privileges or to cause
765 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
767 struct l_ucontext uc;
768 struct l_sigcontext *context;
772 struct trapframe *regs;
779 if (ldebug(rt_sigreturn))
780 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
783 * The trampoline code hands us the ucontext.
784 * It is unsafe to keep track of it ourselves, in the event that a
785 * program jumps out of a signal handler.
787 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
790 context = &uc.uc_mcontext;
793 * Check for security violations.
795 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
796 eflags = context->sc_eflags;
798 * XXX do allow users to change the privileged flag PSL_RF. The
799 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
800 * sometimes set it there too. tf_eflags is kept in the signal
801 * context during signal handling and there is no other place
802 * to remember it, so the PSL_RF bit may be corrupted by the
803 * signal handler without us knowing. Corruption of the PSL_RF
804 * bit at worst causes one more or one less debugger trap, so
805 * allowing it is fairly harmless.
807 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
811 * Don't allow users to load a valid privileged %cs. Let the
812 * hardware check for invalid selectors, excess privilege in
813 * other selectors, invalid %eip's and invalid %esp's.
815 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
816 if (!CS_SECURE(context->sc_cs)) {
817 ksiginfo_init_trap(&ksi);
818 ksi.ksi_signo = SIGBUS;
819 ksi.ksi_code = BUS_OBJERR;
820 ksi.ksi_trapno = T_PROTFLT;
821 ksi.ksi_addr = (void *)regs->tf_eip;
822 trapsignal(td, &ksi);
826 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
827 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
830 * Restore signal context
832 /* %gs was restored by the trampoline. */
833 regs->tf_fs = context->sc_fs;
834 regs->tf_es = context->sc_es;
835 regs->tf_ds = context->sc_ds;
836 regs->tf_edi = context->sc_edi;
837 regs->tf_esi = context->sc_esi;
838 regs->tf_ebp = context->sc_ebp;
839 regs->tf_ebx = context->sc_ebx;
840 regs->tf_edx = context->sc_edx;
841 regs->tf_ecx = context->sc_ecx;
842 regs->tf_eax = context->sc_eax;
843 regs->tf_eip = context->sc_eip;
844 regs->tf_cs = context->sc_cs;
845 regs->tf_eflags = eflags;
846 regs->tf_esp = context->sc_esp_at_signal;
847 regs->tf_ss = context->sc_ss;
850 * call sigaltstack & ignore results..
853 ss.ss_sp = lss->ss_sp;
854 ss.ss_size = lss->ss_size;
855 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
858 if (ldebug(rt_sigreturn))
859 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
860 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
862 (void)kern_sigaltstack(td, &ss, NULL);
864 return (EJUSTRETURN);
871 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
873 args[0] = tf->tf_ebx;
874 args[1] = tf->tf_ecx;
875 args[2] = tf->tf_edx;
876 args[3] = tf->tf_esi;
877 args[4] = tf->tf_edi;
878 args[5] = tf->tf_ebp; /* Unconfirmed */
879 *params = NULL; /* no copyin */
883 * If a linux binary is exec'ing something, try this image activator
884 * first. We override standard shell script execution in order to
885 * be able to modify the interpreter path. We only do this if a linux
886 * binary is doing the exec, so we do not create an EXEC module for it.
888 static int exec_linux_imgact_try(struct image_params *iparams);
891 exec_linux_imgact_try(struct image_params *imgp)
893 const char *head = (const char *)imgp->image_header;
898 * The interpreter for shell scripts run from a linux binary needs
899 * to be located in /compat/linux if possible in order to recursively
900 * maintain linux path emulation.
902 if (((const short *)head)[0] == SHELLMAGIC) {
904 * Run our normal shell image activator. If it succeeds attempt
905 * to use the alternate path for the interpreter. If an alternate
906 * path is found, use our stringspace to store it.
908 if ((error = exec_shell_imgact(imgp)) == 0) {
909 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
910 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
912 len = strlen(rpath) + 1;
914 if (len <= MAXSHELLCMDLEN) {
915 memcpy(imgp->interpreter_name, rpath, len);
925 * exec_setregs may initialize some registers differently than Linux
926 * does, thus potentially confusing Linux binaries. If necessary, we
927 * override the exec_setregs default(s) here.
930 exec_linux_setregs(struct thread *td, u_long entry,
931 u_long stack, u_long ps_strings)
933 struct pcb *pcb = td->td_pcb;
935 exec_setregs(td, entry, stack, ps_strings);
937 /* Linux sets %gs to 0, we default to _udatasel */
941 pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
945 linux_get_machine(const char **dst)
963 struct sysentvec linux_sysvec = {
964 .sv_size = LINUX_SYS_MAXSYSCALL,
965 .sv_table = linux_sysent,
967 .sv_sigsize = LINUX_SIGTBLSZ,
968 .sv_sigtbl = bsd_to_linux_signal,
969 .sv_errsize = ELAST + 1,
970 .sv_errtbl = bsd_to_linux_errno,
971 .sv_transtrap = translate_traps,
972 .sv_fixup = linux_fixup,
973 .sv_sendsig = linux_sendsig,
974 .sv_sigcode = linux_sigcode,
975 .sv_szsigcode = &linux_szsigcode,
976 .sv_prepsyscall = linux_prepsyscall,
977 .sv_name = "Linux a.out",
979 .sv_imgact_try = exec_linux_imgact_try,
980 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
981 .sv_pagesize = PAGE_SIZE,
982 .sv_minuser = VM_MIN_ADDRESS,
983 .sv_maxuser = VM_MAXUSER_ADDRESS,
984 .sv_usrstack = USRSTACK,
985 .sv_psstrings = PS_STRINGS,
986 .sv_stackprot = VM_PROT_ALL,
987 .sv_copyout_strings = exec_copyout_strings,
988 .sv_setregs = exec_linux_setregs,
991 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32
994 struct sysentvec elf_linux_sysvec = {
995 .sv_size = LINUX_SYS_MAXSYSCALL,
996 .sv_table = linux_sysent,
998 .sv_sigsize = LINUX_SIGTBLSZ,
999 .sv_sigtbl = bsd_to_linux_signal,
1000 .sv_errsize = ELAST + 1,
1001 .sv_errtbl = bsd_to_linux_errno,
1002 .sv_transtrap = translate_traps,
1003 .sv_fixup = elf_linux_fixup,
1004 .sv_sendsig = linux_sendsig,
1005 .sv_sigcode = linux_sigcode,
1006 .sv_szsigcode = &linux_szsigcode,
1007 .sv_prepsyscall = linux_prepsyscall,
1008 .sv_name = "Linux ELF",
1009 .sv_coredump = elf32_coredump,
1010 .sv_imgact_try = exec_linux_imgact_try,
1011 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1012 .sv_pagesize = PAGE_SIZE,
1013 .sv_minuser = VM_MIN_ADDRESS,
1014 .sv_maxuser = VM_MAXUSER_ADDRESS,
1015 .sv_usrstack = USRSTACK,
1016 .sv_psstrings = PS_STRINGS,
1017 .sv_stackprot = VM_PROT_ALL,
1018 .sv_copyout_strings = linux_copyout_strings,
1019 .sv_setregs = exec_linux_setregs,
1020 .sv_fixlimit = NULL,
1022 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32
1025 static char GNU_ABI_VENDOR[] = "GNU";
1026 static int GNULINUX_ABI_DESC = 0;
1029 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1031 const Elf32_Word *desc;
1034 p = (uintptr_t)(note + 1);
1035 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1037 desc = (const Elf32_Word *)p;
1038 if (desc[0] != GNULINUX_ABI_DESC)
1042 * For linux we encode osrel as follows (see linux_mib.c):
1043 * VVVMMMIII (version, major, minor), see linux_mib.c.
1045 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1050 static Elf_Brandnote linux_brandnote = {
1051 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1052 .hdr.n_descsz = 16, /* XXX at least 16 */
1054 .vendor = GNU_ABI_VENDOR,
1055 .flags = BN_TRANSLATE_OSREL,
1056 .trans_osrel = linux_trans_osrel
1059 static Elf32_Brandinfo linux_brand = {
1060 .brand = ELFOSABI_LINUX,
1062 .compat_3_brand = "Linux",
1063 .emul_path = "/compat/linux",
1064 .interp_path = "/lib/ld-linux.so.1",
1065 .sysvec = &elf_linux_sysvec,
1066 .interp_newpath = NULL,
1067 .brand_note = &linux_brandnote,
1068 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1071 static Elf32_Brandinfo linux_glibc2brand = {
1072 .brand = ELFOSABI_LINUX,
1074 .compat_3_brand = "Linux",
1075 .emul_path = "/compat/linux",
1076 .interp_path = "/lib/ld-linux.so.2",
1077 .sysvec = &elf_linux_sysvec,
1078 .interp_newpath = NULL,
1079 .brand_note = &linux_brandnote,
1080 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1083 Elf32_Brandinfo *linux_brandlist[] = {
1090 linux_elf_modevent(module_t mod, int type, void *data)
1092 Elf32_Brandinfo **brandinfo;
1094 struct linux_ioctl_handler **lihp;
1095 struct linux_device_handler **ldhp;
1101 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1103 if (elf32_insert_brand_entry(*brandinfo) < 0)
1106 SET_FOREACH(lihp, linux_ioctl_handler_set)
1107 linux_ioctl_register_handler(*lihp);
1108 SET_FOREACH(ldhp, linux_device_handler_set)
1109 linux_device_register_handler(*ldhp);
1110 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1111 sx_init(&emul_shared_lock, "emuldata->shared lock");
1112 LIST_INIT(&futex_list);
1113 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1114 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1116 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1118 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1120 linux_get_machine(&linux_platform);
1121 linux_szplatform = roundup(strlen(linux_platform) + 1,
1123 linux_osd_jail_register();
1124 stclohz = (stathz ? stathz : hz);
1126 printf("Linux ELF exec handler installed\n");
1128 printf("cannot insert Linux ELF brand handler\n");
1131 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1133 if (elf32_brand_inuse(*brandinfo))
1136 for (brandinfo = &linux_brandlist[0];
1137 *brandinfo != NULL; ++brandinfo)
1138 if (elf32_remove_brand_entry(*brandinfo) < 0)
1142 SET_FOREACH(lihp, linux_ioctl_handler_set)
1143 linux_ioctl_unregister_handler(*lihp);
1144 SET_FOREACH(ldhp, linux_device_handler_set)
1145 linux_device_unregister_handler(*ldhp);
1146 mtx_destroy(&emul_lock);
1147 sx_destroy(&emul_shared_lock);
1148 mtx_destroy(&futex_mtx);
1149 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1150 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1151 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1152 linux_osd_jail_deregister();
1154 printf("Linux ELF exec handler removed\n");
1156 printf("Could not deinstall ELF interpreter entry\n");
1164 static moduledata_t linux_elf_mod = {
1170 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);