2 * Copyright (c) 1994-1996 Søren Schmidt
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_emul.h>
68 #include <compat/linux/linux_futex.h>
69 #include <compat/linux/linux_ioctl.h>
70 #include <compat/linux/linux_mib.h>
71 #include <compat/linux/linux_misc.h>
72 #include <compat/linux/linux_signal.h>
73 #include <compat/linux/linux_util.h>
75 MODULE_VERSION(linux, 1);
77 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
79 #if BYTE_ORDER == LITTLE_ENDIAN
80 #define SHELLMAGIC 0x2123 /* #! */
82 #define SHELLMAGIC 0x2321
86 * Allow the sendsig functions to use the ldebug() facility
87 * even though they are not syscalls themselves. Map them
88 * to syscall 0. This is slightly less bogus than using
91 #define LINUX_SYS_linux_rt_sendsig 0
92 #define LINUX_SYS_linux_sendsig 0
94 #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings))
96 extern char linux_sigcode[];
97 extern int linux_szsigcode;
99 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
101 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
102 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
104 static int linux_fixup(register_t **stack_base,
105 struct image_params *iparams);
106 static int elf_linux_fixup(register_t **stack_base,
107 struct image_params *iparams);
108 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
109 static void exec_linux_setregs(struct thread *td,
110 struct image_params *imgp, u_long stack);
111 static register_t *linux_copyout_strings(struct image_params *imgp);
112 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
114 static int linux_szplatform;
115 const char *linux_platform;
117 static eventhandler_tag linux_exit_tag;
118 static eventhandler_tag linux_exec_tag;
121 * Linux syscalls return negative errno's, we do positive and map them
123 * FreeBSD: src/sys/sys/errno.h
124 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
125 * linux-2.6.17.8/include/asm-generic/errno.h
127 static int bsd_to_linux_errno[ELAST + 1] = {
128 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
129 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
130 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
131 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
132 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
133 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
134 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
135 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
136 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
140 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
141 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
142 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
143 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
144 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
145 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
146 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
147 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
148 0, LINUX_SIGUSR1, LINUX_SIGUSR2
151 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
152 SIGHUP, SIGINT, SIGQUIT, SIGILL,
153 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
154 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
155 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
156 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
157 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
158 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
159 SIGIO, SIGURG, SIGSYS
162 #define LINUX_T_UNKNOWN 255
163 static int _bsd_to_linux_trapcode[] = {
164 LINUX_T_UNKNOWN, /* 0 */
165 6, /* 1 T_PRIVINFLT */
166 LINUX_T_UNKNOWN, /* 2 */
168 LINUX_T_UNKNOWN, /* 4 */
169 LINUX_T_UNKNOWN, /* 5 */
170 16, /* 6 T_ARITHTRAP */
171 254, /* 7 T_ASTFLT */
172 LINUX_T_UNKNOWN, /* 8 */
173 13, /* 9 T_PROTFLT */
174 1, /* 10 T_TRCTRAP */
175 LINUX_T_UNKNOWN, /* 11 */
176 14, /* 12 T_PAGEFLT */
177 LINUX_T_UNKNOWN, /* 13 */
178 17, /* 14 T_ALIGNFLT */
179 LINUX_T_UNKNOWN, /* 15 */
180 LINUX_T_UNKNOWN, /* 16 */
181 LINUX_T_UNKNOWN, /* 17 */
187 8, /* 23 T_DOUBLEFLT */
188 9, /* 24 T_FPOPFLT */
189 10, /* 25 T_TSSFLT */
190 11, /* 26 T_SEGNPFLT */
191 12, /* 27 T_STKFLT */
193 19, /* 29 T_XMMFLT */
194 15 /* 30 T_RESERVED */
196 #define bsd_to_linux_trapcode(code) \
197 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
198 _bsd_to_linux_trapcode[(code)]: \
202 * If FreeBSD & Linux have a difference of opinion about what a trap
203 * means, deal with it here.
208 translate_traps(int signal, int trap_code)
210 if (signal != SIGBUS)
224 linux_fixup(register_t **stack_base, struct image_params *imgp)
226 register_t *argv, *envp;
229 envp = *stack_base + (imgp->args->argc + 1);
231 suword(*stack_base, (intptr_t)(void *)envp);
233 suword(*stack_base, (intptr_t)(void *)argv);
235 suword(*stack_base, imgp->args->argc);
240 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
244 Elf32_Addr *uplatform;
245 struct ps_strings *arginfo;
248 KASSERT(curthread->td_proc == imgp->proc,
249 ("unsafe elf_linux_fixup(), should be curproc"));
252 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
253 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
254 args = (Elf32_Auxargs *)imgp->auxargs;
255 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
257 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
260 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
261 * as it has appeared in the 2.4.0-rc7 first time.
262 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
263 * glibc falls back to the hard-coded CLK_TCK value when aux entry
265 * Also see linux_times() implementation.
267 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
268 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
269 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
270 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
271 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
272 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
273 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
274 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
275 AUXARGS_ENTRY(pos, AT_BASE, args->base);
276 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
277 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
278 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
279 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
280 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
281 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
282 if (args->execfd != -1)
283 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
284 AUXARGS_ENTRY(pos, AT_NULL, 0);
286 free(imgp->auxargs, M_TEMP);
287 imgp->auxargs = NULL;
290 suword(*stack_base, (register_t)imgp->args->argc);
295 * Copied from kern/kern_exec.c
298 linux_copyout_strings(struct image_params *imgp)
302 char *stringp, *destp;
303 register_t *stack_base;
304 struct ps_strings *arginfo;
308 * Calculate string base and vector table pointers.
309 * Also deal with signal trampoline code for this exec type.
312 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
313 destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
314 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
317 * install LINUX_PLATFORM
319 copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
323 * If we have a valid auxargs ptr, prepare some room
328 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
329 * lower compatibility.
331 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
332 (LINUX_AT_COUNT * 2);
334 * The '+ 2' is for the null pointers at the end of each of
335 * the arg and env vector sets,and imgp->auxarg_size is room
336 * for argument of Runtime loader.
338 vectp = (char **)(destp - (imgp->args->argc +
339 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
342 * The '+ 2' is for the null pointers at the end of each of
343 * the arg and env vector sets
345 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
350 * vectp also becomes our initial stack base
352 stack_base = (register_t *)vectp;
354 stringp = imgp->args->begin_argv;
355 argc = imgp->args->argc;
356 envc = imgp->args->envc;
359 * Copy out strings - arguments and environment.
361 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
364 * Fill in "ps_strings" struct for ps, w, etc.
366 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
367 suword(&arginfo->ps_nargvstr, argc);
370 * Fill in argument portion of vector table.
372 for (; argc > 0; --argc) {
373 suword(vectp++, (long)(intptr_t)destp);
374 while (*stringp++ != 0)
379 /* a null vector table pointer separates the argp's from the envp's */
382 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
383 suword(&arginfo->ps_nenvstr, envc);
386 * Fill in environment portion of vector table.
388 for (; envc > 0; --envc) {
389 suword(vectp++, (long)(intptr_t)destp);
390 while (*stringp++ != 0)
395 /* end of vector table is a null pointer */
403 extern unsigned long linux_sznonrtsigcode;
406 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
408 struct thread *td = curthread;
409 struct proc *p = td->td_proc;
411 struct trapframe *regs;
412 struct l_rt_sigframe *fp, frame;
416 sig = ksi->ksi_signo;
417 code = ksi->ksi_code;
418 PROC_LOCK_ASSERT(p, MA_OWNED);
420 mtx_assert(&psp->ps_mtx, MA_OWNED);
422 oonstack = sigonstack(regs->tf_esp);
425 if (ldebug(rt_sendsig))
426 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
427 catcher, sig, (void*)mask, code);
430 * Allocate space for the signal handler context.
432 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
433 SIGISMEMBER(psp->ps_sigonstack, sig)) {
434 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
435 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
437 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
438 mtx_unlock(&psp->ps_mtx);
441 * Build the argument list for the signal handler.
443 if (p->p_sysent->sv_sigtbl)
444 if (sig <= p->p_sysent->sv_sigsize)
445 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
447 bzero(&frame, sizeof(frame));
449 frame.sf_handler = catcher;
451 frame.sf_siginfo = &fp->sf_si;
452 frame.sf_ucontext = &fp->sf_sc;
454 /* Fill in POSIX parts */
455 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
458 * Build the signal context to be used by sigreturn.
460 frame.sf_sc.uc_flags = 0; /* XXX ??? */
461 frame.sf_sc.uc_link = NULL; /* XXX ??? */
463 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
464 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
465 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
466 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
469 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
471 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
472 frame.sf_sc.uc_mcontext.sc_gs = rgs();
473 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
474 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
475 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
476 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
477 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
478 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
479 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
480 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
481 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
482 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
483 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
484 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
485 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
486 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
487 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
488 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
489 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
490 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
493 if (ldebug(rt_sendsig))
494 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
495 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
496 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
499 if (copyout(&frame, fp, sizeof(frame)) != 0) {
501 * Process has trashed its stack; give it an illegal
502 * instruction to halt it in its tracks.
505 if (ldebug(rt_sendsig))
506 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
514 * Build context to run handler in.
516 regs->tf_esp = (int)fp;
517 regs->tf_eip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
518 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
519 regs->tf_cs = _ucodesel;
520 regs->tf_ds = _udatasel;
521 regs->tf_es = _udatasel;
522 regs->tf_fs = _udatasel;
523 regs->tf_ss = _udatasel;
525 mtx_lock(&psp->ps_mtx);
530 * Send an interrupt to process.
532 * Stack is set up to allow sigcode stored
533 * in u. to call routine, followed by kcall
534 * to sigreturn routine below. After sigreturn
535 * resets the signal mask, the stack, and the
536 * frame pointer, it returns to the user
540 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
542 struct thread *td = curthread;
543 struct proc *p = td->td_proc;
545 struct trapframe *regs;
546 struct l_sigframe *fp, frame;
551 PROC_LOCK_ASSERT(p, MA_OWNED);
553 sig = ksi->ksi_signo;
554 code = ksi->ksi_code;
555 mtx_assert(&psp->ps_mtx, MA_OWNED);
556 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
557 /* Signal handler installed with SA_SIGINFO. */
558 linux_rt_sendsig(catcher, ksi, mask);
562 oonstack = sigonstack(regs->tf_esp);
566 printf(ARGS(sendsig, "%p, %d, %p, %u"),
567 catcher, sig, (void*)mask, code);
571 * Allocate space for the signal handler context.
573 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
574 SIGISMEMBER(psp->ps_sigonstack, sig)) {
575 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
576 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
578 fp = (struct l_sigframe *)regs->tf_esp - 1;
579 mtx_unlock(&psp->ps_mtx);
583 * Build the argument list for the signal handler.
585 if (p->p_sysent->sv_sigtbl)
586 if (sig <= p->p_sysent->sv_sigsize)
587 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
589 bzero(&frame, sizeof(frame));
591 frame.sf_handler = catcher;
594 bsd_to_linux_sigset(mask, &lmask);
597 * Build the signal context to be used by sigreturn.
599 frame.sf_sc.sc_mask = lmask.__bits[0];
600 frame.sf_sc.sc_gs = rgs();
601 frame.sf_sc.sc_fs = regs->tf_fs;
602 frame.sf_sc.sc_es = regs->tf_es;
603 frame.sf_sc.sc_ds = regs->tf_ds;
604 frame.sf_sc.sc_edi = regs->tf_edi;
605 frame.sf_sc.sc_esi = regs->tf_esi;
606 frame.sf_sc.sc_ebp = regs->tf_ebp;
607 frame.sf_sc.sc_ebx = regs->tf_ebx;
608 frame.sf_sc.sc_edx = regs->tf_edx;
609 frame.sf_sc.sc_ecx = regs->tf_ecx;
610 frame.sf_sc.sc_eax = regs->tf_eax;
611 frame.sf_sc.sc_eip = regs->tf_eip;
612 frame.sf_sc.sc_cs = regs->tf_cs;
613 frame.sf_sc.sc_eflags = regs->tf_eflags;
614 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
615 frame.sf_sc.sc_ss = regs->tf_ss;
616 frame.sf_sc.sc_err = regs->tf_err;
617 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
618 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
620 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
621 frame.sf_extramask[i] = lmask.__bits[i+1];
623 if (copyout(&frame, fp, sizeof(frame)) != 0) {
625 * Process has trashed its stack; give it an illegal
626 * instruction to halt it in its tracks.
633 * Build context to run handler in.
635 regs->tf_esp = (int)fp;
636 regs->tf_eip = p->p_sysent->sv_sigcode_base;
637 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
638 regs->tf_cs = _ucodesel;
639 regs->tf_ds = _udatasel;
640 regs->tf_es = _udatasel;
641 regs->tf_fs = _udatasel;
642 regs->tf_ss = _udatasel;
644 mtx_lock(&psp->ps_mtx);
648 * System call to cleanup state after a signal
649 * has been taken. Reset signal mask and
650 * stack state from context left by sendsig (above).
651 * Return to previous pc and psl as specified by
652 * context left by sendsig. Check carefully to
653 * make sure that the user has not modified the
654 * psl to gain improper privileges or to cause
658 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
660 struct l_sigframe frame;
661 struct trapframe *regs;
670 if (ldebug(sigreturn))
671 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
674 * The trampoline code hands us the sigframe.
675 * It is unsafe to keep track of it ourselves, in the event that a
676 * program jumps out of a signal handler.
678 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
682 * Check for security violations.
684 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
685 eflags = frame.sf_sc.sc_eflags;
686 if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
690 * Don't allow users to load a valid privileged %cs. Let the
691 * hardware check for invalid selectors, excess privilege in
692 * other selectors, invalid %eip's and invalid %esp's.
694 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
695 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
696 ksiginfo_init_trap(&ksi);
697 ksi.ksi_signo = SIGBUS;
698 ksi.ksi_code = BUS_OBJERR;
699 ksi.ksi_trapno = T_PROTFLT;
700 ksi.ksi_addr = (void *)regs->tf_eip;
701 trapsignal(td, &ksi);
705 lmask.__bits[0] = frame.sf_sc.sc_mask;
706 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
707 lmask.__bits[i+1] = frame.sf_extramask[i];
708 linux_to_bsd_sigset(&lmask, &bmask);
709 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
712 * Restore signal context.
714 /* %gs was restored by the trampoline. */
715 regs->tf_fs = frame.sf_sc.sc_fs;
716 regs->tf_es = frame.sf_sc.sc_es;
717 regs->tf_ds = frame.sf_sc.sc_ds;
718 regs->tf_edi = frame.sf_sc.sc_edi;
719 regs->tf_esi = frame.sf_sc.sc_esi;
720 regs->tf_ebp = frame.sf_sc.sc_ebp;
721 regs->tf_ebx = frame.sf_sc.sc_ebx;
722 regs->tf_edx = frame.sf_sc.sc_edx;
723 regs->tf_ecx = frame.sf_sc.sc_ecx;
724 regs->tf_eax = frame.sf_sc.sc_eax;
725 regs->tf_eip = frame.sf_sc.sc_eip;
726 regs->tf_cs = frame.sf_sc.sc_cs;
727 regs->tf_eflags = eflags;
728 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
729 regs->tf_ss = frame.sf_sc.sc_ss;
731 return (EJUSTRETURN);
735 * System call to cleanup state after a signal
736 * has been taken. Reset signal mask and
737 * stack state from context left by rt_sendsig (above).
738 * Return to previous pc and psl as specified by
739 * context left by sendsig. Check carefully to
740 * make sure that the user has not modified the
741 * psl to gain improper privileges or to cause
745 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
747 struct l_ucontext uc;
748 struct l_sigcontext *context;
752 struct trapframe *regs;
759 if (ldebug(rt_sigreturn))
760 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
763 * The trampoline code hands us the ucontext.
764 * It is unsafe to keep track of it ourselves, in the event that a
765 * program jumps out of a signal handler.
767 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
770 context = &uc.uc_mcontext;
773 * Check for security violations.
775 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
776 eflags = context->sc_eflags;
777 if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
781 * Don't allow users to load a valid privileged %cs. Let the
782 * hardware check for invalid selectors, excess privilege in
783 * other selectors, invalid %eip's and invalid %esp's.
785 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
786 if (!CS_SECURE(context->sc_cs)) {
787 ksiginfo_init_trap(&ksi);
788 ksi.ksi_signo = SIGBUS;
789 ksi.ksi_code = BUS_OBJERR;
790 ksi.ksi_trapno = T_PROTFLT;
791 ksi.ksi_addr = (void *)regs->tf_eip;
792 trapsignal(td, &ksi);
796 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
797 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
800 * Restore signal context
802 /* %gs was restored by the trampoline. */
803 regs->tf_fs = context->sc_fs;
804 regs->tf_es = context->sc_es;
805 regs->tf_ds = context->sc_ds;
806 regs->tf_edi = context->sc_edi;
807 regs->tf_esi = context->sc_esi;
808 regs->tf_ebp = context->sc_ebp;
809 regs->tf_ebx = context->sc_ebx;
810 regs->tf_edx = context->sc_edx;
811 regs->tf_ecx = context->sc_ecx;
812 regs->tf_eax = context->sc_eax;
813 regs->tf_eip = context->sc_eip;
814 regs->tf_cs = context->sc_cs;
815 regs->tf_eflags = eflags;
816 regs->tf_esp = context->sc_esp_at_signal;
817 regs->tf_ss = context->sc_ss;
820 * call sigaltstack & ignore results..
823 ss.ss_sp = lss->ss_sp;
824 ss.ss_size = lss->ss_size;
825 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
828 if (ldebug(rt_sigreturn))
829 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
830 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
832 (void)kern_sigaltstack(td, &ss, NULL);
834 return (EJUSTRETURN);
838 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
841 struct trapframe *frame;
844 frame = td->td_frame;
846 sa->code = frame->tf_eax;
847 sa->args[0] = frame->tf_ebx;
848 sa->args[1] = frame->tf_ecx;
849 sa->args[2] = frame->tf_edx;
850 sa->args[3] = frame->tf_esi;
851 sa->args[4] = frame->tf_edi;
852 sa->args[5] = frame->tf_ebp; /* Unconfirmed */
854 if (sa->code >= p->p_sysent->sv_size)
855 sa->callp = &p->p_sysent->sv_table[0];
857 sa->callp = &p->p_sysent->sv_table[sa->code];
858 sa->narg = sa->callp->sy_narg;
860 td->td_retval[0] = 0;
861 td->td_retval[1] = frame->tf_edx;
867 * If a linux binary is exec'ing something, try this image activator
868 * first. We override standard shell script execution in order to
869 * be able to modify the interpreter path. We only do this if a linux
870 * binary is doing the exec, so we do not create an EXEC module for it.
872 static int exec_linux_imgact_try(struct image_params *iparams);
875 exec_linux_imgact_try(struct image_params *imgp)
877 const char *head = (const char *)imgp->image_header;
882 * The interpreter for shell scripts run from a linux binary needs
883 * to be located in /compat/linux if possible in order to recursively
884 * maintain linux path emulation.
886 if (((const short *)head)[0] == SHELLMAGIC) {
888 * Run our normal shell image activator. If it succeeds attempt
889 * to use the alternate path for the interpreter. If an alternate
890 * path is found, use our stringspace to store it.
892 if ((error = exec_shell_imgact(imgp)) == 0) {
893 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
894 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
896 imgp->args->fname_buf =
897 imgp->interpreter_name = rpath;
904 * exec_setregs may initialize some registers differently than Linux
905 * does, thus potentially confusing Linux binaries. If necessary, we
906 * override the exec_setregs default(s) here.
909 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
911 struct pcb *pcb = td->td_pcb;
913 exec_setregs(td, imgp, stack);
915 /* Linux sets %gs to 0, we default to _udatasel */
919 pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
923 linux_get_machine(const char **dst)
941 struct sysentvec linux_sysvec = {
942 .sv_size = LINUX_SYS_MAXSYSCALL,
943 .sv_table = linux_sysent,
945 .sv_sigsize = LINUX_SIGTBLSZ,
946 .sv_sigtbl = bsd_to_linux_signal,
947 .sv_errsize = ELAST + 1,
948 .sv_errtbl = bsd_to_linux_errno,
949 .sv_transtrap = translate_traps,
950 .sv_fixup = linux_fixup,
951 .sv_sendsig = linux_sendsig,
952 .sv_sigcode = linux_sigcode,
953 .sv_szsigcode = &linux_szsigcode,
954 .sv_prepsyscall = NULL,
955 .sv_name = "Linux a.out",
957 .sv_imgact_try = exec_linux_imgact_try,
958 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
959 .sv_pagesize = PAGE_SIZE,
960 .sv_minuser = VM_MIN_ADDRESS,
961 .sv_maxuser = VM_MAXUSER_ADDRESS,
962 .sv_usrstack = LINUX_USRSTACK,
963 .sv_psstrings = PS_STRINGS,
964 .sv_stackprot = VM_PROT_ALL,
965 .sv_copyout_strings = exec_copyout_strings,
966 .sv_setregs = exec_linux_setregs,
969 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
970 .sv_set_syscall_retval = cpu_set_syscall_retval,
971 .sv_fetch_syscall_args = linux_fetch_syscall_args,
972 .sv_syscallnames = NULL,
973 .sv_shared_page_base = LINUX_SHAREDPAGE,
974 .sv_shared_page_len = PAGE_SIZE,
975 .sv_schedtail = linux_schedtail,
977 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
979 struct sysentvec elf_linux_sysvec = {
980 .sv_size = LINUX_SYS_MAXSYSCALL,
981 .sv_table = linux_sysent,
983 .sv_sigsize = LINUX_SIGTBLSZ,
984 .sv_sigtbl = bsd_to_linux_signal,
985 .sv_errsize = ELAST + 1,
986 .sv_errtbl = bsd_to_linux_errno,
987 .sv_transtrap = translate_traps,
988 .sv_fixup = elf_linux_fixup,
989 .sv_sendsig = linux_sendsig,
990 .sv_sigcode = linux_sigcode,
991 .sv_szsigcode = &linux_szsigcode,
992 .sv_prepsyscall = NULL,
993 .sv_name = "Linux ELF",
994 .sv_coredump = elf32_coredump,
995 .sv_imgact_try = exec_linux_imgact_try,
996 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
997 .sv_pagesize = PAGE_SIZE,
998 .sv_minuser = VM_MIN_ADDRESS,
999 .sv_maxuser = VM_MAXUSER_ADDRESS,
1000 .sv_usrstack = LINUX_USRSTACK,
1001 .sv_psstrings = LINUX_PS_STRINGS,
1002 .sv_stackprot = VM_PROT_ALL,
1003 .sv_copyout_strings = linux_copyout_strings,
1004 .sv_setregs = exec_linux_setregs,
1005 .sv_fixlimit = NULL,
1007 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1008 .sv_set_syscall_retval = cpu_set_syscall_retval,
1009 .sv_fetch_syscall_args = linux_fetch_syscall_args,
1010 .sv_syscallnames = NULL,
1011 .sv_shared_page_base = LINUX_SHAREDPAGE,
1012 .sv_shared_page_len = PAGE_SIZE,
1013 .sv_schedtail = linux_schedtail,
1015 INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1017 static char GNU_ABI_VENDOR[] = "GNU";
1018 static int GNULINUX_ABI_DESC = 0;
1021 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1023 const Elf32_Word *desc;
1026 p = (uintptr_t)(note + 1);
1027 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1029 desc = (const Elf32_Word *)p;
1030 if (desc[0] != GNULINUX_ABI_DESC)
1034 * For linux we encode osrel as follows (see linux_mib.c):
1035 * VVVMMMIII (version, major, minor), see linux_mib.c.
1037 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1042 static Elf_Brandnote linux_brandnote = {
1043 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1044 .hdr.n_descsz = 16, /* XXX at least 16 */
1046 .vendor = GNU_ABI_VENDOR,
1047 .flags = BN_TRANSLATE_OSREL,
1048 .trans_osrel = linux_trans_osrel
1051 static Elf32_Brandinfo linux_brand = {
1052 .brand = ELFOSABI_LINUX,
1054 .compat_3_brand = "Linux",
1055 .emul_path = "/compat/linux",
1056 .interp_path = "/lib/ld-linux.so.1",
1057 .sysvec = &elf_linux_sysvec,
1058 .interp_newpath = NULL,
1059 .brand_note = &linux_brandnote,
1060 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1063 static Elf32_Brandinfo linux_glibc2brand = {
1064 .brand = ELFOSABI_LINUX,
1066 .compat_3_brand = "Linux",
1067 .emul_path = "/compat/linux",
1068 .interp_path = "/lib/ld-linux.so.2",
1069 .sysvec = &elf_linux_sysvec,
1070 .interp_newpath = NULL,
1071 .brand_note = &linux_brandnote,
1072 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1075 Elf32_Brandinfo *linux_brandlist[] = {
1082 linux_elf_modevent(module_t mod, int type, void *data)
1084 Elf32_Brandinfo **brandinfo;
1086 struct linux_ioctl_handler **lihp;
1087 struct linux_device_handler **ldhp;
1093 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1095 if (elf32_insert_brand_entry(*brandinfo) < 0)
1098 SET_FOREACH(lihp, linux_ioctl_handler_set)
1099 linux_ioctl_register_handler(*lihp);
1100 SET_FOREACH(ldhp, linux_device_handler_set)
1101 linux_device_register_handler(*ldhp);
1102 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1103 sx_init(&emul_shared_lock, "emuldata->shared lock");
1104 LIST_INIT(&futex_list);
1105 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1106 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1108 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1110 linux_get_machine(&linux_platform);
1111 linux_szplatform = roundup(strlen(linux_platform) + 1,
1113 linux_osd_jail_register();
1114 stclohz = (stathz ? stathz : hz);
1116 printf("Linux ELF exec handler installed\n");
1118 printf("cannot insert Linux ELF brand handler\n");
1121 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1123 if (elf32_brand_inuse(*brandinfo))
1126 for (brandinfo = &linux_brandlist[0];
1127 *brandinfo != NULL; ++brandinfo)
1128 if (elf32_remove_brand_entry(*brandinfo) < 0)
1132 SET_FOREACH(lihp, linux_ioctl_handler_set)
1133 linux_ioctl_unregister_handler(*lihp);
1134 SET_FOREACH(ldhp, linux_device_handler_set)
1135 linux_device_unregister_handler(*ldhp);
1136 mtx_destroy(&emul_lock);
1137 sx_destroy(&emul_shared_lock);
1138 mtx_destroy(&futex_mtx);
1139 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1140 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1141 linux_osd_jail_deregister();
1143 printf("Linux ELF exec handler removed\n");
1145 printf("Could not deinstall ELF interpreter entry\n");
1153 static moduledata_t linux_elf_mod = {
1159 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);