2 * Copyright (c) 1994-1996 Søren Schmidt
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_emul.h>
68 #include <compat/linux/linux_futex.h>
69 #include <compat/linux/linux_ioctl.h>
70 #include <compat/linux/linux_mib.h>
71 #include <compat/linux/linux_misc.h>
72 #include <compat/linux/linux_signal.h>
73 #include <compat/linux/linux_util.h>
75 MODULE_VERSION(linux, 1);
77 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
79 #if BYTE_ORDER == LITTLE_ENDIAN
80 #define SHELLMAGIC 0x2123 /* #! */
82 #define SHELLMAGIC 0x2321
86 * Allow the sendsig functions to use the ldebug() facility
87 * even though they are not syscalls themselves. Map them
88 * to syscall 0. This is slightly less bogus than using
91 #define LINUX_SYS_linux_rt_sendsig 0
92 #define LINUX_SYS_linux_sendsig 0
94 #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings))
96 extern char linux_sigcode[];
97 extern int linux_szsigcode;
99 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
101 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
102 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
104 static int linux_fixup(register_t **stack_base,
105 struct image_params *iparams);
106 static int elf_linux_fixup(register_t **stack_base,
107 struct image_params *iparams);
108 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
109 static void exec_linux_setregs(struct thread *td,
110 struct image_params *imgp, u_long stack);
111 static register_t *linux_copyout_strings(struct image_params *imgp);
112 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
114 static int linux_szplatform;
115 const char *linux_platform;
117 static eventhandler_tag linux_exit_tag;
118 static eventhandler_tag linux_exec_tag;
121 * Linux syscalls return negative errno's, we do positive and map them
123 * FreeBSD: src/sys/sys/errno.h
124 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
125 * linux-2.6.17.8/include/asm-generic/errno.h
127 static int bsd_to_linux_errno[ELAST + 1] = {
128 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
129 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
130 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
131 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
132 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
133 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
134 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
135 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
136 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
140 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
141 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
142 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
143 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
144 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
145 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
146 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
147 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
148 0, LINUX_SIGUSR1, LINUX_SIGUSR2
151 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
152 SIGHUP, SIGINT, SIGQUIT, SIGILL,
153 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
154 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
155 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
156 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
157 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
158 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
159 SIGIO, SIGURG, SIGSYS
162 #define LINUX_T_UNKNOWN 255
163 static int _bsd_to_linux_trapcode[] = {
164 LINUX_T_UNKNOWN, /* 0 */
165 6, /* 1 T_PRIVINFLT */
166 LINUX_T_UNKNOWN, /* 2 */
168 LINUX_T_UNKNOWN, /* 4 */
169 LINUX_T_UNKNOWN, /* 5 */
170 16, /* 6 T_ARITHTRAP */
171 254, /* 7 T_ASTFLT */
172 LINUX_T_UNKNOWN, /* 8 */
173 13, /* 9 T_PROTFLT */
174 1, /* 10 T_TRCTRAP */
175 LINUX_T_UNKNOWN, /* 11 */
176 14, /* 12 T_PAGEFLT */
177 LINUX_T_UNKNOWN, /* 13 */
178 17, /* 14 T_ALIGNFLT */
179 LINUX_T_UNKNOWN, /* 15 */
180 LINUX_T_UNKNOWN, /* 16 */
181 LINUX_T_UNKNOWN, /* 17 */
187 8, /* 23 T_DOUBLEFLT */
188 9, /* 24 T_FPOPFLT */
189 10, /* 25 T_TSSFLT */
190 11, /* 26 T_SEGNPFLT */
191 12, /* 27 T_STKFLT */
193 19, /* 29 T_XMMFLT */
194 15 /* 30 T_RESERVED */
196 #define bsd_to_linux_trapcode(code) \
197 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
198 _bsd_to_linux_trapcode[(code)]: \
202 * If FreeBSD & Linux have a difference of opinion about what a trap
203 * means, deal with it here.
208 translate_traps(int signal, int trap_code)
210 if (signal != SIGBUS)
224 linux_fixup(register_t **stack_base, struct image_params *imgp)
226 register_t *argv, *envp;
229 envp = *stack_base + (imgp->args->argc + 1);
231 suword(*stack_base, (intptr_t)(void *)envp);
233 suword(*stack_base, (intptr_t)(void *)argv);
235 suword(*stack_base, imgp->args->argc);
240 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
244 Elf32_Addr *uplatform;
245 struct ps_strings *arginfo;
249 KASSERT(curthread->td_proc == imgp->proc,
250 ("unsafe elf_linux_fixup(), should be curproc"));
253 issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0;
254 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
255 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
256 args = (Elf32_Auxargs *)imgp->auxargs;
257 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
259 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
262 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
263 * as it has appeared in the 2.4.0-rc7 first time.
264 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
265 * glibc falls back to the hard-coded CLK_TCK value when aux entry
267 * Also see linux_times() implementation.
269 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
270 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
271 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
272 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
273 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
274 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
275 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
276 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
277 AUXARGS_ENTRY(pos, AT_BASE, args->base);
278 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid);
279 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
280 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
281 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
282 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
283 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
284 if (args->execfd != -1)
285 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
286 AUXARGS_ENTRY(pos, AT_NULL, 0);
288 free(imgp->auxargs, M_TEMP);
289 imgp->auxargs = NULL;
292 suword(*stack_base, (register_t)imgp->args->argc);
297 * Copied from kern/kern_exec.c
300 linux_copyout_strings(struct image_params *imgp)
304 char *stringp, *destp;
305 register_t *stack_base;
306 struct ps_strings *arginfo;
310 * Calculate string base and vector table pointers.
311 * Also deal with signal trampoline code for this exec type.
314 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
315 destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
316 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
319 * install LINUX_PLATFORM
321 copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
325 * If we have a valid auxargs ptr, prepare some room
330 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
331 * lower compatibility.
333 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
334 (LINUX_AT_COUNT * 2);
336 * The '+ 2' is for the null pointers at the end of each of
337 * the arg and env vector sets,and imgp->auxarg_size is room
338 * for argument of Runtime loader.
340 vectp = (char **)(destp - (imgp->args->argc +
341 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
344 * The '+ 2' is for the null pointers at the end of each of
345 * the arg and env vector sets
347 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
352 * vectp also becomes our initial stack base
354 stack_base = (register_t *)vectp;
356 stringp = imgp->args->begin_argv;
357 argc = imgp->args->argc;
358 envc = imgp->args->envc;
361 * Copy out strings - arguments and environment.
363 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
366 * Fill in "ps_strings" struct for ps, w, etc.
368 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
369 suword(&arginfo->ps_nargvstr, argc);
372 * Fill in argument portion of vector table.
374 for (; argc > 0; --argc) {
375 suword(vectp++, (long)(intptr_t)destp);
376 while (*stringp++ != 0)
381 /* a null vector table pointer separates the argp's from the envp's */
384 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
385 suword(&arginfo->ps_nenvstr, envc);
388 * Fill in environment portion of vector table.
390 for (; envc > 0; --envc) {
391 suword(vectp++, (long)(intptr_t)destp);
392 while (*stringp++ != 0)
397 /* end of vector table is a null pointer */
405 extern unsigned long linux_sznonrtsigcode;
408 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
410 struct thread *td = curthread;
411 struct proc *p = td->td_proc;
413 struct trapframe *regs;
414 struct l_rt_sigframe *fp, frame;
418 sig = ksi->ksi_signo;
419 code = ksi->ksi_code;
420 PROC_LOCK_ASSERT(p, MA_OWNED);
422 mtx_assert(&psp->ps_mtx, MA_OWNED);
424 oonstack = sigonstack(regs->tf_esp);
427 if (ldebug(rt_sendsig))
428 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
429 catcher, sig, (void*)mask, code);
432 * Allocate space for the signal handler context.
434 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
435 SIGISMEMBER(psp->ps_sigonstack, sig)) {
436 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
437 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
439 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
440 mtx_unlock(&psp->ps_mtx);
443 * Build the argument list for the signal handler.
445 if (p->p_sysent->sv_sigtbl)
446 if (sig <= p->p_sysent->sv_sigsize)
447 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
449 bzero(&frame, sizeof(frame));
451 frame.sf_handler = catcher;
453 frame.sf_siginfo = &fp->sf_si;
454 frame.sf_ucontext = &fp->sf_sc;
456 /* Fill in POSIX parts */
457 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
460 * Build the signal context to be used by sigreturn.
462 frame.sf_sc.uc_flags = 0; /* XXX ??? */
463 frame.sf_sc.uc_link = NULL; /* XXX ??? */
465 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
466 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
467 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
468 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
471 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
473 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
474 frame.sf_sc.uc_mcontext.sc_gs = rgs();
475 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
476 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
477 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
478 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
479 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
480 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
481 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
482 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
483 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
484 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
485 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
486 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
487 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
488 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
489 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
490 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
491 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
492 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
495 if (ldebug(rt_sendsig))
496 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
497 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
498 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
501 if (copyout(&frame, fp, sizeof(frame)) != 0) {
503 * Process has trashed its stack; give it an illegal
504 * instruction to halt it in its tracks.
507 if (ldebug(rt_sendsig))
508 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
516 * Build context to run handler in.
518 regs->tf_esp = (int)fp;
519 regs->tf_eip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
520 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
521 regs->tf_cs = _ucodesel;
522 regs->tf_ds = _udatasel;
523 regs->tf_es = _udatasel;
524 regs->tf_fs = _udatasel;
525 regs->tf_ss = _udatasel;
527 mtx_lock(&psp->ps_mtx);
532 * Send an interrupt to process.
534 * Stack is set up to allow sigcode stored
535 * in u. to call routine, followed by kcall
536 * to sigreturn routine below. After sigreturn
537 * resets the signal mask, the stack, and the
538 * frame pointer, it returns to the user
542 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
544 struct thread *td = curthread;
545 struct proc *p = td->td_proc;
547 struct trapframe *regs;
548 struct l_sigframe *fp, frame;
553 PROC_LOCK_ASSERT(p, MA_OWNED);
555 sig = ksi->ksi_signo;
556 code = ksi->ksi_code;
557 mtx_assert(&psp->ps_mtx, MA_OWNED);
558 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
559 /* Signal handler installed with SA_SIGINFO. */
560 linux_rt_sendsig(catcher, ksi, mask);
564 oonstack = sigonstack(regs->tf_esp);
568 printf(ARGS(sendsig, "%p, %d, %p, %u"),
569 catcher, sig, (void*)mask, code);
573 * Allocate space for the signal handler context.
575 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
576 SIGISMEMBER(psp->ps_sigonstack, sig)) {
577 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
578 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
580 fp = (struct l_sigframe *)regs->tf_esp - 1;
581 mtx_unlock(&psp->ps_mtx);
585 * Build the argument list for the signal handler.
587 if (p->p_sysent->sv_sigtbl)
588 if (sig <= p->p_sysent->sv_sigsize)
589 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
591 bzero(&frame, sizeof(frame));
593 frame.sf_handler = catcher;
596 bsd_to_linux_sigset(mask, &lmask);
599 * Build the signal context to be used by sigreturn.
601 frame.sf_sc.sc_mask = lmask.__bits[0];
602 frame.sf_sc.sc_gs = rgs();
603 frame.sf_sc.sc_fs = regs->tf_fs;
604 frame.sf_sc.sc_es = regs->tf_es;
605 frame.sf_sc.sc_ds = regs->tf_ds;
606 frame.sf_sc.sc_edi = regs->tf_edi;
607 frame.sf_sc.sc_esi = regs->tf_esi;
608 frame.sf_sc.sc_ebp = regs->tf_ebp;
609 frame.sf_sc.sc_ebx = regs->tf_ebx;
610 frame.sf_sc.sc_edx = regs->tf_edx;
611 frame.sf_sc.sc_ecx = regs->tf_ecx;
612 frame.sf_sc.sc_eax = regs->tf_eax;
613 frame.sf_sc.sc_eip = regs->tf_eip;
614 frame.sf_sc.sc_cs = regs->tf_cs;
615 frame.sf_sc.sc_eflags = regs->tf_eflags;
616 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
617 frame.sf_sc.sc_ss = regs->tf_ss;
618 frame.sf_sc.sc_err = regs->tf_err;
619 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
620 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
622 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
623 frame.sf_extramask[i] = lmask.__bits[i+1];
625 if (copyout(&frame, fp, sizeof(frame)) != 0) {
627 * Process has trashed its stack; give it an illegal
628 * instruction to halt it in its tracks.
635 * Build context to run handler in.
637 regs->tf_esp = (int)fp;
638 regs->tf_eip = p->p_sysent->sv_sigcode_base;
639 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
640 regs->tf_cs = _ucodesel;
641 regs->tf_ds = _udatasel;
642 regs->tf_es = _udatasel;
643 regs->tf_fs = _udatasel;
644 regs->tf_ss = _udatasel;
646 mtx_lock(&psp->ps_mtx);
650 * System call to cleanup state after a signal
651 * has been taken. Reset signal mask and
652 * stack state from context left by sendsig (above).
653 * Return to previous pc and psl as specified by
654 * context left by sendsig. Check carefully to
655 * make sure that the user has not modified the
656 * psl to gain improper privileges or to cause
660 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
662 struct l_sigframe frame;
663 struct trapframe *regs;
672 if (ldebug(sigreturn))
673 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
676 * The trampoline code hands us the sigframe.
677 * It is unsafe to keep track of it ourselves, in the event that a
678 * program jumps out of a signal handler.
680 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
684 * Check for security violations.
686 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
687 eflags = frame.sf_sc.sc_eflags;
688 if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
692 * Don't allow users to load a valid privileged %cs. Let the
693 * hardware check for invalid selectors, excess privilege in
694 * other selectors, invalid %eip's and invalid %esp's.
696 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
697 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
698 ksiginfo_init_trap(&ksi);
699 ksi.ksi_signo = SIGBUS;
700 ksi.ksi_code = BUS_OBJERR;
701 ksi.ksi_trapno = T_PROTFLT;
702 ksi.ksi_addr = (void *)regs->tf_eip;
703 trapsignal(td, &ksi);
707 lmask.__bits[0] = frame.sf_sc.sc_mask;
708 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
709 lmask.__bits[i+1] = frame.sf_extramask[i];
710 linux_to_bsd_sigset(&lmask, &bmask);
711 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
714 * Restore signal context.
716 /* %gs was restored by the trampoline. */
717 regs->tf_fs = frame.sf_sc.sc_fs;
718 regs->tf_es = frame.sf_sc.sc_es;
719 regs->tf_ds = frame.sf_sc.sc_ds;
720 regs->tf_edi = frame.sf_sc.sc_edi;
721 regs->tf_esi = frame.sf_sc.sc_esi;
722 regs->tf_ebp = frame.sf_sc.sc_ebp;
723 regs->tf_ebx = frame.sf_sc.sc_ebx;
724 regs->tf_edx = frame.sf_sc.sc_edx;
725 regs->tf_ecx = frame.sf_sc.sc_ecx;
726 regs->tf_eax = frame.sf_sc.sc_eax;
727 regs->tf_eip = frame.sf_sc.sc_eip;
728 regs->tf_cs = frame.sf_sc.sc_cs;
729 regs->tf_eflags = eflags;
730 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
731 regs->tf_ss = frame.sf_sc.sc_ss;
733 return (EJUSTRETURN);
737 * System call to cleanup state after a signal
738 * has been taken. Reset signal mask and
739 * stack state from context left by rt_sendsig (above).
740 * Return to previous pc and psl as specified by
741 * context left by sendsig. Check carefully to
742 * make sure that the user has not modified the
743 * psl to gain improper privileges or to cause
747 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
749 struct l_ucontext uc;
750 struct l_sigcontext *context;
754 struct trapframe *regs;
761 if (ldebug(rt_sigreturn))
762 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
765 * The trampoline code hands us the ucontext.
766 * It is unsafe to keep track of it ourselves, in the event that a
767 * program jumps out of a signal handler.
769 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
772 context = &uc.uc_mcontext;
775 * Check for security violations.
777 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
778 eflags = context->sc_eflags;
779 if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
783 * Don't allow users to load a valid privileged %cs. Let the
784 * hardware check for invalid selectors, excess privilege in
785 * other selectors, invalid %eip's and invalid %esp's.
787 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
788 if (!CS_SECURE(context->sc_cs)) {
789 ksiginfo_init_trap(&ksi);
790 ksi.ksi_signo = SIGBUS;
791 ksi.ksi_code = BUS_OBJERR;
792 ksi.ksi_trapno = T_PROTFLT;
793 ksi.ksi_addr = (void *)regs->tf_eip;
794 trapsignal(td, &ksi);
798 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
799 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
802 * Restore signal context
804 /* %gs was restored by the trampoline. */
805 regs->tf_fs = context->sc_fs;
806 regs->tf_es = context->sc_es;
807 regs->tf_ds = context->sc_ds;
808 regs->tf_edi = context->sc_edi;
809 regs->tf_esi = context->sc_esi;
810 regs->tf_ebp = context->sc_ebp;
811 regs->tf_ebx = context->sc_ebx;
812 regs->tf_edx = context->sc_edx;
813 regs->tf_ecx = context->sc_ecx;
814 regs->tf_eax = context->sc_eax;
815 regs->tf_eip = context->sc_eip;
816 regs->tf_cs = context->sc_cs;
817 regs->tf_eflags = eflags;
818 regs->tf_esp = context->sc_esp_at_signal;
819 regs->tf_ss = context->sc_ss;
822 * call sigaltstack & ignore results..
825 ss.ss_sp = lss->ss_sp;
826 ss.ss_size = lss->ss_size;
827 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
830 if (ldebug(rt_sigreturn))
831 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
832 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
834 (void)kern_sigaltstack(td, &ss, NULL);
836 return (EJUSTRETURN);
840 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
843 struct trapframe *frame;
846 frame = td->td_frame;
848 sa->code = frame->tf_eax;
849 sa->args[0] = frame->tf_ebx;
850 sa->args[1] = frame->tf_ecx;
851 sa->args[2] = frame->tf_edx;
852 sa->args[3] = frame->tf_esi;
853 sa->args[4] = frame->tf_edi;
854 sa->args[5] = frame->tf_ebp; /* Unconfirmed */
856 if (sa->code >= p->p_sysent->sv_size)
857 sa->callp = &p->p_sysent->sv_table[0];
859 sa->callp = &p->p_sysent->sv_table[sa->code];
860 sa->narg = sa->callp->sy_narg;
862 td->td_retval[0] = 0;
863 td->td_retval[1] = frame->tf_edx;
869 * If a linux binary is exec'ing something, try this image activator
870 * first. We override standard shell script execution in order to
871 * be able to modify the interpreter path. We only do this if a linux
872 * binary is doing the exec, so we do not create an EXEC module for it.
874 static int exec_linux_imgact_try(struct image_params *iparams);
877 exec_linux_imgact_try(struct image_params *imgp)
879 const char *head = (const char *)imgp->image_header;
884 * The interpreter for shell scripts run from a linux binary needs
885 * to be located in /compat/linux if possible in order to recursively
886 * maintain linux path emulation.
888 if (((const short *)head)[0] == SHELLMAGIC) {
890 * Run our normal shell image activator. If it succeeds attempt
891 * to use the alternate path for the interpreter. If an alternate
892 * path is found, use our stringspace to store it.
894 if ((error = exec_shell_imgact(imgp)) == 0) {
895 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
896 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
898 imgp->args->fname_buf =
899 imgp->interpreter_name = rpath;
906 * exec_setregs may initialize some registers differently than Linux
907 * does, thus potentially confusing Linux binaries. If necessary, we
908 * override the exec_setregs default(s) here.
911 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
913 struct pcb *pcb = td->td_pcb;
915 exec_setregs(td, imgp, stack);
917 /* Linux sets %gs to 0, we default to _udatasel */
921 pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
925 linux_get_machine(const char **dst)
943 struct sysentvec linux_sysvec = {
944 .sv_size = LINUX_SYS_MAXSYSCALL,
945 .sv_table = linux_sysent,
947 .sv_sigsize = LINUX_SIGTBLSZ,
948 .sv_sigtbl = bsd_to_linux_signal,
949 .sv_errsize = ELAST + 1,
950 .sv_errtbl = bsd_to_linux_errno,
951 .sv_transtrap = translate_traps,
952 .sv_fixup = linux_fixup,
953 .sv_sendsig = linux_sendsig,
954 .sv_sigcode = linux_sigcode,
955 .sv_szsigcode = &linux_szsigcode,
956 .sv_prepsyscall = NULL,
957 .sv_name = "Linux a.out",
959 .sv_imgact_try = exec_linux_imgact_try,
960 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
961 .sv_pagesize = PAGE_SIZE,
962 .sv_minuser = VM_MIN_ADDRESS,
963 .sv_maxuser = VM_MAXUSER_ADDRESS,
964 .sv_usrstack = LINUX_USRSTACK,
965 .sv_psstrings = PS_STRINGS,
966 .sv_stackprot = VM_PROT_ALL,
967 .sv_copyout_strings = exec_copyout_strings,
968 .sv_setregs = exec_linux_setregs,
971 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
972 .sv_set_syscall_retval = cpu_set_syscall_retval,
973 .sv_fetch_syscall_args = linux_fetch_syscall_args,
974 .sv_syscallnames = NULL,
975 .sv_shared_page_base = LINUX_SHAREDPAGE,
976 .sv_shared_page_len = PAGE_SIZE,
977 .sv_schedtail = linux_schedtail,
979 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
981 struct sysentvec elf_linux_sysvec = {
982 .sv_size = LINUX_SYS_MAXSYSCALL,
983 .sv_table = linux_sysent,
985 .sv_sigsize = LINUX_SIGTBLSZ,
986 .sv_sigtbl = bsd_to_linux_signal,
987 .sv_errsize = ELAST + 1,
988 .sv_errtbl = bsd_to_linux_errno,
989 .sv_transtrap = translate_traps,
990 .sv_fixup = elf_linux_fixup,
991 .sv_sendsig = linux_sendsig,
992 .sv_sigcode = linux_sigcode,
993 .sv_szsigcode = &linux_szsigcode,
994 .sv_prepsyscall = NULL,
995 .sv_name = "Linux ELF",
996 .sv_coredump = elf32_coredump,
997 .sv_imgact_try = exec_linux_imgact_try,
998 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
999 .sv_pagesize = PAGE_SIZE,
1000 .sv_minuser = VM_MIN_ADDRESS,
1001 .sv_maxuser = VM_MAXUSER_ADDRESS,
1002 .sv_usrstack = LINUX_USRSTACK,
1003 .sv_psstrings = LINUX_PS_STRINGS,
1004 .sv_stackprot = VM_PROT_ALL,
1005 .sv_copyout_strings = linux_copyout_strings,
1006 .sv_setregs = exec_linux_setregs,
1007 .sv_fixlimit = NULL,
1009 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1010 .sv_set_syscall_retval = cpu_set_syscall_retval,
1011 .sv_fetch_syscall_args = linux_fetch_syscall_args,
1012 .sv_syscallnames = NULL,
1013 .sv_shared_page_base = LINUX_SHAREDPAGE,
1014 .sv_shared_page_len = PAGE_SIZE,
1015 .sv_schedtail = linux_schedtail,
1017 INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1019 static char GNU_ABI_VENDOR[] = "GNU";
1020 static int GNULINUX_ABI_DESC = 0;
1023 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1025 const Elf32_Word *desc;
1028 p = (uintptr_t)(note + 1);
1029 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1031 desc = (const Elf32_Word *)p;
1032 if (desc[0] != GNULINUX_ABI_DESC)
1036 * For linux we encode osrel as follows (see linux_mib.c):
1037 * VVVMMMIII (version, major, minor), see linux_mib.c.
1039 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1044 static Elf_Brandnote linux_brandnote = {
1045 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1046 .hdr.n_descsz = 16, /* XXX at least 16 */
1048 .vendor = GNU_ABI_VENDOR,
1049 .flags = BN_TRANSLATE_OSREL,
1050 .trans_osrel = linux_trans_osrel
1053 static Elf32_Brandinfo linux_brand = {
1054 .brand = ELFOSABI_LINUX,
1056 .compat_3_brand = "Linux",
1057 .emul_path = "/compat/linux",
1058 .interp_path = "/lib/ld-linux.so.1",
1059 .sysvec = &elf_linux_sysvec,
1060 .interp_newpath = NULL,
1061 .brand_note = &linux_brandnote,
1062 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1065 static Elf32_Brandinfo linux_glibc2brand = {
1066 .brand = ELFOSABI_LINUX,
1068 .compat_3_brand = "Linux",
1069 .emul_path = "/compat/linux",
1070 .interp_path = "/lib/ld-linux.so.2",
1071 .sysvec = &elf_linux_sysvec,
1072 .interp_newpath = NULL,
1073 .brand_note = &linux_brandnote,
1074 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1077 Elf32_Brandinfo *linux_brandlist[] = {
1084 linux_elf_modevent(module_t mod, int type, void *data)
1086 Elf32_Brandinfo **brandinfo;
1088 struct linux_ioctl_handler **lihp;
1089 struct linux_device_handler **ldhp;
1095 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1097 if (elf32_insert_brand_entry(*brandinfo) < 0)
1100 SET_FOREACH(lihp, linux_ioctl_handler_set)
1101 linux_ioctl_register_handler(*lihp);
1102 SET_FOREACH(ldhp, linux_device_handler_set)
1103 linux_device_register_handler(*ldhp);
1104 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1105 sx_init(&emul_shared_lock, "emuldata->shared lock");
1106 LIST_INIT(&futex_list);
1107 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1108 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1110 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1112 linux_get_machine(&linux_platform);
1113 linux_szplatform = roundup(strlen(linux_platform) + 1,
1115 linux_osd_jail_register();
1116 stclohz = (stathz ? stathz : hz);
1118 printf("Linux ELF exec handler installed\n");
1120 printf("cannot insert Linux ELF brand handler\n");
1123 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1125 if (elf32_brand_inuse(*brandinfo))
1128 for (brandinfo = &linux_brandlist[0];
1129 *brandinfo != NULL; ++brandinfo)
1130 if (elf32_remove_brand_entry(*brandinfo) < 0)
1134 SET_FOREACH(lihp, linux_ioctl_handler_set)
1135 linux_ioctl_unregister_handler(*lihp);
1136 SET_FOREACH(ldhp, linux_device_handler_set)
1137 linux_device_unregister_handler(*ldhp);
1138 mtx_destroy(&emul_lock);
1139 sx_destroy(&emul_shared_lock);
1140 mtx_destroy(&futex_mtx);
1141 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1142 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1143 linux_osd_jail_deregister();
1145 printf("Linux ELF exec handler removed\n");
1147 printf("Could not deinstall ELF interpreter entry\n");
1155 static moduledata_t linux_elf_mod = {
1161 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);