2 * Copyright (c) 1994-1996 Søren Schmidt
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_emul.h>
68 #include <compat/linux/linux_futex.h>
69 #include <compat/linux/linux_mib.h>
70 #include <compat/linux/linux_misc.h>
71 #include <compat/linux/linux_signal.h>
72 #include <compat/linux/linux_util.h>
74 MODULE_VERSION(linux, 1);
76 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
78 #if BYTE_ORDER == LITTLE_ENDIAN
79 #define SHELLMAGIC 0x2123 /* #! */
81 #define SHELLMAGIC 0x2321
85 * Allow the sendsig functions to use the ldebug() facility
86 * even though they are not syscalls themselves. Map them
87 * to syscall 0. This is slightly less bogus than using
90 #define LINUX_SYS_linux_rt_sendsig 0
91 #define LINUX_SYS_linux_sendsig 0
93 extern char linux_sigcode[];
94 extern int linux_szsigcode;
96 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
98 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
99 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
101 static int linux_fixup(register_t **stack_base,
102 struct image_params *iparams);
103 static int elf_linux_fixup(register_t **stack_base,
104 struct image_params *iparams);
105 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
106 static void exec_linux_setregs(struct thread *td, u_long entry,
107 u_long stack, u_long ps_strings);
108 static register_t *linux_copyout_strings(struct image_params *imgp);
109 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
111 static int linux_szplatform;
112 const char *linux_platform;
114 static eventhandler_tag linux_exit_tag;
115 static eventhandler_tag linux_exec_tag;
118 * Linux syscalls return negative errno's, we do positive and map them
120 * FreeBSD: src/sys/sys/errno.h
121 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
122 * linux-2.6.17.8/include/asm-generic/errno.h
124 static int bsd_to_linux_errno[ELAST + 1] = {
125 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
126 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
127 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
128 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
129 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
130 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
131 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
132 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
133 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
137 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
138 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
139 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
140 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
141 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
142 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
143 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
144 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
145 0, LINUX_SIGUSR1, LINUX_SIGUSR2
148 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
149 SIGHUP, SIGINT, SIGQUIT, SIGILL,
150 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
151 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
152 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
153 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
154 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
155 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
156 SIGIO, SIGURG, SIGSYS
159 #define LINUX_T_UNKNOWN 255
160 static int _bsd_to_linux_trapcode[] = {
161 LINUX_T_UNKNOWN, /* 0 */
162 6, /* 1 T_PRIVINFLT */
163 LINUX_T_UNKNOWN, /* 2 */
165 LINUX_T_UNKNOWN, /* 4 */
166 LINUX_T_UNKNOWN, /* 5 */
167 16, /* 6 T_ARITHTRAP */
168 254, /* 7 T_ASTFLT */
169 LINUX_T_UNKNOWN, /* 8 */
170 13, /* 9 T_PROTFLT */
171 1, /* 10 T_TRCTRAP */
172 LINUX_T_UNKNOWN, /* 11 */
173 14, /* 12 T_PAGEFLT */
174 LINUX_T_UNKNOWN, /* 13 */
175 17, /* 14 T_ALIGNFLT */
176 LINUX_T_UNKNOWN, /* 15 */
177 LINUX_T_UNKNOWN, /* 16 */
178 LINUX_T_UNKNOWN, /* 17 */
184 8, /* 23 T_DOUBLEFLT */
185 9, /* 24 T_FPOPFLT */
186 10, /* 25 T_TSSFLT */
187 11, /* 26 T_SEGNPFLT */
188 12, /* 27 T_STKFLT */
190 19, /* 29 T_XMMFLT */
191 15 /* 30 T_RESERVED */
193 #define bsd_to_linux_trapcode(code) \
194 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
195 _bsd_to_linux_trapcode[(code)]: \
199 * If FreeBSD & Linux have a difference of opinion about what a trap
200 * means, deal with it here.
205 translate_traps(int signal, int trap_code)
207 if (signal != SIGBUS)
221 linux_fixup(register_t **stack_base, struct image_params *imgp)
223 register_t *argv, *envp;
226 envp = *stack_base + (imgp->args->argc + 1);
228 suword(*stack_base, (intptr_t)(void *)envp);
230 suword(*stack_base, (intptr_t)(void *)argv);
232 suword(*stack_base, imgp->args->argc);
237 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
241 Elf32_Addr *uplatform;
242 struct ps_strings *arginfo;
245 KASSERT(curthread->td_proc == imgp->proc,
246 ("unsafe elf_linux_fixup(), should be curproc"));
249 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
250 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
252 args = (Elf32_Auxargs *)imgp->auxargs;
253 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
255 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
258 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
259 * as it has appeared in the 2.4.0-rc7 first time.
260 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
261 * glibc falls back to the hard-coded CLK_TCK value when aux entry
263 * Also see linux_times() implementation.
265 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
266 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
267 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
268 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
269 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
270 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
271 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
272 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
273 AUXARGS_ENTRY(pos, AT_BASE, args->base);
274 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
275 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
276 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
277 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
278 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
279 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
280 if (args->execfd != -1)
281 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
282 AUXARGS_ENTRY(pos, AT_NULL, 0);
284 free(imgp->auxargs, M_TEMP);
285 imgp->auxargs = NULL;
288 suword(*stack_base, (register_t)imgp->args->argc);
293 * Copied from kern/kern_exec.c
296 linux_copyout_strings(struct image_params *imgp)
300 char *stringp, *destp;
301 register_t *stack_base;
302 struct ps_strings *arginfo;
306 * Calculate string base and vector table pointers.
307 * Also deal with signal trampoline code for this exec type.
310 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
311 destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
312 linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
318 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
319 linux_szsigcode), linux_szsigcode);
322 * install LINUX_PLATFORM
324 copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
325 linux_szplatform), linux_szplatform);
328 * If we have a valid auxargs ptr, prepare some room
333 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
334 * lower compatibility.
336 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
337 (LINUX_AT_COUNT * 2);
339 * The '+ 2' is for the null pointers at the end of each of
340 * the arg and env vector sets,and imgp->auxarg_size is room
341 * for argument of Runtime loader.
343 vectp = (char **)(destp - (imgp->args->argc +
344 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
347 * The '+ 2' is for the null pointers at the end of each of
348 * the arg and env vector sets
350 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
355 * vectp also becomes our initial stack base
357 stack_base = (register_t *)vectp;
359 stringp = imgp->args->begin_argv;
360 argc = imgp->args->argc;
361 envc = imgp->args->envc;
364 * Copy out strings - arguments and environment.
366 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
369 * Fill in "ps_strings" struct for ps, w, etc.
371 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
372 suword(&arginfo->ps_nargvstr, argc);
375 * Fill in argument portion of vector table.
377 for (; argc > 0; --argc) {
378 suword(vectp++, (long)(intptr_t)destp);
379 while (*stringp++ != 0)
384 /* a null vector table pointer separates the argp's from the envp's */
387 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
388 suword(&arginfo->ps_nenvstr, envc);
391 * Fill in environment portion of vector table.
393 for (; envc > 0; --envc) {
394 suword(vectp++, (long)(intptr_t)destp);
395 while (*stringp++ != 0)
400 /* end of vector table is a null pointer */
408 extern int _ucodesel, _udatasel;
409 extern unsigned long linux_sznonrtsigcode;
412 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
414 struct thread *td = curthread;
415 struct proc *p = td->td_proc;
417 struct trapframe *regs;
418 struct l_rt_sigframe *fp, frame;
422 sig = ksi->ksi_signo;
423 code = ksi->ksi_code;
424 PROC_LOCK_ASSERT(p, MA_OWNED);
426 mtx_assert(&psp->ps_mtx, MA_OWNED);
428 oonstack = sigonstack(regs->tf_esp);
431 if (ldebug(rt_sendsig))
432 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
433 catcher, sig, (void*)mask, code);
436 * Allocate space for the signal handler context.
438 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
439 SIGISMEMBER(psp->ps_sigonstack, sig)) {
440 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
441 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
443 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
444 mtx_unlock(&psp->ps_mtx);
447 * Build the argument list for the signal handler.
449 if (p->p_sysent->sv_sigtbl)
450 if (sig <= p->p_sysent->sv_sigsize)
451 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
453 bzero(&frame, sizeof(frame));
455 frame.sf_handler = catcher;
457 frame.sf_siginfo = &fp->sf_si;
458 frame.sf_ucontext = &fp->sf_sc;
460 /* Fill in POSIX parts */
461 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
464 * Build the signal context to be used by sigreturn.
466 frame.sf_sc.uc_flags = 0; /* XXX ??? */
467 frame.sf_sc.uc_link = NULL; /* XXX ??? */
469 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
470 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
471 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
472 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
475 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
477 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
478 frame.sf_sc.uc_mcontext.sc_gs = rgs();
479 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
480 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
481 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
482 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
483 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
484 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
485 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
486 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
487 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
488 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
489 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
490 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
491 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
492 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
493 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
494 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
495 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
496 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
499 if (ldebug(rt_sendsig))
500 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
501 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
502 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
505 if (copyout(&frame, fp, sizeof(frame)) != 0) {
507 * Process has trashed its stack; give it an illegal
508 * instruction to halt it in its tracks.
511 if (ldebug(rt_sendsig))
512 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
520 * Build context to run handler in.
522 regs->tf_esp = (int)fp;
523 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
524 linux_sznonrtsigcode;
525 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
526 regs->tf_cs = _ucodesel;
527 regs->tf_ds = _udatasel;
528 regs->tf_es = _udatasel;
529 regs->tf_fs = _udatasel;
530 regs->tf_ss = _udatasel;
532 mtx_lock(&psp->ps_mtx);
537 * Send an interrupt to process.
539 * Stack is set up to allow sigcode stored
540 * in u. to call routine, followed by kcall
541 * to sigreturn routine below. After sigreturn
542 * resets the signal mask, the stack, and the
543 * frame pointer, it returns to the user
547 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
549 struct thread *td = curthread;
550 struct proc *p = td->td_proc;
552 struct trapframe *regs;
553 struct l_sigframe *fp, frame;
558 PROC_LOCK_ASSERT(p, MA_OWNED);
560 sig = ksi->ksi_signo;
561 code = ksi->ksi_code;
562 mtx_assert(&psp->ps_mtx, MA_OWNED);
563 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
564 /* Signal handler installed with SA_SIGINFO. */
565 linux_rt_sendsig(catcher, ksi, mask);
569 oonstack = sigonstack(regs->tf_esp);
573 printf(ARGS(sendsig, "%p, %d, %p, %u"),
574 catcher, sig, (void*)mask, code);
578 * Allocate space for the signal handler context.
580 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
581 SIGISMEMBER(psp->ps_sigonstack, sig)) {
582 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
583 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
585 fp = (struct l_sigframe *)regs->tf_esp - 1;
586 mtx_unlock(&psp->ps_mtx);
590 * Build the argument list for the signal handler.
592 if (p->p_sysent->sv_sigtbl)
593 if (sig <= p->p_sysent->sv_sigsize)
594 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
596 bzero(&frame, sizeof(frame));
598 frame.sf_handler = catcher;
601 bsd_to_linux_sigset(mask, &lmask);
604 * Build the signal context to be used by sigreturn.
606 frame.sf_sc.sc_mask = lmask.__bits[0];
607 frame.sf_sc.sc_gs = rgs();
608 frame.sf_sc.sc_fs = regs->tf_fs;
609 frame.sf_sc.sc_es = regs->tf_es;
610 frame.sf_sc.sc_ds = regs->tf_ds;
611 frame.sf_sc.sc_edi = regs->tf_edi;
612 frame.sf_sc.sc_esi = regs->tf_esi;
613 frame.sf_sc.sc_ebp = regs->tf_ebp;
614 frame.sf_sc.sc_ebx = regs->tf_ebx;
615 frame.sf_sc.sc_edx = regs->tf_edx;
616 frame.sf_sc.sc_ecx = regs->tf_ecx;
617 frame.sf_sc.sc_eax = regs->tf_eax;
618 frame.sf_sc.sc_eip = regs->tf_eip;
619 frame.sf_sc.sc_cs = regs->tf_cs;
620 frame.sf_sc.sc_eflags = regs->tf_eflags;
621 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
622 frame.sf_sc.sc_ss = regs->tf_ss;
623 frame.sf_sc.sc_err = regs->tf_err;
624 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
625 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
627 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
628 frame.sf_extramask[i] = lmask.__bits[i+1];
630 if (copyout(&frame, fp, sizeof(frame)) != 0) {
632 * Process has trashed its stack; give it an illegal
633 * instruction to halt it in its tracks.
640 * Build context to run handler in.
642 regs->tf_esp = (int)fp;
643 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
644 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
645 regs->tf_cs = _ucodesel;
646 regs->tf_ds = _udatasel;
647 regs->tf_es = _udatasel;
648 regs->tf_fs = _udatasel;
649 regs->tf_ss = _udatasel;
651 mtx_lock(&psp->ps_mtx);
655 * System call to cleanup state after a signal
656 * has been taken. Reset signal mask and
657 * stack state from context left by sendsig (above).
658 * Return to previous pc and psl as specified by
659 * context left by sendsig. Check carefully to
660 * make sure that the user has not modified the
661 * psl to gain improper privileges or to cause
665 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
667 struct l_sigframe frame;
668 struct trapframe *regs;
677 if (ldebug(sigreturn))
678 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
681 * The trampoline code hands us the sigframe.
682 * It is unsafe to keep track of it ourselves, in the event that a
683 * program jumps out of a signal handler.
685 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
689 * Check for security violations.
691 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
692 eflags = frame.sf_sc.sc_eflags;
694 * XXX do allow users to change the privileged flag PSL_RF. The
695 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
696 * sometimes set it there too. tf_eflags is kept in the signal
697 * context during signal handling and there is no other place
698 * to remember it, so the PSL_RF bit may be corrupted by the
699 * signal handler without us knowing. Corruption of the PSL_RF
700 * bit at worst causes one more or one less debugger trap, so
701 * allowing it is fairly harmless.
703 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
707 * Don't allow users to load a valid privileged %cs. Let the
708 * hardware check for invalid selectors, excess privilege in
709 * other selectors, invalid %eip's and invalid %esp's.
711 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
712 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
713 ksiginfo_init_trap(&ksi);
714 ksi.ksi_signo = SIGBUS;
715 ksi.ksi_code = BUS_OBJERR;
716 ksi.ksi_trapno = T_PROTFLT;
717 ksi.ksi_addr = (void *)regs->tf_eip;
718 trapsignal(td, &ksi);
722 lmask.__bits[0] = frame.sf_sc.sc_mask;
723 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
724 lmask.__bits[i+1] = frame.sf_extramask[i];
725 linux_to_bsd_sigset(&lmask, &bmask);
726 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
729 * Restore signal context.
731 /* %gs was restored by the trampoline. */
732 regs->tf_fs = frame.sf_sc.sc_fs;
733 regs->tf_es = frame.sf_sc.sc_es;
734 regs->tf_ds = frame.sf_sc.sc_ds;
735 regs->tf_edi = frame.sf_sc.sc_edi;
736 regs->tf_esi = frame.sf_sc.sc_esi;
737 regs->tf_ebp = frame.sf_sc.sc_ebp;
738 regs->tf_ebx = frame.sf_sc.sc_ebx;
739 regs->tf_edx = frame.sf_sc.sc_edx;
740 regs->tf_ecx = frame.sf_sc.sc_ecx;
741 regs->tf_eax = frame.sf_sc.sc_eax;
742 regs->tf_eip = frame.sf_sc.sc_eip;
743 regs->tf_cs = frame.sf_sc.sc_cs;
744 regs->tf_eflags = eflags;
745 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
746 regs->tf_ss = frame.sf_sc.sc_ss;
748 return (EJUSTRETURN);
752 * System call to cleanup state after a signal
753 * has been taken. Reset signal mask and
754 * stack state from context left by rt_sendsig (above).
755 * Return to previous pc and psl as specified by
756 * context left by sendsig. Check carefully to
757 * make sure that the user has not modified the
758 * psl to gain improper privileges or to cause
762 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
764 struct l_ucontext uc;
765 struct l_sigcontext *context;
769 struct trapframe *regs;
776 if (ldebug(rt_sigreturn))
777 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
780 * The trampoline code hands us the ucontext.
781 * It is unsafe to keep track of it ourselves, in the event that a
782 * program jumps out of a signal handler.
784 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
787 context = &uc.uc_mcontext;
790 * Check for security violations.
792 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
793 eflags = context->sc_eflags;
795 * XXX do allow users to change the privileged flag PSL_RF. The
796 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
797 * sometimes set it there too. tf_eflags is kept in the signal
798 * context during signal handling and there is no other place
799 * to remember it, so the PSL_RF bit may be corrupted by the
800 * signal handler without us knowing. Corruption of the PSL_RF
801 * bit at worst causes one more or one less debugger trap, so
802 * allowing it is fairly harmless.
804 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
808 * Don't allow users to load a valid privileged %cs. Let the
809 * hardware check for invalid selectors, excess privilege in
810 * other selectors, invalid %eip's and invalid %esp's.
812 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
813 if (!CS_SECURE(context->sc_cs)) {
814 ksiginfo_init_trap(&ksi);
815 ksi.ksi_signo = SIGBUS;
816 ksi.ksi_code = BUS_OBJERR;
817 ksi.ksi_trapno = T_PROTFLT;
818 ksi.ksi_addr = (void *)regs->tf_eip;
819 trapsignal(td, &ksi);
823 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
824 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
827 * Restore signal context
829 /* %gs was restored by the trampoline. */
830 regs->tf_fs = context->sc_fs;
831 regs->tf_es = context->sc_es;
832 regs->tf_ds = context->sc_ds;
833 regs->tf_edi = context->sc_edi;
834 regs->tf_esi = context->sc_esi;
835 regs->tf_ebp = context->sc_ebp;
836 regs->tf_ebx = context->sc_ebx;
837 regs->tf_edx = context->sc_edx;
838 regs->tf_ecx = context->sc_ecx;
839 regs->tf_eax = context->sc_eax;
840 regs->tf_eip = context->sc_eip;
841 regs->tf_cs = context->sc_cs;
842 regs->tf_eflags = eflags;
843 regs->tf_esp = context->sc_esp_at_signal;
844 regs->tf_ss = context->sc_ss;
847 * call sigaltstack & ignore results..
850 ss.ss_sp = lss->ss_sp;
851 ss.ss_size = lss->ss_size;
852 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
855 if (ldebug(rt_sigreturn))
856 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
857 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
859 (void)kern_sigaltstack(td, &ss, NULL);
861 return (EJUSTRETURN);
865 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
868 struct trapframe *frame;
871 frame = td->td_frame;
873 sa->code = frame->tf_eax;
874 sa->args[0] = frame->tf_ebx;
875 sa->args[1] = frame->tf_ecx;
876 sa->args[2] = frame->tf_edx;
877 sa->args[3] = frame->tf_esi;
878 sa->args[4] = frame->tf_edi;
879 sa->args[5] = frame->tf_ebp; /* Unconfirmed */
881 if (sa->code >= p->p_sysent->sv_size)
882 sa->callp = &p->p_sysent->sv_table[0];
884 sa->callp = &p->p_sysent->sv_table[sa->code];
885 sa->narg = sa->callp->sy_narg;
887 td->td_retval[0] = 0;
888 td->td_retval[1] = frame->tf_edx;
894 * If a linux binary is exec'ing something, try this image activator
895 * first. We override standard shell script execution in order to
896 * be able to modify the interpreter path. We only do this if a linux
897 * binary is doing the exec, so we do not create an EXEC module for it.
899 static int exec_linux_imgact_try(struct image_params *iparams);
902 exec_linux_imgact_try(struct image_params *imgp)
904 const char *head = (const char *)imgp->image_header;
909 * The interpreter for shell scripts run from a linux binary needs
910 * to be located in /compat/linux if possible in order to recursively
911 * maintain linux path emulation.
913 if (((const short *)head)[0] == SHELLMAGIC) {
915 * Run our normal shell image activator. If it succeeds attempt
916 * to use the alternate path for the interpreter. If an alternate
917 * path is found, use our stringspace to store it.
919 if ((error = exec_shell_imgact(imgp)) == 0) {
920 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
921 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
923 len = strlen(rpath) + 1;
925 if (len <= MAXSHELLCMDLEN) {
926 memcpy(imgp->interpreter_name, rpath, len);
936 * exec_setregs may initialize some registers differently than Linux
937 * does, thus potentially confusing Linux binaries. If necessary, we
938 * override the exec_setregs default(s) here.
941 exec_linux_setregs(struct thread *td, u_long entry,
942 u_long stack, u_long ps_strings)
944 struct pcb *pcb = td->td_pcb;
946 exec_setregs(td, entry, stack, ps_strings);
948 /* Linux sets %gs to 0, we default to _udatasel */
952 pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
956 linux_get_machine(const char **dst)
974 struct sysentvec linux_sysvec = {
975 .sv_size = LINUX_SYS_MAXSYSCALL,
976 .sv_table = linux_sysent,
978 .sv_sigsize = LINUX_SIGTBLSZ,
979 .sv_sigtbl = bsd_to_linux_signal,
980 .sv_errsize = ELAST + 1,
981 .sv_errtbl = bsd_to_linux_errno,
982 .sv_transtrap = translate_traps,
983 .sv_fixup = linux_fixup,
984 .sv_sendsig = linux_sendsig,
985 .sv_sigcode = linux_sigcode,
986 .sv_szsigcode = &linux_szsigcode,
987 .sv_prepsyscall = NULL,
988 .sv_name = "Linux a.out",
990 .sv_imgact_try = exec_linux_imgact_try,
991 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
992 .sv_pagesize = PAGE_SIZE,
993 .sv_minuser = VM_MIN_ADDRESS,
994 .sv_maxuser = VM_MAXUSER_ADDRESS,
995 .sv_usrstack = USRSTACK,
996 .sv_psstrings = PS_STRINGS,
997 .sv_stackprot = VM_PROT_ALL,
998 .sv_copyout_strings = exec_copyout_strings,
999 .sv_setregs = exec_linux_setregs,
1000 .sv_fixlimit = NULL,
1002 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
1003 .sv_set_syscall_retval = cpu_set_syscall_retval,
1004 .sv_fetch_syscall_args = linux_fetch_syscall_args,
1005 .sv_syscallnames = NULL,
1006 .sv_schedtail = linux_schedtail,
1009 struct sysentvec elf_linux_sysvec = {
1010 .sv_size = LINUX_SYS_MAXSYSCALL,
1011 .sv_table = linux_sysent,
1013 .sv_sigsize = LINUX_SIGTBLSZ,
1014 .sv_sigtbl = bsd_to_linux_signal,
1015 .sv_errsize = ELAST + 1,
1016 .sv_errtbl = bsd_to_linux_errno,
1017 .sv_transtrap = translate_traps,
1018 .sv_fixup = elf_linux_fixup,
1019 .sv_sendsig = linux_sendsig,
1020 .sv_sigcode = linux_sigcode,
1021 .sv_szsigcode = &linux_szsigcode,
1022 .sv_prepsyscall = NULL,
1023 .sv_name = "Linux ELF",
1024 .sv_coredump = elf32_coredump,
1025 .sv_imgact_try = exec_linux_imgact_try,
1026 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1027 .sv_pagesize = PAGE_SIZE,
1028 .sv_minuser = VM_MIN_ADDRESS,
1029 .sv_maxuser = VM_MAXUSER_ADDRESS,
1030 .sv_usrstack = USRSTACK,
1031 .sv_psstrings = PS_STRINGS,
1032 .sv_stackprot = VM_PROT_ALL,
1033 .sv_copyout_strings = linux_copyout_strings,
1034 .sv_setregs = exec_linux_setregs,
1035 .sv_fixlimit = NULL,
1037 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32,
1038 .sv_set_syscall_retval = cpu_set_syscall_retval,
1039 .sv_fetch_syscall_args = linux_fetch_syscall_args,
1040 .sv_syscallnames = NULL,
1041 .sv_schedtail = linux_schedtail,
1044 static char GNU_ABI_VENDOR[] = "GNU";
1045 static int GNULINUX_ABI_DESC = 0;
1048 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1050 const Elf32_Word *desc;
1053 p = (uintptr_t)(note + 1);
1054 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1056 desc = (const Elf32_Word *)p;
1057 if (desc[0] != GNULINUX_ABI_DESC)
1061 * For linux we encode osrel as follows (see linux_mib.c):
1062 * VVVMMMIII (version, major, minor), see linux_mib.c.
1064 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1069 static Elf_Brandnote linux_brandnote = {
1070 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1071 .hdr.n_descsz = 16, /* XXX at least 16 */
1073 .vendor = GNU_ABI_VENDOR,
1074 .flags = BN_TRANSLATE_OSREL,
1075 .trans_osrel = linux_trans_osrel
1078 static Elf32_Brandinfo linux_brand = {
1079 .brand = ELFOSABI_LINUX,
1081 .compat_3_brand = "Linux",
1082 .emul_path = "/compat/linux",
1083 .interp_path = "/lib/ld-linux.so.1",
1084 .sysvec = &elf_linux_sysvec,
1085 .interp_newpath = NULL,
1086 .brand_note = &linux_brandnote,
1087 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1090 static Elf32_Brandinfo linux_glibc2brand = {
1091 .brand = ELFOSABI_LINUX,
1093 .compat_3_brand = "Linux",
1094 .emul_path = "/compat/linux",
1095 .interp_path = "/lib/ld-linux.so.2",
1096 .sysvec = &elf_linux_sysvec,
1097 .interp_newpath = NULL,
1098 .brand_note = &linux_brandnote,
1099 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1102 Elf32_Brandinfo *linux_brandlist[] = {
1109 linux_elf_modevent(module_t mod, int type, void *data)
1111 Elf32_Brandinfo **brandinfo;
1113 struct linux_ioctl_handler **lihp;
1114 struct linux_device_handler **ldhp;
1120 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1122 if (elf32_insert_brand_entry(*brandinfo) < 0)
1125 SET_FOREACH(lihp, linux_ioctl_handler_set)
1126 linux_ioctl_register_handler(*lihp);
1127 SET_FOREACH(ldhp, linux_device_handler_set)
1128 linux_device_register_handler(*ldhp);
1129 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1130 sx_init(&emul_shared_lock, "emuldata->shared lock");
1131 LIST_INIT(&futex_list);
1132 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1133 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1135 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1137 linux_get_machine(&linux_platform);
1138 linux_szplatform = roundup(strlen(linux_platform) + 1,
1140 linux_osd_jail_register();
1141 stclohz = (stathz ? stathz : hz);
1143 printf("Linux ELF exec handler installed\n");
1145 printf("cannot insert Linux ELF brand handler\n");
1148 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1150 if (elf32_brand_inuse(*brandinfo))
1153 for (brandinfo = &linux_brandlist[0];
1154 *brandinfo != NULL; ++brandinfo)
1155 if (elf32_remove_brand_entry(*brandinfo) < 0)
1159 SET_FOREACH(lihp, linux_ioctl_handler_set)
1160 linux_ioctl_unregister_handler(*lihp);
1161 SET_FOREACH(ldhp, linux_device_handler_set)
1162 linux_device_unregister_handler(*ldhp);
1163 mtx_destroy(&emul_lock);
1164 sx_destroy(&emul_shared_lock);
1165 mtx_destroy(&futex_mtx);
1166 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1167 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1168 linux_osd_jail_deregister();
1170 printf("Linux ELF exec handler removed\n");
1172 printf("Could not deinstall ELF interpreter entry\n");
1180 static moduledata_t linux_elf_mod = {
1186 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);