2 * Copyright (c) 1994-1996 Søren Schmidt
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_emul.h>
68 #include <compat/linux/linux_futex.h>
69 #include <compat/linux/linux_ioctl.h>
70 #include <compat/linux/linux_mib.h>
71 #include <compat/linux/linux_misc.h>
72 #include <compat/linux/linux_signal.h>
73 #include <compat/linux/linux_util.h>
75 MODULE_VERSION(linux, 1);
77 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
79 #if BYTE_ORDER == LITTLE_ENDIAN
80 #define SHELLMAGIC 0x2123 /* #! */
82 #define SHELLMAGIC 0x2321
86 * Allow the sendsig functions to use the ldebug() facility
87 * even though they are not syscalls themselves. Map them
88 * to syscall 0. This is slightly less bogus than using
91 #define LINUX_SYS_linux_rt_sendsig 0
92 #define LINUX_SYS_linux_sendsig 0
94 #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings))
96 extern char linux_sigcode[];
97 extern int linux_szsigcode;
99 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
101 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
102 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
104 static int linux_fixup(register_t **stack_base,
105 struct image_params *iparams);
106 static int elf_linux_fixup(register_t **stack_base,
107 struct image_params *iparams);
108 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
109 static void exec_linux_setregs(struct thread *td,
110 struct image_params *imgp, u_long stack);
111 static register_t *linux_copyout_strings(struct image_params *imgp);
112 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
114 static int linux_szplatform;
115 const char *linux_platform;
117 static eventhandler_tag linux_exit_tag;
118 static eventhandler_tag linux_exec_tag;
119 static eventhandler_tag linux_thread_dtor_tag;
122 * Linux syscalls return negative errno's, we do positive and map them
124 * FreeBSD: src/sys/sys/errno.h
125 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
126 * linux-2.6.17.8/include/asm-generic/errno.h
128 static int bsd_to_linux_errno[ELAST + 1] = {
129 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
130 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
131 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
132 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
133 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
134 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
135 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
136 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
137 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
141 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
142 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
143 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
144 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
145 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
146 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
147 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
148 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
149 0, LINUX_SIGUSR1, LINUX_SIGUSR2
152 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
153 SIGHUP, SIGINT, SIGQUIT, SIGILL,
154 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
155 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
156 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
157 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
158 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
159 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
160 SIGIO, SIGURG, SIGSYS
163 #define LINUX_T_UNKNOWN 255
164 static int _bsd_to_linux_trapcode[] = {
165 LINUX_T_UNKNOWN, /* 0 */
166 6, /* 1 T_PRIVINFLT */
167 LINUX_T_UNKNOWN, /* 2 */
169 LINUX_T_UNKNOWN, /* 4 */
170 LINUX_T_UNKNOWN, /* 5 */
171 16, /* 6 T_ARITHTRAP */
172 254, /* 7 T_ASTFLT */
173 LINUX_T_UNKNOWN, /* 8 */
174 13, /* 9 T_PROTFLT */
175 1, /* 10 T_TRCTRAP */
176 LINUX_T_UNKNOWN, /* 11 */
177 14, /* 12 T_PAGEFLT */
178 LINUX_T_UNKNOWN, /* 13 */
179 17, /* 14 T_ALIGNFLT */
180 LINUX_T_UNKNOWN, /* 15 */
181 LINUX_T_UNKNOWN, /* 16 */
182 LINUX_T_UNKNOWN, /* 17 */
188 8, /* 23 T_DOUBLEFLT */
189 9, /* 24 T_FPOPFLT */
190 10, /* 25 T_TSSFLT */
191 11, /* 26 T_SEGNPFLT */
192 12, /* 27 T_STKFLT */
194 19, /* 29 T_XMMFLT */
195 15 /* 30 T_RESERVED */
197 #define bsd_to_linux_trapcode(code) \
198 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
199 _bsd_to_linux_trapcode[(code)]: \
203 * If FreeBSD & Linux have a difference of opinion about what a trap
204 * means, deal with it here.
209 translate_traps(int signal, int trap_code)
211 if (signal != SIGBUS)
225 linux_fixup(register_t **stack_base, struct image_params *imgp)
227 register_t *argv, *envp;
230 envp = *stack_base + (imgp->args->argc + 1);
232 suword(*stack_base, (intptr_t)(void *)envp);
234 suword(*stack_base, (intptr_t)(void *)argv);
236 suword(*stack_base, imgp->args->argc);
241 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
245 Elf32_Addr *uplatform;
246 struct ps_strings *arginfo;
249 KASSERT(curthread->td_proc == imgp->proc,
250 ("unsafe elf_linux_fixup(), should be curproc"));
253 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
254 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
255 args = (Elf32_Auxargs *)imgp->auxargs;
256 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
258 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
261 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
262 * as it has appeared in the 2.4.0-rc7 first time.
263 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
264 * glibc falls back to the hard-coded CLK_TCK value when aux entry
266 * Also see linux_times() implementation.
268 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
269 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
270 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
271 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
272 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
273 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
274 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
275 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
276 AUXARGS_ENTRY(pos, AT_BASE, args->base);
277 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
278 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
279 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
280 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
281 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
282 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
283 if (args->execfd != -1)
284 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
285 AUXARGS_ENTRY(pos, AT_NULL, 0);
287 free(imgp->auxargs, M_TEMP);
288 imgp->auxargs = NULL;
291 suword(*stack_base, (register_t)imgp->args->argc);
296 * Copied from kern/kern_exec.c
299 linux_copyout_strings(struct image_params *imgp)
303 char *stringp, *destp;
304 register_t *stack_base;
305 struct ps_strings *arginfo;
309 * Calculate string base and vector table pointers.
310 * Also deal with signal trampoline code for this exec type.
313 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
314 destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
315 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
318 * install LINUX_PLATFORM
320 copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
324 * If we have a valid auxargs ptr, prepare some room
329 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
330 * lower compatibility.
332 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
333 (LINUX_AT_COUNT * 2);
335 * The '+ 2' is for the null pointers at the end of each of
336 * the arg and env vector sets,and imgp->auxarg_size is room
337 * for argument of Runtime loader.
339 vectp = (char **)(destp - (imgp->args->argc +
340 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
343 * The '+ 2' is for the null pointers at the end of each of
344 * the arg and env vector sets
346 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
351 * vectp also becomes our initial stack base
353 stack_base = (register_t *)vectp;
355 stringp = imgp->args->begin_argv;
356 argc = imgp->args->argc;
357 envc = imgp->args->envc;
360 * Copy out strings - arguments and environment.
362 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
365 * Fill in "ps_strings" struct for ps, w, etc.
367 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
368 suword(&arginfo->ps_nargvstr, argc);
371 * Fill in argument portion of vector table.
373 for (; argc > 0; --argc) {
374 suword(vectp++, (long)(intptr_t)destp);
375 while (*stringp++ != 0)
380 /* a null vector table pointer separates the argp's from the envp's */
383 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
384 suword(&arginfo->ps_nenvstr, envc);
387 * Fill in environment portion of vector table.
389 for (; envc > 0; --envc) {
390 suword(vectp++, (long)(intptr_t)destp);
391 while (*stringp++ != 0)
396 /* end of vector table is a null pointer */
404 extern unsigned long linux_sznonrtsigcode;
407 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
409 struct thread *td = curthread;
410 struct proc *p = td->td_proc;
412 struct trapframe *regs;
413 struct l_rt_sigframe *fp, frame;
417 sig = ksi->ksi_signo;
418 code = ksi->ksi_code;
419 PROC_LOCK_ASSERT(p, MA_OWNED);
421 mtx_assert(&psp->ps_mtx, MA_OWNED);
423 oonstack = sigonstack(regs->tf_esp);
426 if (ldebug(rt_sendsig))
427 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
428 catcher, sig, (void*)mask, code);
431 * Allocate space for the signal handler context.
433 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
434 SIGISMEMBER(psp->ps_sigonstack, sig)) {
435 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
436 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
438 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
439 mtx_unlock(&psp->ps_mtx);
442 * Build the argument list for the signal handler.
444 if (p->p_sysent->sv_sigtbl)
445 if (sig <= p->p_sysent->sv_sigsize)
446 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
448 bzero(&frame, sizeof(frame));
450 frame.sf_handler = catcher;
452 frame.sf_siginfo = &fp->sf_si;
453 frame.sf_ucontext = &fp->sf_sc;
455 /* Fill in POSIX parts */
456 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
459 * Build the signal context to be used by sigreturn.
461 frame.sf_sc.uc_flags = 0; /* XXX ??? */
462 frame.sf_sc.uc_link = NULL; /* XXX ??? */
464 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
465 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
466 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
467 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
470 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
472 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
473 frame.sf_sc.uc_mcontext.sc_gs = rgs();
474 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
475 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
476 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
477 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
478 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
479 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
480 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
481 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
482 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
483 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
484 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
485 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
486 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
487 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
488 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
489 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
490 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
491 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
494 if (ldebug(rt_sendsig))
495 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
496 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
497 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
500 if (copyout(&frame, fp, sizeof(frame)) != 0) {
502 * Process has trashed its stack; give it an illegal
503 * instruction to halt it in its tracks.
506 if (ldebug(rt_sendsig))
507 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
515 * Build context to run handler in.
517 regs->tf_esp = (int)fp;
518 regs->tf_eip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
519 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
520 regs->tf_cs = _ucodesel;
521 regs->tf_ds = _udatasel;
522 regs->tf_es = _udatasel;
523 regs->tf_fs = _udatasel;
524 regs->tf_ss = _udatasel;
526 mtx_lock(&psp->ps_mtx);
531 * Send an interrupt to process.
533 * Stack is set up to allow sigcode stored
534 * in u. to call routine, followed by kcall
535 * to sigreturn routine below. After sigreturn
536 * resets the signal mask, the stack, and the
537 * frame pointer, it returns to the user
541 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
543 struct thread *td = curthread;
544 struct proc *p = td->td_proc;
546 struct trapframe *regs;
547 struct l_sigframe *fp, frame;
552 PROC_LOCK_ASSERT(p, MA_OWNED);
554 sig = ksi->ksi_signo;
555 code = ksi->ksi_code;
556 mtx_assert(&psp->ps_mtx, MA_OWNED);
557 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
558 /* Signal handler installed with SA_SIGINFO. */
559 linux_rt_sendsig(catcher, ksi, mask);
563 oonstack = sigonstack(regs->tf_esp);
567 printf(ARGS(sendsig, "%p, %d, %p, %u"),
568 catcher, sig, (void*)mask, code);
572 * Allocate space for the signal handler context.
574 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
575 SIGISMEMBER(psp->ps_sigonstack, sig)) {
576 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
577 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
579 fp = (struct l_sigframe *)regs->tf_esp - 1;
580 mtx_unlock(&psp->ps_mtx);
584 * Build the argument list for the signal handler.
586 if (p->p_sysent->sv_sigtbl)
587 if (sig <= p->p_sysent->sv_sigsize)
588 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
590 bzero(&frame, sizeof(frame));
592 frame.sf_handler = catcher;
595 bsd_to_linux_sigset(mask, &lmask);
598 * Build the signal context to be used by sigreturn.
600 frame.sf_sc.sc_mask = lmask.__bits[0];
601 frame.sf_sc.sc_gs = rgs();
602 frame.sf_sc.sc_fs = regs->tf_fs;
603 frame.sf_sc.sc_es = regs->tf_es;
604 frame.sf_sc.sc_ds = regs->tf_ds;
605 frame.sf_sc.sc_edi = regs->tf_edi;
606 frame.sf_sc.sc_esi = regs->tf_esi;
607 frame.sf_sc.sc_ebp = regs->tf_ebp;
608 frame.sf_sc.sc_ebx = regs->tf_ebx;
609 frame.sf_sc.sc_edx = regs->tf_edx;
610 frame.sf_sc.sc_ecx = regs->tf_ecx;
611 frame.sf_sc.sc_eax = regs->tf_eax;
612 frame.sf_sc.sc_eip = regs->tf_eip;
613 frame.sf_sc.sc_cs = regs->tf_cs;
614 frame.sf_sc.sc_eflags = regs->tf_eflags;
615 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
616 frame.sf_sc.sc_ss = regs->tf_ss;
617 frame.sf_sc.sc_err = regs->tf_err;
618 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
619 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
621 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
622 frame.sf_extramask[i] = lmask.__bits[i+1];
624 if (copyout(&frame, fp, sizeof(frame)) != 0) {
626 * Process has trashed its stack; give it an illegal
627 * instruction to halt it in its tracks.
634 * Build context to run handler in.
636 regs->tf_esp = (int)fp;
637 regs->tf_eip = p->p_sysent->sv_sigcode_base;
638 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
639 regs->tf_cs = _ucodesel;
640 regs->tf_ds = _udatasel;
641 regs->tf_es = _udatasel;
642 regs->tf_fs = _udatasel;
643 regs->tf_ss = _udatasel;
645 mtx_lock(&psp->ps_mtx);
649 * System call to cleanup state after a signal
650 * has been taken. Reset signal mask and
651 * stack state from context left by sendsig (above).
652 * Return to previous pc and psl as specified by
653 * context left by sendsig. Check carefully to
654 * make sure that the user has not modified the
655 * psl to gain improper privileges or to cause
659 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
661 struct l_sigframe frame;
662 struct trapframe *regs;
671 if (ldebug(sigreturn))
672 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
675 * The trampoline code hands us the sigframe.
676 * It is unsafe to keep track of it ourselves, in the event that a
677 * program jumps out of a signal handler.
679 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
683 * Check for security violations.
685 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
686 eflags = frame.sf_sc.sc_eflags;
687 if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
691 * Don't allow users to load a valid privileged %cs. Let the
692 * hardware check for invalid selectors, excess privilege in
693 * other selectors, invalid %eip's and invalid %esp's.
695 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
696 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
697 ksiginfo_init_trap(&ksi);
698 ksi.ksi_signo = SIGBUS;
699 ksi.ksi_code = BUS_OBJERR;
700 ksi.ksi_trapno = T_PROTFLT;
701 ksi.ksi_addr = (void *)regs->tf_eip;
702 trapsignal(td, &ksi);
706 lmask.__bits[0] = frame.sf_sc.sc_mask;
707 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
708 lmask.__bits[i+1] = frame.sf_extramask[i];
709 linux_to_bsd_sigset(&lmask, &bmask);
710 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
713 * Restore signal context.
715 /* %gs was restored by the trampoline. */
716 regs->tf_fs = frame.sf_sc.sc_fs;
717 regs->tf_es = frame.sf_sc.sc_es;
718 regs->tf_ds = frame.sf_sc.sc_ds;
719 regs->tf_edi = frame.sf_sc.sc_edi;
720 regs->tf_esi = frame.sf_sc.sc_esi;
721 regs->tf_ebp = frame.sf_sc.sc_ebp;
722 regs->tf_ebx = frame.sf_sc.sc_ebx;
723 regs->tf_edx = frame.sf_sc.sc_edx;
724 regs->tf_ecx = frame.sf_sc.sc_ecx;
725 regs->tf_eax = frame.sf_sc.sc_eax;
726 regs->tf_eip = frame.sf_sc.sc_eip;
727 regs->tf_cs = frame.sf_sc.sc_cs;
728 regs->tf_eflags = eflags;
729 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
730 regs->tf_ss = frame.sf_sc.sc_ss;
732 return (EJUSTRETURN);
736 * System call to cleanup state after a signal
737 * has been taken. Reset signal mask and
738 * stack state from context left by rt_sendsig (above).
739 * Return to previous pc and psl as specified by
740 * context left by sendsig. Check carefully to
741 * make sure that the user has not modified the
742 * psl to gain improper privileges or to cause
746 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
748 struct l_ucontext uc;
749 struct l_sigcontext *context;
753 struct trapframe *regs;
760 if (ldebug(rt_sigreturn))
761 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
764 * The trampoline code hands us the ucontext.
765 * It is unsafe to keep track of it ourselves, in the event that a
766 * program jumps out of a signal handler.
768 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
771 context = &uc.uc_mcontext;
774 * Check for security violations.
776 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
777 eflags = context->sc_eflags;
778 if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
782 * Don't allow users to load a valid privileged %cs. Let the
783 * hardware check for invalid selectors, excess privilege in
784 * other selectors, invalid %eip's and invalid %esp's.
786 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
787 if (!CS_SECURE(context->sc_cs)) {
788 ksiginfo_init_trap(&ksi);
789 ksi.ksi_signo = SIGBUS;
790 ksi.ksi_code = BUS_OBJERR;
791 ksi.ksi_trapno = T_PROTFLT;
792 ksi.ksi_addr = (void *)regs->tf_eip;
793 trapsignal(td, &ksi);
797 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
798 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
801 * Restore signal context
803 /* %gs was restored by the trampoline. */
804 regs->tf_fs = context->sc_fs;
805 regs->tf_es = context->sc_es;
806 regs->tf_ds = context->sc_ds;
807 regs->tf_edi = context->sc_edi;
808 regs->tf_esi = context->sc_esi;
809 regs->tf_ebp = context->sc_ebp;
810 regs->tf_ebx = context->sc_ebx;
811 regs->tf_edx = context->sc_edx;
812 regs->tf_ecx = context->sc_ecx;
813 regs->tf_eax = context->sc_eax;
814 regs->tf_eip = context->sc_eip;
815 regs->tf_cs = context->sc_cs;
816 regs->tf_eflags = eflags;
817 regs->tf_esp = context->sc_esp_at_signal;
818 regs->tf_ss = context->sc_ss;
821 * call sigaltstack & ignore results..
824 ss.ss_sp = lss->ss_sp;
825 ss.ss_size = lss->ss_size;
826 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
829 if (ldebug(rt_sigreturn))
830 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
831 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
833 (void)kern_sigaltstack(td, &ss, NULL);
835 return (EJUSTRETURN);
839 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
842 struct trapframe *frame;
845 frame = td->td_frame;
847 sa->code = frame->tf_eax;
848 sa->args[0] = frame->tf_ebx;
849 sa->args[1] = frame->tf_ecx;
850 sa->args[2] = frame->tf_edx;
851 sa->args[3] = frame->tf_esi;
852 sa->args[4] = frame->tf_edi;
853 sa->args[5] = frame->tf_ebp; /* Unconfirmed */
855 if (sa->code >= p->p_sysent->sv_size)
856 sa->callp = &p->p_sysent->sv_table[0];
858 sa->callp = &p->p_sysent->sv_table[sa->code];
859 sa->narg = sa->callp->sy_narg;
861 td->td_retval[0] = 0;
862 td->td_retval[1] = frame->tf_edx;
868 * If a linux binary is exec'ing something, try this image activator
869 * first. We override standard shell script execution in order to
870 * be able to modify the interpreter path. We only do this if a linux
871 * binary is doing the exec, so we do not create an EXEC module for it.
873 static int exec_linux_imgact_try(struct image_params *iparams);
876 exec_linux_imgact_try(struct image_params *imgp)
878 const char *head = (const char *)imgp->image_header;
883 * The interpreter for shell scripts run from a linux binary needs
884 * to be located in /compat/linux if possible in order to recursively
885 * maintain linux path emulation.
887 if (((const short *)head)[0] == SHELLMAGIC) {
889 * Run our normal shell image activator. If it succeeds attempt
890 * to use the alternate path for the interpreter. If an alternate
891 * path is found, use our stringspace to store it.
893 if ((error = exec_shell_imgact(imgp)) == 0) {
894 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
895 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
897 imgp->args->fname_buf =
898 imgp->interpreter_name = rpath;
905 * exec_setregs may initialize some registers differently than Linux
906 * does, thus potentially confusing Linux binaries. If necessary, we
907 * override the exec_setregs default(s) here.
910 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
912 struct pcb *pcb = td->td_pcb;
914 exec_setregs(td, imgp, stack);
916 /* Linux sets %gs to 0, we default to _udatasel */
920 pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
924 linux_get_machine(const char **dst)
942 struct sysentvec linux_sysvec = {
943 .sv_size = LINUX_SYS_MAXSYSCALL,
944 .sv_table = linux_sysent,
946 .sv_sigsize = LINUX_SIGTBLSZ,
947 .sv_sigtbl = bsd_to_linux_signal,
948 .sv_errsize = ELAST + 1,
949 .sv_errtbl = bsd_to_linux_errno,
950 .sv_transtrap = translate_traps,
951 .sv_fixup = linux_fixup,
952 .sv_sendsig = linux_sendsig,
953 .sv_sigcode = linux_sigcode,
954 .sv_szsigcode = &linux_szsigcode,
955 .sv_prepsyscall = NULL,
956 .sv_name = "Linux a.out",
958 .sv_imgact_try = exec_linux_imgact_try,
959 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
960 .sv_pagesize = PAGE_SIZE,
961 .sv_minuser = VM_MIN_ADDRESS,
962 .sv_maxuser = VM_MAXUSER_ADDRESS,
963 .sv_usrstack = LINUX_USRSTACK,
964 .sv_psstrings = PS_STRINGS,
965 .sv_stackprot = VM_PROT_ALL,
966 .sv_copyout_strings = exec_copyout_strings,
967 .sv_setregs = exec_linux_setregs,
970 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
971 .sv_set_syscall_retval = cpu_set_syscall_retval,
972 .sv_fetch_syscall_args = linux_fetch_syscall_args,
973 .sv_syscallnames = NULL,
974 .sv_shared_page_base = LINUX_SHAREDPAGE,
975 .sv_shared_page_len = PAGE_SIZE,
976 .sv_schedtail = linux_schedtail,
977 .sv_thread_detach = linux_thread_detach,
979 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
981 struct sysentvec elf_linux_sysvec = {
982 .sv_size = LINUX_SYS_MAXSYSCALL,
983 .sv_table = linux_sysent,
985 .sv_sigsize = LINUX_SIGTBLSZ,
986 .sv_sigtbl = bsd_to_linux_signal,
987 .sv_errsize = ELAST + 1,
988 .sv_errtbl = bsd_to_linux_errno,
989 .sv_transtrap = translate_traps,
990 .sv_fixup = elf_linux_fixup,
991 .sv_sendsig = linux_sendsig,
992 .sv_sigcode = linux_sigcode,
993 .sv_szsigcode = &linux_szsigcode,
994 .sv_prepsyscall = NULL,
995 .sv_name = "Linux ELF",
996 .sv_coredump = elf32_coredump,
997 .sv_imgact_try = exec_linux_imgact_try,
998 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
999 .sv_pagesize = PAGE_SIZE,
1000 .sv_minuser = VM_MIN_ADDRESS,
1001 .sv_maxuser = VM_MAXUSER_ADDRESS,
1002 .sv_usrstack = LINUX_USRSTACK,
1003 .sv_psstrings = LINUX_PS_STRINGS,
1004 .sv_stackprot = VM_PROT_ALL,
1005 .sv_copyout_strings = linux_copyout_strings,
1006 .sv_setregs = exec_linux_setregs,
1007 .sv_fixlimit = NULL,
1009 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1010 .sv_set_syscall_retval = cpu_set_syscall_retval,
1011 .sv_fetch_syscall_args = linux_fetch_syscall_args,
1012 .sv_syscallnames = NULL,
1013 .sv_shared_page_base = LINUX_SHAREDPAGE,
1014 .sv_shared_page_len = PAGE_SIZE,
1015 .sv_schedtail = linux_schedtail,
1016 .sv_thread_detach = linux_thread_detach,
1018 INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1020 static char GNU_ABI_VENDOR[] = "GNU";
1021 static int GNULINUX_ABI_DESC = 0;
1024 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1026 const Elf32_Word *desc;
1029 p = (uintptr_t)(note + 1);
1030 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1032 desc = (const Elf32_Word *)p;
1033 if (desc[0] != GNULINUX_ABI_DESC)
1037 * For linux we encode osrel as follows (see linux_mib.c):
1038 * VVVMMMIII (version, major, minor), see linux_mib.c.
1040 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1045 static Elf_Brandnote linux_brandnote = {
1046 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1047 .hdr.n_descsz = 16, /* XXX at least 16 */
1049 .vendor = GNU_ABI_VENDOR,
1050 .flags = BN_TRANSLATE_OSREL,
1051 .trans_osrel = linux_trans_osrel
1054 static Elf32_Brandinfo linux_brand = {
1055 .brand = ELFOSABI_LINUX,
1057 .compat_3_brand = "Linux",
1058 .emul_path = "/compat/linux",
1059 .interp_path = "/lib/ld-linux.so.1",
1060 .sysvec = &elf_linux_sysvec,
1061 .interp_newpath = NULL,
1062 .brand_note = &linux_brandnote,
1063 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1066 static Elf32_Brandinfo linux_glibc2brand = {
1067 .brand = ELFOSABI_LINUX,
1069 .compat_3_brand = "Linux",
1070 .emul_path = "/compat/linux",
1071 .interp_path = "/lib/ld-linux.so.2",
1072 .sysvec = &elf_linux_sysvec,
1073 .interp_newpath = NULL,
1074 .brand_note = &linux_brandnote,
1075 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1078 Elf32_Brandinfo *linux_brandlist[] = {
1085 linux_elf_modevent(module_t mod, int type, void *data)
1087 Elf32_Brandinfo **brandinfo;
1089 struct linux_ioctl_handler **lihp;
1090 struct linux_device_handler **ldhp;
1096 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1098 if (elf32_insert_brand_entry(*brandinfo) < 0)
1101 SET_FOREACH(lihp, linux_ioctl_handler_set)
1102 linux_ioctl_register_handler(*lihp);
1103 SET_FOREACH(ldhp, linux_device_handler_set)
1104 linux_device_register_handler(*ldhp);
1105 LIST_INIT(&futex_list);
1106 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1107 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1109 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1111 linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1112 linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1113 linux_get_machine(&linux_platform);
1114 linux_szplatform = roundup(strlen(linux_platform) + 1,
1116 linux_osd_jail_register();
1117 stclohz = (stathz ? stathz : hz);
1119 printf("Linux ELF exec handler installed\n");
1121 printf("cannot insert Linux ELF brand handler\n");
1124 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1126 if (elf32_brand_inuse(*brandinfo))
1129 for (brandinfo = &linux_brandlist[0];
1130 *brandinfo != NULL; ++brandinfo)
1131 if (elf32_remove_brand_entry(*brandinfo) < 0)
1135 SET_FOREACH(lihp, linux_ioctl_handler_set)
1136 linux_ioctl_unregister_handler(*lihp);
1137 SET_FOREACH(ldhp, linux_device_handler_set)
1138 linux_device_unregister_handler(*ldhp);
1139 mtx_destroy(&futex_mtx);
1140 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1141 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1142 EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1143 linux_osd_jail_deregister();
1145 printf("Linux ELF exec handler removed\n");
1147 printf("Could not deinstall ELF interpreter entry\n");
1150 return (EOPNOTSUPP);
1155 static moduledata_t linux_elf_mod = {
1161 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);