2 * Copyright (c) 1994-1996 Søren Schmidt
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_emul.h>
68 #include <compat/linux/linux_futex.h>
69 #include <compat/linux/linux_ioctl.h>
70 #include <compat/linux/linux_mib.h>
71 #include <compat/linux/linux_misc.h>
72 #include <compat/linux/linux_signal.h>
73 #include <compat/linux/linux_util.h>
74 #include <compat/linux/linux_vdso.h>
76 MODULE_VERSION(linux, 1);
78 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
80 #if BYTE_ORDER == LITTLE_ENDIAN
81 #define SHELLMAGIC 0x2123 /* #! */
83 #define SHELLMAGIC 0x2321
87 * Allow the sendsig functions to use the ldebug() facility
88 * even though they are not syscalls themselves. Map them
89 * to syscall 0. This is slightly less bogus than using
92 #define LINUX_SYS_linux_rt_sendsig 0
93 #define LINUX_SYS_linux_sendsig 0
95 #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings))
97 static int linux_szsigcode;
98 static vm_object_t linux_shared_page_obj;
99 static char *linux_shared_page_mapping;
100 extern char _binary_linux_locore_o_start;
101 extern char _binary_linux_locore_o_end;
103 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
105 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
106 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
108 static int linux_fixup(register_t **stack_base,
109 struct image_params *iparams);
110 static int elf_linux_fixup(register_t **stack_base,
111 struct image_params *iparams);
112 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
113 static void exec_linux_setregs(struct thread *td,
114 struct image_params *imgp, u_long stack);
115 static register_t *linux_copyout_strings(struct image_params *imgp);
116 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
117 static void linux_vdso_install(void *param);
118 static void linux_vdso_deinstall(void *param);
120 static int linux_szplatform;
121 const char *linux_kplatform;
123 static eventhandler_tag linux_exit_tag;
124 static eventhandler_tag linux_exec_tag;
125 static eventhandler_tag linux_thread_dtor_tag;
128 * Linux syscalls return negative errno's, we do positive and map them
130 * FreeBSD: src/sys/sys/errno.h
131 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
132 * linux-2.6.17.8/include/asm-generic/errno.h
134 static int bsd_to_linux_errno[ELAST + 1] = {
135 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
136 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
137 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
138 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
139 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
140 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
141 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
142 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
143 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
147 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
148 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
149 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
150 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
151 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
152 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
153 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
154 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
155 0, LINUX_SIGUSR1, LINUX_SIGUSR2
158 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
159 SIGHUP, SIGINT, SIGQUIT, SIGILL,
160 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
161 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
162 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
163 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
164 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
165 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
166 SIGIO, SIGURG, SIGSYS
169 #define LINUX_T_UNKNOWN 255
170 static int _bsd_to_linux_trapcode[] = {
171 LINUX_T_UNKNOWN, /* 0 */
172 6, /* 1 T_PRIVINFLT */
173 LINUX_T_UNKNOWN, /* 2 */
175 LINUX_T_UNKNOWN, /* 4 */
176 LINUX_T_UNKNOWN, /* 5 */
177 16, /* 6 T_ARITHTRAP */
178 254, /* 7 T_ASTFLT */
179 LINUX_T_UNKNOWN, /* 8 */
180 13, /* 9 T_PROTFLT */
181 1, /* 10 T_TRCTRAP */
182 LINUX_T_UNKNOWN, /* 11 */
183 14, /* 12 T_PAGEFLT */
184 LINUX_T_UNKNOWN, /* 13 */
185 17, /* 14 T_ALIGNFLT */
186 LINUX_T_UNKNOWN, /* 15 */
187 LINUX_T_UNKNOWN, /* 16 */
188 LINUX_T_UNKNOWN, /* 17 */
194 8, /* 23 T_DOUBLEFLT */
195 9, /* 24 T_FPOPFLT */
196 10, /* 25 T_TSSFLT */
197 11, /* 26 T_SEGNPFLT */
198 12, /* 27 T_STKFLT */
200 19, /* 29 T_XMMFLT */
201 15 /* 30 T_RESERVED */
203 #define bsd_to_linux_trapcode(code) \
204 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
205 _bsd_to_linux_trapcode[(code)]: \
208 LINUX_VDSO_SYM_INTPTR(linux_sigcode);
209 LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
210 LINUX_VDSO_SYM_INTPTR(linux_vsyscall);
213 * If FreeBSD & Linux have a difference of opinion about what a trap
214 * means, deal with it here.
219 translate_traps(int signal, int trap_code)
221 if (signal != SIGBUS)
235 linux_fixup(register_t **stack_base, struct image_params *imgp)
237 register_t *argv, *envp;
240 envp = *stack_base + (imgp->args->argc + 1);
242 suword(*stack_base, (intptr_t)(void *)envp);
244 suword(*stack_base, (intptr_t)(void *)argv);
246 suword(*stack_base, imgp->args->argc);
251 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
255 Elf32_Addr *uplatform;
256 struct ps_strings *arginfo;
259 KASSERT(curthread->td_proc == imgp->proc,
260 ("unsafe elf_linux_fixup(), should be curproc"));
263 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
264 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
265 args = (Elf32_Auxargs *)imgp->auxargs;
266 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
268 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
269 imgp->proc->p_sysent->sv_shared_page_base);
270 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall);
271 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
274 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
275 * as it has appeared in the 2.4.0-rc7 first time.
276 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
277 * glibc falls back to the hard-coded CLK_TCK value when aux entry
279 * Also see linux_times() implementation.
281 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
282 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
283 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
284 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
285 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
286 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
287 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
288 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
289 AUXARGS_ENTRY(pos, AT_BASE, args->base);
290 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
291 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
292 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
293 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
294 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
295 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
296 if (args->execfd != -1)
297 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
298 AUXARGS_ENTRY(pos, AT_NULL, 0);
300 free(imgp->auxargs, M_TEMP);
301 imgp->auxargs = NULL;
304 suword(*stack_base, (register_t)imgp->args->argc);
309 * Copied from kern/kern_exec.c
312 linux_copyout_strings(struct image_params *imgp)
316 char *stringp, *destp;
317 register_t *stack_base;
318 struct ps_strings *arginfo;
322 * Calculate string base and vector table pointers.
325 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
326 destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
327 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
330 * install LINUX_PLATFORM
332 copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform),
336 * If we have a valid auxargs ptr, prepare some room
341 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
342 * lower compatibility.
344 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
345 (LINUX_AT_COUNT * 2);
347 * The '+ 2' is for the null pointers at the end of each of
348 * the arg and env vector sets,and imgp->auxarg_size is room
349 * for argument of Runtime loader.
351 vectp = (char **)(destp - (imgp->args->argc +
352 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
355 * The '+ 2' is for the null pointers at the end of each of
356 * the arg and env vector sets
358 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
363 * vectp also becomes our initial stack base
365 stack_base = (register_t *)vectp;
367 stringp = imgp->args->begin_argv;
368 argc = imgp->args->argc;
369 envc = imgp->args->envc;
372 * Copy out strings - arguments and environment.
374 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
377 * Fill in "ps_strings" struct for ps, w, etc.
379 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
380 suword(&arginfo->ps_nargvstr, argc);
383 * Fill in argument portion of vector table.
385 for (; argc > 0; --argc) {
386 suword(vectp++, (long)(intptr_t)destp);
387 while (*stringp++ != 0)
392 /* a null vector table pointer separates the argp's from the envp's */
395 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
396 suword(&arginfo->ps_nenvstr, envc);
399 * Fill in environment portion of vector table.
401 for (; envc > 0; --envc) {
402 suword(vectp++, (long)(intptr_t)destp);
403 while (*stringp++ != 0)
408 /* end of vector table is a null pointer */
415 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
417 struct thread *td = curthread;
418 struct proc *p = td->td_proc;
420 struct trapframe *regs;
421 struct l_rt_sigframe *fp, frame;
425 sig = ksi->ksi_signo;
426 code = ksi->ksi_code;
427 PROC_LOCK_ASSERT(p, MA_OWNED);
429 mtx_assert(&psp->ps_mtx, MA_OWNED);
431 oonstack = sigonstack(regs->tf_esp);
434 if (ldebug(rt_sendsig))
435 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
436 catcher, sig, (void*)mask, code);
439 * Allocate space for the signal handler context.
441 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
442 SIGISMEMBER(psp->ps_sigonstack, sig)) {
443 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
444 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
446 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
447 mtx_unlock(&psp->ps_mtx);
450 * Build the argument list for the signal handler.
452 if (p->p_sysent->sv_sigtbl)
453 if (sig <= p->p_sysent->sv_sigsize)
454 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
456 bzero(&frame, sizeof(frame));
458 frame.sf_handler = catcher;
460 frame.sf_siginfo = &fp->sf_si;
461 frame.sf_ucontext = &fp->sf_sc;
463 /* Fill in POSIX parts */
464 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
467 * Build the signal context to be used by sigreturn.
469 frame.sf_sc.uc_flags = 0; /* XXX ??? */
470 frame.sf_sc.uc_link = NULL; /* XXX ??? */
472 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
473 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
474 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
475 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
478 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
480 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
481 frame.sf_sc.uc_mcontext.sc_gs = rgs();
482 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
483 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
484 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
485 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
486 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
487 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
488 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
489 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_esp;
490 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
491 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
492 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
493 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
494 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
495 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
496 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
497 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
498 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
499 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
500 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
503 if (ldebug(rt_sendsig))
504 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
505 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
506 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
509 if (copyout(&frame, fp, sizeof(frame)) != 0) {
511 * Process has trashed its stack; give it an illegal
512 * instruction to halt it in its tracks.
515 if (ldebug(rt_sendsig))
516 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
524 * Build context to run handler in.
526 regs->tf_esp = (int)fp;
527 regs->tf_eip = linux_rt_sigcode;
528 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
529 regs->tf_cs = _ucodesel;
530 regs->tf_ds = _udatasel;
531 regs->tf_es = _udatasel;
532 regs->tf_fs = _udatasel;
533 regs->tf_ss = _udatasel;
535 mtx_lock(&psp->ps_mtx);
540 * Send an interrupt to process.
542 * Stack is set up to allow sigcode stored
543 * in u. to call routine, followed by kcall
544 * to sigreturn routine below. After sigreturn
545 * resets the signal mask, the stack, and the
546 * frame pointer, it returns to the user
550 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
552 struct thread *td = curthread;
553 struct proc *p = td->td_proc;
555 struct trapframe *regs;
556 struct l_sigframe *fp, frame;
561 PROC_LOCK_ASSERT(p, MA_OWNED);
563 sig = ksi->ksi_signo;
564 code = ksi->ksi_code;
565 mtx_assert(&psp->ps_mtx, MA_OWNED);
566 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
567 /* Signal handler installed with SA_SIGINFO. */
568 linux_rt_sendsig(catcher, ksi, mask);
572 oonstack = sigonstack(regs->tf_esp);
576 printf(ARGS(sendsig, "%p, %d, %p, %u"),
577 catcher, sig, (void*)mask, code);
581 * Allocate space for the signal handler context.
583 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
584 SIGISMEMBER(psp->ps_sigonstack, sig)) {
585 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
586 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
588 fp = (struct l_sigframe *)regs->tf_esp - 1;
589 mtx_unlock(&psp->ps_mtx);
593 * Build the argument list for the signal handler.
595 if (p->p_sysent->sv_sigtbl)
596 if (sig <= p->p_sysent->sv_sigsize)
597 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
599 bzero(&frame, sizeof(frame));
601 frame.sf_handler = catcher;
604 bsd_to_linux_sigset(mask, &lmask);
607 * Build the signal context to be used by sigreturn.
609 frame.sf_sc.sc_mask = lmask.__bits[0];
610 frame.sf_sc.sc_gs = rgs();
611 frame.sf_sc.sc_fs = regs->tf_fs;
612 frame.sf_sc.sc_es = regs->tf_es;
613 frame.sf_sc.sc_ds = regs->tf_ds;
614 frame.sf_sc.sc_edi = regs->tf_edi;
615 frame.sf_sc.sc_esi = regs->tf_esi;
616 frame.sf_sc.sc_ebp = regs->tf_ebp;
617 frame.sf_sc.sc_ebx = regs->tf_ebx;
618 frame.sf_sc.sc_esp = regs->tf_esp;
619 frame.sf_sc.sc_edx = regs->tf_edx;
620 frame.sf_sc.sc_ecx = regs->tf_ecx;
621 frame.sf_sc.sc_eax = regs->tf_eax;
622 frame.sf_sc.sc_eip = regs->tf_eip;
623 frame.sf_sc.sc_cs = regs->tf_cs;
624 frame.sf_sc.sc_eflags = regs->tf_eflags;
625 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
626 frame.sf_sc.sc_ss = regs->tf_ss;
627 frame.sf_sc.sc_err = regs->tf_err;
628 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
629 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
631 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
632 frame.sf_extramask[i] = lmask.__bits[i+1];
634 if (copyout(&frame, fp, sizeof(frame)) != 0) {
636 * Process has trashed its stack; give it an illegal
637 * instruction to halt it in its tracks.
644 * Build context to run handler in.
646 regs->tf_esp = (int)fp;
647 regs->tf_eip = linux_sigcode;
648 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
649 regs->tf_cs = _ucodesel;
650 regs->tf_ds = _udatasel;
651 regs->tf_es = _udatasel;
652 regs->tf_fs = _udatasel;
653 regs->tf_ss = _udatasel;
655 mtx_lock(&psp->ps_mtx);
659 * System call to cleanup state after a signal
660 * has been taken. Reset signal mask and
661 * stack state from context left by sendsig (above).
662 * Return to previous pc and psl as specified by
663 * context left by sendsig. Check carefully to
664 * make sure that the user has not modified the
665 * psl to gain improper privileges or to cause
669 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
671 struct l_sigframe frame;
672 struct trapframe *regs;
681 if (ldebug(sigreturn))
682 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
685 * The trampoline code hands us the sigframe.
686 * It is unsafe to keep track of it ourselves, in the event that a
687 * program jumps out of a signal handler.
689 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
693 * Check for security violations.
695 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
696 eflags = frame.sf_sc.sc_eflags;
697 if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
701 * Don't allow users to load a valid privileged %cs. Let the
702 * hardware check for invalid selectors, excess privilege in
703 * other selectors, invalid %eip's and invalid %esp's.
705 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
706 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
707 ksiginfo_init_trap(&ksi);
708 ksi.ksi_signo = SIGBUS;
709 ksi.ksi_code = BUS_OBJERR;
710 ksi.ksi_trapno = T_PROTFLT;
711 ksi.ksi_addr = (void *)regs->tf_eip;
712 trapsignal(td, &ksi);
716 lmask.__bits[0] = frame.sf_sc.sc_mask;
717 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
718 lmask.__bits[i+1] = frame.sf_extramask[i];
719 linux_to_bsd_sigset(&lmask, &bmask);
720 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
723 * Restore signal context.
725 /* %gs was restored by the trampoline. */
726 regs->tf_fs = frame.sf_sc.sc_fs;
727 regs->tf_es = frame.sf_sc.sc_es;
728 regs->tf_ds = frame.sf_sc.sc_ds;
729 regs->tf_edi = frame.sf_sc.sc_edi;
730 regs->tf_esi = frame.sf_sc.sc_esi;
731 regs->tf_ebp = frame.sf_sc.sc_ebp;
732 regs->tf_ebx = frame.sf_sc.sc_ebx;
733 regs->tf_edx = frame.sf_sc.sc_edx;
734 regs->tf_ecx = frame.sf_sc.sc_ecx;
735 regs->tf_eax = frame.sf_sc.sc_eax;
736 regs->tf_eip = frame.sf_sc.sc_eip;
737 regs->tf_cs = frame.sf_sc.sc_cs;
738 regs->tf_eflags = eflags;
739 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
740 regs->tf_ss = frame.sf_sc.sc_ss;
742 return (EJUSTRETURN);
746 * System call to cleanup state after a signal
747 * has been taken. Reset signal mask and
748 * stack state from context left by rt_sendsig (above).
749 * Return to previous pc and psl as specified by
750 * context left by sendsig. Check carefully to
751 * make sure that the user has not modified the
752 * psl to gain improper privileges or to cause
756 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
758 struct l_ucontext uc;
759 struct l_sigcontext *context;
763 struct trapframe *regs;
770 if (ldebug(rt_sigreturn))
771 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
774 * The trampoline code hands us the ucontext.
775 * It is unsafe to keep track of it ourselves, in the event that a
776 * program jumps out of a signal handler.
778 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
781 context = &uc.uc_mcontext;
784 * Check for security violations.
786 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
787 eflags = context->sc_eflags;
788 if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
792 * Don't allow users to load a valid privileged %cs. Let the
793 * hardware check for invalid selectors, excess privilege in
794 * other selectors, invalid %eip's and invalid %esp's.
796 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
797 if (!CS_SECURE(context->sc_cs)) {
798 ksiginfo_init_trap(&ksi);
799 ksi.ksi_signo = SIGBUS;
800 ksi.ksi_code = BUS_OBJERR;
801 ksi.ksi_trapno = T_PROTFLT;
802 ksi.ksi_addr = (void *)regs->tf_eip;
803 trapsignal(td, &ksi);
807 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
808 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
811 * Restore signal context
813 /* %gs was restored by the trampoline. */
814 regs->tf_fs = context->sc_fs;
815 regs->tf_es = context->sc_es;
816 regs->tf_ds = context->sc_ds;
817 regs->tf_edi = context->sc_edi;
818 regs->tf_esi = context->sc_esi;
819 regs->tf_ebp = context->sc_ebp;
820 regs->tf_ebx = context->sc_ebx;
821 regs->tf_edx = context->sc_edx;
822 regs->tf_ecx = context->sc_ecx;
823 regs->tf_eax = context->sc_eax;
824 regs->tf_eip = context->sc_eip;
825 regs->tf_cs = context->sc_cs;
826 regs->tf_eflags = eflags;
827 regs->tf_esp = context->sc_esp_at_signal;
828 regs->tf_ss = context->sc_ss;
831 * call sigaltstack & ignore results..
834 ss.ss_sp = lss->ss_sp;
835 ss.ss_size = lss->ss_size;
836 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
839 if (ldebug(rt_sigreturn))
840 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
841 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
843 (void)kern_sigaltstack(td, &ss, NULL);
845 return (EJUSTRETURN);
849 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
852 struct trapframe *frame;
855 frame = td->td_frame;
857 sa->code = frame->tf_eax;
858 sa->args[0] = frame->tf_ebx;
859 sa->args[1] = frame->tf_ecx;
860 sa->args[2] = frame->tf_edx;
861 sa->args[3] = frame->tf_esi;
862 sa->args[4] = frame->tf_edi;
863 sa->args[5] = frame->tf_ebp; /* Unconfirmed */
865 if (sa->code >= p->p_sysent->sv_size)
866 sa->callp = &p->p_sysent->sv_table[0];
868 sa->callp = &p->p_sysent->sv_table[sa->code];
869 sa->narg = sa->callp->sy_narg;
871 td->td_retval[0] = 0;
872 td->td_retval[1] = frame->tf_edx;
878 * If a linux binary is exec'ing something, try this image activator
879 * first. We override standard shell script execution in order to
880 * be able to modify the interpreter path. We only do this if a linux
881 * binary is doing the exec, so we do not create an EXEC module for it.
883 static int exec_linux_imgact_try(struct image_params *iparams);
886 exec_linux_imgact_try(struct image_params *imgp)
888 const char *head = (const char *)imgp->image_header;
893 * The interpreter for shell scripts run from a linux binary needs
894 * to be located in /compat/linux if possible in order to recursively
895 * maintain linux path emulation.
897 if (((const short *)head)[0] == SHELLMAGIC) {
899 * Run our normal shell image activator. If it succeeds attempt
900 * to use the alternate path for the interpreter. If an alternate
901 * path is found, use our stringspace to store it.
903 if ((error = exec_shell_imgact(imgp)) == 0) {
904 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
905 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
907 imgp->args->fname_buf =
908 imgp->interpreter_name = rpath;
915 * exec_setregs may initialize some registers differently than Linux
916 * does, thus potentially confusing Linux binaries. If necessary, we
917 * override the exec_setregs default(s) here.
920 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
922 struct pcb *pcb = td->td_pcb;
924 exec_setregs(td, imgp, stack);
926 /* Linux sets %gs to 0, we default to _udatasel */
930 pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
934 linux_get_machine(const char **dst)
952 struct sysentvec linux_sysvec = {
953 .sv_size = LINUX_SYS_MAXSYSCALL,
954 .sv_table = linux_sysent,
956 .sv_sigsize = LINUX_SIGTBLSZ,
957 .sv_sigtbl = bsd_to_linux_signal,
958 .sv_errsize = ELAST + 1,
959 .sv_errtbl = bsd_to_linux_errno,
960 .sv_transtrap = translate_traps,
961 .sv_fixup = linux_fixup,
962 .sv_sendsig = linux_sendsig,
963 .sv_sigcode = &_binary_linux_locore_o_start,
964 .sv_szsigcode = &linux_szsigcode,
965 .sv_prepsyscall = NULL,
966 .sv_name = "Linux a.out",
968 .sv_imgact_try = exec_linux_imgact_try,
969 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
970 .sv_pagesize = PAGE_SIZE,
971 .sv_minuser = VM_MIN_ADDRESS,
972 .sv_maxuser = VM_MAXUSER_ADDRESS,
973 .sv_usrstack = LINUX_USRSTACK,
974 .sv_psstrings = PS_STRINGS,
975 .sv_stackprot = VM_PROT_ALL,
976 .sv_copyout_strings = exec_copyout_strings,
977 .sv_setregs = exec_linux_setregs,
980 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
981 .sv_set_syscall_retval = cpu_set_syscall_retval,
982 .sv_fetch_syscall_args = linux_fetch_syscall_args,
983 .sv_syscallnames = NULL,
984 .sv_shared_page_base = LINUX_SHAREDPAGE,
985 .sv_shared_page_len = PAGE_SIZE,
986 .sv_schedtail = linux_schedtail,
987 .sv_thread_detach = linux_thread_detach,
989 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
991 struct sysentvec elf_linux_sysvec = {
992 .sv_size = LINUX_SYS_MAXSYSCALL,
993 .sv_table = linux_sysent,
995 .sv_sigsize = LINUX_SIGTBLSZ,
996 .sv_sigtbl = bsd_to_linux_signal,
997 .sv_errsize = ELAST + 1,
998 .sv_errtbl = bsd_to_linux_errno,
999 .sv_transtrap = translate_traps,
1000 .sv_fixup = elf_linux_fixup,
1001 .sv_sendsig = linux_sendsig,
1002 .sv_sigcode = &_binary_linux_locore_o_start,
1003 .sv_szsigcode = &linux_szsigcode,
1004 .sv_prepsyscall = NULL,
1005 .sv_name = "Linux ELF",
1006 .sv_coredump = elf32_coredump,
1007 .sv_imgact_try = exec_linux_imgact_try,
1008 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1009 .sv_pagesize = PAGE_SIZE,
1010 .sv_minuser = VM_MIN_ADDRESS,
1011 .sv_maxuser = VM_MAXUSER_ADDRESS,
1012 .sv_usrstack = LINUX_USRSTACK,
1013 .sv_psstrings = LINUX_PS_STRINGS,
1014 .sv_stackprot = VM_PROT_ALL,
1015 .sv_copyout_strings = linux_copyout_strings,
1016 .sv_setregs = exec_linux_setregs,
1017 .sv_fixlimit = NULL,
1019 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1020 .sv_set_syscall_retval = cpu_set_syscall_retval,
1021 .sv_fetch_syscall_args = linux_fetch_syscall_args,
1022 .sv_syscallnames = NULL,
1023 .sv_shared_page_base = LINUX_SHAREDPAGE,
1024 .sv_shared_page_len = PAGE_SIZE,
1025 .sv_schedtail = linux_schedtail,
1026 .sv_thread_detach = linux_thread_detach,
1030 linux_vdso_install(void *param)
1033 linux_szsigcode = (&_binary_linux_locore_o_end -
1034 &_binary_linux_locore_o_start);
1036 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
1037 panic("Linux invalid vdso size\n");
1039 __elfN(linux_vdso_fixup)(&elf_linux_sysvec);
1041 linux_shared_page_obj = __elfN(linux_shared_page_init)
1042 (&linux_shared_page_mapping);
1044 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX_SHAREDPAGE);
1046 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
1048 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
1050 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
1051 (sysinit_cfunc_t)linux_vdso_install, NULL);
1054 linux_vdso_deinstall(void *param)
1057 __elfN(linux_shared_page_fini)(linux_shared_page_obj);
1059 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
1060 (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
1062 static char GNU_ABI_VENDOR[] = "GNU";
1063 static int GNULINUX_ABI_DESC = 0;
1066 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1068 const Elf32_Word *desc;
1071 p = (uintptr_t)(note + 1);
1072 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1074 desc = (const Elf32_Word *)p;
1075 if (desc[0] != GNULINUX_ABI_DESC)
1079 * For linux we encode osrel as follows (see linux_mib.c):
1080 * VVVMMMIII (version, major, minor), see linux_mib.c.
1082 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1087 static Elf_Brandnote linux_brandnote = {
1088 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1089 .hdr.n_descsz = 16, /* XXX at least 16 */
1091 .vendor = GNU_ABI_VENDOR,
1092 .flags = BN_TRANSLATE_OSREL,
1093 .trans_osrel = linux_trans_osrel
1096 static Elf32_Brandinfo linux_brand = {
1097 .brand = ELFOSABI_LINUX,
1099 .compat_3_brand = "Linux",
1100 .emul_path = "/compat/linux",
1101 .interp_path = "/lib/ld-linux.so.1",
1102 .sysvec = &elf_linux_sysvec,
1103 .interp_newpath = NULL,
1104 .brand_note = &linux_brandnote,
1105 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1108 static Elf32_Brandinfo linux_glibc2brand = {
1109 .brand = ELFOSABI_LINUX,
1111 .compat_3_brand = "Linux",
1112 .emul_path = "/compat/linux",
1113 .interp_path = "/lib/ld-linux.so.2",
1114 .sysvec = &elf_linux_sysvec,
1115 .interp_newpath = NULL,
1116 .brand_note = &linux_brandnote,
1117 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1120 Elf32_Brandinfo *linux_brandlist[] = {
1127 linux_elf_modevent(module_t mod, int type, void *data)
1129 Elf32_Brandinfo **brandinfo;
1131 struct linux_ioctl_handler **lihp;
1132 struct linux_device_handler **ldhp;
1138 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1140 if (elf32_insert_brand_entry(*brandinfo) < 0)
1143 SET_FOREACH(lihp, linux_ioctl_handler_set)
1144 linux_ioctl_register_handler(*lihp);
1145 SET_FOREACH(ldhp, linux_device_handler_set)
1146 linux_device_register_handler(*ldhp);
1147 LIST_INIT(&futex_list);
1148 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1149 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1151 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1153 linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1154 linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1155 linux_get_machine(&linux_kplatform);
1156 linux_szplatform = roundup(strlen(linux_kplatform) + 1,
1158 linux_osd_jail_register();
1159 stclohz = (stathz ? stathz : hz);
1161 printf("Linux ELF exec handler installed\n");
1163 printf("cannot insert Linux ELF brand handler\n");
1166 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1168 if (elf32_brand_inuse(*brandinfo))
1171 for (brandinfo = &linux_brandlist[0];
1172 *brandinfo != NULL; ++brandinfo)
1173 if (elf32_remove_brand_entry(*brandinfo) < 0)
1177 SET_FOREACH(lihp, linux_ioctl_handler_set)
1178 linux_ioctl_unregister_handler(*lihp);
1179 SET_FOREACH(ldhp, linux_device_handler_set)
1180 linux_device_unregister_handler(*ldhp);
1181 mtx_destroy(&futex_mtx);
1182 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1183 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1184 EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1185 linux_osd_jail_deregister();
1187 printf("Linux ELF exec handler removed\n");
1189 printf("Could not deinstall ELF interpreter entry\n");
1192 return (EOPNOTSUPP);
1197 static moduledata_t linux_elf_mod = {
1203 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);