2 * Copyright (c) 1994-1996 Søren Schmidt
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_emul.h>
68 #include <compat/linux/linux_futex.h>
69 #include <compat/linux/linux_ioctl.h>
70 #include <compat/linux/linux_mib.h>
71 #include <compat/linux/linux_misc.h>
72 #include <compat/linux/linux_signal.h>
73 #include <compat/linux/linux_util.h>
74 #include <compat/linux/linux_vdso.h>
76 MODULE_VERSION(linux, 1);
78 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
80 #if BYTE_ORDER == LITTLE_ENDIAN
81 #define SHELLMAGIC 0x2123 /* #! */
83 #define SHELLMAGIC 0x2321
87 * Allow the sendsig functions to use the ldebug() facility
88 * even though they are not syscalls themselves. Map them
89 * to syscall 0. This is slightly less bogus than using
92 #define LINUX_SYS_linux_rt_sendsig 0
93 #define LINUX_SYS_linux_sendsig 0
95 #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings))
97 static int linux_szsigcode;
98 static vm_object_t linux_shared_page_obj;
99 static char *linux_shared_page_mapping;
100 extern char _binary_linux_locore_o_start;
101 extern char _binary_linux_locore_o_end;
103 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
105 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
106 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
108 static int linux_fixup(register_t **stack_base,
109 struct image_params *iparams);
110 static int elf_linux_fixup(register_t **stack_base,
111 struct image_params *iparams);
112 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
113 static void exec_linux_setregs(struct thread *td,
114 struct image_params *imgp, u_long stack);
115 static register_t *linux_copyout_strings(struct image_params *imgp);
116 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
117 static void linux_vdso_install(void *param);
118 static void linux_vdso_deinstall(void *param);
120 static int linux_szplatform;
121 const char *linux_platform;
123 static eventhandler_tag linux_exit_tag;
124 static eventhandler_tag linux_exec_tag;
125 static eventhandler_tag linux_thread_dtor_tag;
128 * Linux syscalls return negative errno's, we do positive and map them
130 * FreeBSD: src/sys/sys/errno.h
131 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
132 * linux-2.6.17.8/include/asm-generic/errno.h
134 static int bsd_to_linux_errno[ELAST + 1] = {
135 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
136 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
137 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
138 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
139 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
140 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
141 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
142 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
143 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
147 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
148 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
149 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
150 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
151 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
152 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
153 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
154 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
155 0, LINUX_SIGUSR1, LINUX_SIGUSR2
158 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
159 SIGHUP, SIGINT, SIGQUIT, SIGILL,
160 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
161 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
162 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
163 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
164 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
165 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
166 SIGIO, SIGURG, SIGSYS
169 #define LINUX_T_UNKNOWN 255
170 static int _bsd_to_linux_trapcode[] = {
171 LINUX_T_UNKNOWN, /* 0 */
172 6, /* 1 T_PRIVINFLT */
173 LINUX_T_UNKNOWN, /* 2 */
175 LINUX_T_UNKNOWN, /* 4 */
176 LINUX_T_UNKNOWN, /* 5 */
177 16, /* 6 T_ARITHTRAP */
178 254, /* 7 T_ASTFLT */
179 LINUX_T_UNKNOWN, /* 8 */
180 13, /* 9 T_PROTFLT */
181 1, /* 10 T_TRCTRAP */
182 LINUX_T_UNKNOWN, /* 11 */
183 14, /* 12 T_PAGEFLT */
184 LINUX_T_UNKNOWN, /* 13 */
185 17, /* 14 T_ALIGNFLT */
186 LINUX_T_UNKNOWN, /* 15 */
187 LINUX_T_UNKNOWN, /* 16 */
188 LINUX_T_UNKNOWN, /* 17 */
194 8, /* 23 T_DOUBLEFLT */
195 9, /* 24 T_FPOPFLT */
196 10, /* 25 T_TSSFLT */
197 11, /* 26 T_SEGNPFLT */
198 12, /* 27 T_STKFLT */
200 19, /* 29 T_XMMFLT */
201 15 /* 30 T_RESERVED */
203 #define bsd_to_linux_trapcode(code) \
204 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
205 _bsd_to_linux_trapcode[(code)]: \
208 LINUX_VDSO_SYM_INTPTR(linux_sigcode);
209 LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
210 LINUX_VDSO_SYM_INTPTR(linux_vsyscall);
213 * If FreeBSD & Linux have a difference of opinion about what a trap
214 * means, deal with it here.
219 translate_traps(int signal, int trap_code)
221 if (signal != SIGBUS)
235 linux_fixup(register_t **stack_base, struct image_params *imgp)
237 register_t *argv, *envp;
240 envp = *stack_base + (imgp->args->argc + 1);
242 suword(*stack_base, (intptr_t)(void *)envp);
244 suword(*stack_base, (intptr_t)(void *)argv);
246 suword(*stack_base, imgp->args->argc);
251 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
255 Elf32_Addr *uplatform;
256 struct ps_strings *arginfo;
259 KASSERT(curthread->td_proc == imgp->proc,
260 ("unsafe elf_linux_fixup(), should be curproc"));
263 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
264 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
265 args = (Elf32_Auxargs *)imgp->auxargs;
266 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
268 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
269 imgp->proc->p_sysent->sv_shared_page_base);
270 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall);
271 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
274 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
275 * as it has appeared in the 2.4.0-rc7 first time.
276 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
277 * glibc falls back to the hard-coded CLK_TCK value when aux entry
279 * Also see linux_times() implementation.
281 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
282 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
283 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
284 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
285 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
286 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
287 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
288 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
289 AUXARGS_ENTRY(pos, AT_BASE, args->base);
290 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
291 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
292 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
293 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
294 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
295 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
296 if (args->execfd != -1)
297 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
298 AUXARGS_ENTRY(pos, AT_NULL, 0);
300 free(imgp->auxargs, M_TEMP);
301 imgp->auxargs = NULL;
304 suword(*stack_base, (register_t)imgp->args->argc);
309 * Copied from kern/kern_exec.c
312 linux_copyout_strings(struct image_params *imgp)
316 char *stringp, *destp;
317 register_t *stack_base;
318 struct ps_strings *arginfo;
322 * Calculate string base and vector table pointers.
323 * Also deal with signal trampoline code for this exec type.
326 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
327 destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
328 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
331 * install LINUX_PLATFORM
333 copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
337 * If we have a valid auxargs ptr, prepare some room
342 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
343 * lower compatibility.
345 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
346 (LINUX_AT_COUNT * 2);
348 * The '+ 2' is for the null pointers at the end of each of
349 * the arg and env vector sets,and imgp->auxarg_size is room
350 * for argument of Runtime loader.
352 vectp = (char **)(destp - (imgp->args->argc +
353 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
356 * The '+ 2' is for the null pointers at the end of each of
357 * the arg and env vector sets
359 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
364 * vectp also becomes our initial stack base
366 stack_base = (register_t *)vectp;
368 stringp = imgp->args->begin_argv;
369 argc = imgp->args->argc;
370 envc = imgp->args->envc;
373 * Copy out strings - arguments and environment.
375 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
378 * Fill in "ps_strings" struct for ps, w, etc.
380 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
381 suword(&arginfo->ps_nargvstr, argc);
384 * Fill in argument portion of vector table.
386 for (; argc > 0; --argc) {
387 suword(vectp++, (long)(intptr_t)destp);
388 while (*stringp++ != 0)
393 /* a null vector table pointer separates the argp's from the envp's */
396 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
397 suword(&arginfo->ps_nenvstr, envc);
400 * Fill in environment portion of vector table.
402 for (; envc > 0; --envc) {
403 suword(vectp++, (long)(intptr_t)destp);
404 while (*stringp++ != 0)
409 /* end of vector table is a null pointer */
416 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
418 struct thread *td = curthread;
419 struct proc *p = td->td_proc;
421 struct trapframe *regs;
422 struct l_rt_sigframe *fp, frame;
426 sig = ksi->ksi_signo;
427 code = ksi->ksi_code;
428 PROC_LOCK_ASSERT(p, MA_OWNED);
430 mtx_assert(&psp->ps_mtx, MA_OWNED);
432 oonstack = sigonstack(regs->tf_esp);
435 if (ldebug(rt_sendsig))
436 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
437 catcher, sig, (void*)mask, code);
440 * Allocate space for the signal handler context.
442 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
443 SIGISMEMBER(psp->ps_sigonstack, sig)) {
444 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
445 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
447 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
448 mtx_unlock(&psp->ps_mtx);
451 * Build the argument list for the signal handler.
453 if (p->p_sysent->sv_sigtbl)
454 if (sig <= p->p_sysent->sv_sigsize)
455 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
457 bzero(&frame, sizeof(frame));
459 frame.sf_handler = catcher;
461 frame.sf_siginfo = &fp->sf_si;
462 frame.sf_ucontext = &fp->sf_sc;
464 /* Fill in POSIX parts */
465 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
468 * Build the signal context to be used by sigreturn.
470 frame.sf_sc.uc_flags = 0; /* XXX ??? */
471 frame.sf_sc.uc_link = NULL; /* XXX ??? */
473 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
474 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
475 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
476 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
479 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
481 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
482 frame.sf_sc.uc_mcontext.sc_gs = rgs();
483 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
484 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
485 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
486 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
487 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
488 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
489 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
490 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_esp;
491 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
492 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
493 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
494 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
495 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
496 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
497 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
498 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
499 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
500 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
501 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
504 if (ldebug(rt_sendsig))
505 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
506 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
507 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
510 if (copyout(&frame, fp, sizeof(frame)) != 0) {
512 * Process has trashed its stack; give it an illegal
513 * instruction to halt it in its tracks.
516 if (ldebug(rt_sendsig))
517 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
525 * Build context to run handler in.
527 regs->tf_esp = (int)fp;
528 regs->tf_eip = linux_rt_sigcode;
529 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
530 regs->tf_cs = _ucodesel;
531 regs->tf_ds = _udatasel;
532 regs->tf_es = _udatasel;
533 regs->tf_fs = _udatasel;
534 regs->tf_ss = _udatasel;
536 mtx_lock(&psp->ps_mtx);
541 * Send an interrupt to process.
543 * Stack is set up to allow sigcode stored
544 * in u. to call routine, followed by kcall
545 * to sigreturn routine below. After sigreturn
546 * resets the signal mask, the stack, and the
547 * frame pointer, it returns to the user
551 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
553 struct thread *td = curthread;
554 struct proc *p = td->td_proc;
556 struct trapframe *regs;
557 struct l_sigframe *fp, frame;
562 PROC_LOCK_ASSERT(p, MA_OWNED);
564 sig = ksi->ksi_signo;
565 code = ksi->ksi_code;
566 mtx_assert(&psp->ps_mtx, MA_OWNED);
567 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
568 /* Signal handler installed with SA_SIGINFO. */
569 linux_rt_sendsig(catcher, ksi, mask);
573 oonstack = sigonstack(regs->tf_esp);
577 printf(ARGS(sendsig, "%p, %d, %p, %u"),
578 catcher, sig, (void*)mask, code);
582 * Allocate space for the signal handler context.
584 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
585 SIGISMEMBER(psp->ps_sigonstack, sig)) {
586 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
587 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
589 fp = (struct l_sigframe *)regs->tf_esp - 1;
590 mtx_unlock(&psp->ps_mtx);
594 * Build the argument list for the signal handler.
596 if (p->p_sysent->sv_sigtbl)
597 if (sig <= p->p_sysent->sv_sigsize)
598 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
600 bzero(&frame, sizeof(frame));
602 frame.sf_handler = catcher;
605 bsd_to_linux_sigset(mask, &lmask);
608 * Build the signal context to be used by sigreturn.
610 frame.sf_sc.sc_mask = lmask.__bits[0];
611 frame.sf_sc.sc_gs = rgs();
612 frame.sf_sc.sc_fs = regs->tf_fs;
613 frame.sf_sc.sc_es = regs->tf_es;
614 frame.sf_sc.sc_ds = regs->tf_ds;
615 frame.sf_sc.sc_edi = regs->tf_edi;
616 frame.sf_sc.sc_esi = regs->tf_esi;
617 frame.sf_sc.sc_ebp = regs->tf_ebp;
618 frame.sf_sc.sc_ebx = regs->tf_ebx;
619 frame.sf_sc.sc_esp = regs->tf_esp;
620 frame.sf_sc.sc_edx = regs->tf_edx;
621 frame.sf_sc.sc_ecx = regs->tf_ecx;
622 frame.sf_sc.sc_eax = regs->tf_eax;
623 frame.sf_sc.sc_eip = regs->tf_eip;
624 frame.sf_sc.sc_cs = regs->tf_cs;
625 frame.sf_sc.sc_eflags = regs->tf_eflags;
626 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
627 frame.sf_sc.sc_ss = regs->tf_ss;
628 frame.sf_sc.sc_err = regs->tf_err;
629 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
630 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
632 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
633 frame.sf_extramask[i] = lmask.__bits[i+1];
635 if (copyout(&frame, fp, sizeof(frame)) != 0) {
637 * Process has trashed its stack; give it an illegal
638 * instruction to halt it in its tracks.
645 * Build context to run handler in.
647 regs->tf_esp = (int)fp;
648 regs->tf_eip = linux_sigcode;
649 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
650 regs->tf_cs = _ucodesel;
651 regs->tf_ds = _udatasel;
652 regs->tf_es = _udatasel;
653 regs->tf_fs = _udatasel;
654 regs->tf_ss = _udatasel;
656 mtx_lock(&psp->ps_mtx);
660 * System call to cleanup state after a signal
661 * has been taken. Reset signal mask and
662 * stack state from context left by sendsig (above).
663 * Return to previous pc and psl as specified by
664 * context left by sendsig. Check carefully to
665 * make sure that the user has not modified the
666 * psl to gain improper privileges or to cause
670 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
672 struct l_sigframe frame;
673 struct trapframe *regs;
682 if (ldebug(sigreturn))
683 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
686 * The trampoline code hands us the sigframe.
687 * It is unsafe to keep track of it ourselves, in the event that a
688 * program jumps out of a signal handler.
690 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
694 * Check for security violations.
696 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
697 eflags = frame.sf_sc.sc_eflags;
698 if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
702 * Don't allow users to load a valid privileged %cs. Let the
703 * hardware check for invalid selectors, excess privilege in
704 * other selectors, invalid %eip's and invalid %esp's.
706 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
707 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
708 ksiginfo_init_trap(&ksi);
709 ksi.ksi_signo = SIGBUS;
710 ksi.ksi_code = BUS_OBJERR;
711 ksi.ksi_trapno = T_PROTFLT;
712 ksi.ksi_addr = (void *)regs->tf_eip;
713 trapsignal(td, &ksi);
717 lmask.__bits[0] = frame.sf_sc.sc_mask;
718 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
719 lmask.__bits[i+1] = frame.sf_extramask[i];
720 linux_to_bsd_sigset(&lmask, &bmask);
721 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
724 * Restore signal context.
726 /* %gs was restored by the trampoline. */
727 regs->tf_fs = frame.sf_sc.sc_fs;
728 regs->tf_es = frame.sf_sc.sc_es;
729 regs->tf_ds = frame.sf_sc.sc_ds;
730 regs->tf_edi = frame.sf_sc.sc_edi;
731 regs->tf_esi = frame.sf_sc.sc_esi;
732 regs->tf_ebp = frame.sf_sc.sc_ebp;
733 regs->tf_ebx = frame.sf_sc.sc_ebx;
734 regs->tf_edx = frame.sf_sc.sc_edx;
735 regs->tf_ecx = frame.sf_sc.sc_ecx;
736 regs->tf_eax = frame.sf_sc.sc_eax;
737 regs->tf_eip = frame.sf_sc.sc_eip;
738 regs->tf_cs = frame.sf_sc.sc_cs;
739 regs->tf_eflags = eflags;
740 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
741 regs->tf_ss = frame.sf_sc.sc_ss;
743 return (EJUSTRETURN);
747 * System call to cleanup state after a signal
748 * has been taken. Reset signal mask and
749 * stack state from context left by rt_sendsig (above).
750 * Return to previous pc and psl as specified by
751 * context left by sendsig. Check carefully to
752 * make sure that the user has not modified the
753 * psl to gain improper privileges or to cause
757 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
759 struct l_ucontext uc;
760 struct l_sigcontext *context;
764 struct trapframe *regs;
771 if (ldebug(rt_sigreturn))
772 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
775 * The trampoline code hands us the ucontext.
776 * It is unsafe to keep track of it ourselves, in the event that a
777 * program jumps out of a signal handler.
779 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
782 context = &uc.uc_mcontext;
785 * Check for security violations.
787 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
788 eflags = context->sc_eflags;
789 if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
793 * Don't allow users to load a valid privileged %cs. Let the
794 * hardware check for invalid selectors, excess privilege in
795 * other selectors, invalid %eip's and invalid %esp's.
797 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
798 if (!CS_SECURE(context->sc_cs)) {
799 ksiginfo_init_trap(&ksi);
800 ksi.ksi_signo = SIGBUS;
801 ksi.ksi_code = BUS_OBJERR;
802 ksi.ksi_trapno = T_PROTFLT;
803 ksi.ksi_addr = (void *)regs->tf_eip;
804 trapsignal(td, &ksi);
808 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
809 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
812 * Restore signal context
814 /* %gs was restored by the trampoline. */
815 regs->tf_fs = context->sc_fs;
816 regs->tf_es = context->sc_es;
817 regs->tf_ds = context->sc_ds;
818 regs->tf_edi = context->sc_edi;
819 regs->tf_esi = context->sc_esi;
820 regs->tf_ebp = context->sc_ebp;
821 regs->tf_ebx = context->sc_ebx;
822 regs->tf_edx = context->sc_edx;
823 regs->tf_ecx = context->sc_ecx;
824 regs->tf_eax = context->sc_eax;
825 regs->tf_eip = context->sc_eip;
826 regs->tf_cs = context->sc_cs;
827 regs->tf_eflags = eflags;
828 regs->tf_esp = context->sc_esp_at_signal;
829 regs->tf_ss = context->sc_ss;
832 * call sigaltstack & ignore results..
835 ss.ss_sp = lss->ss_sp;
836 ss.ss_size = lss->ss_size;
837 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
840 if (ldebug(rt_sigreturn))
841 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
842 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
844 (void)kern_sigaltstack(td, &ss, NULL);
846 return (EJUSTRETURN);
850 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
853 struct trapframe *frame;
856 frame = td->td_frame;
858 sa->code = frame->tf_eax;
859 sa->args[0] = frame->tf_ebx;
860 sa->args[1] = frame->tf_ecx;
861 sa->args[2] = frame->tf_edx;
862 sa->args[3] = frame->tf_esi;
863 sa->args[4] = frame->tf_edi;
864 sa->args[5] = frame->tf_ebp; /* Unconfirmed */
866 if (sa->code >= p->p_sysent->sv_size)
867 sa->callp = &p->p_sysent->sv_table[0];
869 sa->callp = &p->p_sysent->sv_table[sa->code];
870 sa->narg = sa->callp->sy_narg;
872 td->td_retval[0] = 0;
873 td->td_retval[1] = frame->tf_edx;
879 * If a linux binary is exec'ing something, try this image activator
880 * first. We override standard shell script execution in order to
881 * be able to modify the interpreter path. We only do this if a linux
882 * binary is doing the exec, so we do not create an EXEC module for it.
884 static int exec_linux_imgact_try(struct image_params *iparams);
887 exec_linux_imgact_try(struct image_params *imgp)
889 const char *head = (const char *)imgp->image_header;
894 * The interpreter for shell scripts run from a linux binary needs
895 * to be located in /compat/linux if possible in order to recursively
896 * maintain linux path emulation.
898 if (((const short *)head)[0] == SHELLMAGIC) {
900 * Run our normal shell image activator. If it succeeds attempt
901 * to use the alternate path for the interpreter. If an alternate
902 * path is found, use our stringspace to store it.
904 if ((error = exec_shell_imgact(imgp)) == 0) {
905 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
906 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
908 imgp->args->fname_buf =
909 imgp->interpreter_name = rpath;
916 * exec_setregs may initialize some registers differently than Linux
917 * does, thus potentially confusing Linux binaries. If necessary, we
918 * override the exec_setregs default(s) here.
921 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
923 struct pcb *pcb = td->td_pcb;
925 exec_setregs(td, imgp, stack);
927 /* Linux sets %gs to 0, we default to _udatasel */
931 pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
935 linux_get_machine(const char **dst)
953 struct sysentvec linux_sysvec = {
954 .sv_size = LINUX_SYS_MAXSYSCALL,
955 .sv_table = linux_sysent,
957 .sv_sigsize = LINUX_SIGTBLSZ,
958 .sv_sigtbl = bsd_to_linux_signal,
959 .sv_errsize = ELAST + 1,
960 .sv_errtbl = bsd_to_linux_errno,
961 .sv_transtrap = translate_traps,
962 .sv_fixup = linux_fixup,
963 .sv_sendsig = linux_sendsig,
964 .sv_sigcode = &_binary_linux_locore_o_start,
965 .sv_szsigcode = &linux_szsigcode,
966 .sv_prepsyscall = NULL,
967 .sv_name = "Linux a.out",
969 .sv_imgact_try = exec_linux_imgact_try,
970 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
971 .sv_pagesize = PAGE_SIZE,
972 .sv_minuser = VM_MIN_ADDRESS,
973 .sv_maxuser = VM_MAXUSER_ADDRESS,
974 .sv_usrstack = LINUX_USRSTACK,
975 .sv_psstrings = PS_STRINGS,
976 .sv_stackprot = VM_PROT_ALL,
977 .sv_copyout_strings = exec_copyout_strings,
978 .sv_setregs = exec_linux_setregs,
981 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
982 .sv_set_syscall_retval = cpu_set_syscall_retval,
983 .sv_fetch_syscall_args = linux_fetch_syscall_args,
984 .sv_syscallnames = NULL,
985 .sv_shared_page_base = LINUX_SHAREDPAGE,
986 .sv_shared_page_len = PAGE_SIZE,
987 .sv_schedtail = linux_schedtail,
988 .sv_thread_detach = linux_thread_detach,
990 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
992 struct sysentvec elf_linux_sysvec = {
993 .sv_size = LINUX_SYS_MAXSYSCALL,
994 .sv_table = linux_sysent,
996 .sv_sigsize = LINUX_SIGTBLSZ,
997 .sv_sigtbl = bsd_to_linux_signal,
998 .sv_errsize = ELAST + 1,
999 .sv_errtbl = bsd_to_linux_errno,
1000 .sv_transtrap = translate_traps,
1001 .sv_fixup = elf_linux_fixup,
1002 .sv_sendsig = linux_sendsig,
1003 .sv_sigcode = &_binary_linux_locore_o_start,
1004 .sv_szsigcode = &linux_szsigcode,
1005 .sv_prepsyscall = NULL,
1006 .sv_name = "Linux ELF",
1007 .sv_coredump = elf32_coredump,
1008 .sv_imgact_try = exec_linux_imgact_try,
1009 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1010 .sv_pagesize = PAGE_SIZE,
1011 .sv_minuser = VM_MIN_ADDRESS,
1012 .sv_maxuser = VM_MAXUSER_ADDRESS,
1013 .sv_usrstack = LINUX_USRSTACK,
1014 .sv_psstrings = LINUX_PS_STRINGS,
1015 .sv_stackprot = VM_PROT_ALL,
1016 .sv_copyout_strings = linux_copyout_strings,
1017 .sv_setregs = exec_linux_setregs,
1018 .sv_fixlimit = NULL,
1020 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1021 .sv_set_syscall_retval = cpu_set_syscall_retval,
1022 .sv_fetch_syscall_args = linux_fetch_syscall_args,
1023 .sv_syscallnames = NULL,
1024 .sv_shared_page_base = LINUX_SHAREDPAGE,
1025 .sv_shared_page_len = PAGE_SIZE,
1026 .sv_schedtail = linux_schedtail,
1027 .sv_thread_detach = linux_thread_detach,
1031 linux_vdso_install(void *param)
1034 linux_szsigcode = (&_binary_linux_locore_o_end -
1035 &_binary_linux_locore_o_start);
1037 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
1038 panic("Linux invalid vdso size\n");
1040 __elfN(linux_vdso_fixup)(&elf_linux_sysvec);
1042 linux_shared_page_obj = __elfN(linux_shared_page_init)
1043 (&linux_shared_page_mapping);
1045 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX_SHAREDPAGE);
1047 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
1049 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
1051 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
1052 (sysinit_cfunc_t)linux_vdso_install, NULL);
1055 linux_vdso_deinstall(void *param)
1058 __elfN(linux_shared_page_fini)(linux_shared_page_obj);
1060 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
1061 (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
1063 static char GNU_ABI_VENDOR[] = "GNU";
1064 static int GNULINUX_ABI_DESC = 0;
1067 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1069 const Elf32_Word *desc;
1072 p = (uintptr_t)(note + 1);
1073 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1075 desc = (const Elf32_Word *)p;
1076 if (desc[0] != GNULINUX_ABI_DESC)
1080 * For linux we encode osrel as follows (see linux_mib.c):
1081 * VVVMMMIII (version, major, minor), see linux_mib.c.
1083 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1088 static Elf_Brandnote linux_brandnote = {
1089 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1090 .hdr.n_descsz = 16, /* XXX at least 16 */
1092 .vendor = GNU_ABI_VENDOR,
1093 .flags = BN_TRANSLATE_OSREL,
1094 .trans_osrel = linux_trans_osrel
1097 static Elf32_Brandinfo linux_brand = {
1098 .brand = ELFOSABI_LINUX,
1100 .compat_3_brand = "Linux",
1101 .emul_path = "/compat/linux",
1102 .interp_path = "/lib/ld-linux.so.1",
1103 .sysvec = &elf_linux_sysvec,
1104 .interp_newpath = NULL,
1105 .brand_note = &linux_brandnote,
1106 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1109 static Elf32_Brandinfo linux_glibc2brand = {
1110 .brand = ELFOSABI_LINUX,
1112 .compat_3_brand = "Linux",
1113 .emul_path = "/compat/linux",
1114 .interp_path = "/lib/ld-linux.so.2",
1115 .sysvec = &elf_linux_sysvec,
1116 .interp_newpath = NULL,
1117 .brand_note = &linux_brandnote,
1118 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1121 Elf32_Brandinfo *linux_brandlist[] = {
1128 linux_elf_modevent(module_t mod, int type, void *data)
1130 Elf32_Brandinfo **brandinfo;
1132 struct linux_ioctl_handler **lihp;
1133 struct linux_device_handler **ldhp;
1139 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1141 if (elf32_insert_brand_entry(*brandinfo) < 0)
1144 SET_FOREACH(lihp, linux_ioctl_handler_set)
1145 linux_ioctl_register_handler(*lihp);
1146 SET_FOREACH(ldhp, linux_device_handler_set)
1147 linux_device_register_handler(*ldhp);
1148 LIST_INIT(&futex_list);
1149 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1150 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1152 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1154 linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1155 linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1156 linux_get_machine(&linux_platform);
1157 linux_szplatform = roundup(strlen(linux_platform) + 1,
1159 linux_osd_jail_register();
1160 stclohz = (stathz ? stathz : hz);
1162 printf("Linux ELF exec handler installed\n");
1164 printf("cannot insert Linux ELF brand handler\n");
1167 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1169 if (elf32_brand_inuse(*brandinfo))
1172 for (brandinfo = &linux_brandlist[0];
1173 *brandinfo != NULL; ++brandinfo)
1174 if (elf32_remove_brand_entry(*brandinfo) < 0)
1178 SET_FOREACH(lihp, linux_ioctl_handler_set)
1179 linux_ioctl_unregister_handler(*lihp);
1180 SET_FOREACH(ldhp, linux_device_handler_set)
1181 linux_device_unregister_handler(*ldhp);
1182 mtx_destroy(&futex_mtx);
1183 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1184 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1185 EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1186 linux_osd_jail_deregister();
1188 printf("Linux ELF exec handler removed\n");
1190 printf("Could not deinstall ELF interpreter entry\n");
1193 return (EOPNOTSUPP);
1198 static moduledata_t linux_elf_mod = {
1204 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);