2 * Copyright (c) 1994-1996 Søren Schmidt
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_futex.h>
68 #include <compat/linux/linux_emul.h>
69 #include <compat/linux/linux_mib.h>
70 #include <compat/linux/linux_misc.h>
71 #include <compat/linux/linux_signal.h>
72 #include <compat/linux/linux_util.h>
74 MODULE_VERSION(linux, 1);
76 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
78 #if BYTE_ORDER == LITTLE_ENDIAN
79 #define SHELLMAGIC 0x2123 /* #! */
81 #define SHELLMAGIC 0x2321
85 * Allow the sendsig functions to use the ldebug() facility
86 * even though they are not syscalls themselves. Map them
87 * to syscall 0. This is slightly less bogus than using
90 #define LINUX_SYS_linux_rt_sendsig 0
91 #define LINUX_SYS_linux_sendsig 0
93 extern char linux_sigcode[];
94 extern int linux_szsigcode;
96 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
98 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
99 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
101 static int linux_fixup(register_t **stack_base,
102 struct image_params *iparams);
103 static int elf_linux_fixup(register_t **stack_base,
104 struct image_params *iparams);
105 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
107 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
108 static void exec_linux_setregs(struct thread *td, u_long entry,
109 u_long stack, u_long ps_strings);
110 static register_t *linux_copyout_strings(struct image_params *imgp);
112 static int linux_szplatform;
113 const char *linux_platform;
115 static eventhandler_tag linux_exit_tag;
116 static eventhandler_tag linux_schedtail_tag;
117 static eventhandler_tag linux_exec_tag;
120 * Linux syscalls return negative errno's, we do positive and map them
122 * FreeBSD: src/sys/sys/errno.h
123 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
124 * linux-2.6.17.8/include/asm-generic/errno.h
126 static int bsd_to_linux_errno[ELAST + 1] = {
127 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
128 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
129 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
130 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
131 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
132 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
133 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
134 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
135 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
139 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
140 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
141 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
142 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
143 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
144 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
145 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
146 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
147 0, LINUX_SIGUSR1, LINUX_SIGUSR2
150 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
151 SIGHUP, SIGINT, SIGQUIT, SIGILL,
152 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
153 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
154 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
155 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
156 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
157 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
158 SIGIO, SIGURG, SIGSYS
161 #define LINUX_T_UNKNOWN 255
162 static int _bsd_to_linux_trapcode[] = {
163 LINUX_T_UNKNOWN, /* 0 */
164 6, /* 1 T_PRIVINFLT */
165 LINUX_T_UNKNOWN, /* 2 */
167 LINUX_T_UNKNOWN, /* 4 */
168 LINUX_T_UNKNOWN, /* 5 */
169 16, /* 6 T_ARITHTRAP */
170 254, /* 7 T_ASTFLT */
171 LINUX_T_UNKNOWN, /* 8 */
172 13, /* 9 T_PROTFLT */
173 1, /* 10 T_TRCTRAP */
174 LINUX_T_UNKNOWN, /* 11 */
175 14, /* 12 T_PAGEFLT */
176 LINUX_T_UNKNOWN, /* 13 */
177 17, /* 14 T_ALIGNFLT */
178 LINUX_T_UNKNOWN, /* 15 */
179 LINUX_T_UNKNOWN, /* 16 */
180 LINUX_T_UNKNOWN, /* 17 */
186 8, /* 23 T_DOUBLEFLT */
187 9, /* 24 T_FPOPFLT */
188 10, /* 25 T_TSSFLT */
189 11, /* 26 T_SEGNPFLT */
190 12, /* 27 T_STKFLT */
192 19, /* 29 T_XMMFLT */
193 15 /* 30 T_RESERVED */
195 #define bsd_to_linux_trapcode(code) \
196 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
197 _bsd_to_linux_trapcode[(code)]: \
201 * If FreeBSD & Linux have a difference of opinion about what a trap
202 * means, deal with it here.
207 translate_traps(int signal, int trap_code)
209 if (signal != SIGBUS)
223 linux_fixup(register_t **stack_base, struct image_params *imgp)
225 register_t *argv, *envp;
228 envp = *stack_base + (imgp->args->argc + 1);
230 **stack_base = (intptr_t)(void *)envp;
232 **stack_base = (intptr_t)(void *)argv;
234 **stack_base = imgp->args->argc;
239 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
243 Elf32_Addr *uplatform;
244 struct ps_strings *arginfo;
247 KASSERT(curthread->td_proc == imgp->proc,
248 ("unsafe elf_linux_fixup(), should be curproc"));
251 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
252 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
254 args = (Elf32_Auxargs *)imgp->auxargs;
255 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
257 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
260 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
261 * as it has appeared in the 2.4.0-rc7 first time.
262 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
263 * glibc falls back to the hard-coded CLK_TCK value when aux entry
265 * Also see linux_times() implementation.
267 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
268 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
269 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
270 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
271 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
272 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
273 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
274 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
275 AUXARGS_ENTRY(pos, AT_BASE, args->base);
276 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
277 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
278 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
279 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
280 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
281 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
282 if (args->execfd != -1)
283 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
284 AUXARGS_ENTRY(pos, AT_NULL, 0);
286 free(imgp->auxargs, M_TEMP);
287 imgp->auxargs = NULL;
290 **stack_base = (register_t)imgp->args->argc;
295 * Copied from kern/kern_exec.c
298 linux_copyout_strings(struct image_params *imgp)
302 char *stringp, *destp;
303 register_t *stack_base;
304 struct ps_strings *arginfo;
308 * Calculate string base and vector table pointers.
309 * Also deal with signal trampoline code for this exec type.
312 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
313 destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
314 linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
320 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
321 linux_szsigcode), linux_szsigcode);
324 * install LINUX_PLATFORM
326 copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
327 linux_szplatform), linux_szplatform);
330 * If we have a valid auxargs ptr, prepare some room
335 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
336 * lower compatibility.
338 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
339 (LINUX_AT_COUNT * 2);
341 * The '+ 2' is for the null pointers at the end of each of
342 * the arg and env vector sets,and imgp->auxarg_size is room
343 * for argument of Runtime loader.
345 vectp = (char **)(destp - (imgp->args->argc +
346 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
349 * The '+ 2' is for the null pointers at the end of each of
350 * the arg and env vector sets
352 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
357 * vectp also becomes our initial stack base
359 stack_base = (register_t *)vectp;
361 stringp = imgp->args->begin_argv;
362 argc = imgp->args->argc;
363 envc = imgp->args->envc;
366 * Copy out strings - arguments and environment.
368 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
371 * Fill in "ps_strings" struct for ps, w, etc.
373 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
374 suword(&arginfo->ps_nargvstr, argc);
377 * Fill in argument portion of vector table.
379 for (; argc > 0; --argc) {
380 suword(vectp++, (long)(intptr_t)destp);
381 while (*stringp++ != 0)
386 /* a null vector table pointer separates the argp's from the envp's */
389 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
390 suword(&arginfo->ps_nenvstr, envc);
393 * Fill in environment portion of vector table.
395 for (; envc > 0; --envc) {
396 suword(vectp++, (long)(intptr_t)destp);
397 while (*stringp++ != 0)
402 /* end of vector table is a null pointer */
410 extern int _ucodesel, _udatasel;
411 extern unsigned long linux_sznonrtsigcode;
414 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
416 struct thread *td = curthread;
417 struct proc *p = td->td_proc;
419 struct trapframe *regs;
420 struct l_rt_sigframe *fp, frame;
424 sig = ksi->ksi_signo;
425 code = ksi->ksi_code;
426 PROC_LOCK_ASSERT(p, MA_OWNED);
428 mtx_assert(&psp->ps_mtx, MA_OWNED);
430 oonstack = sigonstack(regs->tf_esp);
433 if (ldebug(rt_sendsig))
434 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
435 catcher, sig, (void*)mask, code);
438 * Allocate space for the signal handler context.
440 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
441 SIGISMEMBER(psp->ps_sigonstack, sig)) {
442 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
443 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
445 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
446 mtx_unlock(&psp->ps_mtx);
449 * Build the argument list for the signal handler.
451 if (p->p_sysent->sv_sigtbl)
452 if (sig <= p->p_sysent->sv_sigsize)
453 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
455 bzero(&frame, sizeof(frame));
457 frame.sf_handler = catcher;
459 frame.sf_siginfo = &fp->sf_si;
460 frame.sf_ucontext = &fp->sf_sc;
462 /* Fill in POSIX parts */
463 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
466 * Build the signal context to be used by sigreturn.
468 frame.sf_sc.uc_flags = 0; /* XXX ??? */
469 frame.sf_sc.uc_link = NULL; /* XXX ??? */
471 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
472 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
473 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
474 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
477 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
479 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
480 frame.sf_sc.uc_mcontext.sc_gs = rgs();
481 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
482 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
483 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
484 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
485 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
486 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
487 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
488 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
489 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
490 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
491 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
492 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
493 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
494 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
495 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
496 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
497 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
498 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
501 if (ldebug(rt_sendsig))
502 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
503 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
504 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
507 if (copyout(&frame, fp, sizeof(frame)) != 0) {
509 * Process has trashed its stack; give it an illegal
510 * instruction to halt it in its tracks.
513 if (ldebug(rt_sendsig))
514 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
522 * Build context to run handler in.
524 regs->tf_esp = (int)fp;
525 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
526 linux_sznonrtsigcode;
527 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
528 regs->tf_cs = _ucodesel;
529 regs->tf_ds = _udatasel;
530 regs->tf_es = _udatasel;
531 regs->tf_fs = _udatasel;
532 regs->tf_ss = _udatasel;
534 mtx_lock(&psp->ps_mtx);
539 * Send an interrupt to process.
541 * Stack is set up to allow sigcode stored
542 * in u. to call routine, followed by kcall
543 * to sigreturn routine below. After sigreturn
544 * resets the signal mask, the stack, and the
545 * frame pointer, it returns to the user
549 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
551 struct thread *td = curthread;
552 struct proc *p = td->td_proc;
554 struct trapframe *regs;
555 struct l_sigframe *fp, frame;
560 PROC_LOCK_ASSERT(p, MA_OWNED);
562 sig = ksi->ksi_signo;
563 code = ksi->ksi_code;
564 mtx_assert(&psp->ps_mtx, MA_OWNED);
565 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
566 /* Signal handler installed with SA_SIGINFO. */
567 linux_rt_sendsig(catcher, ksi, mask);
571 oonstack = sigonstack(regs->tf_esp);
575 printf(ARGS(sendsig, "%p, %d, %p, %u"),
576 catcher, sig, (void*)mask, code);
580 * Allocate space for the signal handler context.
582 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
583 SIGISMEMBER(psp->ps_sigonstack, sig)) {
584 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
585 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
587 fp = (struct l_sigframe *)regs->tf_esp - 1;
588 mtx_unlock(&psp->ps_mtx);
592 * Build the argument list for the signal handler.
594 if (p->p_sysent->sv_sigtbl)
595 if (sig <= p->p_sysent->sv_sigsize)
596 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
598 bzero(&frame, sizeof(frame));
600 frame.sf_handler = catcher;
603 bsd_to_linux_sigset(mask, &lmask);
606 * Build the signal context to be used by sigreturn.
608 frame.sf_sc.sc_mask = lmask.__bits[0];
609 frame.sf_sc.sc_gs = rgs();
610 frame.sf_sc.sc_fs = regs->tf_fs;
611 frame.sf_sc.sc_es = regs->tf_es;
612 frame.sf_sc.sc_ds = regs->tf_ds;
613 frame.sf_sc.sc_edi = regs->tf_edi;
614 frame.sf_sc.sc_esi = regs->tf_esi;
615 frame.sf_sc.sc_ebp = regs->tf_ebp;
616 frame.sf_sc.sc_ebx = regs->tf_ebx;
617 frame.sf_sc.sc_edx = regs->tf_edx;
618 frame.sf_sc.sc_ecx = regs->tf_ecx;
619 frame.sf_sc.sc_eax = regs->tf_eax;
620 frame.sf_sc.sc_eip = regs->tf_eip;
621 frame.sf_sc.sc_cs = regs->tf_cs;
622 frame.sf_sc.sc_eflags = regs->tf_eflags;
623 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
624 frame.sf_sc.sc_ss = regs->tf_ss;
625 frame.sf_sc.sc_err = regs->tf_err;
626 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
627 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
629 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
630 frame.sf_extramask[i] = lmask.__bits[i+1];
632 if (copyout(&frame, fp, sizeof(frame)) != 0) {
634 * Process has trashed its stack; give it an illegal
635 * instruction to halt it in its tracks.
642 * Build context to run handler in.
644 regs->tf_esp = (int)fp;
645 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
646 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
647 regs->tf_cs = _ucodesel;
648 regs->tf_ds = _udatasel;
649 regs->tf_es = _udatasel;
650 regs->tf_fs = _udatasel;
651 regs->tf_ss = _udatasel;
653 mtx_lock(&psp->ps_mtx);
657 * System call to cleanup state after a signal
658 * has been taken. Reset signal mask and
659 * stack state from context left by sendsig (above).
660 * Return to previous pc and psl as specified by
661 * context left by sendsig. Check carefully to
662 * make sure that the user has not modified the
663 * psl to gain improper privileges or to cause
667 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
669 struct proc *p = td->td_proc;
670 struct l_sigframe frame;
671 struct trapframe *regs;
679 if (ldebug(sigreturn))
680 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
683 * The trampoline code hands us the sigframe.
684 * It is unsafe to keep track of it ourselves, in the event that a
685 * program jumps out of a signal handler.
687 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
691 * Check for security violations.
693 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
694 eflags = frame.sf_sc.sc_eflags;
696 * XXX do allow users to change the privileged flag PSL_RF. The
697 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
698 * sometimes set it there too. tf_eflags is kept in the signal
699 * context during signal handling and there is no other place
700 * to remember it, so the PSL_RF bit may be corrupted by the
701 * signal handler without us knowing. Corruption of the PSL_RF
702 * bit at worst causes one more or one less debugger trap, so
703 * allowing it is fairly harmless.
705 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
709 * Don't allow users to load a valid privileged %cs. Let the
710 * hardware check for invalid selectors, excess privilege in
711 * other selectors, invalid %eip's and invalid %esp's.
713 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
714 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
715 ksiginfo_init_trap(&ksi);
716 ksi.ksi_signo = SIGBUS;
717 ksi.ksi_code = BUS_OBJERR;
718 ksi.ksi_trapno = T_PROTFLT;
719 ksi.ksi_addr = (void *)regs->tf_eip;
720 trapsignal(td, &ksi);
724 lmask.__bits[0] = frame.sf_sc.sc_mask;
725 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
726 lmask.__bits[i+1] = frame.sf_extramask[i];
728 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
729 SIG_CANTMASK(td->td_sigmask);
734 * Restore signal context.
736 /* %gs was restored by the trampoline. */
737 regs->tf_fs = frame.sf_sc.sc_fs;
738 regs->tf_es = frame.sf_sc.sc_es;
739 regs->tf_ds = frame.sf_sc.sc_ds;
740 regs->tf_edi = frame.sf_sc.sc_edi;
741 regs->tf_esi = frame.sf_sc.sc_esi;
742 regs->tf_ebp = frame.sf_sc.sc_ebp;
743 regs->tf_ebx = frame.sf_sc.sc_ebx;
744 regs->tf_edx = frame.sf_sc.sc_edx;
745 regs->tf_ecx = frame.sf_sc.sc_ecx;
746 regs->tf_eax = frame.sf_sc.sc_eax;
747 regs->tf_eip = frame.sf_sc.sc_eip;
748 regs->tf_cs = frame.sf_sc.sc_cs;
749 regs->tf_eflags = eflags;
750 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
751 regs->tf_ss = frame.sf_sc.sc_ss;
753 return (EJUSTRETURN);
757 * System call to cleanup state after a signal
758 * has been taken. Reset signal mask and
759 * stack state from context left by rt_sendsig (above).
760 * Return to previous pc and psl as specified by
761 * context left by sendsig. Check carefully to
762 * make sure that the user has not modified the
763 * psl to gain improper privileges or to cause
767 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
769 struct proc *p = td->td_proc;
770 struct l_ucontext uc;
771 struct l_sigcontext *context;
774 struct trapframe *regs;
781 if (ldebug(rt_sigreturn))
782 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
785 * The trampoline code hands us the ucontext.
786 * It is unsafe to keep track of it ourselves, in the event that a
787 * program jumps out of a signal handler.
789 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
792 context = &uc.uc_mcontext;
795 * Check for security violations.
797 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
798 eflags = context->sc_eflags;
800 * XXX do allow users to change the privileged flag PSL_RF. The
801 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
802 * sometimes set it there too. tf_eflags is kept in the signal
803 * context during signal handling and there is no other place
804 * to remember it, so the PSL_RF bit may be corrupted by the
805 * signal handler without us knowing. Corruption of the PSL_RF
806 * bit at worst causes one more or one less debugger trap, so
807 * allowing it is fairly harmless.
809 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
813 * Don't allow users to load a valid privileged %cs. Let the
814 * hardware check for invalid selectors, excess privilege in
815 * other selectors, invalid %eip's and invalid %esp's.
817 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
818 if (!CS_SECURE(context->sc_cs)) {
819 ksiginfo_init_trap(&ksi);
820 ksi.ksi_signo = SIGBUS;
821 ksi.ksi_code = BUS_OBJERR;
822 ksi.ksi_trapno = T_PROTFLT;
823 ksi.ksi_addr = (void *)regs->tf_eip;
824 trapsignal(td, &ksi);
829 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
830 SIG_CANTMASK(td->td_sigmask);
835 * Restore signal context
837 /* %gs was restored by the trampoline. */
838 regs->tf_fs = context->sc_fs;
839 regs->tf_es = context->sc_es;
840 regs->tf_ds = context->sc_ds;
841 regs->tf_edi = context->sc_edi;
842 regs->tf_esi = context->sc_esi;
843 regs->tf_ebp = context->sc_ebp;
844 regs->tf_ebx = context->sc_ebx;
845 regs->tf_edx = context->sc_edx;
846 regs->tf_ecx = context->sc_ecx;
847 regs->tf_eax = context->sc_eax;
848 regs->tf_eip = context->sc_eip;
849 regs->tf_cs = context->sc_cs;
850 regs->tf_eflags = eflags;
851 regs->tf_esp = context->sc_esp_at_signal;
852 regs->tf_ss = context->sc_ss;
855 * call sigaltstack & ignore results..
858 ss.ss_sp = lss->ss_sp;
859 ss.ss_size = lss->ss_size;
860 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
863 if (ldebug(rt_sigreturn))
864 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
865 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
867 (void)kern_sigaltstack(td, &ss, NULL);
869 return (EJUSTRETURN);
876 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
878 args[0] = tf->tf_ebx;
879 args[1] = tf->tf_ecx;
880 args[2] = tf->tf_edx;
881 args[3] = tf->tf_esi;
882 args[4] = tf->tf_edi;
883 args[5] = tf->tf_ebp; /* Unconfirmed */
884 *params = NULL; /* no copyin */
888 * If a linux binary is exec'ing something, try this image activator
889 * first. We override standard shell script execution in order to
890 * be able to modify the interpreter path. We only do this if a linux
891 * binary is doing the exec, so we do not create an EXEC module for it.
893 static int exec_linux_imgact_try(struct image_params *iparams);
896 exec_linux_imgact_try(struct image_params *imgp)
898 const char *head = (const char *)imgp->image_header;
903 * The interpreter for shell scripts run from a linux binary needs
904 * to be located in /compat/linux if possible in order to recursively
905 * maintain linux path emulation.
907 if (((const short *)head)[0] == SHELLMAGIC) {
909 * Run our normal shell image activator. If it succeeds attempt
910 * to use the alternate path for the interpreter. If an alternate
911 * path is found, use our stringspace to store it.
913 if ((error = exec_shell_imgact(imgp)) == 0) {
914 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
915 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
917 len = strlen(rpath) + 1;
919 if (len <= MAXSHELLCMDLEN) {
920 memcpy(imgp->interpreter_name, rpath, len);
930 * exec_setregs may initialize some registers differently than Linux
931 * does, thus potentially confusing Linux binaries. If necessary, we
932 * override the exec_setregs default(s) here.
935 exec_linux_setregs(struct thread *td, u_long entry,
936 u_long stack, u_long ps_strings)
938 struct pcb *pcb = td->td_pcb;
940 exec_setregs(td, entry, stack, ps_strings);
942 /* Linux sets %gs to 0, we default to _udatasel */
946 pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
950 linux_get_machine(const char **dst)
968 struct sysentvec linux_sysvec = {
969 .sv_size = LINUX_SYS_MAXSYSCALL,
970 .sv_table = linux_sysent,
972 .sv_sigsize = LINUX_SIGTBLSZ,
973 .sv_sigtbl = bsd_to_linux_signal,
974 .sv_errsize = ELAST + 1,
975 .sv_errtbl = bsd_to_linux_errno,
976 .sv_transtrap = translate_traps,
977 .sv_fixup = linux_fixup,
978 .sv_sendsig = linux_sendsig,
979 .sv_sigcode = linux_sigcode,
980 .sv_szsigcode = &linux_szsigcode,
981 .sv_prepsyscall = linux_prepsyscall,
982 .sv_name = "Linux a.out",
984 .sv_imgact_try = exec_linux_imgact_try,
985 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
986 .sv_pagesize = PAGE_SIZE,
987 .sv_minuser = VM_MIN_ADDRESS,
988 .sv_maxuser = VM_MAXUSER_ADDRESS,
989 .sv_usrstack = USRSTACK,
990 .sv_psstrings = PS_STRINGS,
991 .sv_stackprot = VM_PROT_ALL,
992 .sv_copyout_strings = exec_copyout_strings,
993 .sv_setregs = exec_linux_setregs,
996 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32
999 struct sysentvec elf_linux_sysvec = {
1000 .sv_size = LINUX_SYS_MAXSYSCALL,
1001 .sv_table = linux_sysent,
1003 .sv_sigsize = LINUX_SIGTBLSZ,
1004 .sv_sigtbl = bsd_to_linux_signal,
1005 .sv_errsize = ELAST + 1,
1006 .sv_errtbl = bsd_to_linux_errno,
1007 .sv_transtrap = translate_traps,
1008 .sv_fixup = elf_linux_fixup,
1009 .sv_sendsig = linux_sendsig,
1010 .sv_sigcode = linux_sigcode,
1011 .sv_szsigcode = &linux_szsigcode,
1012 .sv_prepsyscall = linux_prepsyscall,
1013 .sv_name = "Linux ELF",
1014 .sv_coredump = elf32_coredump,
1015 .sv_imgact_try = exec_linux_imgact_try,
1016 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1017 .sv_pagesize = PAGE_SIZE,
1018 .sv_minuser = VM_MIN_ADDRESS,
1019 .sv_maxuser = VM_MAXUSER_ADDRESS,
1020 .sv_usrstack = USRSTACK,
1021 .sv_psstrings = PS_STRINGS,
1022 .sv_stackprot = VM_PROT_ALL,
1023 .sv_copyout_strings = linux_copyout_strings,
1024 .sv_setregs = exec_linux_setregs,
1025 .sv_fixlimit = NULL,
1027 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32
1030 static char GNULINUX_ABI_VENDOR[] = "GNU";
1032 static Elf_Brandnote linux_brandnote = {
1033 .hdr.n_namesz = sizeof(GNULINUX_ABI_VENDOR),
1036 .vendor = GNULINUX_ABI_VENDOR,
1040 static Elf32_Brandinfo linux_brand = {
1041 .brand = ELFOSABI_LINUX,
1043 .compat_3_brand = "Linux",
1044 .emul_path = "/compat/linux",
1045 .interp_path = "/lib/ld-linux.so.1",
1046 .sysvec = &elf_linux_sysvec,
1047 .interp_newpath = NULL,
1048 .brand_note = &linux_brandnote,
1049 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1052 static Elf32_Brandinfo linux_glibc2brand = {
1053 .brand = ELFOSABI_LINUX,
1055 .compat_3_brand = "Linux",
1056 .emul_path = "/compat/linux",
1057 .interp_path = "/lib/ld-linux.so.2",
1058 .sysvec = &elf_linux_sysvec,
1059 .interp_newpath = NULL,
1060 .brand_note = &linux_brandnote,
1061 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1064 Elf32_Brandinfo *linux_brandlist[] = {
1071 linux_elf_modevent(module_t mod, int type, void *data)
1073 Elf32_Brandinfo **brandinfo;
1075 struct linux_ioctl_handler **lihp;
1076 struct linux_device_handler **ldhp;
1082 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1084 if (elf32_insert_brand_entry(*brandinfo) < 0)
1087 SET_FOREACH(lihp, linux_ioctl_handler_set)
1088 linux_ioctl_register_handler(*lihp);
1089 SET_FOREACH(ldhp, linux_device_handler_set)
1090 linux_device_register_handler(*ldhp);
1091 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1092 sx_init(&emul_shared_lock, "emuldata->shared lock");
1093 LIST_INIT(&futex_list);
1094 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1095 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1097 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1099 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1101 linux_get_machine(&linux_platform);
1102 linux_szplatform = roundup(strlen(linux_platform) + 1,
1104 linux_osd_jail_register();
1105 stclohz = (stathz ? stathz : hz);
1107 printf("Linux ELF exec handler installed\n");
1109 printf("cannot insert Linux ELF brand handler\n");
1112 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1114 if (elf32_brand_inuse(*brandinfo))
1117 for (brandinfo = &linux_brandlist[0];
1118 *brandinfo != NULL; ++brandinfo)
1119 if (elf32_remove_brand_entry(*brandinfo) < 0)
1123 SET_FOREACH(lihp, linux_ioctl_handler_set)
1124 linux_ioctl_unregister_handler(*lihp);
1125 SET_FOREACH(ldhp, linux_device_handler_set)
1126 linux_device_unregister_handler(*ldhp);
1127 mtx_destroy(&emul_lock);
1128 sx_destroy(&emul_shared_lock);
1129 mtx_destroy(&futex_mtx);
1130 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1131 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1132 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1133 linux_osd_jail_deregister();
1135 printf("Linux ELF exec handler removed\n");
1137 printf("Could not deinstall ELF interpreter entry\n");
1145 static moduledata_t linux_elf_mod = {
1151 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);