2 * Copyright (c) 1994-1996 Søren Schmidt
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
35 #include <sys/imgact.h>
36 #include <sys/imgact_aout.h>
37 #include <sys/imgact_elf.h>
38 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/module.h>
42 #include <sys/mutex.h>
44 #include <sys/signalvar.h>
45 #include <sys/syscallsubr.h>
46 #include <sys/sysent.h>
47 #include <sys/sysproto.h>
48 #include <sys/vnode.h>
49 #include <sys/eventhandler.h>
53 #include <vm/vm_extern.h>
54 #include <vm/vm_map.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_param.h>
59 #include <machine/cpu.h>
60 #include <machine/cputypes.h>
61 #include <machine/md_var.h>
62 #include <machine/pcb.h>
64 #include <i386/linux/linux.h>
65 #include <i386/linux/linux_proto.h>
66 #include <compat/linux/linux_emul.h>
67 #include <compat/linux/linux_mib.h>
68 #include <compat/linux/linux_misc.h>
69 #include <compat/linux/linux_signal.h>
70 #include <compat/linux/linux_util.h>
72 MODULE_VERSION(linux, 1);
74 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
76 #if BYTE_ORDER == LITTLE_ENDIAN
77 #define SHELLMAGIC 0x2123 /* #! */
79 #define SHELLMAGIC 0x2321
83 * Allow the sendsig functions to use the ldebug() facility
84 * even though they are not syscalls themselves. Map them
85 * to syscall 0. This is slightly less bogus than using
88 #define LINUX_SYS_linux_rt_sendsig 0
89 #define LINUX_SYS_linux_sendsig 0
91 extern char linux_sigcode[];
92 extern int linux_szsigcode;
94 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
96 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
97 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
99 static int linux_fixup(register_t **stack_base,
100 struct image_params *iparams);
101 static int elf_linux_fixup(register_t **stack_base,
102 struct image_params *iparams);
103 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
105 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
106 static void exec_linux_setregs(struct thread *td, u_long entry,
107 u_long stack, u_long ps_strings);
108 static register_t *linux_copyout_strings(struct image_params *imgp);
110 static int linux_szplatform;
111 const char *linux_platform;
113 extern LIST_HEAD(futex_list, futex) futex_list;
114 extern struct sx futex_sx;
116 static eventhandler_tag linux_exit_tag;
117 static eventhandler_tag linux_schedtail_tag;
118 static eventhandler_tag linux_exec_tag;
121 * Linux syscalls return negative errno's, we do positive and map them
123 * FreeBSD: src/sys/sys/errno.h
124 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
125 * linux-2.6.17.8/include/asm-generic/errno.h
127 static int bsd_to_linux_errno[ELAST + 1] = {
128 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
129 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
130 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
131 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
132 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
133 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
134 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
135 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
136 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
140 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
141 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
142 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
143 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
144 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
145 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
146 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
147 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
148 0, LINUX_SIGUSR1, LINUX_SIGUSR2
151 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
152 SIGHUP, SIGINT, SIGQUIT, SIGILL,
153 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
154 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
155 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
156 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
157 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
158 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
159 SIGIO, SIGURG, SIGSYS
162 #define LINUX_T_UNKNOWN 255
163 static int _bsd_to_linux_trapcode[] = {
164 LINUX_T_UNKNOWN, /* 0 */
165 6, /* 1 T_PRIVINFLT */
166 LINUX_T_UNKNOWN, /* 2 */
168 LINUX_T_UNKNOWN, /* 4 */
169 LINUX_T_UNKNOWN, /* 5 */
170 16, /* 6 T_ARITHTRAP */
171 254, /* 7 T_ASTFLT */
172 LINUX_T_UNKNOWN, /* 8 */
173 13, /* 9 T_PROTFLT */
174 1, /* 10 T_TRCTRAP */
175 LINUX_T_UNKNOWN, /* 11 */
176 14, /* 12 T_PAGEFLT */
177 LINUX_T_UNKNOWN, /* 13 */
178 17, /* 14 T_ALIGNFLT */
179 LINUX_T_UNKNOWN, /* 15 */
180 LINUX_T_UNKNOWN, /* 16 */
181 LINUX_T_UNKNOWN, /* 17 */
187 8, /* 23 T_DOUBLEFLT */
188 9, /* 24 T_FPOPFLT */
189 10, /* 25 T_TSSFLT */
190 11, /* 26 T_SEGNPFLT */
191 12, /* 27 T_STKFLT */
193 19, /* 29 T_XMMFLT */
194 15 /* 30 T_RESERVED */
196 #define bsd_to_linux_trapcode(code) \
197 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
198 _bsd_to_linux_trapcode[(code)]: \
202 * If FreeBSD & Linux have a difference of opinion about what a trap
203 * means, deal with it here.
208 translate_traps(int signal, int trap_code)
210 if (signal != SIGBUS)
224 linux_fixup(register_t **stack_base, struct image_params *imgp)
226 register_t *argv, *envp;
229 envp = *stack_base + (imgp->args->argc + 1);
231 **stack_base = (intptr_t)(void *)envp;
233 **stack_base = (intptr_t)(void *)argv;
235 **stack_base = imgp->args->argc;
240 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
244 Elf32_Addr *uplatform;
245 struct ps_strings *arginfo;
248 KASSERT(curthread->td_proc == imgp->proc &&
249 (curthread->td_proc->p_flag & P_SA) == 0,
250 ("unsafe elf_linux_fixup(), should be curproc"));
253 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
254 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
256 args = (Elf32_Auxargs *)imgp->auxargs;
257 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
259 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
260 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, hz);
261 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
262 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
263 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
264 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
265 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
266 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
267 AUXARGS_ENTRY(pos, AT_BASE, args->base);
268 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
269 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
270 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
271 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
272 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
273 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
274 if (args->execfd != -1)
275 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
276 AUXARGS_ENTRY(pos, AT_NULL, 0);
278 free(imgp->auxargs, M_TEMP);
279 imgp->auxargs = NULL;
282 **stack_base = (register_t)imgp->args->argc;
287 * Copied from kern/kern_exec.c
290 linux_copyout_strings(struct image_params *imgp)
294 char *stringp, *destp;
295 register_t *stack_base;
296 struct ps_strings *arginfo;
300 * Calculate string base and vector table pointers.
301 * Also deal with signal trampoline code for this exec type.
304 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
305 destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
306 linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
312 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
313 linux_szsigcode), linux_szsigcode);
316 * install LINUX_PLATFORM
318 copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
319 linux_szplatform), linux_szplatform);
322 * If we have a valid auxargs ptr, prepare some room
327 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
328 * lower compatibility.
330 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
331 (LINUX_AT_COUNT * 2);
333 * The '+ 2' is for the null pointers at the end of each of
334 * the arg and env vector sets,and imgp->auxarg_size is room
335 * for argument of Runtime loader.
337 vectp = (char **)(destp - (imgp->args->argc +
338 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
341 * The '+ 2' is for the null pointers at the end of each of
342 * the arg and env vector sets
344 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
349 * vectp also becomes our initial stack base
351 stack_base = (register_t *)vectp;
353 stringp = imgp->args->begin_argv;
354 argc = imgp->args->argc;
355 envc = imgp->args->envc;
358 * Copy out strings - arguments and environment.
360 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
363 * Fill in "ps_strings" struct for ps, w, etc.
365 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
366 suword(&arginfo->ps_nargvstr, argc);
369 * Fill in argument portion of vector table.
371 for (; argc > 0; --argc) {
372 suword(vectp++, (long)(intptr_t)destp);
373 while (*stringp++ != 0)
378 /* a null vector table pointer separates the argp's from the envp's */
381 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
382 suword(&arginfo->ps_nenvstr, envc);
385 * Fill in environment portion of vector table.
387 for (; envc > 0; --envc) {
388 suword(vectp++, (long)(intptr_t)destp);
389 while (*stringp++ != 0)
394 /* end of vector table is a null pointer */
400 extern int _ucodesel, _udatasel;
401 extern unsigned long linux_sznonrtsigcode;
404 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
406 struct thread *td = curthread;
407 struct proc *p = td->td_proc;
409 struct trapframe *regs;
410 struct l_rt_sigframe *fp, frame;
414 sig = ksi->ksi_signo;
415 code = ksi->ksi_code;
416 PROC_LOCK_ASSERT(p, MA_OWNED);
418 mtx_assert(&psp->ps_mtx, MA_OWNED);
420 oonstack = sigonstack(regs->tf_esp);
423 if (ldebug(rt_sendsig))
424 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
425 catcher, sig, (void*)mask, code);
428 * Allocate space for the signal handler context.
430 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
431 SIGISMEMBER(psp->ps_sigonstack, sig)) {
432 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
433 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
435 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
436 mtx_unlock(&psp->ps_mtx);
439 * Build the argument list for the signal handler.
441 if (p->p_sysent->sv_sigtbl)
442 if (sig <= p->p_sysent->sv_sigsize)
443 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
445 bzero(&frame, sizeof(frame));
447 frame.sf_handler = catcher;
449 frame.sf_siginfo = &fp->sf_si;
450 frame.sf_ucontext = &fp->sf_sc;
452 /* Fill in POSIX parts */
453 frame.sf_si.lsi_signo = sig;
454 frame.sf_si.lsi_code = code;
455 frame.sf_si.lsi_addr = ksi->ksi_addr;
458 * Build the signal context to be used by sigreturn.
460 frame.sf_sc.uc_flags = 0; /* XXX ??? */
461 frame.sf_sc.uc_link = NULL; /* XXX ??? */
463 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
464 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
465 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
466 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
469 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
471 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
472 frame.sf_sc.uc_mcontext.sc_gs = rgs();
473 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
474 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
475 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
476 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
477 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
478 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
479 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
480 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
481 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
482 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
483 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
484 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
485 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
486 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
487 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
488 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
489 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
490 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
493 if (ldebug(rt_sendsig))
494 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
495 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
496 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
499 if (copyout(&frame, fp, sizeof(frame)) != 0) {
501 * Process has trashed its stack; give it an illegal
502 * instruction to halt it in its tracks.
505 if (ldebug(rt_sendsig))
506 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
514 * Build context to run handler in.
516 regs->tf_esp = (int)fp;
517 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
518 linux_sznonrtsigcode;
519 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
520 regs->tf_cs = _ucodesel;
521 regs->tf_ds = _udatasel;
522 regs->tf_es = _udatasel;
523 regs->tf_fs = _udatasel;
524 regs->tf_ss = _udatasel;
526 mtx_lock(&psp->ps_mtx);
531 * Send an interrupt to process.
533 * Stack is set up to allow sigcode stored
534 * in u. to call routine, followed by kcall
535 * to sigreturn routine below. After sigreturn
536 * resets the signal mask, the stack, and the
537 * frame pointer, it returns to the user
541 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
543 struct thread *td = curthread;
544 struct proc *p = td->td_proc;
546 struct trapframe *regs;
547 struct l_sigframe *fp, frame;
552 PROC_LOCK_ASSERT(p, MA_OWNED);
554 sig = ksi->ksi_signo;
555 code = ksi->ksi_code;
556 mtx_assert(&psp->ps_mtx, MA_OWNED);
557 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
558 /* Signal handler installed with SA_SIGINFO. */
559 linux_rt_sendsig(catcher, ksi, mask);
563 oonstack = sigonstack(regs->tf_esp);
567 printf(ARGS(sendsig, "%p, %d, %p, %u"),
568 catcher, sig, (void*)mask, code);
572 * Allocate space for the signal handler context.
574 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
575 SIGISMEMBER(psp->ps_sigonstack, sig)) {
576 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
577 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
579 fp = (struct l_sigframe *)regs->tf_esp - 1;
580 mtx_unlock(&psp->ps_mtx);
584 * Build the argument list for the signal handler.
586 if (p->p_sysent->sv_sigtbl)
587 if (sig <= p->p_sysent->sv_sigsize)
588 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
590 bzero(&frame, sizeof(frame));
592 frame.sf_handler = catcher;
595 bsd_to_linux_sigset(mask, &lmask);
598 * Build the signal context to be used by sigreturn.
600 frame.sf_sc.sc_mask = lmask.__bits[0];
601 frame.sf_sc.sc_gs = rgs();
602 frame.sf_sc.sc_fs = regs->tf_fs;
603 frame.sf_sc.sc_es = regs->tf_es;
604 frame.sf_sc.sc_ds = regs->tf_ds;
605 frame.sf_sc.sc_edi = regs->tf_edi;
606 frame.sf_sc.sc_esi = regs->tf_esi;
607 frame.sf_sc.sc_ebp = regs->tf_ebp;
608 frame.sf_sc.sc_ebx = regs->tf_ebx;
609 frame.sf_sc.sc_edx = regs->tf_edx;
610 frame.sf_sc.sc_ecx = regs->tf_ecx;
611 frame.sf_sc.sc_eax = regs->tf_eax;
612 frame.sf_sc.sc_eip = regs->tf_eip;
613 frame.sf_sc.sc_cs = regs->tf_cs;
614 frame.sf_sc.sc_eflags = regs->tf_eflags;
615 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
616 frame.sf_sc.sc_ss = regs->tf_ss;
617 frame.sf_sc.sc_err = regs->tf_err;
618 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
619 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
621 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
622 frame.sf_extramask[i] = lmask.__bits[i+1];
624 if (copyout(&frame, fp, sizeof(frame)) != 0) {
626 * Process has trashed its stack; give it an illegal
627 * instruction to halt it in its tracks.
634 * Build context to run handler in.
636 regs->tf_esp = (int)fp;
637 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
638 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
639 regs->tf_cs = _ucodesel;
640 regs->tf_ds = _udatasel;
641 regs->tf_es = _udatasel;
642 regs->tf_fs = _udatasel;
643 regs->tf_ss = _udatasel;
645 mtx_lock(&psp->ps_mtx);
649 * System call to cleanup state after a signal
650 * has been taken. Reset signal mask and
651 * stack state from context left by sendsig (above).
652 * Return to previous pc and psl as specified by
653 * context left by sendsig. Check carefully to
654 * make sure that the user has not modified the
655 * psl to gain improper privileges or to cause
659 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
661 struct proc *p = td->td_proc;
662 struct l_sigframe frame;
663 struct trapframe *regs;
671 if (ldebug(sigreturn))
672 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
675 * The trampoline code hands us the sigframe.
676 * It is unsafe to keep track of it ourselves, in the event that a
677 * program jumps out of a signal handler.
679 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
683 * Check for security violations.
685 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
686 eflags = frame.sf_sc.sc_eflags;
688 * XXX do allow users to change the privileged flag PSL_RF. The
689 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
690 * sometimes set it there too. tf_eflags is kept in the signal
691 * context during signal handling and there is no other place
692 * to remember it, so the PSL_RF bit may be corrupted by the
693 * signal handler without us knowing. Corruption of the PSL_RF
694 * bit at worst causes one more or one less debugger trap, so
695 * allowing it is fairly harmless.
697 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
701 * Don't allow users to load a valid privileged %cs. Let the
702 * hardware check for invalid selectors, excess privilege in
703 * other selectors, invalid %eip's and invalid %esp's.
705 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
706 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
707 ksiginfo_init_trap(&ksi);
708 ksi.ksi_signo = SIGBUS;
709 ksi.ksi_code = BUS_OBJERR;
710 ksi.ksi_trapno = T_PROTFLT;
711 ksi.ksi_addr = (void *)regs->tf_eip;
712 trapsignal(td, &ksi);
716 lmask.__bits[0] = frame.sf_sc.sc_mask;
717 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
718 lmask.__bits[i+1] = frame.sf_extramask[i];
720 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
721 SIG_CANTMASK(td->td_sigmask);
726 * Restore signal context.
728 /* %gs was restored by the trampoline. */
729 regs->tf_fs = frame.sf_sc.sc_fs;
730 regs->tf_es = frame.sf_sc.sc_es;
731 regs->tf_ds = frame.sf_sc.sc_ds;
732 regs->tf_edi = frame.sf_sc.sc_edi;
733 regs->tf_esi = frame.sf_sc.sc_esi;
734 regs->tf_ebp = frame.sf_sc.sc_ebp;
735 regs->tf_ebx = frame.sf_sc.sc_ebx;
736 regs->tf_edx = frame.sf_sc.sc_edx;
737 regs->tf_ecx = frame.sf_sc.sc_ecx;
738 regs->tf_eax = frame.sf_sc.sc_eax;
739 regs->tf_eip = frame.sf_sc.sc_eip;
740 regs->tf_cs = frame.sf_sc.sc_cs;
741 regs->tf_eflags = eflags;
742 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
743 regs->tf_ss = frame.sf_sc.sc_ss;
745 return (EJUSTRETURN);
749 * System call to cleanup state after a signal
750 * has been taken. Reset signal mask and
751 * stack state from context left by rt_sendsig (above).
752 * Return to previous pc and psl as specified by
753 * context left by sendsig. Check carefully to
754 * make sure that the user has not modified the
755 * psl to gain improper privileges or to cause
759 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
761 struct proc *p = td->td_proc;
762 struct l_ucontext uc;
763 struct l_sigcontext *context;
766 struct trapframe *regs;
773 if (ldebug(rt_sigreturn))
774 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
777 * The trampoline code hands us the ucontext.
778 * It is unsafe to keep track of it ourselves, in the event that a
779 * program jumps out of a signal handler.
781 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
784 context = &uc.uc_mcontext;
787 * Check for security violations.
789 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
790 eflags = context->sc_eflags;
792 * XXX do allow users to change the privileged flag PSL_RF. The
793 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
794 * sometimes set it there too. tf_eflags is kept in the signal
795 * context during signal handling and there is no other place
796 * to remember it, so the PSL_RF bit may be corrupted by the
797 * signal handler without us knowing. Corruption of the PSL_RF
798 * bit at worst causes one more or one less debugger trap, so
799 * allowing it is fairly harmless.
801 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
805 * Don't allow users to load a valid privileged %cs. Let the
806 * hardware check for invalid selectors, excess privilege in
807 * other selectors, invalid %eip's and invalid %esp's.
809 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
810 if (!CS_SECURE(context->sc_cs)) {
811 ksiginfo_init_trap(&ksi);
812 ksi.ksi_signo = SIGBUS;
813 ksi.ksi_code = BUS_OBJERR;
814 ksi.ksi_trapno = T_PROTFLT;
815 ksi.ksi_addr = (void *)regs->tf_eip;
816 trapsignal(td, &ksi);
821 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
822 SIG_CANTMASK(td->td_sigmask);
827 * Restore signal context
829 /* %gs was restored by the trampoline. */
830 regs->tf_fs = context->sc_fs;
831 regs->tf_es = context->sc_es;
832 regs->tf_ds = context->sc_ds;
833 regs->tf_edi = context->sc_edi;
834 regs->tf_esi = context->sc_esi;
835 regs->tf_ebp = context->sc_ebp;
836 regs->tf_ebx = context->sc_ebx;
837 regs->tf_edx = context->sc_edx;
838 regs->tf_ecx = context->sc_ecx;
839 regs->tf_eax = context->sc_eax;
840 regs->tf_eip = context->sc_eip;
841 regs->tf_cs = context->sc_cs;
842 regs->tf_eflags = eflags;
843 regs->tf_esp = context->sc_esp_at_signal;
844 regs->tf_ss = context->sc_ss;
847 * call sigaltstack & ignore results..
850 ss.ss_sp = lss->ss_sp;
851 ss.ss_size = lss->ss_size;
852 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
855 if (ldebug(rt_sigreturn))
856 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
857 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
859 (void)kern_sigaltstack(td, &ss, NULL);
861 return (EJUSTRETURN);
868 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
870 args[0] = tf->tf_ebx;
871 args[1] = tf->tf_ecx;
872 args[2] = tf->tf_edx;
873 args[3] = tf->tf_esi;
874 args[4] = tf->tf_edi;
875 args[5] = tf->tf_ebp; /* Unconfirmed */
876 *params = NULL; /* no copyin */
880 * If a linux binary is exec'ing something, try this image activator
881 * first. We override standard shell script execution in order to
882 * be able to modify the interpreter path. We only do this if a linux
883 * binary is doing the exec, so we do not create an EXEC module for it.
885 static int exec_linux_imgact_try(struct image_params *iparams);
888 exec_linux_imgact_try(struct image_params *imgp)
890 const char *head = (const char *)imgp->image_header;
895 * The interpreter for shell scripts run from a linux binary needs
896 * to be located in /compat/linux if possible in order to recursively
897 * maintain linux path emulation.
899 if (((const short *)head)[0] == SHELLMAGIC) {
901 * Run our normal shell image activator. If it succeeds attempt
902 * to use the alternate path for the interpreter. If an alternate
903 * path is found, use our stringspace to store it.
905 if ((error = exec_shell_imgact(imgp)) == 0) {
906 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
907 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
909 len = strlen(rpath) + 1;
911 if (len <= MAXSHELLCMDLEN) {
912 memcpy(imgp->interpreter_name, rpath, len);
922 * exec_setregs may initialize some registers differently than Linux
923 * does, thus potentially confusing Linux binaries. If necessary, we
924 * override the exec_setregs default(s) here.
927 exec_linux_setregs(struct thread *td, u_long entry,
928 u_long stack, u_long ps_strings)
930 struct pcb *pcb = td->td_pcb;
932 exec_setregs(td, entry, stack, ps_strings);
934 /* Linux sets %gs to 0, we default to _udatasel */
938 pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
942 linux_get_machine(const char **dst)
960 struct sysentvec linux_sysvec = {
961 .sv_size = LINUX_SYS_MAXSYSCALL,
962 .sv_table = linux_sysent,
964 .sv_sigsize = LINUX_SIGTBLSZ,
965 .sv_sigtbl = bsd_to_linux_signal,
966 .sv_errsize = ELAST + 1,
967 .sv_errtbl = bsd_to_linux_errno,
968 .sv_transtrap = translate_traps,
969 .sv_fixup = linux_fixup,
970 .sv_sendsig = linux_sendsig,
971 .sv_sigcode = linux_sigcode,
972 .sv_szsigcode = &linux_szsigcode,
973 .sv_prepsyscall = linux_prepsyscall,
974 .sv_name = "Linux a.out",
976 .sv_imgact_try = exec_linux_imgact_try,
977 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
978 .sv_pagesize = PAGE_SIZE,
979 .sv_minuser = VM_MIN_ADDRESS,
980 .sv_maxuser = VM_MAXUSER_ADDRESS,
981 .sv_usrstack = USRSTACK,
982 .sv_psstrings = PS_STRINGS,
983 .sv_stackprot = VM_PROT_ALL,
984 .sv_copyout_strings = exec_copyout_strings,
985 .sv_setregs = exec_linux_setregs,
990 struct sysentvec elf_linux_sysvec = {
991 .sv_size = LINUX_SYS_MAXSYSCALL,
992 .sv_table = linux_sysent,
994 .sv_sigsize = LINUX_SIGTBLSZ,
995 .sv_sigtbl = bsd_to_linux_signal,
996 .sv_errsize = ELAST + 1,
997 .sv_errtbl = bsd_to_linux_errno,
998 .sv_transtrap = translate_traps,
999 .sv_fixup = elf_linux_fixup,
1000 .sv_sendsig = linux_sendsig,
1001 .sv_sigcode = linux_sigcode,
1002 .sv_szsigcode = &linux_szsigcode,
1003 .sv_prepsyscall = linux_prepsyscall,
1004 .sv_name = "Linux ELF",
1005 .sv_coredump = elf32_coredump,
1006 .sv_imgact_try = exec_linux_imgact_try,
1007 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1008 .sv_pagesize = PAGE_SIZE,
1009 .sv_minuser = VM_MIN_ADDRESS,
1010 .sv_maxuser = VM_MAXUSER_ADDRESS,
1011 .sv_usrstack = USRSTACK,
1012 .sv_psstrings = PS_STRINGS,
1013 .sv_stackprot = VM_PROT_ALL,
1014 .sv_copyout_strings = linux_copyout_strings,
1015 .sv_setregs = exec_linux_setregs,
1016 .sv_fixlimit = NULL,
1020 static char GNULINUX_ABI_VENDOR[] = "GNU";
1022 static Elf_Brandnote linux_brandnote = {
1023 .hdr.n_namesz = sizeof(GNULINUX_ABI_VENDOR),
1026 .vendor = GNULINUX_ABI_VENDOR,
1030 static Elf32_Brandinfo linux_brand = {
1031 .brand = ELFOSABI_LINUX,
1033 .compat_3_brand = "Linux",
1034 .emul_path = "/compat/linux",
1035 .interp_path = "/lib/ld-linux.so.1",
1036 .sysvec = &elf_linux_sysvec,
1037 .interp_newpath = NULL,
1038 .brand_note = &linux_brandnote,
1039 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1042 static Elf32_Brandinfo linux_glibc2brand = {
1043 .brand = ELFOSABI_LINUX,
1045 .compat_3_brand = "Linux",
1046 .emul_path = "/compat/linux",
1047 .interp_path = "/lib/ld-linux.so.2",
1048 .sysvec = &elf_linux_sysvec,
1049 .interp_newpath = NULL,
1050 .brand_note = &linux_brandnote,
1051 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1054 Elf32_Brandinfo *linux_brandlist[] = {
1061 linux_elf_modevent(module_t mod, int type, void *data)
1063 Elf32_Brandinfo **brandinfo;
1065 struct linux_ioctl_handler **lihp;
1066 struct linux_device_handler **ldhp;
1072 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1074 if (elf32_insert_brand_entry(*brandinfo) < 0)
1077 SET_FOREACH(lihp, linux_ioctl_handler_set)
1078 linux_ioctl_register_handler(*lihp);
1079 SET_FOREACH(ldhp, linux_device_handler_set)
1080 linux_device_register_handler(*ldhp);
1081 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1082 sx_init(&emul_shared_lock, "emuldata->shared lock");
1083 LIST_INIT(&futex_list);
1084 sx_init(&futex_sx, "futex protection lock");
1085 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1087 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1089 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1091 linux_get_machine(&linux_platform);
1092 linux_szplatform = roundup(strlen(linux_platform) + 1,
1095 printf("Linux ELF exec handler installed\n");
1097 printf("cannot insert Linux ELF brand handler\n");
1100 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1102 if (elf32_brand_inuse(*brandinfo))
1105 for (brandinfo = &linux_brandlist[0];
1106 *brandinfo != NULL; ++brandinfo)
1107 if (elf32_remove_brand_entry(*brandinfo) < 0)
1111 SET_FOREACH(lihp, linux_ioctl_handler_set)
1112 linux_ioctl_unregister_handler(*lihp);
1113 SET_FOREACH(ldhp, linux_device_handler_set)
1114 linux_device_unregister_handler(*ldhp);
1115 mtx_destroy(&emul_lock);
1116 sx_destroy(&emul_shared_lock);
1117 sx_destroy(&futex_sx);
1118 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1119 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1120 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1122 printf("Linux ELF exec handler removed\n");
1124 printf("Could not deinstall ELF interpreter entry\n");
1132 static moduledata_t linux_elf_mod = {
1138 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);