2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 Søren Schmidt
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
41 #define __ELF_WORD_SIZE 32
43 #include <sys/param.h>
44 #include <sys/systm.h>
46 #include <sys/imgact.h>
47 #include <sys/imgact_elf.h>
48 #include <sys/kernel.h>
50 #include <sys/malloc.h>
51 #include <sys/module.h>
52 #include <sys/mutex.h>
54 #include <sys/resourcevar.h>
55 #include <sys/signalvar.h>
56 #include <sys/sysctl.h>
57 #include <sys/syscallsubr.h>
58 #include <sys/sysent.h>
59 #include <sys/sysproto.h>
60 #include <sys/vnode.h>
61 #include <sys/eventhandler.h>
65 #include <vm/vm_extern.h>
66 #include <vm/vm_map.h>
67 #include <vm/vm_object.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_param.h>
71 #include <machine/cpu.h>
72 #include <machine/md_var.h>
73 #include <machine/pcb.h>
74 #include <machine/specialreg.h>
76 #include <amd64/linux32/linux.h>
77 #include <amd64/linux32/linux32_proto.h>
78 #include <compat/linux/linux_emul.h>
79 #include <compat/linux/linux_mib.h>
80 #include <compat/linux/linux_misc.h>
81 #include <compat/linux/linux_signal.h>
82 #include <compat/linux/linux_util.h>
84 MODULE_VERSION(linux, 1);
86 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
88 #define AUXARGS_ENTRY_32(pos, id, val) \
90 suword32(pos++, id); \
91 suword32(pos++, val); \
94 #if BYTE_ORDER == LITTLE_ENDIAN
95 #define SHELLMAGIC 0x2123 /* #! */
97 #define SHELLMAGIC 0x2321
101 * Allow the sendsig functions to use the ldebug() facility
102 * even though they are not syscalls themselves. Map them
103 * to syscall 0. This is slightly less bogus than using
106 #define LINUX_SYS_linux_rt_sendsig 0
107 #define LINUX_SYS_linux_sendsig 0
109 const char *linux_platform = "i686";
110 static int linux_szplatform;
111 extern char linux_sigcode[];
112 extern int linux_szsigcode;
114 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
116 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
117 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
119 static int elf_linux_fixup(register_t **stack_base,
120 struct image_params *iparams);
121 static register_t *linux_copyout_strings(struct image_params *imgp);
122 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
124 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
125 static void exec_linux_setregs(struct thread *td, u_long entry,
126 u_long stack, u_long ps_strings);
127 static void linux32_fixlimit(struct rlimit *rl, int which);
129 extern LIST_HEAD(futex_list, futex) futex_list;
130 extern struct sx futex_sx;
132 static eventhandler_tag linux_exit_tag;
133 static eventhandler_tag linux_schedtail_tag;
134 static eventhandler_tag linux_exec_tag;
137 * Linux syscalls return negative errno's, we do positive and map them
139 * FreeBSD: src/sys/sys/errno.h
140 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
141 * linux-2.6.17.8/include/asm-generic/errno.h
143 static int bsd_to_linux_errno[ELAST + 1] = {
144 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
145 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
146 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
147 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
148 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
149 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
150 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
151 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
152 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
156 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
157 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
158 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
159 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
160 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
161 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
162 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
163 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
164 0, LINUX_SIGUSR1, LINUX_SIGUSR2
167 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
168 SIGHUP, SIGINT, SIGQUIT, SIGILL,
169 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
170 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
171 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
172 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
173 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
174 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
175 SIGIO, SIGURG, SIGSYS
178 #define LINUX_T_UNKNOWN 255
179 static int _bsd_to_linux_trapcode[] = {
180 LINUX_T_UNKNOWN, /* 0 */
181 6, /* 1 T_PRIVINFLT */
182 LINUX_T_UNKNOWN, /* 2 */
184 LINUX_T_UNKNOWN, /* 4 */
185 LINUX_T_UNKNOWN, /* 5 */
186 16, /* 6 T_ARITHTRAP */
187 254, /* 7 T_ASTFLT */
188 LINUX_T_UNKNOWN, /* 8 */
189 13, /* 9 T_PROTFLT */
190 1, /* 10 T_TRCTRAP */
191 LINUX_T_UNKNOWN, /* 11 */
192 14, /* 12 T_PAGEFLT */
193 LINUX_T_UNKNOWN, /* 13 */
194 17, /* 14 T_ALIGNFLT */
195 LINUX_T_UNKNOWN, /* 15 */
196 LINUX_T_UNKNOWN, /* 16 */
197 LINUX_T_UNKNOWN, /* 17 */
203 8, /* 23 T_DOUBLEFLT */
204 9, /* 24 T_FPOPFLT */
205 10, /* 25 T_TSSFLT */
206 11, /* 26 T_SEGNPFLT */
207 12, /* 27 T_STKFLT */
209 19, /* 29 T_XMMFLT */
210 15 /* 30 T_RESERVED */
212 #define bsd_to_linux_trapcode(code) \
213 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
214 _bsd_to_linux_trapcode[(code)]: \
217 struct linux32_ps_strings {
218 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
219 u_int ps_nargvstr; /* the number of argument strings */
220 u_int32_t ps_envstr; /* first of 0 or more environment strings */
221 u_int ps_nenvstr; /* the number of environment strings */
225 * If FreeBSD & Linux have a difference of opinion about what a trap
226 * means, deal with it here.
231 translate_traps(int signal, int trap_code)
233 if (signal != SIGBUS)
247 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
251 Elf32_Addr *pos, *uplatform;
252 struct linux32_ps_strings *arginfo;
254 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
255 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
258 KASSERT(curthread->td_proc == imgp->proc &&
259 (curthread->td_proc->p_flag & P_SA) == 0,
260 ("unsafe elf_linux_fixup(), should be curproc"));
261 base = (Elf32_Addr *)*stack_base;
262 args = (Elf32_Auxargs *)imgp->auxargs;
263 pos = base + (imgp->args->argc + imgp->args->envc + 2);
265 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
266 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, hz);
267 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
268 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
269 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
270 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
271 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
272 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
273 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
274 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
275 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
276 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
277 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
278 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
279 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
280 if (args->execfd != -1)
281 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
282 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
284 free(imgp->auxargs, M_TEMP);
285 imgp->auxargs = NULL;
288 suword32(base, (uint32_t)imgp->args->argc);
289 *stack_base = (register_t *)base;
293 extern int _ucodesel, _ucode32sel, _udatasel;
294 extern unsigned long linux_sznonrtsigcode;
297 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
299 struct thread *td = curthread;
300 struct proc *p = td->td_proc;
302 struct trapframe *regs;
303 struct l_rt_sigframe *fp, frame;
308 sig = ksi->ksi_signo;
309 code = ksi->ksi_code;
310 PROC_LOCK_ASSERT(p, MA_OWNED);
312 mtx_assert(&psp->ps_mtx, MA_OWNED);
314 oonstack = sigonstack(regs->tf_rsp);
317 if (ldebug(rt_sendsig))
318 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
319 catcher, sig, (void*)mask, code);
322 * Allocate space for the signal handler context.
324 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
325 SIGISMEMBER(psp->ps_sigonstack, sig)) {
326 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
327 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
329 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
330 mtx_unlock(&psp->ps_mtx);
333 * Build the argument list for the signal handler.
335 if (p->p_sysent->sv_sigtbl)
336 if (sig <= p->p_sysent->sv_sigsize)
337 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
339 bzero(&frame, sizeof(frame));
341 frame.sf_handler = PTROUT(catcher);
343 frame.sf_siginfo = PTROUT(&fp->sf_si);
344 frame.sf_ucontext = PTROUT(&fp->sf_sc);
346 /* Fill in POSIX parts */
347 frame.sf_si.lsi_signo = sig;
348 frame.sf_si.lsi_code = code;
349 frame.sf_si.lsi_addr = PTROUT(ksi->ksi_addr);
352 * Build the signal context to be used by sigreturn.
354 frame.sf_sc.uc_flags = 0; /* XXX ??? */
355 frame.sf_sc.uc_link = 0; /* XXX ??? */
357 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
358 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
359 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
360 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
363 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
365 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
366 frame.sf_sc.uc_mcontext.sc_gs = rgs();
367 frame.sf_sc.uc_mcontext.sc_fs = rfs();
368 __asm __volatile("movl %%es,%0" :
369 "=rm" (frame.sf_sc.uc_mcontext.sc_es));
370 __asm __volatile("movl %%ds,%0" :
371 "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
372 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
373 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
374 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
375 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
376 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
377 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
378 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
379 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
380 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
381 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
382 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
383 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
384 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
385 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
386 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
389 if (ldebug(rt_sendsig))
390 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
391 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
392 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
395 if (copyout(&frame, fp, sizeof(frame)) != 0) {
397 * Process has trashed its stack; give it an illegal
398 * instruction to halt it in its tracks.
401 if (ldebug(rt_sendsig))
402 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
410 * Build context to run handler in.
412 regs->tf_rsp = PTROUT(fp);
413 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
414 linux_sznonrtsigcode;
415 regs->tf_rflags &= ~(PSL_T | PSL_D);
416 regs->tf_cs = _ucode32sel;
417 regs->tf_ss = _udatasel;
419 td->td_pcb->pcb_ds = _udatasel;
421 td->td_pcb->pcb_es = _udatasel;
422 /* leave user %fs and %gs untouched */
424 mtx_lock(&psp->ps_mtx);
429 * Send an interrupt to process.
431 * Stack is set up to allow sigcode stored
432 * in u. to call routine, followed by kcall
433 * to sigreturn routine below. After sigreturn
434 * resets the signal mask, the stack, and the
435 * frame pointer, it returns to the user
439 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
441 struct thread *td = curthread;
442 struct proc *p = td->td_proc;
444 struct trapframe *regs;
445 struct l_sigframe *fp, frame;
450 sig = ksi->ksi_signo;
451 code = ksi->ksi_code;
452 PROC_LOCK_ASSERT(p, MA_OWNED);
454 mtx_assert(&psp->ps_mtx, MA_OWNED);
455 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
456 /* Signal handler installed with SA_SIGINFO. */
457 linux_rt_sendsig(catcher, ksi, mask);
462 oonstack = sigonstack(regs->tf_rsp);
466 printf(ARGS(sendsig, "%p, %d, %p, %u"),
467 catcher, sig, (void*)mask, code);
471 * Allocate space for the signal handler context.
473 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
474 SIGISMEMBER(psp->ps_sigonstack, sig)) {
475 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
476 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
478 fp = (struct l_sigframe *)regs->tf_rsp - 1;
479 mtx_unlock(&psp->ps_mtx);
483 * Build the argument list for the signal handler.
485 if (p->p_sysent->sv_sigtbl)
486 if (sig <= p->p_sysent->sv_sigsize)
487 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
489 bzero(&frame, sizeof(frame));
491 frame.sf_handler = PTROUT(catcher);
494 bsd_to_linux_sigset(mask, &lmask);
497 * Build the signal context to be used by sigreturn.
499 frame.sf_sc.sc_mask = lmask.__bits[0];
500 frame.sf_sc.sc_gs = rgs();
501 frame.sf_sc.sc_fs = rfs();
502 __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es));
503 __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
504 frame.sf_sc.sc_edi = regs->tf_rdi;
505 frame.sf_sc.sc_esi = regs->tf_rsi;
506 frame.sf_sc.sc_ebp = regs->tf_rbp;
507 frame.sf_sc.sc_ebx = regs->tf_rbx;
508 frame.sf_sc.sc_edx = regs->tf_rdx;
509 frame.sf_sc.sc_ecx = regs->tf_rcx;
510 frame.sf_sc.sc_eax = regs->tf_rax;
511 frame.sf_sc.sc_eip = regs->tf_rip;
512 frame.sf_sc.sc_cs = regs->tf_cs;
513 frame.sf_sc.sc_eflags = regs->tf_rflags;
514 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
515 frame.sf_sc.sc_ss = regs->tf_ss;
516 frame.sf_sc.sc_err = regs->tf_err;
517 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
518 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
520 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
521 frame.sf_extramask[i] = lmask.__bits[i+1];
523 if (copyout(&frame, fp, sizeof(frame)) != 0) {
525 * Process has trashed its stack; give it an illegal
526 * instruction to halt it in its tracks.
533 * Build context to run handler in.
535 regs->tf_rsp = PTROUT(fp);
536 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
537 regs->tf_rflags &= ~(PSL_T | PSL_D);
538 regs->tf_cs = _ucode32sel;
539 regs->tf_ss = _udatasel;
541 td->td_pcb->pcb_ds = _udatasel;
543 td->td_pcb->pcb_es = _udatasel;
544 /* leave user %fs and %gs untouched */
546 mtx_lock(&psp->ps_mtx);
550 * System call to cleanup state after a signal
551 * has been taken. Reset signal mask and
552 * stack state from context left by sendsig (above).
553 * Return to previous pc and psl as specified by
554 * context left by sendsig. Check carefully to
555 * make sure that the user has not modified the
556 * psl to gain improper privileges or to cause
560 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
562 struct proc *p = td->td_proc;
563 struct l_sigframe frame;
564 struct trapframe *regs;
572 if (ldebug(sigreturn))
573 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
576 * The trampoline code hands us the sigframe.
577 * It is unsafe to keep track of it ourselves, in the event that a
578 * program jumps out of a signal handler.
580 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
584 * Check for security violations.
586 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
587 eflags = frame.sf_sc.sc_eflags;
589 * XXX do allow users to change the privileged flag PSL_RF. The
590 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
591 * sometimes set it there too. tf_eflags is kept in the signal
592 * context during signal handling and there is no other place
593 * to remember it, so the PSL_RF bit may be corrupted by the
594 * signal handler without us knowing. Corruption of the PSL_RF
595 * bit at worst causes one more or one less debugger trap, so
596 * allowing it is fairly harmless.
598 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
602 * Don't allow users to load a valid privileged %cs. Let the
603 * hardware check for invalid selectors, excess privilege in
604 * other selectors, invalid %eip's and invalid %esp's.
606 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
607 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
608 ksiginfo_init_trap(&ksi);
609 ksi.ksi_signo = SIGBUS;
610 ksi.ksi_code = BUS_OBJERR;
611 ksi.ksi_trapno = T_PROTFLT;
612 ksi.ksi_addr = (void *)regs->tf_rip;
613 trapsignal(td, &ksi);
617 lmask.__bits[0] = frame.sf_sc.sc_mask;
618 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
619 lmask.__bits[i+1] = frame.sf_extramask[i];
621 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
622 SIG_CANTMASK(td->td_sigmask);
627 * Restore signal context.
629 /* Selectors were restored by the trampoline. */
630 regs->tf_rdi = frame.sf_sc.sc_edi;
631 regs->tf_rsi = frame.sf_sc.sc_esi;
632 regs->tf_rbp = frame.sf_sc.sc_ebp;
633 regs->tf_rbx = frame.sf_sc.sc_ebx;
634 regs->tf_rdx = frame.sf_sc.sc_edx;
635 regs->tf_rcx = frame.sf_sc.sc_ecx;
636 regs->tf_rax = frame.sf_sc.sc_eax;
637 regs->tf_rip = frame.sf_sc.sc_eip;
638 regs->tf_cs = frame.sf_sc.sc_cs;
639 regs->tf_rflags = eflags;
640 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
641 regs->tf_ss = frame.sf_sc.sc_ss;
643 return (EJUSTRETURN);
647 * System call to cleanup state after a signal
648 * has been taken. Reset signal mask and
649 * stack state from context left by rt_sendsig (above).
650 * Return to previous pc and psl as specified by
651 * context left by sendsig. Check carefully to
652 * make sure that the user has not modified the
653 * psl to gain improper privileges or to cause
657 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
659 struct proc *p = td->td_proc;
660 struct l_ucontext uc;
661 struct l_sigcontext *context;
664 struct trapframe *regs;
671 if (ldebug(rt_sigreturn))
672 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
675 * The trampoline code hands us the ucontext.
676 * It is unsafe to keep track of it ourselves, in the event that a
677 * program jumps out of a signal handler.
679 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
682 context = &uc.uc_mcontext;
685 * Check for security violations.
687 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
688 eflags = context->sc_eflags;
690 * XXX do allow users to change the privileged flag PSL_RF. The
691 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
692 * sometimes set it there too. tf_eflags is kept in the signal
693 * context during signal handling and there is no other place
694 * to remember it, so the PSL_RF bit may be corrupted by the
695 * signal handler without us knowing. Corruption of the PSL_RF
696 * bit at worst causes one more or one less debugger trap, so
697 * allowing it is fairly harmless.
699 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
703 * Don't allow users to load a valid privileged %cs. Let the
704 * hardware check for invalid selectors, excess privilege in
705 * other selectors, invalid %eip's and invalid %esp's.
707 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
708 if (!CS_SECURE(context->sc_cs)) {
709 ksiginfo_init_trap(&ksi);
710 ksi.ksi_signo = SIGBUS;
711 ksi.ksi_code = BUS_OBJERR;
712 ksi.ksi_trapno = T_PROTFLT;
713 ksi.ksi_addr = (void *)regs->tf_rip;
714 trapsignal(td, &ksi);
719 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
720 SIG_CANTMASK(td->td_sigmask);
725 * Restore signal context
727 /* Selectors were restored by the trampoline. */
728 regs->tf_rdi = context->sc_edi;
729 regs->tf_rsi = context->sc_esi;
730 regs->tf_rbp = context->sc_ebp;
731 regs->tf_rbx = context->sc_ebx;
732 regs->tf_rdx = context->sc_edx;
733 regs->tf_rcx = context->sc_ecx;
734 regs->tf_rax = context->sc_eax;
735 regs->tf_rip = context->sc_eip;
736 regs->tf_cs = context->sc_cs;
737 regs->tf_rflags = eflags;
738 regs->tf_rsp = context->sc_esp_at_signal;
739 regs->tf_ss = context->sc_ss;
742 * call sigaltstack & ignore results..
745 ss.ss_sp = PTRIN(lss->ss_sp);
746 ss.ss_size = lss->ss_size;
747 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
750 if (ldebug(rt_sigreturn))
751 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
752 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
754 (void)kern_sigaltstack(td, &ss, NULL);
756 return (EJUSTRETURN);
763 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
765 args[0] = tf->tf_rbx;
766 args[1] = tf->tf_rcx;
767 args[2] = tf->tf_rdx;
768 args[3] = tf->tf_rsi;
769 args[4] = tf->tf_rdi;
770 args[5] = tf->tf_rbp; /* Unconfirmed */
771 *params = NULL; /* no copyin */
775 * If a linux binary is exec'ing something, try this image activator
776 * first. We override standard shell script execution in order to
777 * be able to modify the interpreter path. We only do this if a linux
778 * binary is doing the exec, so we do not create an EXEC module for it.
780 static int exec_linux_imgact_try(struct image_params *iparams);
783 exec_linux_imgact_try(struct image_params *imgp)
785 const char *head = (const char *)imgp->image_header;
790 * The interpreter for shell scripts run from a linux binary needs
791 * to be located in /compat/linux if possible in order to recursively
792 * maintain linux path emulation.
794 if (((const short *)head)[0] == SHELLMAGIC) {
796 * Run our normal shell image activator. If it succeeds
797 * attempt to use the alternate path for the interpreter. If
798 * an alternate path is found, use our stringspace to store it.
800 if ((error = exec_shell_imgact(imgp)) == 0) {
801 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
802 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
804 len = strlen(rpath) + 1;
806 if (len <= MAXSHELLCMDLEN) {
807 memcpy(imgp->interpreter_name, rpath,
818 * Clear registers on exec
819 * XXX copied from ia32_signal.c.
822 exec_linux_setregs(td, entry, stack, ps_strings)
828 struct trapframe *regs = td->td_frame;
829 struct pcb *pcb = td->td_pcb;
832 wrmsr(MSR_FSBASE, 0);
833 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
841 pcb->pcb_ds = _udatasel;
842 pcb->pcb_es = _udatasel;
843 pcb->pcb_fs = _udatasel;
844 pcb->pcb_gs = _udatasel;
845 pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
847 bzero((char *)regs, sizeof(struct trapframe));
848 regs->tf_rip = entry;
849 regs->tf_rsp = stack;
850 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
851 regs->tf_ss = _udatasel;
852 regs->tf_cs = _ucode32sel;
853 regs->tf_rbx = ps_strings;
854 load_cr0(rcr0() | CR0_MP | CR0_TS);
857 /* Return via doreti so that we can change to a different %cs */
858 pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
859 pcb->pcb_flags &= ~PCB_GS32BIT;
860 td->td_retval[1] = 0;
864 * XXX copied from ia32_sysvec.c.
867 linux_copyout_strings(struct image_params *imgp)
871 char *stringp, *destp;
872 u_int32_t *stack_base;
873 struct linux32_ps_strings *arginfo;
876 * Calculate string base and vector table pointers.
877 * Also deal with signal trampoline code for this exec type.
879 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
880 destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
881 linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
887 copyout(imgp->proc->p_sysent->sv_sigcode,
888 ((caddr_t)arginfo - linux_szsigcode), linux_szsigcode);
891 * Install LINUX_PLATFORM
893 copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
894 linux_szplatform), linux_szplatform);
897 * If we have a valid auxargs ptr, prepare some room
902 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
903 * lower compatibility.
905 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
906 (LINUX_AT_COUNT * 2);
908 * The '+ 2' is for the null pointers at the end of each of
909 * the arg and env vector sets,and imgp->auxarg_size is room
910 * for argument of Runtime loader.
912 vectp = (u_int32_t *)(destp - (imgp->args->argc +
913 imgp->args->envc + 2 + imgp->auxarg_size) *
918 * The '+ 2' is for the null pointers at the end of each of
919 * the arg and env vector sets
921 vectp = (u_int32_t *)(destp - (imgp->args->argc +
922 imgp->args->envc + 2) * sizeof(u_int32_t));
925 * vectp also becomes our initial stack base
929 stringp = imgp->args->begin_argv;
930 argc = imgp->args->argc;
931 envc = imgp->args->envc;
933 * Copy out strings - arguments and environment.
935 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
938 * Fill in "ps_strings" struct for ps, w, etc.
940 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
941 suword32(&arginfo->ps_nargvstr, argc);
944 * Fill in argument portion of vector table.
946 for (; argc > 0; --argc) {
947 suword32(vectp++, (uint32_t)(intptr_t)destp);
948 while (*stringp++ != 0)
953 /* a null vector table pointer separates the argp's from the envp's */
954 suword32(vectp++, 0);
956 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
957 suword32(&arginfo->ps_nenvstr, envc);
960 * Fill in environment portion of vector table.
962 for (; envc > 0; --envc) {
963 suword32(vectp++, (uint32_t)(intptr_t)destp);
964 while (*stringp++ != 0)
969 /* end of vector table is a null pointer */
972 return ((register_t *)stack_base);
975 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
976 "32-bit Linux emulation");
978 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ;
979 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
980 &linux32_maxdsiz, 0, "");
981 static u_long linux32_maxssiz = LINUX32_MAXSSIZ;
982 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
983 &linux32_maxssiz, 0, "");
984 static u_long linux32_maxvmem = LINUX32_MAXVMEM;
985 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
986 &linux32_maxvmem, 0, "");
989 linux32_fixlimit(struct rlimit *rl, int which)
994 if (linux32_maxdsiz != 0) {
995 if (rl->rlim_cur > linux32_maxdsiz)
996 rl->rlim_cur = linux32_maxdsiz;
997 if (rl->rlim_max > linux32_maxdsiz)
998 rl->rlim_max = linux32_maxdsiz;
1002 if (linux32_maxssiz != 0) {
1003 if (rl->rlim_cur > linux32_maxssiz)
1004 rl->rlim_cur = linux32_maxssiz;
1005 if (rl->rlim_max > linux32_maxssiz)
1006 rl->rlim_max = linux32_maxssiz;
1010 if (linux32_maxvmem != 0) {
1011 if (rl->rlim_cur > linux32_maxvmem)
1012 rl->rlim_cur = linux32_maxvmem;
1013 if (rl->rlim_max > linux32_maxvmem)
1014 rl->rlim_max = linux32_maxvmem;
1020 struct sysentvec elf_linux_sysvec = {
1021 .sv_size = LINUX_SYS_MAXSYSCALL,
1022 .sv_table = linux_sysent,
1024 .sv_sigsize = LINUX_SIGTBLSZ,
1025 .sv_sigtbl = bsd_to_linux_signal,
1026 .sv_errsize = ELAST + 1,
1027 .sv_errtbl = bsd_to_linux_errno,
1028 .sv_transtrap = translate_traps,
1029 .sv_fixup = elf_linux_fixup,
1030 .sv_sendsig = linux_sendsig,
1031 .sv_sigcode = linux_sigcode,
1032 .sv_szsigcode = &linux_szsigcode,
1033 .sv_prepsyscall = linux_prepsyscall,
1034 .sv_name = "Linux ELF32",
1035 .sv_coredump = elf32_coredump,
1036 .sv_imgact_try = exec_linux_imgact_try,
1037 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1038 .sv_pagesize = PAGE_SIZE,
1039 .sv_minuser = VM_MIN_ADDRESS,
1040 .sv_maxuser = LINUX32_USRSTACK,
1041 .sv_usrstack = LINUX32_USRSTACK,
1042 .sv_psstrings = LINUX32_PS_STRINGS,
1043 .sv_stackprot = VM_PROT_ALL,
1044 .sv_copyout_strings = linux_copyout_strings,
1045 .sv_setregs = exec_linux_setregs,
1046 .sv_fixlimit = linux32_fixlimit,
1047 .sv_maxssiz = &linux32_maxssiz,
1050 static char GNULINUX_ABI_VENDOR[] = "GNU";
1052 static Elf_Brandnote linux32_brandnote = {
1053 .hdr.n_namesz = sizeof(GNULINUX_ABI_VENDOR),
1056 .vendor = GNULINUX_ABI_VENDOR,
1060 static Elf32_Brandinfo linux_brand = {
1061 .brand = ELFOSABI_LINUX,
1063 .compat_3_brand = "Linux",
1064 .emul_path = "/compat/linux",
1065 .interp_path = "/lib/ld-linux.so.1",
1066 .sysvec = &elf_linux_sysvec,
1067 .interp_newpath = NULL,
1068 .brand_note = &linux32_brandnote,
1069 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1072 static Elf32_Brandinfo linux_glibc2brand = {
1073 .brand = ELFOSABI_LINUX,
1075 .compat_3_brand = "Linux",
1076 .emul_path = "/compat/linux",
1077 .interp_path = "/lib/ld-linux.so.2",
1078 .sysvec = &elf_linux_sysvec,
1079 .interp_newpath = NULL,
1080 .brand_note = &linux32_brandnote,
1081 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1084 Elf32_Brandinfo *linux_brandlist[] = {
1091 linux_elf_modevent(module_t mod, int type, void *data)
1093 Elf32_Brandinfo **brandinfo;
1095 struct linux_ioctl_handler **lihp;
1096 struct linux_device_handler **ldhp;
1102 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1104 if (elf32_insert_brand_entry(*brandinfo) < 0)
1107 SET_FOREACH(lihp, linux_ioctl_handler_set)
1108 linux_ioctl_register_handler(*lihp);
1109 SET_FOREACH(ldhp, linux_device_handler_set)
1110 linux_device_register_handler(*ldhp);
1111 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1112 sx_init(&emul_shared_lock, "emuldata->shared lock");
1113 LIST_INIT(&futex_list);
1114 sx_init(&futex_sx, "futex protection lock");
1115 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1116 linux_proc_exit, NULL, 1000);
1117 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail,
1118 linux_schedtail, NULL, 1000);
1119 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1120 linux_proc_exec, NULL, 1000);
1121 linux_szplatform = roundup(strlen(linux_platform) + 1,
1124 printf("Linux ELF exec handler installed\n");
1126 printf("cannot insert Linux ELF brand handler\n");
1129 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1131 if (elf32_brand_inuse(*brandinfo))
1134 for (brandinfo = &linux_brandlist[0];
1135 *brandinfo != NULL; ++brandinfo)
1136 if (elf32_remove_brand_entry(*brandinfo) < 0)
1140 SET_FOREACH(lihp, linux_ioctl_handler_set)
1141 linux_ioctl_unregister_handler(*lihp);
1142 SET_FOREACH(ldhp, linux_device_handler_set)
1143 linux_device_unregister_handler(*ldhp);
1144 mtx_destroy(&emul_lock);
1145 sx_destroy(&emul_shared_lock);
1146 sx_destroy(&futex_sx);
1147 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1148 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1149 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1151 printf("Linux ELF exec handler removed\n");
1153 printf("Could not deinstall ELF interpreter entry\n");
1161 static moduledata_t linux_elf_mod = {
1167 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);