2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 Søren Schmidt
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
37 #ifndef COMPAT_FREEBSD32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
41 #define __ELF_WORD_SIZE 32
43 #include <sys/param.h>
44 #include <sys/systm.h>
46 #include <sys/fcntl.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/kernel.h>
51 #include <sys/malloc.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysent.h>
60 #include <sys/sysproto.h>
61 #include <sys/vnode.h>
62 #include <sys/eventhandler.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_param.h>
72 #include <machine/cpu.h>
73 #include <machine/md_var.h>
74 #include <machine/pcb.h>
75 #include <machine/specialreg.h>
77 #include <amd64/linux32/linux.h>
78 #include <amd64/linux32/linux32_proto.h>
79 #include <compat/linux/linux_emul.h>
80 #include <compat/linux/linux_futex.h>
81 #include <compat/linux/linux_mib.h>
82 #include <compat/linux/linux_misc.h>
83 #include <compat/linux/linux_signal.h>
84 #include <compat/linux/linux_util.h>
86 MODULE_VERSION(linux, 1);
88 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
90 #define AUXARGS_ENTRY_32(pos, id, val) \
92 suword32(pos++, id); \
93 suword32(pos++, val); \
96 #if BYTE_ORDER == LITTLE_ENDIAN
97 #define SHELLMAGIC 0x2123 /* #! */
99 #define SHELLMAGIC 0x2321
103 * Allow the sendsig functions to use the ldebug() facility
104 * even though they are not syscalls themselves. Map them
105 * to syscall 0. This is slightly less bogus than using
108 #define LINUX_SYS_linux_rt_sendsig 0
109 #define LINUX_SYS_linux_sendsig 0
111 const char *linux_platform = "i686";
112 static int linux_szplatform;
113 extern char linux_sigcode[];
114 extern int linux_szsigcode;
116 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
118 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
119 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
121 static int elf_linux_fixup(register_t **stack_base,
122 struct image_params *iparams);
123 static register_t *linux_copyout_strings(struct image_params *imgp);
124 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
125 static void exec_linux_setregs(struct thread *td,
126 struct image_params *imgp, u_long stack);
127 static void linux32_fixlimit(struct rlimit *rl, int which);
128 static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
130 static eventhandler_tag linux_exit_tag;
131 static eventhandler_tag linux_exec_tag;
134 * Linux syscalls return negative errno's, we do positive and map them
136 * FreeBSD: src/sys/sys/errno.h
137 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
138 * linux-2.6.17.8/include/asm-generic/errno.h
140 static int bsd_to_linux_errno[ELAST + 1] = {
141 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
142 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
143 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
144 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
145 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
146 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
147 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
148 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
149 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
153 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
154 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
155 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
156 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
157 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
158 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
159 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
160 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
161 0, LINUX_SIGUSR1, LINUX_SIGUSR2
164 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
165 SIGHUP, SIGINT, SIGQUIT, SIGILL,
166 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
167 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
168 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
169 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
170 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
171 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
172 SIGIO, SIGURG, SIGSYS
175 #define LINUX_T_UNKNOWN 255
176 static int _bsd_to_linux_trapcode[] = {
177 LINUX_T_UNKNOWN, /* 0 */
178 6, /* 1 T_PRIVINFLT */
179 LINUX_T_UNKNOWN, /* 2 */
181 LINUX_T_UNKNOWN, /* 4 */
182 LINUX_T_UNKNOWN, /* 5 */
183 16, /* 6 T_ARITHTRAP */
184 254, /* 7 T_ASTFLT */
185 LINUX_T_UNKNOWN, /* 8 */
186 13, /* 9 T_PROTFLT */
187 1, /* 10 T_TRCTRAP */
188 LINUX_T_UNKNOWN, /* 11 */
189 14, /* 12 T_PAGEFLT */
190 LINUX_T_UNKNOWN, /* 13 */
191 17, /* 14 T_ALIGNFLT */
192 LINUX_T_UNKNOWN, /* 15 */
193 LINUX_T_UNKNOWN, /* 16 */
194 LINUX_T_UNKNOWN, /* 17 */
200 8, /* 23 T_DOUBLEFLT */
201 9, /* 24 T_FPOPFLT */
202 10, /* 25 T_TSSFLT */
203 11, /* 26 T_SEGNPFLT */
204 12, /* 27 T_STKFLT */
206 19, /* 29 T_XMMFLT */
207 15 /* 30 T_RESERVED */
209 #define bsd_to_linux_trapcode(code) \
210 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
211 _bsd_to_linux_trapcode[(code)]: \
214 struct linux32_ps_strings {
215 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
216 u_int ps_nargvstr; /* the number of argument strings */
217 u_int32_t ps_envstr; /* first of 0 or more environment strings */
218 u_int ps_nenvstr; /* the number of environment strings */
222 * If FreeBSD & Linux have a difference of opinion about what a trap
223 * means, deal with it here.
228 translate_traps(int signal, int trap_code)
230 if (signal != SIGBUS)
244 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
248 Elf32_Addr *pos, *uplatform;
249 struct linux32_ps_strings *arginfo;
251 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
252 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
254 KASSERT(curthread->td_proc == imgp->proc,
255 ("unsafe elf_linux_fixup(), should be curproc"));
256 base = (Elf32_Addr *)*stack_base;
257 args = (Elf32_Auxargs *)imgp->auxargs;
258 pos = base + (imgp->args->argc + imgp->args->envc + 2);
260 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
263 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
264 * as it has appeared in the 2.4.0-rc7 first time.
265 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
266 * glibc falls back to the hard-coded CLK_TCK value when aux entry
268 * Also see linux_times() implementation.
270 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
271 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
272 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
273 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
274 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
275 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
276 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
277 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
278 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
279 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
280 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
281 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
282 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
283 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
284 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
285 if (args->execfd != -1)
286 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
287 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
289 free(imgp->auxargs, M_TEMP);
290 imgp->auxargs = NULL;
293 suword32(base, (uint32_t)imgp->args->argc);
294 *stack_base = (register_t *)base;
298 extern unsigned long linux_sznonrtsigcode;
301 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
303 struct thread *td = curthread;
304 struct proc *p = td->td_proc;
306 struct trapframe *regs;
307 struct l_rt_sigframe *fp, frame;
312 sig = ksi->ksi_signo;
313 code = ksi->ksi_code;
314 PROC_LOCK_ASSERT(p, MA_OWNED);
316 mtx_assert(&psp->ps_mtx, MA_OWNED);
318 oonstack = sigonstack(regs->tf_rsp);
321 if (ldebug(rt_sendsig))
322 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
323 catcher, sig, (void*)mask, code);
326 * Allocate space for the signal handler context.
328 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
329 SIGISMEMBER(psp->ps_sigonstack, sig)) {
330 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
331 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
333 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
334 mtx_unlock(&psp->ps_mtx);
337 * Build the argument list for the signal handler.
339 if (p->p_sysent->sv_sigtbl)
340 if (sig <= p->p_sysent->sv_sigsize)
341 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
343 bzero(&frame, sizeof(frame));
345 frame.sf_handler = PTROUT(catcher);
347 frame.sf_siginfo = PTROUT(&fp->sf_si);
348 frame.sf_ucontext = PTROUT(&fp->sf_sc);
350 /* Fill in POSIX parts */
351 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
354 * Build the signal context to be used by sigreturn.
356 frame.sf_sc.uc_flags = 0; /* XXX ??? */
357 frame.sf_sc.uc_link = 0; /* XXX ??? */
359 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
360 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
361 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
362 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
365 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
367 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
368 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
369 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
370 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
371 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
372 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
373 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
374 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
375 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
376 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
377 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
378 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
379 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
380 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
381 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
382 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
383 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
384 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
385 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
386 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
389 if (ldebug(rt_sendsig))
390 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
391 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
392 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
395 if (copyout(&frame, fp, sizeof(frame)) != 0) {
397 * Process has trashed its stack; give it an illegal
398 * instruction to halt it in its tracks.
401 if (ldebug(rt_sendsig))
402 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
410 * Build context to run handler in.
412 regs->tf_rsp = PTROUT(fp);
413 regs->tf_rip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
414 regs->tf_rflags &= ~(PSL_T | PSL_D);
415 regs->tf_cs = _ucode32sel;
416 regs->tf_ss = _udatasel;
417 regs->tf_ds = _udatasel;
418 regs->tf_es = _udatasel;
419 regs->tf_fs = _ufssel;
420 regs->tf_gs = _ugssel;
421 regs->tf_flags = TF_HASSEGS;
422 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
424 mtx_lock(&psp->ps_mtx);
429 * Send an interrupt to process.
431 * Stack is set up to allow sigcode stored
432 * in u. to call routine, followed by kcall
433 * to sigreturn routine below. After sigreturn
434 * resets the signal mask, the stack, and the
435 * frame pointer, it returns to the user
439 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
441 struct thread *td = curthread;
442 struct proc *p = td->td_proc;
444 struct trapframe *regs;
445 struct l_sigframe *fp, frame;
450 sig = ksi->ksi_signo;
451 code = ksi->ksi_code;
452 PROC_LOCK_ASSERT(p, MA_OWNED);
454 mtx_assert(&psp->ps_mtx, MA_OWNED);
455 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
456 /* Signal handler installed with SA_SIGINFO. */
457 linux_rt_sendsig(catcher, ksi, mask);
462 oonstack = sigonstack(regs->tf_rsp);
466 printf(ARGS(sendsig, "%p, %d, %p, %u"),
467 catcher, sig, (void*)mask, code);
471 * Allocate space for the signal handler context.
473 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
474 SIGISMEMBER(psp->ps_sigonstack, sig)) {
475 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
476 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
478 fp = (struct l_sigframe *)regs->tf_rsp - 1;
479 mtx_unlock(&psp->ps_mtx);
483 * Build the argument list for the signal handler.
485 if (p->p_sysent->sv_sigtbl)
486 if (sig <= p->p_sysent->sv_sigsize)
487 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
489 bzero(&frame, sizeof(frame));
491 frame.sf_handler = PTROUT(catcher);
494 bsd_to_linux_sigset(mask, &lmask);
497 * Build the signal context to be used by sigreturn.
499 frame.sf_sc.sc_mask = lmask.__bits[0];
500 frame.sf_sc.sc_gs = regs->tf_gs;
501 frame.sf_sc.sc_fs = regs->tf_fs;
502 frame.sf_sc.sc_es = regs->tf_es;
503 frame.sf_sc.sc_ds = regs->tf_ds;
504 frame.sf_sc.sc_edi = regs->tf_rdi;
505 frame.sf_sc.sc_esi = regs->tf_rsi;
506 frame.sf_sc.sc_ebp = regs->tf_rbp;
507 frame.sf_sc.sc_ebx = regs->tf_rbx;
508 frame.sf_sc.sc_edx = regs->tf_rdx;
509 frame.sf_sc.sc_ecx = regs->tf_rcx;
510 frame.sf_sc.sc_eax = regs->tf_rax;
511 frame.sf_sc.sc_eip = regs->tf_rip;
512 frame.sf_sc.sc_cs = regs->tf_cs;
513 frame.sf_sc.sc_eflags = regs->tf_rflags;
514 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
515 frame.sf_sc.sc_ss = regs->tf_ss;
516 frame.sf_sc.sc_err = regs->tf_err;
517 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
518 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
520 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
521 frame.sf_extramask[i] = lmask.__bits[i+1];
523 if (copyout(&frame, fp, sizeof(frame)) != 0) {
525 * Process has trashed its stack; give it an illegal
526 * instruction to halt it in its tracks.
533 * Build context to run handler in.
535 regs->tf_rsp = PTROUT(fp);
536 regs->tf_rip = p->p_sysent->sv_sigcode_base;
537 regs->tf_rflags &= ~(PSL_T | PSL_D);
538 regs->tf_cs = _ucode32sel;
539 regs->tf_ss = _udatasel;
540 regs->tf_ds = _udatasel;
541 regs->tf_es = _udatasel;
542 regs->tf_fs = _ufssel;
543 regs->tf_gs = _ugssel;
544 regs->tf_flags = TF_HASSEGS;
545 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
547 mtx_lock(&psp->ps_mtx);
551 * System call to cleanup state after a signal
552 * has been taken. Reset signal mask and
553 * stack state from context left by sendsig (above).
554 * Return to previous pc and psl as specified by
555 * context left by sendsig. Check carefully to
556 * make sure that the user has not modified the
557 * psl to gain improper privileges or to cause
561 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
563 struct l_sigframe frame;
564 struct trapframe *regs;
573 if (ldebug(sigreturn))
574 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
577 * The trampoline code hands us the sigframe.
578 * It is unsafe to keep track of it ourselves, in the event that a
579 * program jumps out of a signal handler.
581 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
585 * Check for security violations.
587 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
588 eflags = frame.sf_sc.sc_eflags;
589 if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
593 * Don't allow users to load a valid privileged %cs. Let the
594 * hardware check for invalid selectors, excess privilege in
595 * other selectors, invalid %eip's and invalid %esp's.
597 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
598 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
599 ksiginfo_init_trap(&ksi);
600 ksi.ksi_signo = SIGBUS;
601 ksi.ksi_code = BUS_OBJERR;
602 ksi.ksi_trapno = T_PROTFLT;
603 ksi.ksi_addr = (void *)regs->tf_rip;
604 trapsignal(td, &ksi);
608 lmask.__bits[0] = frame.sf_sc.sc_mask;
609 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
610 lmask.__bits[i+1] = frame.sf_extramask[i];
611 linux_to_bsd_sigset(&lmask, &bmask);
612 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
615 * Restore signal context.
617 regs->tf_rdi = frame.sf_sc.sc_edi;
618 regs->tf_rsi = frame.sf_sc.sc_esi;
619 regs->tf_rbp = frame.sf_sc.sc_ebp;
620 regs->tf_rbx = frame.sf_sc.sc_ebx;
621 regs->tf_rdx = frame.sf_sc.sc_edx;
622 regs->tf_rcx = frame.sf_sc.sc_ecx;
623 regs->tf_rax = frame.sf_sc.sc_eax;
624 regs->tf_rip = frame.sf_sc.sc_eip;
625 regs->tf_cs = frame.sf_sc.sc_cs;
626 regs->tf_ds = frame.sf_sc.sc_ds;
627 regs->tf_es = frame.sf_sc.sc_es;
628 regs->tf_fs = frame.sf_sc.sc_fs;
629 regs->tf_gs = frame.sf_sc.sc_gs;
630 regs->tf_rflags = eflags;
631 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
632 regs->tf_ss = frame.sf_sc.sc_ss;
633 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
635 return (EJUSTRETURN);
639 * System call to cleanup state after a signal
640 * has been taken. Reset signal mask and
641 * stack state from context left by rt_sendsig (above).
642 * Return to previous pc and psl as specified by
643 * context left by sendsig. Check carefully to
644 * make sure that the user has not modified the
645 * psl to gain improper privileges or to cause
649 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
651 struct l_ucontext uc;
652 struct l_sigcontext *context;
656 struct trapframe *regs;
663 if (ldebug(rt_sigreturn))
664 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
667 * The trampoline code hands us the ucontext.
668 * It is unsafe to keep track of it ourselves, in the event that a
669 * program jumps out of a signal handler.
671 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
674 context = &uc.uc_mcontext;
677 * Check for security violations.
679 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
680 eflags = context->sc_eflags;
681 if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
685 * Don't allow users to load a valid privileged %cs. Let the
686 * hardware check for invalid selectors, excess privilege in
687 * other selectors, invalid %eip's and invalid %esp's.
689 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
690 if (!CS_SECURE(context->sc_cs)) {
691 ksiginfo_init_trap(&ksi);
692 ksi.ksi_signo = SIGBUS;
693 ksi.ksi_code = BUS_OBJERR;
694 ksi.ksi_trapno = T_PROTFLT;
695 ksi.ksi_addr = (void *)regs->tf_rip;
696 trapsignal(td, &ksi);
700 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
701 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
704 * Restore signal context
706 regs->tf_gs = context->sc_gs;
707 regs->tf_fs = context->sc_fs;
708 regs->tf_es = context->sc_es;
709 regs->tf_ds = context->sc_ds;
710 regs->tf_rdi = context->sc_edi;
711 regs->tf_rsi = context->sc_esi;
712 regs->tf_rbp = context->sc_ebp;
713 regs->tf_rbx = context->sc_ebx;
714 regs->tf_rdx = context->sc_edx;
715 regs->tf_rcx = context->sc_ecx;
716 regs->tf_rax = context->sc_eax;
717 regs->tf_rip = context->sc_eip;
718 regs->tf_cs = context->sc_cs;
719 regs->tf_rflags = eflags;
720 regs->tf_rsp = context->sc_esp_at_signal;
721 regs->tf_ss = context->sc_ss;
722 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
725 * call sigaltstack & ignore results..
728 ss.ss_sp = PTRIN(lss->ss_sp);
729 ss.ss_size = lss->ss_size;
730 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
733 if (ldebug(rt_sigreturn))
734 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
735 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
737 (void)kern_sigaltstack(td, &ss, NULL);
739 return (EJUSTRETURN);
743 linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
746 struct trapframe *frame;
749 frame = td->td_frame;
751 sa->args[0] = frame->tf_rbx;
752 sa->args[1] = frame->tf_rcx;
753 sa->args[2] = frame->tf_rdx;
754 sa->args[3] = frame->tf_rsi;
755 sa->args[4] = frame->tf_rdi;
756 sa->args[5] = frame->tf_rbp; /* Unconfirmed */
757 sa->code = frame->tf_rax;
759 if (sa->code >= p->p_sysent->sv_size)
760 sa->callp = &p->p_sysent->sv_table[0];
762 sa->callp = &p->p_sysent->sv_table[sa->code];
763 sa->narg = sa->callp->sy_narg;
765 td->td_retval[0] = 0;
766 td->td_retval[1] = frame->tf_rdx;
772 * If a linux binary is exec'ing something, try this image activator
773 * first. We override standard shell script execution in order to
774 * be able to modify the interpreter path. We only do this if a linux
775 * binary is doing the exec, so we do not create an EXEC module for it.
777 static int exec_linux_imgact_try(struct image_params *iparams);
780 exec_linux_imgact_try(struct image_params *imgp)
782 const char *head = (const char *)imgp->image_header;
787 * The interpreter for shell scripts run from a linux binary needs
788 * to be located in /compat/linux if possible in order to recursively
789 * maintain linux path emulation.
791 if (((const short *)head)[0] == SHELLMAGIC) {
793 * Run our normal shell image activator. If it succeeds attempt
794 * to use the alternate path for the interpreter. If an
795 * alternate * path is found, use our stringspace to store it.
797 if ((error = exec_shell_imgact(imgp)) == 0) {
798 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
799 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
802 imgp->args->fname_buf =
803 imgp->interpreter_name = rpath;
810 * Clear registers on exec
811 * XXX copied from ia32_signal.c.
814 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
816 struct trapframe *regs = td->td_frame;
817 struct pcb *pcb = td->td_pcb;
820 if (td->td_proc->p_md.md_ldt != NULL)
823 mtx_unlock(&dt_lock);
826 wrmsr(MSR_FSBASE, 0);
827 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
831 pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
833 bzero((char *)regs, sizeof(struct trapframe));
834 regs->tf_rip = imgp->entry_addr;
835 regs->tf_rsp = stack;
836 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
837 regs->tf_gs = _ugssel;
838 regs->tf_fs = _ufssel;
839 regs->tf_es = _udatasel;
840 regs->tf_ds = _udatasel;
841 regs->tf_ss = _udatasel;
842 regs->tf_flags = TF_HASSEGS;
843 regs->tf_cs = _ucode32sel;
844 regs->tf_rbx = imgp->ps_strings;
848 /* Do full restore on return so that we can change to a different %cs */
849 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
850 td->td_retval[1] = 0;
854 * XXX copied from ia32_sysvec.c.
857 linux_copyout_strings(struct image_params *imgp)
861 char *stringp, *destp;
862 u_int32_t *stack_base;
863 struct linux32_ps_strings *arginfo;
866 * Calculate string base and vector table pointers.
867 * Also deal with signal trampoline code for this exec type.
869 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
870 destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
871 roundup((ARG_MAX - imgp->args->stringspace),
875 * Install LINUX_PLATFORM
877 copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
881 * If we have a valid auxargs ptr, prepare some room
886 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
887 * lower compatibility.
889 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
890 (LINUX_AT_COUNT * 2);
892 * The '+ 2' is for the null pointers at the end of each of
893 * the arg and env vector sets,and imgp->auxarg_size is room
894 * for argument of Runtime loader.
896 vectp = (u_int32_t *) (destp - (imgp->args->argc +
897 imgp->args->envc + 2 + imgp->auxarg_size) *
902 * The '+ 2' is for the null pointers at the end of each of
903 * the arg and env vector sets
905 vectp = (u_int32_t *)(destp - (imgp->args->argc +
906 imgp->args->envc + 2) * sizeof(u_int32_t));
909 * vectp also becomes our initial stack base
913 stringp = imgp->args->begin_argv;
914 argc = imgp->args->argc;
915 envc = imgp->args->envc;
917 * Copy out strings - arguments and environment.
919 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
922 * Fill in "ps_strings" struct for ps, w, etc.
924 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
925 suword32(&arginfo->ps_nargvstr, argc);
928 * Fill in argument portion of vector table.
930 for (; argc > 0; --argc) {
931 suword32(vectp++, (uint32_t)(intptr_t)destp);
932 while (*stringp++ != 0)
937 /* a null vector table pointer separates the argp's from the envp's */
938 suword32(vectp++, 0);
940 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
941 suword32(&arginfo->ps_nenvstr, envc);
944 * Fill in environment portion of vector table.
946 for (; envc > 0; --envc) {
947 suword32(vectp++, (uint32_t)(intptr_t)destp);
948 while (*stringp++ != 0)
953 /* end of vector table is a null pointer */
956 return ((register_t *)stack_base);
959 static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
960 "32-bit Linux emulation");
962 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ;
963 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
964 &linux32_maxdsiz, 0, "");
965 static u_long linux32_maxssiz = LINUX32_MAXSSIZ;
966 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
967 &linux32_maxssiz, 0, "");
968 static u_long linux32_maxvmem = LINUX32_MAXVMEM;
969 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
970 &linux32_maxvmem, 0, "");
973 linux32_fixlimit(struct rlimit *rl, int which)
978 if (linux32_maxdsiz != 0) {
979 if (rl->rlim_cur > linux32_maxdsiz)
980 rl->rlim_cur = linux32_maxdsiz;
981 if (rl->rlim_max > linux32_maxdsiz)
982 rl->rlim_max = linux32_maxdsiz;
986 if (linux32_maxssiz != 0) {
987 if (rl->rlim_cur > linux32_maxssiz)
988 rl->rlim_cur = linux32_maxssiz;
989 if (rl->rlim_max > linux32_maxssiz)
990 rl->rlim_max = linux32_maxssiz;
994 if (linux32_maxvmem != 0) {
995 if (rl->rlim_cur > linux32_maxvmem)
996 rl->rlim_cur = linux32_maxvmem;
997 if (rl->rlim_max > linux32_maxvmem)
998 rl->rlim_max = linux32_maxvmem;
1004 struct sysentvec elf_linux_sysvec = {
1005 .sv_size = LINUX_SYS_MAXSYSCALL,
1006 .sv_table = linux_sysent,
1008 .sv_sigsize = LINUX_SIGTBLSZ,
1009 .sv_sigtbl = bsd_to_linux_signal,
1010 .sv_errsize = ELAST + 1,
1011 .sv_errtbl = bsd_to_linux_errno,
1012 .sv_transtrap = translate_traps,
1013 .sv_fixup = elf_linux_fixup,
1014 .sv_sendsig = linux_sendsig,
1015 .sv_sigcode = linux_sigcode,
1016 .sv_szsigcode = &linux_szsigcode,
1017 .sv_prepsyscall = NULL,
1018 .sv_name = "Linux ELF32",
1019 .sv_coredump = elf32_coredump,
1020 .sv_imgact_try = exec_linux_imgact_try,
1021 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1022 .sv_pagesize = PAGE_SIZE,
1023 .sv_minuser = VM_MIN_ADDRESS,
1024 .sv_maxuser = LINUX32_MAXUSER,
1025 .sv_usrstack = LINUX32_USRSTACK,
1026 .sv_psstrings = LINUX32_PS_STRINGS,
1027 .sv_stackprot = VM_PROT_ALL,
1028 .sv_copyout_strings = linux_copyout_strings,
1029 .sv_setregs = exec_linux_setregs,
1030 .sv_fixlimit = linux32_fixlimit,
1031 .sv_maxssiz = &linux32_maxssiz,
1032 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
1033 .sv_set_syscall_retval = cpu_set_syscall_retval,
1034 .sv_fetch_syscall_args = linux32_fetch_syscall_args,
1035 .sv_syscallnames = NULL,
1036 .sv_shared_page_base = LINUX32_SHAREDPAGE,
1037 .sv_shared_page_len = PAGE_SIZE,
1038 .sv_schedtail = linux_schedtail,
1040 INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1042 static char GNU_ABI_VENDOR[] = "GNU";
1043 static int GNULINUX_ABI_DESC = 0;
1046 linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1048 const Elf32_Word *desc;
1051 p = (uintptr_t)(note + 1);
1052 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1054 desc = (const Elf32_Word *)p;
1055 if (desc[0] != GNULINUX_ABI_DESC)
1059 * For linux we encode osrel as follows (see linux_mib.c):
1060 * VVVMMMIII (version, major, minor), see linux_mib.c.
1062 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1067 static Elf_Brandnote linux32_brandnote = {
1068 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1069 .hdr.n_descsz = 16, /* XXX at least 16 */
1071 .vendor = GNU_ABI_VENDOR,
1072 .flags = BN_TRANSLATE_OSREL,
1073 .trans_osrel = linux32_trans_osrel
1076 static Elf32_Brandinfo linux_brand = {
1077 .brand = ELFOSABI_LINUX,
1079 .compat_3_brand = "Linux",
1080 .emul_path = "/compat/linux",
1081 .interp_path = "/lib/ld-linux.so.1",
1082 .sysvec = &elf_linux_sysvec,
1083 .interp_newpath = NULL,
1084 .brand_note = &linux32_brandnote,
1085 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1088 static Elf32_Brandinfo linux_glibc2brand = {
1089 .brand = ELFOSABI_LINUX,
1091 .compat_3_brand = "Linux",
1092 .emul_path = "/compat/linux",
1093 .interp_path = "/lib/ld-linux.so.2",
1094 .sysvec = &elf_linux_sysvec,
1095 .interp_newpath = NULL,
1096 .brand_note = &linux32_brandnote,
1097 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1100 Elf32_Brandinfo *linux_brandlist[] = {
1107 linux_elf_modevent(module_t mod, int type, void *data)
1109 Elf32_Brandinfo **brandinfo;
1111 struct linux_ioctl_handler **lihp;
1112 struct linux_device_handler **ldhp;
1118 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1120 if (elf32_insert_brand_entry(*brandinfo) < 0)
1123 SET_FOREACH(lihp, linux_ioctl_handler_set)
1124 linux_ioctl_register_handler(*lihp);
1125 SET_FOREACH(ldhp, linux_device_handler_set)
1126 linux_device_register_handler(*ldhp);
1127 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1128 sx_init(&emul_shared_lock, "emuldata->shared lock");
1129 LIST_INIT(&futex_list);
1130 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1131 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1132 linux_proc_exit, NULL, 1000);
1133 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1134 linux_proc_exec, NULL, 1000);
1135 linux_szplatform = roundup(strlen(linux_platform) + 1,
1137 linux_osd_jail_register();
1138 stclohz = (stathz ? stathz : hz);
1140 printf("Linux ELF exec handler installed\n");
1142 printf("cannot insert Linux ELF brand handler\n");
1145 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1147 if (elf32_brand_inuse(*brandinfo))
1150 for (brandinfo = &linux_brandlist[0];
1151 *brandinfo != NULL; ++brandinfo)
1152 if (elf32_remove_brand_entry(*brandinfo) < 0)
1156 SET_FOREACH(lihp, linux_ioctl_handler_set)
1157 linux_ioctl_unregister_handler(*lihp);
1158 SET_FOREACH(ldhp, linux_device_handler_set)
1159 linux_device_unregister_handler(*ldhp);
1160 mtx_destroy(&emul_lock);
1161 sx_destroy(&emul_shared_lock);
1162 mtx_destroy(&futex_mtx);
1163 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1164 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1165 linux_osd_jail_deregister();
1167 printf("Linux ELF exec handler removed\n");
1169 printf("Could not deinstall ELF interpreter entry\n");
1177 static moduledata_t linux_elf_mod = {
1183 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);