]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - sys/compat/linux/linux_emul.c
- Copy stable/9 to releng/9.2 as part of the 9.2-RELEASE cycle.
[FreeBSD/releng/9.2.git] / sys / compat / linux / linux_emul.c
1 /*-
2  * Copyright (c) 2006 Roman Divacky
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include "opt_compat.h"
33 #include "opt_kdtrace.h"
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/imgact.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/sdt.h>
43 #include <sys/sx.h>
44 #include <sys/proc.h>
45 #include <sys/syscallsubr.h>
46 #include <sys/sysent.h>
47 #include <sys/sysproto.h>
48 #include <sys/unistd.h>
49
50 #ifdef COMPAT_LINUX32
51 #include <machine/../linux32/linux.h>
52 #include <machine/../linux32/linux32_proto.h>
53 #else
54 #include <machine/../linux/linux.h>
55 #include <machine/../linux/linux_proto.h>
56 #endif
57
58 #include <compat/linux/linux_dtrace.h>
59 #include <compat/linux/linux_emul.h>
60 #include <compat/linux/linux_futex.h>
61
62 /**
63  * Special DTrace provider for the linuxulator.
64  *
65  * In this file we define the provider for the entire linuxulator. All
66  * modules (= files of the linuxulator) use it.
67  *
68  * We define a different name depending on the emulated bitsize, see
69  * ../../<ARCH>/linux{,32}/linux.h, e.g.:
70  *      native bitsize          = linuxulator
71  *      amd64, 32bit emulation  = linuxulator32
72  */
73 LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE);
74
75 /**
76  * Special DTrace module "locks", it covers some linuxulator internal
77  * locks.
78  */
79 LIN_SDT_PROBE_DEFINE1(locks, emul_lock, locked, "struct mtx *");
80 LIN_SDT_PROBE_DEFINE1(locks, emul_lock, unlock, "struct mtx *");
81 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, locked, "struct sx *");
82 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, unlock, "struct sx *");
83 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, locked, "struct sx *");
84 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, unlock, "struct sx *");
85
86 /**
87  * DTrace probes in this module.
88  */
89 LIN_SDT_PROBE_DEFINE2(emul, em_find, entry, "struct proc *", "int");
90 LIN_SDT_PROBE_DEFINE0(emul, em_find, return);
91 LIN_SDT_PROBE_DEFINE3(emul, proc_init, entry, "struct thread *", "pid_t",
92     "int");
93 LIN_SDT_PROBE_DEFINE0(emul, proc_init, create_thread);
94 LIN_SDT_PROBE_DEFINE0(emul, proc_init, fork);
95 LIN_SDT_PROBE_DEFINE0(emul, proc_init, exec);
96 LIN_SDT_PROBE_DEFINE0(emul, proc_init, return);
97 LIN_SDT_PROBE_DEFINE1(emul, proc_exit, entry, "struct proc *");
98 LIN_SDT_PROBE_DEFINE0(emul, proc_exit, futex_failed);
99 LIN_SDT_PROBE_DEFINE3(emul, proc_exit, reparent, "pid_t", "pid_t",
100     "struct proc *");
101 LIN_SDT_PROBE_DEFINE1(emul, proc_exit, child_clear_tid_error, "int");
102 LIN_SDT_PROBE_DEFINE0(emul, proc_exit, return);
103 LIN_SDT_PROBE_DEFINE2(emul, proc_exec, entry, "struct proc *",
104     "struct image_params *");
105 LIN_SDT_PROBE_DEFINE0(emul, proc_exec, return);
106 LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, entry);
107 LIN_SDT_PROBE_DEFINE1(emul, linux_schedtail, copyout_error, "int");
108 LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, return);
109 LIN_SDT_PROBE_DEFINE1(emul, linux_set_tid_address, entry, "int *");
110 LIN_SDT_PROBE_DEFINE0(emul, linux_set_tid_address, return);
111 LIN_SDT_PROBE_DEFINE2(emul, linux_kill_threads, entry, "struct thread *",
112     "int");
113 LIN_SDT_PROBE_DEFINE1(emul, linux_kill_threads, kill, "pid_t");
114 LIN_SDT_PROBE_DEFINE0(emul, linux_kill_threads, return);
115
116 struct sx       emul_shared_lock;
117 struct mtx      emul_lock;
118
119 /* this returns locked reference to the emuldata entry (if found) */
120 struct linux_emuldata *
121 em_find(struct proc *p, int locked)
122 {
123         struct linux_emuldata *em;
124
125         LIN_SDT_PROBE2(emul, em_find, entry, p, locked);
126
127         if (locked == EMUL_DOLOCK)
128                 EMUL_LOCK(&emul_lock);
129
130         em = p->p_emuldata;
131
132         if (em == NULL && locked == EMUL_DOLOCK)
133                 EMUL_UNLOCK(&emul_lock);
134
135         LIN_SDT_PROBE1(emul, em_find, return, em);
136         return (em);
137 }
138
139 int
140 linux_proc_init(struct thread *td, pid_t child, int flags)
141 {
142         struct linux_emuldata *em, *p_em;
143         struct proc *p;
144
145         LIN_SDT_PROBE3(emul, proc_init, entry, td, child, flags);
146
147         if (child != 0) {
148                 /* fork or create a thread */
149                 em = malloc(sizeof *em, M_LINUX, M_WAITOK | M_ZERO);
150                 em->pid = child;
151                 em->pdeath_signal = 0;
152                 em->flags = 0;
153                 em->robust_futexes = NULL;
154                 if (flags & LINUX_CLONE_THREAD) {
155                         /* handled later in the code */
156                         LIN_SDT_PROBE0(emul, proc_init, create_thread);
157                 } else {
158                         struct linux_emuldata_shared *s;
159
160                         LIN_SDT_PROBE0(emul, proc_init, fork);
161
162                         s = malloc(sizeof *s, M_LINUX, M_WAITOK | M_ZERO);
163                         s->refs = 1;
164                         s->group_pid = child;
165
166                         LIST_INIT(&s->threads);
167                         em->shared = s;
168                 }
169         } else {
170                 /* exec */
171                 LIN_SDT_PROBE0(emul, proc_init, exec);
172
173                 /* lookup the old one */
174                 em = em_find(td->td_proc, EMUL_DOLOCK);
175                 KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n"));
176         }
177
178         em->child_clear_tid = NULL;
179         em->child_set_tid = NULL;
180
181         /*
182          * allocate the shared struct only in clone()/fork cases in the case
183          * of clone() td = calling proc and child = pid of the newly created
184          * proc
185          */
186         if (child != 0) {
187                 if (flags & LINUX_CLONE_THREAD) {
188                         /* lookup the parent */
189                         /* 
190                          * we dont have to lock the p_em because
191                          * its waiting for us in linux_clone so
192                          * there is no chance of it changing the
193                          * p_em->shared address
194                          */
195                         p_em = em_find(td->td_proc, EMUL_DONTLOCK);
196                         KASSERT(p_em != NULL, ("proc_init: parent emuldata not found for CLONE_THREAD\n"));
197                         em->shared = p_em->shared;
198                         EMUL_SHARED_WLOCK(&emul_shared_lock);
199                         em->shared->refs++;
200                         EMUL_SHARED_WUNLOCK(&emul_shared_lock);
201                 } else {
202                         /*
203                          * handled earlier to avoid malloc(M_WAITOK) with
204                          * rwlock held
205                          */
206                 }
207
208                 EMUL_SHARED_WLOCK(&emul_shared_lock);
209                 LIST_INSERT_HEAD(&em->shared->threads, em, threads);
210                 EMUL_SHARED_WUNLOCK(&emul_shared_lock);
211
212                 p = pfind(child);
213                 KASSERT(p != NULL, ("process not found in proc_init\n"));
214                 p->p_emuldata = em;
215                 PROC_UNLOCK(p);
216         } else
217                 EMUL_UNLOCK(&emul_lock);
218
219         LIN_SDT_PROBE0(emul, proc_init, return);
220         return (0);
221 }
222
223 void
224 linux_proc_exit(void *arg __unused, struct proc *p)
225 {
226         struct linux_emuldata *em;
227         int error, shared_flags, shared_xstat;
228         struct thread *td = FIRST_THREAD_IN_PROC(p);
229         int *child_clear_tid;
230         struct proc *q, *nq;
231
232         if (__predict_true(p->p_sysent != &elf_linux_sysvec))
233                 return;
234
235         LIN_SDT_PROBE1(emul, proc_exit, entry, p);
236
237         release_futexes(p);
238
239         /* find the emuldata */
240         em = em_find(p, EMUL_DOLOCK);
241
242         KASSERT(em != NULL, ("proc_exit: emuldata not found.\n"));
243
244         /* reparent all procs that are not a thread leader to initproc */
245         if (em->shared->group_pid != p->p_pid) {
246                 LIN_SDT_PROBE3(emul, proc_exit, reparent,
247                     em->shared->group_pid, p->p_pid, p);
248
249                 child_clear_tid = em->child_clear_tid;
250                 EMUL_UNLOCK(&emul_lock);
251                 sx_xlock(&proctree_lock);
252                 wakeup(initproc);
253                 PROC_LOCK(p);
254                 proc_reparent(p, initproc);
255                 p->p_sigparent = SIGCHLD;
256                 PROC_UNLOCK(p);
257                 sx_xunlock(&proctree_lock);
258         } else {
259                 child_clear_tid = em->child_clear_tid;
260                 EMUL_UNLOCK(&emul_lock);        
261         }
262
263         EMUL_SHARED_WLOCK(&emul_shared_lock);
264         shared_flags = em->shared->flags;
265         shared_xstat = em->shared->xstat;
266         LIST_REMOVE(em, threads);
267
268         em->shared->refs--;
269         if (em->shared->refs == 0) {
270                 EMUL_SHARED_WUNLOCK(&emul_shared_lock);
271                 free(em->shared, M_LINUX);
272         } else  
273                 EMUL_SHARED_WUNLOCK(&emul_shared_lock);
274
275         if ((shared_flags & EMUL_SHARED_HASXSTAT) != 0)
276                 p->p_xstat = shared_xstat;
277
278         if (child_clear_tid != NULL) {
279                 struct linux_sys_futex_args cup;
280                 int null = 0;
281
282                 error = copyout(&null, child_clear_tid, sizeof(null));
283                 if (error) {
284                         LIN_SDT_PROBE1(emul, proc_exit,
285                             child_clear_tid_error, error);
286
287                         free(em, M_LINUX);
288
289                         LIN_SDT_PROBE0(emul, proc_exit, return);
290                         return;
291                 }
292
293                 /* futexes stuff */
294                 cup.uaddr = child_clear_tid;
295                 cup.op = LINUX_FUTEX_WAKE;
296                 cup.val = 0x7fffffff;   /* Awake everyone */
297                 cup.timeout = NULL;
298                 cup.uaddr2 = NULL;
299                 cup.val3 = 0;
300                 error = linux_sys_futex(FIRST_THREAD_IN_PROC(p), &cup);
301                 /*
302                  * this cannot happen at the moment and if this happens it
303                  * probably means there is a user space bug
304                  */
305                 if (error) {
306                         LIN_SDT_PROBE0(emul, proc_exit, futex_failed);
307                         printf(LMSG("futex stuff in proc_exit failed.\n"));
308                 }
309         }
310
311         /* clean the stuff up */
312         free(em, M_LINUX);
313
314         /* this is a little weird but rewritten from exit1() */
315         sx_xlock(&proctree_lock);
316         q = LIST_FIRST(&p->p_children);
317         for (; q != NULL; q = nq) {
318                 nq = LIST_NEXT(q, p_sibling);
319                 if (q->p_flag & P_WEXIT)
320                         continue;
321                 if (__predict_false(q->p_sysent != &elf_linux_sysvec))
322                         continue;
323                 em = em_find(q, EMUL_DOLOCK);
324                 KASSERT(em != NULL, ("linux_reparent: emuldata not found: %i\n", q->p_pid));
325                 PROC_LOCK(q);
326                 if ((q->p_flag & P_WEXIT) == 0 && em->pdeath_signal != 0) {
327                         kern_psignal(q, em->pdeath_signal);
328                 }
329                 PROC_UNLOCK(q);
330                 EMUL_UNLOCK(&emul_lock);
331         }
332         sx_xunlock(&proctree_lock);
333
334         LIN_SDT_PROBE0(emul, proc_exit, return);
335 }
336
337 /*
338  * This is used in a case of transition from FreeBSD binary execing to linux binary
339  * in this case we create linux emuldata proc entry with the pid of the currently running
340  * process.
341  */
342 void 
343 linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp)
344 {
345         if (__predict_false(imgp->sysent == &elf_linux_sysvec)) {
346                 LIN_SDT_PROBE2(emul, proc_exec, entry, p, imgp);
347         }
348         if (__predict_false(imgp->sysent == &elf_linux_sysvec
349             && p->p_sysent != &elf_linux_sysvec))
350                 linux_proc_init(FIRST_THREAD_IN_PROC(p), p->p_pid, 0);
351         if (__predict_false((p->p_sysent->sv_flags & SV_ABI_MASK) ==
352             SV_ABI_LINUX))
353                 /* Kill threads regardless of imgp->sysent value */
354                 linux_kill_threads(FIRST_THREAD_IN_PROC(p), SIGKILL);
355         if (__predict_false(imgp->sysent != &elf_linux_sysvec
356             && p->p_sysent == &elf_linux_sysvec)) {
357                 struct linux_emuldata *em;
358
359                 /* 
360                  * XXX:There's a race because here we assign p->p_emuldata NULL
361                  * but the process is still counted as linux one for a short
362                  * time so some other process might reference it and try to
363                  * access its p->p_emuldata and panicing on a NULL reference.
364                  */
365                 em = em_find(p, EMUL_DONTLOCK);
366
367                 KASSERT(em != NULL, ("proc_exec: emuldata not found.\n"));
368
369                 EMUL_SHARED_WLOCK(&emul_shared_lock);
370                 LIST_REMOVE(em, threads);
371
372                 PROC_LOCK(p);
373                 p->p_emuldata = NULL;
374                 PROC_UNLOCK(p);
375
376                 em->shared->refs--;
377                 if (em->shared->refs == 0) {
378                         EMUL_SHARED_WUNLOCK(&emul_shared_lock);
379                         free(em->shared, M_LINUX);
380                 } else
381                         EMUL_SHARED_WUNLOCK(&emul_shared_lock);
382
383                 free(em, M_LINUX);
384         }
385
386         if (__predict_false(imgp->sysent == &elf_linux_sysvec)) {
387                 LIN_SDT_PROBE0(emul, proc_exec, return);
388         }
389 }
390
391 void
392 linux_schedtail(struct thread *td)
393 {
394         struct linux_emuldata *em;
395         struct proc *p;
396         int error = 0;
397         int *child_set_tid;
398
399         p = td->td_proc;
400
401         LIN_SDT_PROBE1(emul, linux_schedtail, entry, p);
402
403         /* find the emuldata */
404         em = em_find(p, EMUL_DOLOCK);
405
406         KASSERT(em != NULL, ("linux_schedtail: emuldata not found.\n"));
407         child_set_tid = em->child_set_tid;
408         EMUL_UNLOCK(&emul_lock);
409
410         if (child_set_tid != NULL) {
411                 error = copyout(&p->p_pid, (int *)child_set_tid,
412                     sizeof(p->p_pid));
413
414                 if (error != 0) {
415                         LIN_SDT_PROBE1(emul, linux_schedtail, copyout_error,
416                             error);
417                 }
418         }
419
420         LIN_SDT_PROBE0(emul, linux_schedtail, return);
421
422         return;
423 }
424
425 int
426 linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args)
427 {
428         struct linux_emuldata *em;
429
430         LIN_SDT_PROBE1(emul, linux_set_tid_address, entry, args->tidptr);
431
432         /* find the emuldata */
433         em = em_find(td->td_proc, EMUL_DOLOCK);
434
435         KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n"));
436
437         em->child_clear_tid = args->tidptr;
438         td->td_retval[0] = td->td_proc->p_pid;
439
440         EMUL_UNLOCK(&emul_lock);
441
442         LIN_SDT_PROBE0(emul, linux_set_tid_address, return);
443         return 0;
444 }
445
446 void
447 linux_kill_threads(struct thread *td, int sig)
448 {
449         struct linux_emuldata *em, *td_em, *tmp_em;
450         struct proc *sp;
451
452         LIN_SDT_PROBE2(emul, linux_kill_threads, entry, td, sig);
453
454         td_em = em_find(td->td_proc, EMUL_DONTLOCK);
455
456         KASSERT(td_em != NULL, ("linux_kill_threads: emuldata not found.\n"));
457
458         EMUL_SHARED_RLOCK(&emul_shared_lock);
459         LIST_FOREACH_SAFE(em, &td_em->shared->threads, threads, tmp_em) {
460                 if (em->pid == td_em->pid)
461                         continue;
462
463                 sp = pfind(em->pid);
464                 if ((sp->p_flag & P_WEXIT) == 0)
465                         kern_psignal(sp, sig);
466                 PROC_UNLOCK(sp);
467
468                 LIN_SDT_PROBE1(emul, linux_kill_threads, kill, em->pid);
469         }
470         EMUL_SHARED_RUNLOCK(&emul_shared_lock);
471
472         LIN_SDT_PROBE0(emul, linux_kill_threads, return);
473 }