]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/link_elf_obj.c
Kernel linkers: add emergency sysctl to restore old behavior
[FreeBSD/FreeBSD.git] / sys / kern / link_elf_obj.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1998-2000 Doug Rabson
5  * Copyright (c) 2004 Peter Wemm
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include "opt_ddb.h"
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/fcntl.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/linker.h>
42 #include <sys/mutex.h>
43 #include <sys/mount.h>
44 #include <sys/namei.h>
45 #include <sys/proc.h>
46 #include <sys/rwlock.h>
47 #include <sys/sysctl.h>
48 #include <sys/vnode.h>
49
50 #include <machine/elf.h>
51
52 #include <net/vnet.h>
53
54 #include <security/mac/mac_framework.h>
55
56 #include <vm/vm.h>
57 #include <vm/vm_param.h>
58 #include <vm/pmap.h>
59 #include <vm/vm_extern.h>
60 #include <vm/vm_kern.h>
61 #include <vm/vm_map.h>
62 #include <vm/vm_object.h>
63 #include <vm/vm_page.h>
64 #include <vm/vm_pager.h>
65
66 #include <sys/link_elf.h>
67
68 #ifdef DDB_CTF
69 #include <contrib/zlib/zlib.h>
70 #endif
71
72 #include "linker_if.h"
73
74 typedef struct {
75         void            *addr;
76         Elf_Off         size;
77         int             flags;  /* Section flags. */
78         int             sec;    /* Original section number. */
79         char            *name;
80 } Elf_progent;
81
82 typedef struct {
83         Elf_Rel         *rel;
84         int             nrel;
85         int             sec;
86 } Elf_relent;
87
88 typedef struct {
89         Elf_Rela        *rela;
90         int             nrela;
91         int             sec;
92 } Elf_relaent;
93
94 typedef struct elf_file {
95         struct linker_file lf;          /* Common fields */
96
97         int             preloaded;
98         caddr_t         address;        /* Relocation address */
99         vm_object_t     object;         /* VM object to hold file pages */
100         Elf_Shdr        *e_shdr;
101
102         Elf_progent     *progtab;
103         u_int           nprogtab;
104
105         Elf_relaent     *relatab;
106         u_int           nrelatab;
107
108         Elf_relent      *reltab;
109         int             nreltab;
110
111         Elf_Sym         *ddbsymtab;     /* The symbol table we are using */
112         long            ddbsymcnt;      /* Number of symbols */
113         caddr_t         ddbstrtab;      /* String table */
114         long            ddbstrcnt;      /* number of bytes in string table */
115
116         caddr_t         shstrtab;       /* Section name string table */
117         long            shstrcnt;       /* number of bytes in string table */
118
119         caddr_t         ctftab;         /* CTF table */
120         long            ctfcnt;         /* number of bytes in CTF table */
121         caddr_t         ctfoff;         /* CTF offset table */
122         caddr_t         typoff;         /* Type offset table */
123         long            typlen;         /* Number of type entries. */
124
125 } *elf_file_t;
126
127 #include <kern/kern_ctf.c>
128
129 static int      link_elf_link_preload(linker_class_t cls,
130                     const char *, linker_file_t *);
131 static int      link_elf_link_preload_finish(linker_file_t);
132 static int      link_elf_load_file(linker_class_t, const char *, linker_file_t *);
133 static int      link_elf_lookup_symbol(linker_file_t, const char *,
134                     c_linker_sym_t *);
135 static int      link_elf_lookup_debug_symbol(linker_file_t, const char *,
136                     c_linker_sym_t *);
137 static int      link_elf_symbol_values(linker_file_t, c_linker_sym_t,
138                     linker_symval_t *);
139 static int      link_elf_debug_symbol_values(linker_file_t, c_linker_sym_t,
140                     linker_symval_t *);
141 static int      link_elf_search_symbol(linker_file_t, caddr_t value,
142                     c_linker_sym_t *sym, long *diffp);
143
144 static void     link_elf_unload_file(linker_file_t);
145 static int      link_elf_lookup_set(linker_file_t, const char *,
146                     void ***, void ***, int *);
147 static int      link_elf_each_function_name(linker_file_t,
148                     int (*)(const char *, void *), void *);
149 static int      link_elf_each_function_nameval(linker_file_t,
150                                 linker_function_nameval_callback_t,
151                                 void *);
152 static int      link_elf_reloc_local(linker_file_t, bool);
153 static long     link_elf_symtab_get(linker_file_t, const Elf_Sym **);
154 static long     link_elf_strtab_get(linker_file_t, caddr_t *);
155
156 static int      elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps,
157                     Elf_Addr *);
158
159 static kobj_method_t link_elf_methods[] = {
160         KOBJMETHOD(linker_lookup_symbol,        link_elf_lookup_symbol),
161         KOBJMETHOD(linker_lookup_debug_symbol,  link_elf_lookup_debug_symbol),
162         KOBJMETHOD(linker_symbol_values,        link_elf_symbol_values),
163         KOBJMETHOD(linker_debug_symbol_values,  link_elf_debug_symbol_values),
164         KOBJMETHOD(linker_search_symbol,        link_elf_search_symbol),
165         KOBJMETHOD(linker_unload,               link_elf_unload_file),
166         KOBJMETHOD(linker_load_file,            link_elf_load_file),
167         KOBJMETHOD(linker_link_preload,         link_elf_link_preload),
168         KOBJMETHOD(linker_link_preload_finish,  link_elf_link_preload_finish),
169         KOBJMETHOD(linker_lookup_set,           link_elf_lookup_set),
170         KOBJMETHOD(linker_each_function_name,   link_elf_each_function_name),
171         KOBJMETHOD(linker_each_function_nameval, link_elf_each_function_nameval),
172         KOBJMETHOD(linker_ctf_get,              link_elf_ctf_get),
173         KOBJMETHOD(linker_symtab_get,           link_elf_symtab_get),
174         KOBJMETHOD(linker_strtab_get,           link_elf_strtab_get),
175         KOBJMETHOD_END
176 };
177
178 static struct linker_class link_elf_class = {
179 #if ELF_TARG_CLASS == ELFCLASS32
180         "elf32_obj",
181 #else
182         "elf64_obj",
183 #endif
184         link_elf_methods, sizeof(struct elf_file)
185 };
186
187 static bool link_elf_obj_leak_locals = true;
188 SYSCTL_BOOL(_debug, OID_AUTO, link_elf_obj_leak_locals,
189     CTLFLAG_RWTUN, &link_elf_obj_leak_locals, 0,
190     "Allow local symbols to participate in global module symbol resolution");
191
192 static int      relocate_file(elf_file_t ef);
193 static void     elf_obj_cleanup_globals_cache(elf_file_t);
194
195 static void
196 link_elf_error(const char *filename, const char *s)
197 {
198         if (filename == NULL)
199                 printf("kldload: %s\n", s);
200         else
201                 printf("kldload: %s: %s\n", filename, s);
202 }
203
204 static void
205 link_elf_init(void *arg)
206 {
207
208         linker_add_class(&link_elf_class);
209 }
210 SYSINIT(link_elf_obj, SI_SUB_KLD, SI_ORDER_SECOND, link_elf_init, NULL);
211
212 static void
213 link_elf_protect_range(elf_file_t ef, vm_offset_t start, vm_offset_t end,
214     vm_prot_t prot)
215 {
216         int error __unused;
217
218         KASSERT(start <= end && start >= (vm_offset_t)ef->address &&
219             end <= round_page((vm_offset_t)ef->address + ef->lf.size),
220             ("link_elf_protect_range: invalid range %#jx-%#jx",
221             (uintmax_t)start, (uintmax_t)end));
222
223         if (start == end)
224                 return;
225         if (ef->preloaded) {
226 #ifdef __amd64__
227                 error = pmap_change_prot(start, end - start, prot);
228                 KASSERT(error == 0,
229                     ("link_elf_protect_range: pmap_change_prot() returned %d",
230                     error));
231 #endif
232                 return;
233         }
234         error = vm_map_protect(kernel_map, start, end, prot, 0,
235             VM_MAP_PROTECT_SET_PROT);
236         KASSERT(error == KERN_SUCCESS,
237             ("link_elf_protect_range: vm_map_protect() returned %d", error));
238 }
239
240 /*
241  * Restrict permissions on linker file memory based on section flags.
242  * Sections need not be page-aligned, so overlap within a page is possible.
243  */
244 static void
245 link_elf_protect(elf_file_t ef)
246 {
247         vm_offset_t end, segend, segstart, start;
248         vm_prot_t gapprot, prot, segprot;
249         int i;
250
251         /*
252          * If the file was preloaded, the last page may contain other preloaded
253          * data which may need to be writeable.  ELF files are always
254          * page-aligned, but other preloaded data, such as entropy or CPU
255          * microcode may be loaded with a smaller alignment.
256          */
257         gapprot = ef->preloaded ? VM_PROT_RW : VM_PROT_READ;
258
259         start = end = (vm_offset_t)ef->address;
260         prot = VM_PROT_READ;
261         for (i = 0; i < ef->nprogtab; i++) {
262                 /*
263                  * VNET and DPCPU sections have their memory allocated by their
264                  * respective subsystems.
265                  */
266                 if (ef->progtab[i].name != NULL && (
267 #ifdef VIMAGE
268                     strcmp(ef->progtab[i].name, VNET_SETNAME) == 0 ||
269 #endif
270                     strcmp(ef->progtab[i].name, DPCPU_SETNAME) == 0))
271                         continue;
272
273                 segstart = trunc_page((vm_offset_t)ef->progtab[i].addr);
274                 segend = round_page((vm_offset_t)ef->progtab[i].addr +
275                     ef->progtab[i].size);
276                 segprot = VM_PROT_READ;
277                 if ((ef->progtab[i].flags & SHF_WRITE) != 0)
278                         segprot |= VM_PROT_WRITE;
279                 if ((ef->progtab[i].flags & SHF_EXECINSTR) != 0)
280                         segprot |= VM_PROT_EXECUTE;
281
282                 if (end <= segstart) {
283                         /*
284                          * Case 1: there is no overlap between the previous
285                          * segment and this one.  Apply protections to the
286                          * previous segment, and protect the gap between the
287                          * previous and current segments, if any.
288                          */
289                         link_elf_protect_range(ef, start, end, prot);
290                         link_elf_protect_range(ef, end, segstart, gapprot);
291
292                         start = segstart;
293                         end = segend;
294                         prot = segprot;
295                 } else if (start < segstart && end == segend) {
296                         /*
297                          * Case 2: the current segment is a subrange of the
298                          * previous segment.  Apply protections to the
299                          * non-overlapping portion of the previous segment.
300                          */
301                         link_elf_protect_range(ef, start, segstart, prot);
302
303                         start = segstart;
304                         prot |= segprot;
305                 } else if (end < segend) {
306                         /*
307                          * Case 3: there is partial overlap between the previous
308                          * and current segments.  Apply protections to the
309                          * non-overlapping portion of the previous segment, and
310                          * then the overlap, which must use the union of the two
311                          * segments' protections.
312                          */
313                         link_elf_protect_range(ef, start, segstart, prot);
314                         link_elf_protect_range(ef, segstart, end,
315                             prot | segprot);
316                         start = end;
317                         end = segend;
318                         prot = segprot;
319                 } else {
320                         /*
321                          * Case 4: the two segments reside in the same page.
322                          */
323                         prot |= segprot;
324                 }
325         }
326
327         /*
328          * Fix up the last unprotected segment and trailing data.
329          */
330         link_elf_protect_range(ef, start, end, prot);
331         link_elf_protect_range(ef, end,
332             round_page((vm_offset_t)ef->address + ef->lf.size), gapprot);
333 }
334
335 static int
336 link_elf_link_preload(linker_class_t cls, const char *filename,
337     linker_file_t *result)
338 {
339         Elf_Ehdr *hdr;
340         Elf_Shdr *shdr;
341         Elf_Sym *es;
342         void *modptr, *baseptr, *sizeptr;
343         char *type;
344         elf_file_t ef;
345         linker_file_t lf;
346         Elf_Addr off;
347         int error, i, j, pb, ra, rl, shstrindex, symstrindex, symtabindex;
348
349         /* Look to see if we have the file preloaded */
350         modptr = preload_search_by_name(filename);
351         if (modptr == NULL)
352                 return ENOENT;
353
354         type = (char *)preload_search_info(modptr, MODINFO_TYPE);
355         baseptr = preload_search_info(modptr, MODINFO_ADDR);
356         sizeptr = preload_search_info(modptr, MODINFO_SIZE);
357         hdr = (Elf_Ehdr *)preload_search_info(modptr, MODINFO_METADATA |
358             MODINFOMD_ELFHDR);
359         shdr = (Elf_Shdr *)preload_search_info(modptr, MODINFO_METADATA |
360             MODINFOMD_SHDR);
361         if (type == NULL || (strcmp(type, "elf" __XSTRING(__ELF_WORD_SIZE)
362             " obj module") != 0 &&
363             strcmp(type, "elf obj module") != 0)) {
364                 return (EFTYPE);
365         }
366         if (baseptr == NULL || sizeptr == NULL || hdr == NULL ||
367             shdr == NULL)
368                 return (EINVAL);
369
370         lf = linker_make_file(filename, &link_elf_class);
371         if (lf == NULL)
372                 return (ENOMEM);
373
374         ef = (elf_file_t)lf;
375         ef->preloaded = 1;
376         ef->address = *(caddr_t *)baseptr;
377         lf->address = *(caddr_t *)baseptr;
378         lf->size = *(size_t *)sizeptr;
379
380         if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
381             hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
382             hdr->e_ident[EI_VERSION] != EV_CURRENT ||
383             hdr->e_version != EV_CURRENT ||
384             hdr->e_type != ET_REL ||
385             hdr->e_machine != ELF_TARG_MACH) {
386                 error = EFTYPE;
387                 goto out;
388         }
389         ef->e_shdr = shdr;
390
391         /* Scan the section header for information and table sizing. */
392         symtabindex = -1;
393         symstrindex = -1;
394         for (i = 0; i < hdr->e_shnum; i++) {
395                 switch (shdr[i].sh_type) {
396                 case SHT_PROGBITS:
397                 case SHT_NOBITS:
398 #ifdef __amd64__
399                 case SHT_X86_64_UNWIND:
400 #endif
401                 case SHT_INIT_ARRAY:
402                 case SHT_FINI_ARRAY:
403                         /* Ignore sections not loaded by the loader. */
404                         if (shdr[i].sh_addr == 0)
405                                 break;
406                         ef->nprogtab++;
407                         break;
408                 case SHT_SYMTAB:
409                         symtabindex = i;
410                         symstrindex = shdr[i].sh_link;
411                         break;
412                 case SHT_REL:
413                         /*
414                          * Ignore relocation tables for sections not
415                          * loaded by the loader.
416                          */
417                         if (shdr[shdr[i].sh_info].sh_addr == 0)
418                                 break;
419                         ef->nreltab++;
420                         break;
421                 case SHT_RELA:
422                         if (shdr[shdr[i].sh_info].sh_addr == 0)
423                                 break;
424                         ef->nrelatab++;
425                         break;
426                 }
427         }
428
429         shstrindex = hdr->e_shstrndx;
430         if (ef->nprogtab == 0 || symstrindex < 0 ||
431             symstrindex >= hdr->e_shnum ||
432             shdr[symstrindex].sh_type != SHT_STRTAB || shstrindex == 0 ||
433             shstrindex >= hdr->e_shnum ||
434             shdr[shstrindex].sh_type != SHT_STRTAB) {
435                 printf("%s: bad/missing section headers\n", filename);
436                 error = ENOEXEC;
437                 goto out;
438         }
439
440         /* Allocate space for tracking the load chunks */
441         if (ef->nprogtab != 0)
442                 ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
443                     M_LINKER, M_WAITOK | M_ZERO);
444         if (ef->nreltab != 0)
445                 ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
446                     M_LINKER, M_WAITOK | M_ZERO);
447         if (ef->nrelatab != 0)
448                 ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
449                     M_LINKER, M_WAITOK | M_ZERO);
450         if ((ef->nprogtab != 0 && ef->progtab == NULL) ||
451             (ef->nreltab != 0 && ef->reltab == NULL) ||
452             (ef->nrelatab != 0 && ef->relatab == NULL)) {
453                 error = ENOMEM;
454                 goto out;
455         }
456
457         /* XXX, relocate the sh_addr fields saved by the loader. */
458         off = 0;
459         for (i = 0; i < hdr->e_shnum; i++) {
460                 if (shdr[i].sh_addr != 0 && (off == 0 || shdr[i].sh_addr < off))
461                         off = shdr[i].sh_addr;
462         }
463         for (i = 0; i < hdr->e_shnum; i++) {
464                 if (shdr[i].sh_addr != 0)
465                         shdr[i].sh_addr = shdr[i].sh_addr - off +
466                             (Elf_Addr)ef->address;
467         }
468
469         ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
470         ef->ddbsymtab = (Elf_Sym *)shdr[symtabindex].sh_addr;
471         ef->ddbstrcnt = shdr[symstrindex].sh_size;
472         ef->ddbstrtab = (char *)shdr[symstrindex].sh_addr;
473         ef->shstrcnt = shdr[shstrindex].sh_size;
474         ef->shstrtab = (char *)shdr[shstrindex].sh_addr;
475
476         /* Now fill out progtab and the relocation tables. */
477         pb = 0;
478         rl = 0;
479         ra = 0;
480         for (i = 0; i < hdr->e_shnum; i++) {
481                 switch (shdr[i].sh_type) {
482                 case SHT_PROGBITS:
483                 case SHT_NOBITS:
484 #ifdef __amd64__
485                 case SHT_X86_64_UNWIND:
486 #endif
487                 case SHT_INIT_ARRAY:
488                 case SHT_FINI_ARRAY:
489                         if (shdr[i].sh_addr == 0)
490                                 break;
491                         ef->progtab[pb].addr = (void *)shdr[i].sh_addr;
492                         if (shdr[i].sh_type == SHT_PROGBITS)
493                                 ef->progtab[pb].name = "<<PROGBITS>>";
494 #ifdef __amd64__
495                         else if (shdr[i].sh_type == SHT_X86_64_UNWIND)
496                                 ef->progtab[pb].name = "<<UNWIND>>";
497 #endif
498                         else if (shdr[i].sh_type == SHT_INIT_ARRAY)
499                                 ef->progtab[pb].name = "<<INIT_ARRAY>>";
500                         else if (shdr[i].sh_type == SHT_FINI_ARRAY)
501                                 ef->progtab[pb].name = "<<FINI_ARRAY>>";
502                         else
503                                 ef->progtab[pb].name = "<<NOBITS>>";
504                         ef->progtab[pb].size = shdr[i].sh_size;
505                         ef->progtab[pb].flags = shdr[i].sh_flags;
506                         ef->progtab[pb].sec = i;
507                         if (ef->shstrtab && shdr[i].sh_name != 0)
508                                 ef->progtab[pb].name =
509                                     ef->shstrtab + shdr[i].sh_name;
510                         if (ef->progtab[pb].name != NULL && 
511                             !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) {
512                                 void *dpcpu;
513
514                                 dpcpu = dpcpu_alloc(shdr[i].sh_size);
515                                 if (dpcpu == NULL) {
516                                         printf("%s: pcpu module space is out "
517                                             "of space; cannot allocate %#jx "
518                                             "for %s\n", __func__,
519                                             (uintmax_t)shdr[i].sh_size,
520                                             filename);
521                                         error = ENOSPC;
522                                         goto out;
523                                 }
524                                 memcpy(dpcpu, ef->progtab[pb].addr,
525                                     ef->progtab[pb].size);
526                                 dpcpu_copy(dpcpu, shdr[i].sh_size);
527                                 ef->progtab[pb].addr = dpcpu;
528 #ifdef VIMAGE
529                         } else if (ef->progtab[pb].name != NULL &&
530                             !strcmp(ef->progtab[pb].name, VNET_SETNAME)) {
531                                 void *vnet_data;
532
533                                 vnet_data = vnet_data_alloc(shdr[i].sh_size);
534                                 if (vnet_data == NULL) {
535                                         printf("%s: vnet module space is out "
536                                             "of space; cannot allocate %#jx "
537                                             "for %s\n", __func__,
538                                             (uintmax_t)shdr[i].sh_size,
539                                             filename);
540                                         error = ENOSPC;
541                                         goto out;
542                                 }
543                                 memcpy(vnet_data, ef->progtab[pb].addr,
544                                     ef->progtab[pb].size);
545                                 vnet_data_copy(vnet_data, shdr[i].sh_size);
546                                 ef->progtab[pb].addr = vnet_data;
547 #endif
548                         } else if ((ef->progtab[pb].name != NULL &&
549                             strcmp(ef->progtab[pb].name, ".ctors") == 0) ||
550                             shdr[i].sh_type == SHT_INIT_ARRAY) {
551                                 if (lf->ctors_addr != 0) {
552                                         printf(
553                                     "%s: multiple ctor sections in %s\n",
554                                             __func__, filename);
555                                 } else {
556                                         lf->ctors_addr = ef->progtab[pb].addr;
557                                         lf->ctors_size = shdr[i].sh_size;
558                                 }
559                         } else if ((ef->progtab[pb].name != NULL &&
560                             strcmp(ef->progtab[pb].name, ".dtors") == 0) ||
561                             shdr[i].sh_type == SHT_FINI_ARRAY) {
562                                 if (lf->dtors_addr != 0) {
563                                         printf(
564                                     "%s: multiple dtor sections in %s\n",
565                                             __func__, filename);
566                                 } else {
567                                         lf->dtors_addr = ef->progtab[pb].addr;
568                                         lf->dtors_size = shdr[i].sh_size;
569                                 }
570                         }
571
572                         /* Update all symbol values with the offset. */
573                         for (j = 0; j < ef->ddbsymcnt; j++) {
574                                 es = &ef->ddbsymtab[j];
575                                 if (es->st_shndx != i)
576                                         continue;
577                                 es->st_value += (Elf_Addr)ef->progtab[pb].addr;
578                         }
579                         pb++;
580                         break;
581                 case SHT_REL:
582                         if (shdr[shdr[i].sh_info].sh_addr == 0)
583                                 break;
584                         ef->reltab[rl].rel = (Elf_Rel *)shdr[i].sh_addr;
585                         ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
586                         ef->reltab[rl].sec = shdr[i].sh_info;
587                         rl++;
588                         break;
589                 case SHT_RELA:
590                         if (shdr[shdr[i].sh_info].sh_addr == 0)
591                                 break;
592                         ef->relatab[ra].rela = (Elf_Rela *)shdr[i].sh_addr;
593                         ef->relatab[ra].nrela =
594                             shdr[i].sh_size / sizeof(Elf_Rela);
595                         ef->relatab[ra].sec = shdr[i].sh_info;
596                         ra++;
597                         break;
598                 }
599         }
600         if (pb != ef->nprogtab) {
601                 printf("%s: lost progbits\n", filename);
602                 error = ENOEXEC;
603                 goto out;
604         }
605         if (rl != ef->nreltab) {
606                 printf("%s: lost reltab\n", filename);
607                 error = ENOEXEC;
608                 goto out;
609         }
610         if (ra != ef->nrelatab) {
611                 printf("%s: lost relatab\n", filename);
612                 error = ENOEXEC;
613                 goto out;
614         }
615
616         /*
617          * The file needs to be writeable and executable while applying
618          * relocations.  Mapping protections are applied once relocation
619          * processing is complete.
620          */
621         link_elf_protect_range(ef, (vm_offset_t)ef->address,
622             round_page((vm_offset_t)ef->address + ef->lf.size), VM_PROT_ALL);
623
624         /* Local intra-module relocations */
625         error = link_elf_reloc_local(lf, false);
626         if (error != 0)
627                 goto out;
628         *result = lf;
629         return (0);
630
631 out:
632         /* preload not done this way */
633         linker_file_unload(lf, LINKER_UNLOAD_FORCE);
634         return (error);
635 }
636
637 static void
638 link_elf_invoke_cbs(caddr_t addr, size_t size)
639 {
640         void (**ctor)(void);
641         size_t i, cnt;
642
643         if (addr == NULL || size == 0)
644                 return;
645         cnt = size / sizeof(*ctor);
646         ctor = (void *)addr;
647         for (i = 0; i < cnt; i++) {
648                 if (ctor[i] != NULL)
649                         (*ctor[i])();
650         }
651 }
652
653 static int
654 link_elf_link_preload_finish(linker_file_t lf)
655 {
656         elf_file_t ef;
657         int error;
658
659         ef = (elf_file_t)lf;
660         error = relocate_file(ef);
661         if (error)
662                 return (error);
663
664         /* Notify MD code that a module is being loaded. */
665         error = elf_cpu_load_file(lf);
666         if (error)
667                 return (error);
668
669 #if defined(__i386__) || defined(__amd64__)
670         /* Now ifuncs. */
671         error = link_elf_reloc_local(lf, true);
672         if (error != 0)
673                 return (error);
674 #endif
675
676         /* Apply protections now that relocation processing is complete. */
677         link_elf_protect(ef);
678
679         link_elf_invoke_cbs(lf->ctors_addr, lf->ctors_size);
680         return (0);
681 }
682
683 static int
684 link_elf_load_file(linker_class_t cls, const char *filename,
685     linker_file_t *result)
686 {
687         struct nameidata *nd;
688         struct thread *td = curthread;  /* XXX */
689         Elf_Ehdr *hdr;
690         Elf_Shdr *shdr;
691         Elf_Sym *es;
692         int nbytes, i, j;
693         vm_offset_t mapbase;
694         size_t mapsize;
695         int error = 0;
696         ssize_t resid;
697         int flags;
698         elf_file_t ef;
699         linker_file_t lf;
700         int symtabindex;
701         int symstrindex;
702         int shstrindex;
703         int nsym;
704         int pb, rl, ra;
705         int alignmask;
706
707         shdr = NULL;
708         lf = NULL;
709         mapsize = 0;
710         hdr = NULL;
711
712         nd = malloc(sizeof(struct nameidata), M_TEMP, M_WAITOK);
713         NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, filename);
714         flags = FREAD;
715         error = vn_open(nd, &flags, 0, NULL);
716         if (error) {
717                 free(nd, M_TEMP);
718                 return error;
719         }
720         NDFREE(nd, NDF_ONLY_PNBUF);
721         if (nd->ni_vp->v_type != VREG) {
722                 error = ENOEXEC;
723                 goto out;
724         }
725 #ifdef MAC
726         error = mac_kld_check_load(td->td_ucred, nd->ni_vp);
727         if (error) {
728                 goto out;
729         }
730 #endif
731
732         /* Read the elf header from the file. */
733         hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK);
734         error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)hdr, sizeof(*hdr), 0,
735             UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
736             &resid, td);
737         if (error)
738                 goto out;
739         if (resid != 0){
740                 error = ENOEXEC;
741                 goto out;
742         }
743
744         if (!IS_ELF(*hdr)) {
745                 error = ENOEXEC;
746                 goto out;
747         }
748
749         if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS
750             || hdr->e_ident[EI_DATA] != ELF_TARG_DATA) {
751                 link_elf_error(filename, "Unsupported file layout");
752                 error = ENOEXEC;
753                 goto out;
754         }
755         if (hdr->e_ident[EI_VERSION] != EV_CURRENT
756             || hdr->e_version != EV_CURRENT) {
757                 link_elf_error(filename, "Unsupported file version");
758                 error = ENOEXEC;
759                 goto out;
760         }
761         if (hdr->e_type != ET_REL) {
762                 error = ENOSYS;
763                 goto out;
764         }
765         if (hdr->e_machine != ELF_TARG_MACH) {
766                 link_elf_error(filename, "Unsupported machine");
767                 error = ENOEXEC;
768                 goto out;
769         }
770
771         lf = linker_make_file(filename, &link_elf_class);
772         if (!lf) {
773                 error = ENOMEM;
774                 goto out;
775         }
776         ef = (elf_file_t) lf;
777         ef->nprogtab = 0;
778         ef->e_shdr = 0;
779         ef->nreltab = 0;
780         ef->nrelatab = 0;
781
782         /* Allocate and read in the section header */
783         nbytes = hdr->e_shnum * hdr->e_shentsize;
784         if (nbytes == 0 || hdr->e_shoff == 0 ||
785             hdr->e_shentsize != sizeof(Elf_Shdr)) {
786                 error = ENOEXEC;
787                 goto out;
788         }
789         shdr = malloc(nbytes, M_LINKER, M_WAITOK);
790         ef->e_shdr = shdr;
791         error = vn_rdwr(UIO_READ, nd->ni_vp, (caddr_t)shdr, nbytes,
792             hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
793             NOCRED, &resid, td);
794         if (error)
795                 goto out;
796         if (resid) {
797                 error = ENOEXEC;
798                 goto out;
799         }
800
801         /* Scan the section header for information and table sizing. */
802         nsym = 0;
803         symtabindex = -1;
804         symstrindex = -1;
805         for (i = 0; i < hdr->e_shnum; i++) {
806                 if (shdr[i].sh_size == 0)
807                         continue;
808                 switch (shdr[i].sh_type) {
809                 case SHT_PROGBITS:
810                 case SHT_NOBITS:
811 #ifdef __amd64__
812                 case SHT_X86_64_UNWIND:
813 #endif
814                 case SHT_INIT_ARRAY:
815                 case SHT_FINI_ARRAY:
816                         if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
817                                 break;
818                         ef->nprogtab++;
819                         break;
820                 case SHT_SYMTAB:
821                         nsym++;
822                         symtabindex = i;
823                         symstrindex = shdr[i].sh_link;
824                         break;
825                 case SHT_REL:
826                         /*
827                          * Ignore relocation tables for unallocated
828                          * sections.
829                          */
830                         if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
831                                 break;
832                         ef->nreltab++;
833                         break;
834                 case SHT_RELA:
835                         if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
836                                 break;
837                         ef->nrelatab++;
838                         break;
839                 case SHT_STRTAB:
840                         break;
841                 }
842         }
843         if (ef->nprogtab == 0) {
844                 link_elf_error(filename, "file has no contents");
845                 error = ENOEXEC;
846                 goto out;
847         }
848         if (nsym != 1) {
849                 /* Only allow one symbol table for now */
850                 link_elf_error(filename,
851                     "file must have exactly one symbol table");
852                 error = ENOEXEC;
853                 goto out;
854         }
855         if (symstrindex < 0 || symstrindex > hdr->e_shnum ||
856             shdr[symstrindex].sh_type != SHT_STRTAB) {
857                 link_elf_error(filename, "file has invalid symbol strings");
858                 error = ENOEXEC;
859                 goto out;
860         }
861
862         /* Allocate space for tracking the load chunks */
863         if (ef->nprogtab != 0)
864                 ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
865                     M_LINKER, M_WAITOK | M_ZERO);
866         if (ef->nreltab != 0)
867                 ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
868                     M_LINKER, M_WAITOK | M_ZERO);
869         if (ef->nrelatab != 0)
870                 ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
871                     M_LINKER, M_WAITOK | M_ZERO);
872
873         if (symtabindex == -1) {
874                 link_elf_error(filename, "lost symbol table index");
875                 error = ENOEXEC;
876                 goto out;
877         }
878         /* Allocate space for and load the symbol table */
879         ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
880         ef->ddbsymtab = malloc(shdr[symtabindex].sh_size, M_LINKER, M_WAITOK);
881         error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)ef->ddbsymtab,
882             shdr[symtabindex].sh_size, shdr[symtabindex].sh_offset,
883             UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
884             &resid, td);
885         if (error)
886                 goto out;
887         if (resid != 0){
888                 error = EINVAL;
889                 goto out;
890         }
891
892         /* Allocate space for and load the symbol strings */
893         ef->ddbstrcnt = shdr[symstrindex].sh_size;
894         ef->ddbstrtab = malloc(shdr[symstrindex].sh_size, M_LINKER, M_WAITOK);
895         error = vn_rdwr(UIO_READ, nd->ni_vp, ef->ddbstrtab,
896             shdr[symstrindex].sh_size, shdr[symstrindex].sh_offset,
897             UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
898             &resid, td);
899         if (error)
900                 goto out;
901         if (resid != 0){
902                 error = EINVAL;
903                 goto out;
904         }
905
906         /* Do we have a string table for the section names?  */
907         shstrindex = -1;
908         if (hdr->e_shstrndx != 0 &&
909             shdr[hdr->e_shstrndx].sh_type == SHT_STRTAB) {
910                 shstrindex = hdr->e_shstrndx;
911                 ef->shstrcnt = shdr[shstrindex].sh_size;
912                 ef->shstrtab = malloc(shdr[shstrindex].sh_size, M_LINKER,
913                     M_WAITOK);
914                 error = vn_rdwr(UIO_READ, nd->ni_vp, ef->shstrtab,
915                     shdr[shstrindex].sh_size, shdr[shstrindex].sh_offset,
916                     UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
917                     &resid, td);
918                 if (error)
919                         goto out;
920                 if (resid != 0){
921                         error = EINVAL;
922                         goto out;
923                 }
924         }
925
926         /* Size up code/data(progbits) and bss(nobits). */
927         alignmask = 0;
928         for (i = 0; i < hdr->e_shnum; i++) {
929                 if (shdr[i].sh_size == 0)
930                         continue;
931                 switch (shdr[i].sh_type) {
932                 case SHT_PROGBITS:
933                 case SHT_NOBITS:
934 #ifdef __amd64__
935                 case SHT_X86_64_UNWIND:
936 #endif
937                 case SHT_INIT_ARRAY:
938                 case SHT_FINI_ARRAY:
939                         if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
940                                 break;
941                         alignmask = shdr[i].sh_addralign - 1;
942                         mapsize += alignmask;
943                         mapsize &= ~alignmask;
944                         mapsize += shdr[i].sh_size;
945                         break;
946                 }
947         }
948
949         /*
950          * We know how much space we need for the text/data/bss/etc.
951          * This stuff needs to be in a single chunk so that profiling etc
952          * can get the bounds and gdb can associate offsets with modules
953          */
954         ef->object = vm_pager_allocate(OBJT_PHYS, NULL, round_page(mapsize),
955             VM_PROT_ALL, 0, thread0.td_ucred);
956         if (ef->object == NULL) {
957                 error = ENOMEM;
958                 goto out;
959         }
960 #if VM_NRESERVLEVEL > 0
961         vm_object_color(ef->object, 0);
962 #endif
963
964         /*
965          * In order to satisfy amd64's architectural requirements on the
966          * location of code and data in the kernel's address space, request a
967          * mapping that is above the kernel.
968          *
969          * Protections will be restricted once relocations are applied.
970          */
971 #ifdef __amd64__
972         mapbase = KERNBASE;
973 #else
974         mapbase = VM_MIN_KERNEL_ADDRESS;
975 #endif
976         error = vm_map_find(kernel_map, ef->object, 0, &mapbase,
977             round_page(mapsize), 0, VMFS_OPTIMAL_SPACE, VM_PROT_ALL,
978             VM_PROT_ALL, 0);
979         if (error != KERN_SUCCESS) {
980                 vm_object_deallocate(ef->object);
981                 ef->object = NULL;
982                 error = ENOMEM;
983                 goto out;
984         }
985
986         /* Wire the pages */
987         error = vm_map_wire(kernel_map, mapbase,
988             mapbase + round_page(mapsize),
989             VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
990         if (error != KERN_SUCCESS) {
991                 error = ENOMEM;
992                 goto out;
993         }
994
995         /* Inform the kld system about the situation */
996         lf->address = ef->address = (caddr_t)mapbase;
997         lf->size = mapsize;
998
999         /*
1000          * Now load code/data(progbits), zero bss(nobits), allocate space for
1001          * and load relocs
1002          */
1003         pb = 0;
1004         rl = 0;
1005         ra = 0;
1006         alignmask = 0;
1007         for (i = 0; i < hdr->e_shnum; i++) {
1008                 if (shdr[i].sh_size == 0)
1009                         continue;
1010                 switch (shdr[i].sh_type) {
1011                 case SHT_PROGBITS:
1012                 case SHT_NOBITS:
1013 #ifdef __amd64__
1014                 case SHT_X86_64_UNWIND:
1015 #endif
1016                 case SHT_INIT_ARRAY:
1017                 case SHT_FINI_ARRAY:
1018                         if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
1019                                 break;
1020                         alignmask = shdr[i].sh_addralign - 1;
1021                         mapbase += alignmask;
1022                         mapbase &= ~alignmask;
1023                         if (ef->shstrtab != NULL && shdr[i].sh_name != 0) {
1024                                 ef->progtab[pb].name =
1025                                     ef->shstrtab + shdr[i].sh_name;
1026                                 if (!strcmp(ef->progtab[pb].name, ".ctors") ||
1027                                     shdr[i].sh_type == SHT_INIT_ARRAY) {
1028                                         if (lf->ctors_addr != 0) {
1029                                                 printf(
1030                                     "%s: multiple ctor sections in %s\n",
1031                                                     __func__, filename);
1032                                         } else {
1033                                                 lf->ctors_addr =
1034                                                     (caddr_t)mapbase;
1035                                                 lf->ctors_size =
1036                                                     shdr[i].sh_size;
1037                                         }
1038                                 } else if (!strcmp(ef->progtab[pb].name,
1039                                     ".dtors") ||
1040                                     shdr[i].sh_type == SHT_FINI_ARRAY) {
1041                                         if (lf->dtors_addr != 0) {
1042                                                 printf(
1043                                     "%s: multiple dtor sections in %s\n",
1044                                                     __func__, filename);
1045                                         } else {
1046                                                 lf->dtors_addr =
1047                                                     (caddr_t)mapbase;
1048                                                 lf->dtors_size =
1049                                                     shdr[i].sh_size;
1050                                         }
1051                                 }
1052                         } else if (shdr[i].sh_type == SHT_PROGBITS)
1053                                 ef->progtab[pb].name = "<<PROGBITS>>";
1054 #ifdef __amd64__
1055                         else if (shdr[i].sh_type == SHT_X86_64_UNWIND)
1056                                 ef->progtab[pb].name = "<<UNWIND>>";
1057 #endif
1058                         else
1059                                 ef->progtab[pb].name = "<<NOBITS>>";
1060                         if (ef->progtab[pb].name != NULL && 
1061                             !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) {
1062                                 ef->progtab[pb].addr =
1063                                     dpcpu_alloc(shdr[i].sh_size);
1064                                 if (ef->progtab[pb].addr == NULL) {
1065                                         printf("%s: pcpu module space is out "
1066                                             "of space; cannot allocate %#jx "
1067                                             "for %s\n", __func__,
1068                                             (uintmax_t)shdr[i].sh_size,
1069                                             filename);
1070                                 }
1071                         }
1072 #ifdef VIMAGE
1073                         else if (ef->progtab[pb].name != NULL &&
1074                             !strcmp(ef->progtab[pb].name, VNET_SETNAME)) {
1075                                 ef->progtab[pb].addr =
1076                                     vnet_data_alloc(shdr[i].sh_size);
1077                                 if (ef->progtab[pb].addr == NULL) {
1078                                         printf("%s: vnet module space is out "
1079                                             "of space; cannot allocate %#jx "
1080                                             "for %s\n", __func__,
1081                                             (uintmax_t)shdr[i].sh_size,
1082                                             filename);
1083                                 }
1084                         }
1085 #endif
1086                         else
1087                                 ef->progtab[pb].addr =
1088                                     (void *)(uintptr_t)mapbase;
1089                         if (ef->progtab[pb].addr == NULL) {
1090                                 error = ENOSPC;
1091                                 goto out;
1092                         }
1093                         ef->progtab[pb].size = shdr[i].sh_size;
1094                         ef->progtab[pb].flags = shdr[i].sh_flags;
1095                         ef->progtab[pb].sec = i;
1096                         if (shdr[i].sh_type == SHT_PROGBITS
1097 #ifdef __amd64__
1098                             || shdr[i].sh_type == SHT_X86_64_UNWIND
1099 #endif
1100                             ) {
1101                                 error = vn_rdwr(UIO_READ, nd->ni_vp,
1102                                     ef->progtab[pb].addr,
1103                                     shdr[i].sh_size, shdr[i].sh_offset,
1104                                     UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
1105                                     NOCRED, &resid, td);
1106                                 if (error)
1107                                         goto out;
1108                                 if (resid != 0){
1109                                         error = EINVAL;
1110                                         goto out;
1111                                 }
1112                                 /* Initialize the per-cpu or vnet area. */
1113                                 if (ef->progtab[pb].addr != (void *)mapbase &&
1114                                     !strcmp(ef->progtab[pb].name, DPCPU_SETNAME))
1115                                         dpcpu_copy(ef->progtab[pb].addr,
1116                                             shdr[i].sh_size);
1117 #ifdef VIMAGE
1118                                 else if (ef->progtab[pb].addr !=
1119                                     (void *)mapbase &&
1120                                     !strcmp(ef->progtab[pb].name, VNET_SETNAME))
1121                                         vnet_data_copy(ef->progtab[pb].addr,
1122                                             shdr[i].sh_size);
1123 #endif
1124                         } else
1125                                 bzero(ef->progtab[pb].addr, shdr[i].sh_size);
1126
1127                         /* Update all symbol values with the offset. */
1128                         for (j = 0; j < ef->ddbsymcnt; j++) {
1129                                 es = &ef->ddbsymtab[j];
1130                                 if (es->st_shndx != i)
1131                                         continue;
1132                                 es->st_value += (Elf_Addr)ef->progtab[pb].addr;
1133                         }
1134                         mapbase += shdr[i].sh_size;
1135                         pb++;
1136                         break;
1137                 case SHT_REL:
1138                         if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
1139                                 break;
1140                         ef->reltab[rl].rel = malloc(shdr[i].sh_size, M_LINKER,
1141                             M_WAITOK);
1142                         ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
1143                         ef->reltab[rl].sec = shdr[i].sh_info;
1144                         error = vn_rdwr(UIO_READ, nd->ni_vp,
1145                             (void *)ef->reltab[rl].rel,
1146                             shdr[i].sh_size, shdr[i].sh_offset,
1147                             UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
1148                             &resid, td);
1149                         if (error)
1150                                 goto out;
1151                         if (resid != 0){
1152                                 error = EINVAL;
1153                                 goto out;
1154                         }
1155                         rl++;
1156                         break;
1157                 case SHT_RELA:
1158                         if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
1159                                 break;
1160                         ef->relatab[ra].rela = malloc(shdr[i].sh_size, M_LINKER,
1161                             M_WAITOK);
1162                         ef->relatab[ra].nrela =
1163                             shdr[i].sh_size / sizeof(Elf_Rela);
1164                         ef->relatab[ra].sec = shdr[i].sh_info;
1165                         error = vn_rdwr(UIO_READ, nd->ni_vp,
1166                             (void *)ef->relatab[ra].rela,
1167                             shdr[i].sh_size, shdr[i].sh_offset,
1168                             UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
1169                             &resid, td);
1170                         if (error)
1171                                 goto out;
1172                         if (resid != 0){
1173                                 error = EINVAL;
1174                                 goto out;
1175                         }
1176                         ra++;
1177                         break;
1178                 }
1179         }
1180         if (pb != ef->nprogtab) {
1181                 link_elf_error(filename, "lost progbits");
1182                 error = ENOEXEC;
1183                 goto out;
1184         }
1185         if (rl != ef->nreltab) {
1186                 link_elf_error(filename, "lost reltab");
1187                 error = ENOEXEC;
1188                 goto out;
1189         }
1190         if (ra != ef->nrelatab) {
1191                 link_elf_error(filename, "lost relatab");
1192                 error = ENOEXEC;
1193                 goto out;
1194         }
1195         if (mapbase != (vm_offset_t)ef->address + mapsize) {
1196                 printf(
1197                     "%s: mapbase 0x%lx != address %p + mapsize 0x%lx (0x%lx)\n",
1198                     filename != NULL ? filename : "<none>",
1199                     (u_long)mapbase, ef->address, (u_long)mapsize,
1200                     (u_long)(vm_offset_t)ef->address + mapsize);
1201                 error = ENOMEM;
1202                 goto out;
1203         }
1204
1205         /* Local intra-module relocations */
1206         error = link_elf_reloc_local(lf, false);
1207         if (error != 0)
1208                 goto out;
1209
1210         /* Pull in dependencies */
1211         VOP_UNLOCK(nd->ni_vp);
1212         error = linker_load_dependencies(lf);
1213         vn_lock(nd->ni_vp, LK_EXCLUSIVE | LK_RETRY);
1214         if (error)
1215                 goto out;
1216
1217         /* External relocations */
1218         error = relocate_file(ef);
1219         if (error)
1220                 goto out;
1221
1222         /* Notify MD code that a module is being loaded. */
1223         error = elf_cpu_load_file(lf);
1224         if (error)
1225                 goto out;
1226
1227 #if defined(__i386__) || defined(__amd64__)
1228         /* Now ifuncs. */
1229         error = link_elf_reloc_local(lf, true);
1230         if (error != 0)
1231                 goto out;
1232 #endif
1233
1234         link_elf_protect(ef);
1235         link_elf_invoke_cbs(lf->ctors_addr, lf->ctors_size);
1236         *result = lf;
1237
1238 out:
1239         VOP_UNLOCK(nd->ni_vp);
1240         vn_close(nd->ni_vp, FREAD, td->td_ucred, td);
1241         free(nd, M_TEMP);
1242         if (error && lf)
1243                 linker_file_unload(lf, LINKER_UNLOAD_FORCE);
1244         free(hdr, M_LINKER);
1245
1246         return error;
1247 }
1248
1249 static void
1250 link_elf_unload_file(linker_file_t file)
1251 {
1252         elf_file_t ef = (elf_file_t) file;
1253         u_int i;
1254
1255         link_elf_invoke_cbs(file->dtors_addr, file->dtors_size);
1256
1257         /* Notify MD code that a module is being unloaded. */
1258         elf_cpu_unload_file(file);
1259
1260         if (ef->progtab) {
1261                 for (i = 0; i < ef->nprogtab; i++) {
1262                         if (ef->progtab[i].size == 0)
1263                                 continue;
1264                         if (ef->progtab[i].name == NULL)
1265                                 continue;
1266                         if (!strcmp(ef->progtab[i].name, DPCPU_SETNAME))
1267                                 dpcpu_free(ef->progtab[i].addr,
1268                                     ef->progtab[i].size);
1269 #ifdef VIMAGE
1270                         else if (!strcmp(ef->progtab[i].name, VNET_SETNAME))
1271                                 vnet_data_free(ef->progtab[i].addr,
1272                                     ef->progtab[i].size);
1273 #endif
1274                 }
1275         }
1276         if (ef->preloaded) {
1277                 free(ef->reltab, M_LINKER);
1278                 free(ef->relatab, M_LINKER);
1279                 free(ef->progtab, M_LINKER);
1280                 free(ef->ctftab, M_LINKER);
1281                 free(ef->ctfoff, M_LINKER);
1282                 free(ef->typoff, M_LINKER);
1283                 if (file->pathname != NULL)
1284                         preload_delete_name(file->pathname);
1285                 return;
1286         }
1287
1288         for (i = 0; i < ef->nreltab; i++)
1289                 free(ef->reltab[i].rel, M_LINKER);
1290         for (i = 0; i < ef->nrelatab; i++)
1291                 free(ef->relatab[i].rela, M_LINKER);
1292         free(ef->reltab, M_LINKER);
1293         free(ef->relatab, M_LINKER);
1294         free(ef->progtab, M_LINKER);
1295
1296         if (ef->object != NULL)
1297                 vm_map_remove(kernel_map, (vm_offset_t)ef->address,
1298                     (vm_offset_t)ef->address + ptoa(ef->object->size));
1299         free(ef->e_shdr, M_LINKER);
1300         free(ef->ddbsymtab, M_LINKER);
1301         free(ef->ddbstrtab, M_LINKER);
1302         free(ef->shstrtab, M_LINKER);
1303         free(ef->ctftab, M_LINKER);
1304         free(ef->ctfoff, M_LINKER);
1305         free(ef->typoff, M_LINKER);
1306 }
1307
1308 static const char *
1309 symbol_name(elf_file_t ef, Elf_Size r_info)
1310 {
1311         const Elf_Sym *ref;
1312
1313         if (ELF_R_SYM(r_info)) {
1314                 ref = ef->ddbsymtab + ELF_R_SYM(r_info);
1315                 return ef->ddbstrtab + ref->st_name;
1316         } else
1317                 return NULL;
1318 }
1319
1320 static Elf_Addr
1321 findbase(elf_file_t ef, int sec)
1322 {
1323         int i;
1324         Elf_Addr base = 0;
1325
1326         for (i = 0; i < ef->nprogtab; i++) {
1327                 if (sec == ef->progtab[i].sec) {
1328                         base = (Elf_Addr)ef->progtab[i].addr;
1329                         break;
1330                 }
1331         }
1332         return base;
1333 }
1334
1335 static int
1336 relocate_file1(elf_file_t ef, bool ifuncs)
1337 {
1338         const Elf_Rel *rellim;
1339         const Elf_Rel *rel;
1340         const Elf_Rela *relalim;
1341         const Elf_Rela *rela;
1342         const char *symname;
1343         const Elf_Sym *sym;
1344         int i;
1345         Elf_Size symidx;
1346         Elf_Addr base;
1347
1348         /* Perform relocations without addend if there are any: */
1349         for (i = 0; i < ef->nreltab; i++) {
1350                 rel = ef->reltab[i].rel;
1351                 if (rel == NULL) {
1352                         link_elf_error(ef->lf.filename, "lost a reltab!");
1353                         return (ENOEXEC);
1354                 }
1355                 rellim = rel + ef->reltab[i].nrel;
1356                 base = findbase(ef, ef->reltab[i].sec);
1357                 if (base == 0) {
1358                         link_elf_error(ef->lf.filename, "lost base for reltab");
1359                         return (ENOEXEC);
1360                 }
1361                 for ( ; rel < rellim; rel++) {
1362                         symidx = ELF_R_SYM(rel->r_info);
1363                         if (symidx >= ef->ddbsymcnt)
1364                                 continue;
1365                         sym = ef->ddbsymtab + symidx;
1366                         /* Local relocs are already done */
1367                         if (ELF_ST_BIND(sym->st_info) == STB_LOCAL)
1368                                 continue;
1369                         if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1370                             elf_is_ifunc_reloc(rel->r_info)) != ifuncs)
1371                                 continue;
1372                         if (elf_reloc(&ef->lf, base, rel, ELF_RELOC_REL,
1373                             elf_obj_lookup)) {
1374                                 symname = symbol_name(ef, rel->r_info);
1375                                 printf("link_elf_obj: symbol %s undefined\n",
1376                                     symname);
1377                                 return (ENOENT);
1378                         }
1379                 }
1380         }
1381
1382         /* Perform relocations with addend if there are any: */
1383         for (i = 0; i < ef->nrelatab; i++) {
1384                 rela = ef->relatab[i].rela;
1385                 if (rela == NULL) {
1386                         link_elf_error(ef->lf.filename, "lost a relatab!");
1387                         return (ENOEXEC);
1388                 }
1389                 relalim = rela + ef->relatab[i].nrela;
1390                 base = findbase(ef, ef->relatab[i].sec);
1391                 if (base == 0) {
1392                         link_elf_error(ef->lf.filename,
1393                             "lost base for relatab");
1394                         return (ENOEXEC);
1395                 }
1396                 for ( ; rela < relalim; rela++) {
1397                         symidx = ELF_R_SYM(rela->r_info);
1398                         if (symidx >= ef->ddbsymcnt)
1399                                 continue;
1400                         sym = ef->ddbsymtab + symidx;
1401                         /* Local relocs are already done */
1402                         if (ELF_ST_BIND(sym->st_info) == STB_LOCAL)
1403                                 continue;
1404                         if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1405                             elf_is_ifunc_reloc(rela->r_info)) != ifuncs)
1406                                 continue;
1407                         if (elf_reloc(&ef->lf, base, rela, ELF_RELOC_RELA,
1408                             elf_obj_lookup)) {
1409                                 symname = symbol_name(ef, rela->r_info);
1410                                 printf("link_elf_obj: symbol %s undefined\n",
1411                                     symname);
1412                                 return (ENOENT);
1413                         }
1414                 }
1415         }
1416
1417         /*
1418          * Only clean SHN_FBSD_CACHED for successful return.  If we
1419          * modified symbol table for the object but found an
1420          * unresolved symbol, there is no reason to roll back.
1421          */
1422         elf_obj_cleanup_globals_cache(ef);
1423
1424         return (0);
1425 }
1426
1427 static int
1428 relocate_file(elf_file_t ef)
1429 {
1430         int error;
1431
1432         error = relocate_file1(ef, false);
1433         if (error == 0)
1434                 error = relocate_file1(ef, true);
1435         return (error);
1436 }
1437
1438 static int
1439 link_elf_lookup_symbol1(linker_file_t lf, const char *name, c_linker_sym_t *sym,
1440     bool see_local)
1441 {
1442         elf_file_t ef = (elf_file_t)lf;
1443         const Elf_Sym *symp;
1444         const char *strp;
1445         int i;
1446
1447         for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1448                 strp = ef->ddbstrtab + symp->st_name;
1449                 if (symp->st_shndx != SHN_UNDEF && strcmp(name, strp) == 0) {
1450                         if (see_local ||
1451                             ELF_ST_BIND(symp->st_info) == STB_GLOBAL) {
1452                                 *sym = (c_linker_sym_t) symp;
1453                                 return (0);
1454                         }
1455                         return (ENOENT);
1456                 }
1457         }
1458         return (ENOENT);
1459 }
1460
1461 static int
1462 link_elf_lookup_symbol(linker_file_t lf, const char *name, c_linker_sym_t *sym)
1463 {
1464         return (link_elf_lookup_symbol1(lf, name, sym,
1465             link_elf_obj_leak_locals));
1466 }
1467
1468 static int
1469 link_elf_lookup_debug_symbol(linker_file_t lf, const char *name,
1470     c_linker_sym_t *sym)
1471 {
1472         return (link_elf_lookup_symbol1(lf, name, sym, true));
1473 }
1474
1475 static int
1476 link_elf_symbol_values1(linker_file_t lf, c_linker_sym_t sym,
1477     linker_symval_t *symval, bool see_local)
1478 {
1479         elf_file_t ef;
1480         const Elf_Sym *es;
1481         caddr_t val;
1482
1483         ef = (elf_file_t) lf;
1484         es = (const Elf_Sym*) sym;
1485         val = (caddr_t)es->st_value;
1486         if (es >= ef->ddbsymtab && es < (ef->ddbsymtab + ef->ddbsymcnt)) {
1487                 if (!see_local && ELF_ST_BIND(es->st_info) == STB_LOCAL)
1488                         return (ENOENT);
1489                 symval->name = ef->ddbstrtab + es->st_name;
1490                 val = (caddr_t)es->st_value;
1491                 if (ELF_ST_TYPE(es->st_info) == STT_GNU_IFUNC)
1492                         val = ((caddr_t (*)(void))val)();
1493                 symval->value = val;
1494                 symval->size = es->st_size;
1495                 return (0);
1496         }
1497         return (ENOENT);
1498 }
1499
1500 static int
1501 link_elf_symbol_values(linker_file_t lf, c_linker_sym_t sym,
1502     linker_symval_t *symval)
1503 {
1504         return (link_elf_symbol_values1(lf, sym, symval,
1505             link_elf_obj_leak_locals));
1506 }
1507
1508 static int
1509 link_elf_debug_symbol_values(linker_file_t lf, c_linker_sym_t sym,
1510     linker_symval_t *symval)
1511 {
1512         return (link_elf_symbol_values1(lf, sym, symval, true));
1513 }
1514
1515 static int
1516 link_elf_search_symbol(linker_file_t lf, caddr_t value,
1517     c_linker_sym_t *sym, long *diffp)
1518 {
1519         elf_file_t ef = (elf_file_t)lf;
1520         u_long off = (uintptr_t)(void *)value;
1521         u_long diff = off;
1522         u_long st_value;
1523         const Elf_Sym *es;
1524         const Elf_Sym *best = NULL;
1525         int i;
1526
1527         for (i = 0, es = ef->ddbsymtab; i < ef->ddbsymcnt; i++, es++) {
1528                 if (es->st_name == 0)
1529                         continue;
1530                 st_value = es->st_value;
1531                 if (off >= st_value) {
1532                         if (off - st_value < diff) {
1533                                 diff = off - st_value;
1534                                 best = es;
1535                                 if (diff == 0)
1536                                         break;
1537                         } else if (off - st_value == diff) {
1538                                 best = es;
1539                         }
1540                 }
1541         }
1542         if (best == NULL)
1543                 *diffp = off;
1544         else
1545                 *diffp = diff;
1546         *sym = (c_linker_sym_t) best;
1547
1548         return (0);
1549 }
1550
1551 /*
1552  * Look up a linker set on an ELF system.
1553  */
1554 static int
1555 link_elf_lookup_set(linker_file_t lf, const char *name,
1556     void ***startp, void ***stopp, int *countp)
1557 {
1558         elf_file_t ef = (elf_file_t)lf;
1559         void **start, **stop;
1560         int i, count;
1561
1562         /* Relative to section number */
1563         for (i = 0; i < ef->nprogtab; i++) {
1564                 if ((strncmp(ef->progtab[i].name, "set_", 4) == 0) &&
1565                     strcmp(ef->progtab[i].name + 4, name) == 0) {
1566                         start  = (void **)ef->progtab[i].addr;
1567                         stop = (void **)((char *)ef->progtab[i].addr +
1568                             ef->progtab[i].size);
1569                         count = stop - start;
1570                         if (startp)
1571                                 *startp = start;
1572                         if (stopp)
1573                                 *stopp = stop;
1574                         if (countp)
1575                                 *countp = count;
1576                         return (0);
1577                 }
1578         }
1579         return (ESRCH);
1580 }
1581
1582 static int
1583 link_elf_each_function_name(linker_file_t file,
1584     int (*callback)(const char *, void *), void *opaque)
1585 {
1586         elf_file_t ef = (elf_file_t)file;
1587         const Elf_Sym *symp;
1588         int i, error;
1589
1590         /* Exhaustive search */
1591         for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1592                 if (symp->st_value != 0 &&
1593                     (ELF_ST_TYPE(symp->st_info) == STT_FUNC ||
1594                     ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) {
1595                         error = callback(ef->ddbstrtab + symp->st_name, opaque);
1596                         if (error)
1597                                 return (error);
1598                 }
1599         }
1600         return (0);
1601 }
1602
1603 static int
1604 link_elf_each_function_nameval(linker_file_t file,
1605     linker_function_nameval_callback_t callback, void *opaque)
1606 {
1607         linker_symval_t symval;
1608         elf_file_t ef = (elf_file_t)file;
1609         const Elf_Sym *symp;
1610         int i, error;
1611
1612         /* Exhaustive search */
1613         for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1614                 if (symp->st_value != 0 &&
1615                     (ELF_ST_TYPE(symp->st_info) == STT_FUNC ||
1616                     ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) {
1617                         error = link_elf_debug_symbol_values(file,
1618                             (c_linker_sym_t)symp, &symval);
1619                         if (error == 0)
1620                                 error = callback(file, i, &symval, opaque);
1621                         if (error != 0)
1622                                 return (error);
1623                 }
1624         }
1625         return (0);
1626 }
1627
1628 static void
1629 elf_obj_cleanup_globals_cache(elf_file_t ef)
1630 {
1631         Elf_Sym *sym;
1632         Elf_Size i;
1633
1634         for (i = 0; i < ef->ddbsymcnt; i++) {
1635                 sym = ef->ddbsymtab + i;
1636                 if (sym->st_shndx == SHN_FBSD_CACHED) {
1637                         sym->st_shndx = SHN_UNDEF;
1638                         sym->st_value = 0;
1639                 }
1640         }
1641 }
1642
1643 /*
1644  * Symbol lookup function that can be used when the symbol index is known (ie
1645  * in relocations). It uses the symbol index instead of doing a fully fledged
1646  * hash table based lookup when such is valid. For example for local symbols.
1647  * This is not only more efficient, it's also more correct. It's not always
1648  * the case that the symbol can be found through the hash table.
1649  */
1650 static int
1651 elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps, Elf_Addr *res)
1652 {
1653         elf_file_t ef = (elf_file_t)lf;
1654         Elf_Sym *sym;
1655         const char *symbol;
1656         Elf_Addr res1;
1657
1658         /* Don't even try to lookup the symbol if the index is bogus. */
1659         if (symidx >= ef->ddbsymcnt) {
1660                 *res = 0;
1661                 return (EINVAL);
1662         }
1663
1664         sym = ef->ddbsymtab + symidx;
1665
1666         /* Quick answer if there is a definition included. */
1667         if (sym->st_shndx != SHN_UNDEF) {
1668                 res1 = (Elf_Addr)sym->st_value;
1669                 if (ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC)
1670                         res1 = ((Elf_Addr (*)(void))res1)();
1671                 *res = res1;
1672                 return (0);
1673         }
1674
1675         /* If we get here, then it is undefined and needs a lookup. */
1676         switch (ELF_ST_BIND(sym->st_info)) {
1677         case STB_LOCAL:
1678                 /* Local, but undefined? huh? */
1679                 *res = 0;
1680                 return (EINVAL);
1681
1682         case STB_GLOBAL:
1683         case STB_WEAK:
1684                 /* Relative to Data or Function name */
1685                 symbol = ef->ddbstrtab + sym->st_name;
1686
1687                 /* Force a lookup failure if the symbol name is bogus. */
1688                 if (*symbol == 0) {
1689                         *res = 0;
1690                         return (EINVAL);
1691                 }
1692                 res1 = (Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps);
1693
1694                 /*
1695                  * Cache global lookups during module relocation. The failure
1696                  * case is particularly expensive for callers, who must scan
1697                  * through the entire globals table doing strcmp(). Cache to
1698                  * avoid doing such work repeatedly.
1699                  *
1700                  * After relocation is complete, undefined globals will be
1701                  * restored to SHN_UNDEF in elf_obj_cleanup_globals_cache(),
1702                  * above.
1703                  */
1704                 if (res1 != 0) {
1705                         sym->st_shndx = SHN_FBSD_CACHED;
1706                         sym->st_value = res1;
1707                         *res = res1;
1708                         return (0);
1709                 } else if (ELF_ST_BIND(sym->st_info) == STB_WEAK) {
1710                         sym->st_value = 0;
1711                         *res = 0;
1712                         return (0);
1713                 }
1714                 return (EINVAL);
1715
1716         default:
1717                 return (EINVAL);
1718         }
1719 }
1720
1721 static void
1722 link_elf_fix_link_set(elf_file_t ef)
1723 {
1724         static const char startn[] = "__start_";
1725         static const char stopn[] = "__stop_";
1726         Elf_Sym *sym;
1727         const char *sym_name, *linkset_name;
1728         Elf_Addr startp, stopp;
1729         Elf_Size symidx;
1730         int start, i;
1731
1732         startp = stopp = 0;
1733         for (symidx = 1 /* zero entry is special */;
1734                 symidx < ef->ddbsymcnt; symidx++) {
1735                 sym = ef->ddbsymtab + symidx;
1736                 if (sym->st_shndx != SHN_UNDEF)
1737                         continue;
1738
1739                 sym_name = ef->ddbstrtab + sym->st_name;
1740                 if (strncmp(sym_name, startn, sizeof(startn) - 1) == 0) {
1741                         start = 1;
1742                         linkset_name = sym_name + sizeof(startn) - 1;
1743                 }
1744                 else if (strncmp(sym_name, stopn, sizeof(stopn) - 1) == 0) {
1745                         start = 0;
1746                         linkset_name = sym_name + sizeof(stopn) - 1;
1747                 }
1748                 else
1749                         continue;
1750
1751                 for (i = 0; i < ef->nprogtab; i++) {
1752                         if (strcmp(ef->progtab[i].name, linkset_name) == 0) {
1753                                 startp = (Elf_Addr)ef->progtab[i].addr;
1754                                 stopp = (Elf_Addr)(startp + ef->progtab[i].size);
1755                                 break;
1756                         }
1757                 }
1758                 if (i == ef->nprogtab)
1759                         continue;
1760
1761                 sym->st_value = start ? startp : stopp;
1762                 sym->st_shndx = i;
1763         }
1764 }
1765
1766 static int
1767 link_elf_reloc_local(linker_file_t lf, bool ifuncs)
1768 {
1769         elf_file_t ef = (elf_file_t)lf;
1770         const Elf_Rel *rellim;
1771         const Elf_Rel *rel;
1772         const Elf_Rela *relalim;
1773         const Elf_Rela *rela;
1774         const Elf_Sym *sym;
1775         Elf_Addr base;
1776         int i;
1777         Elf_Size symidx;
1778
1779         link_elf_fix_link_set(ef);
1780
1781         /* Perform relocations without addend if there are any: */
1782         for (i = 0; i < ef->nreltab; i++) {
1783                 rel = ef->reltab[i].rel;
1784                 if (rel == NULL) {
1785                         link_elf_error(ef->lf.filename, "lost a reltab");
1786                         return (ENOEXEC);
1787                 }
1788                 rellim = rel + ef->reltab[i].nrel;
1789                 base = findbase(ef, ef->reltab[i].sec);
1790                 if (base == 0) {
1791                         link_elf_error(ef->lf.filename, "lost base for reltab");
1792                         return (ENOEXEC);
1793                 }
1794                 for ( ; rel < rellim; rel++) {
1795                         symidx = ELF_R_SYM(rel->r_info);
1796                         if (symidx >= ef->ddbsymcnt)
1797                                 continue;
1798                         sym = ef->ddbsymtab + symidx;
1799                         /* Only do local relocs */
1800                         if (ELF_ST_BIND(sym->st_info) != STB_LOCAL)
1801                                 continue;
1802                         if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1803                             elf_is_ifunc_reloc(rel->r_info)) != ifuncs)
1804                                 continue;
1805                         if (elf_reloc_local(lf, base, rel, ELF_RELOC_REL,
1806                             elf_obj_lookup) != 0)
1807                                 return (ENOEXEC);
1808                 }
1809         }
1810
1811         /* Perform relocations with addend if there are any: */
1812         for (i = 0; i < ef->nrelatab; i++) {
1813                 rela = ef->relatab[i].rela;
1814                 if (rela == NULL) {
1815                         link_elf_error(ef->lf.filename, "lost a relatab!");
1816                         return (ENOEXEC);
1817                 }
1818                 relalim = rela + ef->relatab[i].nrela;
1819                 base = findbase(ef, ef->relatab[i].sec);
1820                 if (base == 0) {
1821                         link_elf_error(ef->lf.filename, "lost base for reltab");
1822                         return (ENOEXEC);
1823                 }
1824                 for ( ; rela < relalim; rela++) {
1825                         symidx = ELF_R_SYM(rela->r_info);
1826                         if (symidx >= ef->ddbsymcnt)
1827                                 continue;
1828                         sym = ef->ddbsymtab + symidx;
1829                         /* Only do local relocs */
1830                         if (ELF_ST_BIND(sym->st_info) != STB_LOCAL)
1831                                 continue;
1832                         if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1833                             elf_is_ifunc_reloc(rela->r_info)) != ifuncs)
1834                                 continue;
1835                         if (elf_reloc_local(lf, base, rela, ELF_RELOC_RELA,
1836                             elf_obj_lookup) != 0)
1837                                 return (ENOEXEC);
1838                 }
1839         }
1840         return (0);
1841 }
1842
1843 static long
1844 link_elf_symtab_get(linker_file_t lf, const Elf_Sym **symtab)
1845 {
1846         elf_file_t ef = (elf_file_t)lf;
1847
1848         *symtab = ef->ddbsymtab;
1849         if (*symtab == NULL)
1850                 return (0);
1851         return (ef->ddbsymcnt);
1852 }
1853     
1854 static long
1855 link_elf_strtab_get(linker_file_t lf, caddr_t *strtab)
1856 {
1857         elf_file_t ef = (elf_file_t)lf;
1858
1859         *strtab = ef->ddbstrtab;
1860         if (*strtab == NULL)
1861                 return (0);
1862         return (ef->ddbstrcnt);
1863 }