]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/link_elf_obj.c
zfs: merge openzfs/zfs@f3678d70f (master) into main
[FreeBSD/FreeBSD.git] / sys / kern / link_elf_obj.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1998-2000 Doug Rabson
5  * Copyright (c) 2004 Peter Wemm
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include "opt_ddb.h"
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/fcntl.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/linker.h>
42 #include <sys/mutex.h>
43 #include <sys/mount.h>
44 #include <sys/namei.h>
45 #include <sys/proc.h>
46 #include <sys/rwlock.h>
47 #include <sys/vnode.h>
48
49 #include <machine/elf.h>
50
51 #include <net/vnet.h>
52
53 #include <security/mac/mac_framework.h>
54
55 #include <vm/vm.h>
56 #include <vm/vm_param.h>
57 #include <vm/pmap.h>
58 #include <vm/vm_extern.h>
59 #include <vm/vm_kern.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_object.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_pager.h>
64
65 #include <sys/link_elf.h>
66
67 #ifdef DDB_CTF
68 #include <contrib/zlib/zlib.h>
69 #endif
70
71 #include "linker_if.h"
72
73 typedef struct {
74         void            *addr;
75         Elf_Off         size;
76         int             flags;  /* Section flags. */
77         int             sec;    /* Original section number. */
78         char            *name;
79 } Elf_progent;
80
81 typedef struct {
82         Elf_Rel         *rel;
83         int             nrel;
84         int             sec;
85 } Elf_relent;
86
87 typedef struct {
88         Elf_Rela        *rela;
89         int             nrela;
90         int             sec;
91 } Elf_relaent;
92
93 typedef struct elf_file {
94         struct linker_file lf;          /* Common fields */
95
96         int             preloaded;
97         caddr_t         address;        /* Relocation address */
98         vm_object_t     object;         /* VM object to hold file pages */
99         Elf_Shdr        *e_shdr;
100
101         Elf_progent     *progtab;
102         u_int           nprogtab;
103
104         Elf_relaent     *relatab;
105         u_int           nrelatab;
106
107         Elf_relent      *reltab;
108         int             nreltab;
109
110         Elf_Sym         *ddbsymtab;     /* The symbol table we are using */
111         long            ddbsymcnt;      /* Number of symbols */
112         caddr_t         ddbstrtab;      /* String table */
113         long            ddbstrcnt;      /* number of bytes in string table */
114
115         caddr_t         shstrtab;       /* Section name string table */
116         long            shstrcnt;       /* number of bytes in string table */
117
118         caddr_t         ctftab;         /* CTF table */
119         long            ctfcnt;         /* number of bytes in CTF table */
120         caddr_t         ctfoff;         /* CTF offset table */
121         caddr_t         typoff;         /* Type offset table */
122         long            typlen;         /* Number of type entries. */
123
124 } *elf_file_t;
125
126 #include <kern/kern_ctf.c>
127
128 static int      link_elf_link_preload(linker_class_t cls,
129                     const char *, linker_file_t *);
130 static int      link_elf_link_preload_finish(linker_file_t);
131 static int      link_elf_load_file(linker_class_t, const char *, linker_file_t *);
132 static int      link_elf_lookup_symbol(linker_file_t, const char *,
133                     c_linker_sym_t *);
134 static int      link_elf_symbol_values(linker_file_t, c_linker_sym_t,
135                     linker_symval_t *);
136 static int      link_elf_search_symbol(linker_file_t, caddr_t value,
137                     c_linker_sym_t *sym, long *diffp);
138
139 static void     link_elf_unload_file(linker_file_t);
140 static int      link_elf_lookup_set(linker_file_t, const char *,
141                     void ***, void ***, int *);
142 static int      link_elf_each_function_name(linker_file_t,
143                     int (*)(const char *, void *), void *);
144 static int      link_elf_each_function_nameval(linker_file_t,
145                                 linker_function_nameval_callback_t,
146                                 void *);
147 static int      link_elf_reloc_local(linker_file_t, bool);
148 static long     link_elf_symtab_get(linker_file_t, const Elf_Sym **);
149 static long     link_elf_strtab_get(linker_file_t, caddr_t *);
150
151 static int      elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps,
152                     Elf_Addr *);
153
154 static kobj_method_t link_elf_methods[] = {
155         KOBJMETHOD(linker_lookup_symbol,        link_elf_lookup_symbol),
156         KOBJMETHOD(linker_symbol_values,        link_elf_symbol_values),
157         KOBJMETHOD(linker_search_symbol,        link_elf_search_symbol),
158         KOBJMETHOD(linker_unload,               link_elf_unload_file),
159         KOBJMETHOD(linker_load_file,            link_elf_load_file),
160         KOBJMETHOD(linker_link_preload,         link_elf_link_preload),
161         KOBJMETHOD(linker_link_preload_finish,  link_elf_link_preload_finish),
162         KOBJMETHOD(linker_lookup_set,           link_elf_lookup_set),
163         KOBJMETHOD(linker_each_function_name,   link_elf_each_function_name),
164         KOBJMETHOD(linker_each_function_nameval, link_elf_each_function_nameval),
165         KOBJMETHOD(linker_ctf_get,              link_elf_ctf_get),
166         KOBJMETHOD(linker_symtab_get,           link_elf_symtab_get),
167         KOBJMETHOD(linker_strtab_get,           link_elf_strtab_get),
168         KOBJMETHOD_END
169 };
170
171 static struct linker_class link_elf_class = {
172 #if ELF_TARG_CLASS == ELFCLASS32
173         "elf32_obj",
174 #else
175         "elf64_obj",
176 #endif
177         link_elf_methods, sizeof(struct elf_file)
178 };
179
180 static int      relocate_file(elf_file_t ef);
181 static void     elf_obj_cleanup_globals_cache(elf_file_t);
182
183 static void
184 link_elf_error(const char *filename, const char *s)
185 {
186         if (filename == NULL)
187                 printf("kldload: %s\n", s);
188         else
189                 printf("kldload: %s: %s\n", filename, s);
190 }
191
192 static void
193 link_elf_init(void *arg)
194 {
195
196         linker_add_class(&link_elf_class);
197 }
198 SYSINIT(link_elf_obj, SI_SUB_KLD, SI_ORDER_SECOND, link_elf_init, NULL);
199
200 static void
201 link_elf_protect_range(elf_file_t ef, vm_offset_t start, vm_offset_t end,
202     vm_prot_t prot)
203 {
204         int error __unused;
205
206         KASSERT(start <= end && start >= (vm_offset_t)ef->address &&
207             end <= round_page((vm_offset_t)ef->address + ef->lf.size),
208             ("link_elf_protect_range: invalid range %#jx-%#jx",
209             (uintmax_t)start, (uintmax_t)end));
210
211         if (start == end)
212                 return;
213         if (ef->preloaded) {
214 #ifdef __amd64__
215                 error = pmap_change_prot(start, end - start, prot);
216                 KASSERT(error == 0,
217                     ("link_elf_protect_range: pmap_change_prot() returned %d",
218                     error));
219 #endif
220                 return;
221         }
222         error = vm_map_protect(kernel_map, start, end, prot, 0,
223             VM_MAP_PROTECT_SET_PROT);
224         KASSERT(error == KERN_SUCCESS,
225             ("link_elf_protect_range: vm_map_protect() returned %d", error));
226 }
227
228 /*
229  * Restrict permissions on linker file memory based on section flags.
230  * Sections need not be page-aligned, so overlap within a page is possible.
231  */
232 static void
233 link_elf_protect(elf_file_t ef)
234 {
235         vm_offset_t end, segend, segstart, start;
236         vm_prot_t gapprot, prot, segprot;
237         int i;
238
239         /*
240          * If the file was preloaded, the last page may contain other preloaded
241          * data which may need to be writeable.  ELF files are always
242          * page-aligned, but other preloaded data, such as entropy or CPU
243          * microcode may be loaded with a smaller alignment.
244          */
245         gapprot = ef->preloaded ? VM_PROT_RW : VM_PROT_READ;
246
247         start = end = (vm_offset_t)ef->address;
248         prot = VM_PROT_READ;
249         for (i = 0; i < ef->nprogtab; i++) {
250                 /*
251                  * VNET and DPCPU sections have their memory allocated by their
252                  * respective subsystems.
253                  */
254                 if (ef->progtab[i].name != NULL && (
255 #ifdef VIMAGE
256                     strcmp(ef->progtab[i].name, VNET_SETNAME) == 0 ||
257 #endif
258                     strcmp(ef->progtab[i].name, DPCPU_SETNAME) == 0))
259                         continue;
260
261                 segstart = trunc_page((vm_offset_t)ef->progtab[i].addr);
262                 segend = round_page((vm_offset_t)ef->progtab[i].addr +
263                     ef->progtab[i].size);
264                 segprot = VM_PROT_READ;
265                 if ((ef->progtab[i].flags & SHF_WRITE) != 0)
266                         segprot |= VM_PROT_WRITE;
267                 if ((ef->progtab[i].flags & SHF_EXECINSTR) != 0)
268                         segprot |= VM_PROT_EXECUTE;
269
270                 if (end <= segstart) {
271                         /*
272                          * Case 1: there is no overlap between the previous
273                          * segment and this one.  Apply protections to the
274                          * previous segment, and protect the gap between the
275                          * previous and current segments, if any.
276                          */
277                         link_elf_protect_range(ef, start, end, prot);
278                         link_elf_protect_range(ef, end, segstart, gapprot);
279
280                         start = segstart;
281                         end = segend;
282                         prot = segprot;
283                 } else if (start < segstart && end == segend) {
284                         /*
285                          * Case 2: the current segment is a subrange of the
286                          * previous segment.  Apply protections to the
287                          * non-overlapping portion of the previous segment.
288                          */
289                         link_elf_protect_range(ef, start, segstart, prot);
290
291                         start = segstart;
292                         prot |= segprot;
293                 } else if (end < segend) {
294                         /*
295                          * Case 3: there is partial overlap between the previous
296                          * and current segments.  Apply protections to the
297                          * non-overlapping portion of the previous segment, and
298                          * then the overlap, which must use the union of the two
299                          * segments' protections.
300                          */
301                         link_elf_protect_range(ef, start, segstart, prot);
302                         link_elf_protect_range(ef, segstart, end,
303                             prot | segprot);
304                         start = end;
305                         end = segend;
306                         prot = segprot;
307                 } else {
308                         /*
309                          * Case 4: the two segments reside in the same page.
310                          */
311                         prot |= segprot;
312                 }
313         }
314
315         /*
316          * Fix up the last unprotected segment and trailing data.
317          */
318         link_elf_protect_range(ef, start, end, prot);
319         link_elf_protect_range(ef, end,
320             round_page((vm_offset_t)ef->address + ef->lf.size), gapprot);
321 }
322
323 static int
324 link_elf_link_preload(linker_class_t cls, const char *filename,
325     linker_file_t *result)
326 {
327         Elf_Ehdr *hdr;
328         Elf_Shdr *shdr;
329         Elf_Sym *es;
330         void *modptr, *baseptr, *sizeptr;
331         char *type;
332         elf_file_t ef;
333         linker_file_t lf;
334         Elf_Addr off;
335         int error, i, j, pb, ra, rl, shstrindex, symstrindex, symtabindex;
336
337         /* Look to see if we have the file preloaded */
338         modptr = preload_search_by_name(filename);
339         if (modptr == NULL)
340                 return ENOENT;
341
342         type = (char *)preload_search_info(modptr, MODINFO_TYPE);
343         baseptr = preload_search_info(modptr, MODINFO_ADDR);
344         sizeptr = preload_search_info(modptr, MODINFO_SIZE);
345         hdr = (Elf_Ehdr *)preload_search_info(modptr, MODINFO_METADATA |
346             MODINFOMD_ELFHDR);
347         shdr = (Elf_Shdr *)preload_search_info(modptr, MODINFO_METADATA |
348             MODINFOMD_SHDR);
349         if (type == NULL || (strcmp(type, "elf" __XSTRING(__ELF_WORD_SIZE)
350             " obj module") != 0 &&
351             strcmp(type, "elf obj module") != 0)) {
352                 return (EFTYPE);
353         }
354         if (baseptr == NULL || sizeptr == NULL || hdr == NULL ||
355             shdr == NULL)
356                 return (EINVAL);
357
358         lf = linker_make_file(filename, &link_elf_class);
359         if (lf == NULL)
360                 return (ENOMEM);
361
362         ef = (elf_file_t)lf;
363         ef->preloaded = 1;
364         ef->address = *(caddr_t *)baseptr;
365         lf->address = *(caddr_t *)baseptr;
366         lf->size = *(size_t *)sizeptr;
367
368         if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
369             hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
370             hdr->e_ident[EI_VERSION] != EV_CURRENT ||
371             hdr->e_version != EV_CURRENT ||
372             hdr->e_type != ET_REL ||
373             hdr->e_machine != ELF_TARG_MACH) {
374                 error = EFTYPE;
375                 goto out;
376         }
377         ef->e_shdr = shdr;
378
379         /* Scan the section header for information and table sizing. */
380         symtabindex = -1;
381         symstrindex = -1;
382         for (i = 0; i < hdr->e_shnum; i++) {
383                 switch (shdr[i].sh_type) {
384                 case SHT_PROGBITS:
385                 case SHT_NOBITS:
386 #ifdef __amd64__
387                 case SHT_X86_64_UNWIND:
388 #endif
389                 case SHT_INIT_ARRAY:
390                 case SHT_FINI_ARRAY:
391                         /* Ignore sections not loaded by the loader. */
392                         if (shdr[i].sh_addr == 0)
393                                 break;
394                         ef->nprogtab++;
395                         break;
396                 case SHT_SYMTAB:
397                         symtabindex = i;
398                         symstrindex = shdr[i].sh_link;
399                         break;
400                 case SHT_REL:
401                         /*
402                          * Ignore relocation tables for sections not
403                          * loaded by the loader.
404                          */
405                         if (shdr[shdr[i].sh_info].sh_addr == 0)
406                                 break;
407                         ef->nreltab++;
408                         break;
409                 case SHT_RELA:
410                         if (shdr[shdr[i].sh_info].sh_addr == 0)
411                                 break;
412                         ef->nrelatab++;
413                         break;
414                 }
415         }
416
417         shstrindex = hdr->e_shstrndx;
418         if (ef->nprogtab == 0 || symstrindex < 0 ||
419             symstrindex >= hdr->e_shnum ||
420             shdr[symstrindex].sh_type != SHT_STRTAB || shstrindex == 0 ||
421             shstrindex >= hdr->e_shnum ||
422             shdr[shstrindex].sh_type != SHT_STRTAB) {
423                 printf("%s: bad/missing section headers\n", filename);
424                 error = ENOEXEC;
425                 goto out;
426         }
427
428         /* Allocate space for tracking the load chunks */
429         if (ef->nprogtab != 0)
430                 ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
431                     M_LINKER, M_WAITOK | M_ZERO);
432         if (ef->nreltab != 0)
433                 ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
434                     M_LINKER, M_WAITOK | M_ZERO);
435         if (ef->nrelatab != 0)
436                 ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
437                     M_LINKER, M_WAITOK | M_ZERO);
438         if ((ef->nprogtab != 0 && ef->progtab == NULL) ||
439             (ef->nreltab != 0 && ef->reltab == NULL) ||
440             (ef->nrelatab != 0 && ef->relatab == NULL)) {
441                 error = ENOMEM;
442                 goto out;
443         }
444
445         /* XXX, relocate the sh_addr fields saved by the loader. */
446         off = 0;
447         for (i = 0; i < hdr->e_shnum; i++) {
448                 if (shdr[i].sh_addr != 0 && (off == 0 || shdr[i].sh_addr < off))
449                         off = shdr[i].sh_addr;
450         }
451         for (i = 0; i < hdr->e_shnum; i++) {
452                 if (shdr[i].sh_addr != 0)
453                         shdr[i].sh_addr = shdr[i].sh_addr - off +
454                             (Elf_Addr)ef->address;
455         }
456
457         ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
458         ef->ddbsymtab = (Elf_Sym *)shdr[symtabindex].sh_addr;
459         ef->ddbstrcnt = shdr[symstrindex].sh_size;
460         ef->ddbstrtab = (char *)shdr[symstrindex].sh_addr;
461         ef->shstrcnt = shdr[shstrindex].sh_size;
462         ef->shstrtab = (char *)shdr[shstrindex].sh_addr;
463
464         /* Now fill out progtab and the relocation tables. */
465         pb = 0;
466         rl = 0;
467         ra = 0;
468         for (i = 0; i < hdr->e_shnum; i++) {
469                 switch (shdr[i].sh_type) {
470                 case SHT_PROGBITS:
471                 case SHT_NOBITS:
472 #ifdef __amd64__
473                 case SHT_X86_64_UNWIND:
474 #endif
475                 case SHT_INIT_ARRAY:
476                 case SHT_FINI_ARRAY:
477                         if (shdr[i].sh_addr == 0)
478                                 break;
479                         ef->progtab[pb].addr = (void *)shdr[i].sh_addr;
480                         if (shdr[i].sh_type == SHT_PROGBITS)
481                                 ef->progtab[pb].name = "<<PROGBITS>>";
482 #ifdef __amd64__
483                         else if (shdr[i].sh_type == SHT_X86_64_UNWIND)
484                                 ef->progtab[pb].name = "<<UNWIND>>";
485 #endif
486                         else if (shdr[i].sh_type == SHT_INIT_ARRAY)
487                                 ef->progtab[pb].name = "<<INIT_ARRAY>>";
488                         else if (shdr[i].sh_type == SHT_FINI_ARRAY)
489                                 ef->progtab[pb].name = "<<FINI_ARRAY>>";
490                         else
491                                 ef->progtab[pb].name = "<<NOBITS>>";
492                         ef->progtab[pb].size = shdr[i].sh_size;
493                         ef->progtab[pb].flags = shdr[i].sh_flags;
494                         ef->progtab[pb].sec = i;
495                         if (ef->shstrtab && shdr[i].sh_name != 0)
496                                 ef->progtab[pb].name =
497                                     ef->shstrtab + shdr[i].sh_name;
498                         if (ef->progtab[pb].name != NULL && 
499                             !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) {
500                                 void *dpcpu;
501
502                                 dpcpu = dpcpu_alloc(shdr[i].sh_size);
503                                 if (dpcpu == NULL) {
504                                         printf("%s: pcpu module space is out "
505                                             "of space; cannot allocate %#jx "
506                                             "for %s\n", __func__,
507                                             (uintmax_t)shdr[i].sh_size,
508                                             filename);
509                                         error = ENOSPC;
510                                         goto out;
511                                 }
512                                 memcpy(dpcpu, ef->progtab[pb].addr,
513                                     ef->progtab[pb].size);
514                                 dpcpu_copy(dpcpu, shdr[i].sh_size);
515                                 ef->progtab[pb].addr = dpcpu;
516 #ifdef VIMAGE
517                         } else if (ef->progtab[pb].name != NULL &&
518                             !strcmp(ef->progtab[pb].name, VNET_SETNAME)) {
519                                 void *vnet_data;
520
521                                 vnet_data = vnet_data_alloc(shdr[i].sh_size);
522                                 if (vnet_data == NULL) {
523                                         printf("%s: vnet module space is out "
524                                             "of space; cannot allocate %#jx "
525                                             "for %s\n", __func__,
526                                             (uintmax_t)shdr[i].sh_size,
527                                             filename);
528                                         error = ENOSPC;
529                                         goto out;
530                                 }
531                                 memcpy(vnet_data, ef->progtab[pb].addr,
532                                     ef->progtab[pb].size);
533                                 vnet_data_copy(vnet_data, shdr[i].sh_size);
534                                 ef->progtab[pb].addr = vnet_data;
535 #endif
536                         } else if ((ef->progtab[pb].name != NULL &&
537                             strcmp(ef->progtab[pb].name, ".ctors") == 0) ||
538                             shdr[i].sh_type == SHT_INIT_ARRAY) {
539                                 if (lf->ctors_addr != 0) {
540                                         printf(
541                                     "%s: multiple ctor sections in %s\n",
542                                             __func__, filename);
543                                 } else {
544                                         lf->ctors_addr = ef->progtab[pb].addr;
545                                         lf->ctors_size = shdr[i].sh_size;
546                                 }
547                         } else if ((ef->progtab[pb].name != NULL &&
548                             strcmp(ef->progtab[pb].name, ".dtors") == 0) ||
549                             shdr[i].sh_type == SHT_FINI_ARRAY) {
550                                 if (lf->dtors_addr != 0) {
551                                         printf(
552                                     "%s: multiple dtor sections in %s\n",
553                                             __func__, filename);
554                                 } else {
555                                         lf->dtors_addr = ef->progtab[pb].addr;
556                                         lf->dtors_size = shdr[i].sh_size;
557                                 }
558                         }
559
560                         /* Update all symbol values with the offset. */
561                         for (j = 0; j < ef->ddbsymcnt; j++) {
562                                 es = &ef->ddbsymtab[j];
563                                 if (es->st_shndx != i)
564                                         continue;
565                                 es->st_value += (Elf_Addr)ef->progtab[pb].addr;
566                         }
567                         pb++;
568                         break;
569                 case SHT_REL:
570                         if (shdr[shdr[i].sh_info].sh_addr == 0)
571                                 break;
572                         ef->reltab[rl].rel = (Elf_Rel *)shdr[i].sh_addr;
573                         ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
574                         ef->reltab[rl].sec = shdr[i].sh_info;
575                         rl++;
576                         break;
577                 case SHT_RELA:
578                         if (shdr[shdr[i].sh_info].sh_addr == 0)
579                                 break;
580                         ef->relatab[ra].rela = (Elf_Rela *)shdr[i].sh_addr;
581                         ef->relatab[ra].nrela =
582                             shdr[i].sh_size / sizeof(Elf_Rela);
583                         ef->relatab[ra].sec = shdr[i].sh_info;
584                         ra++;
585                         break;
586                 }
587         }
588         if (pb != ef->nprogtab) {
589                 printf("%s: lost progbits\n", filename);
590                 error = ENOEXEC;
591                 goto out;
592         }
593         if (rl != ef->nreltab) {
594                 printf("%s: lost reltab\n", filename);
595                 error = ENOEXEC;
596                 goto out;
597         }
598         if (ra != ef->nrelatab) {
599                 printf("%s: lost relatab\n", filename);
600                 error = ENOEXEC;
601                 goto out;
602         }
603
604         /*
605          * The file needs to be writeable and executable while applying
606          * relocations.  Mapping protections are applied once relocation
607          * processing is complete.
608          */
609         link_elf_protect_range(ef, (vm_offset_t)ef->address,
610             round_page((vm_offset_t)ef->address + ef->lf.size), VM_PROT_ALL);
611
612         /* Local intra-module relocations */
613         error = link_elf_reloc_local(lf, false);
614         if (error != 0)
615                 goto out;
616         *result = lf;
617         return (0);
618
619 out:
620         /* preload not done this way */
621         linker_file_unload(lf, LINKER_UNLOAD_FORCE);
622         return (error);
623 }
624
625 static void
626 link_elf_invoke_cbs(caddr_t addr, size_t size)
627 {
628         void (**ctor)(void);
629         size_t i, cnt;
630
631         if (addr == NULL || size == 0)
632                 return;
633         cnt = size / sizeof(*ctor);
634         ctor = (void *)addr;
635         for (i = 0; i < cnt; i++) {
636                 if (ctor[i] != NULL)
637                         (*ctor[i])();
638         }
639 }
640
641 static int
642 link_elf_link_preload_finish(linker_file_t lf)
643 {
644         elf_file_t ef;
645         int error;
646
647         ef = (elf_file_t)lf;
648         error = relocate_file(ef);
649         if (error)
650                 return (error);
651
652         /* Notify MD code that a module is being loaded. */
653         error = elf_cpu_load_file(lf);
654         if (error)
655                 return (error);
656
657 #if defined(__i386__) || defined(__amd64__)
658         /* Now ifuncs. */
659         error = link_elf_reloc_local(lf, true);
660         if (error != 0)
661                 return (error);
662 #endif
663
664         /* Apply protections now that relocation processing is complete. */
665         link_elf_protect(ef);
666
667         link_elf_invoke_cbs(lf->ctors_addr, lf->ctors_size);
668         return (0);
669 }
670
671 static int
672 link_elf_load_file(linker_class_t cls, const char *filename,
673     linker_file_t *result)
674 {
675         struct nameidata *nd;
676         struct thread *td = curthread;  /* XXX */
677         Elf_Ehdr *hdr;
678         Elf_Shdr *shdr;
679         Elf_Sym *es;
680         int nbytes, i, j;
681         vm_offset_t mapbase;
682         size_t mapsize;
683         int error = 0;
684         ssize_t resid;
685         int flags;
686         elf_file_t ef;
687         linker_file_t lf;
688         int symtabindex;
689         int symstrindex;
690         int shstrindex;
691         int nsym;
692         int pb, rl, ra;
693         int alignmask;
694
695         shdr = NULL;
696         lf = NULL;
697         mapsize = 0;
698         hdr = NULL;
699
700         nd = malloc(sizeof(struct nameidata), M_TEMP, M_WAITOK);
701         NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, filename, td);
702         flags = FREAD;
703         error = vn_open(nd, &flags, 0, NULL);
704         if (error) {
705                 free(nd, M_TEMP);
706                 return error;
707         }
708         NDFREE(nd, NDF_ONLY_PNBUF);
709         if (nd->ni_vp->v_type != VREG) {
710                 error = ENOEXEC;
711                 goto out;
712         }
713 #ifdef MAC
714         error = mac_kld_check_load(td->td_ucred, nd->ni_vp);
715         if (error) {
716                 goto out;
717         }
718 #endif
719
720         /* Read the elf header from the file. */
721         hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK);
722         error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)hdr, sizeof(*hdr), 0,
723             UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
724             &resid, td);
725         if (error)
726                 goto out;
727         if (resid != 0){
728                 error = ENOEXEC;
729                 goto out;
730         }
731
732         if (!IS_ELF(*hdr)) {
733                 error = ENOEXEC;
734                 goto out;
735         }
736
737         if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS
738             || hdr->e_ident[EI_DATA] != ELF_TARG_DATA) {
739                 link_elf_error(filename, "Unsupported file layout");
740                 error = ENOEXEC;
741                 goto out;
742         }
743         if (hdr->e_ident[EI_VERSION] != EV_CURRENT
744             || hdr->e_version != EV_CURRENT) {
745                 link_elf_error(filename, "Unsupported file version");
746                 error = ENOEXEC;
747                 goto out;
748         }
749         if (hdr->e_type != ET_REL) {
750                 error = ENOSYS;
751                 goto out;
752         }
753         if (hdr->e_machine != ELF_TARG_MACH) {
754                 link_elf_error(filename, "Unsupported machine");
755                 error = ENOEXEC;
756                 goto out;
757         }
758
759         lf = linker_make_file(filename, &link_elf_class);
760         if (!lf) {
761                 error = ENOMEM;
762                 goto out;
763         }
764         ef = (elf_file_t) lf;
765         ef->nprogtab = 0;
766         ef->e_shdr = 0;
767         ef->nreltab = 0;
768         ef->nrelatab = 0;
769
770         /* Allocate and read in the section header */
771         nbytes = hdr->e_shnum * hdr->e_shentsize;
772         if (nbytes == 0 || hdr->e_shoff == 0 ||
773             hdr->e_shentsize != sizeof(Elf_Shdr)) {
774                 error = ENOEXEC;
775                 goto out;
776         }
777         shdr = malloc(nbytes, M_LINKER, M_WAITOK);
778         ef->e_shdr = shdr;
779         error = vn_rdwr(UIO_READ, nd->ni_vp, (caddr_t)shdr, nbytes,
780             hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
781             NOCRED, &resid, td);
782         if (error)
783                 goto out;
784         if (resid) {
785                 error = ENOEXEC;
786                 goto out;
787         }
788
789         /* Scan the section header for information and table sizing. */
790         nsym = 0;
791         symtabindex = -1;
792         symstrindex = -1;
793         for (i = 0; i < hdr->e_shnum; i++) {
794                 if (shdr[i].sh_size == 0)
795                         continue;
796                 switch (shdr[i].sh_type) {
797                 case SHT_PROGBITS:
798                 case SHT_NOBITS:
799 #ifdef __amd64__
800                 case SHT_X86_64_UNWIND:
801 #endif
802                 case SHT_INIT_ARRAY:
803                 case SHT_FINI_ARRAY:
804                         if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
805                                 break;
806                         ef->nprogtab++;
807                         break;
808                 case SHT_SYMTAB:
809                         nsym++;
810                         symtabindex = i;
811                         symstrindex = shdr[i].sh_link;
812                         break;
813                 case SHT_REL:
814                         /*
815                          * Ignore relocation tables for unallocated
816                          * sections.
817                          */
818                         if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
819                                 break;
820                         ef->nreltab++;
821                         break;
822                 case SHT_RELA:
823                         if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
824                                 break;
825                         ef->nrelatab++;
826                         break;
827                 case SHT_STRTAB:
828                         break;
829                 }
830         }
831         if (ef->nprogtab == 0) {
832                 link_elf_error(filename, "file has no contents");
833                 error = ENOEXEC;
834                 goto out;
835         }
836         if (nsym != 1) {
837                 /* Only allow one symbol table for now */
838                 link_elf_error(filename,
839                     "file must have exactly one symbol table");
840                 error = ENOEXEC;
841                 goto out;
842         }
843         if (symstrindex < 0 || symstrindex > hdr->e_shnum ||
844             shdr[symstrindex].sh_type != SHT_STRTAB) {
845                 link_elf_error(filename, "file has invalid symbol strings");
846                 error = ENOEXEC;
847                 goto out;
848         }
849
850         /* Allocate space for tracking the load chunks */
851         if (ef->nprogtab != 0)
852                 ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
853                     M_LINKER, M_WAITOK | M_ZERO);
854         if (ef->nreltab != 0)
855                 ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
856                     M_LINKER, M_WAITOK | M_ZERO);
857         if (ef->nrelatab != 0)
858                 ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
859                     M_LINKER, M_WAITOK | M_ZERO);
860
861         if (symtabindex == -1) {
862                 link_elf_error(filename, "lost symbol table index");
863                 error = ENOEXEC;
864                 goto out;
865         }
866         /* Allocate space for and load the symbol table */
867         ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
868         ef->ddbsymtab = malloc(shdr[symtabindex].sh_size, M_LINKER, M_WAITOK);
869         error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)ef->ddbsymtab,
870             shdr[symtabindex].sh_size, shdr[symtabindex].sh_offset,
871             UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
872             &resid, td);
873         if (error)
874                 goto out;
875         if (resid != 0){
876                 error = EINVAL;
877                 goto out;
878         }
879
880         /* Allocate space for and load the symbol strings */
881         ef->ddbstrcnt = shdr[symstrindex].sh_size;
882         ef->ddbstrtab = malloc(shdr[symstrindex].sh_size, M_LINKER, M_WAITOK);
883         error = vn_rdwr(UIO_READ, nd->ni_vp, ef->ddbstrtab,
884             shdr[symstrindex].sh_size, shdr[symstrindex].sh_offset,
885             UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
886             &resid, td);
887         if (error)
888                 goto out;
889         if (resid != 0){
890                 error = EINVAL;
891                 goto out;
892         }
893
894         /* Do we have a string table for the section names?  */
895         shstrindex = -1;
896         if (hdr->e_shstrndx != 0 &&
897             shdr[hdr->e_shstrndx].sh_type == SHT_STRTAB) {
898                 shstrindex = hdr->e_shstrndx;
899                 ef->shstrcnt = shdr[shstrindex].sh_size;
900                 ef->shstrtab = malloc(shdr[shstrindex].sh_size, M_LINKER,
901                     M_WAITOK);
902                 error = vn_rdwr(UIO_READ, nd->ni_vp, ef->shstrtab,
903                     shdr[shstrindex].sh_size, shdr[shstrindex].sh_offset,
904                     UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
905                     &resid, td);
906                 if (error)
907                         goto out;
908                 if (resid != 0){
909                         error = EINVAL;
910                         goto out;
911                 }
912         }
913
914         /* Size up code/data(progbits) and bss(nobits). */
915         alignmask = 0;
916         for (i = 0; i < hdr->e_shnum; i++) {
917                 if (shdr[i].sh_size == 0)
918                         continue;
919                 switch (shdr[i].sh_type) {
920                 case SHT_PROGBITS:
921                 case SHT_NOBITS:
922 #ifdef __amd64__
923                 case SHT_X86_64_UNWIND:
924 #endif
925                 case SHT_INIT_ARRAY:
926                 case SHT_FINI_ARRAY:
927                         if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
928                                 break;
929                         alignmask = shdr[i].sh_addralign - 1;
930                         mapsize += alignmask;
931                         mapsize &= ~alignmask;
932                         mapsize += shdr[i].sh_size;
933                         break;
934                 }
935         }
936
937         /*
938          * We know how much space we need for the text/data/bss/etc.
939          * This stuff needs to be in a single chunk so that profiling etc
940          * can get the bounds and gdb can associate offsets with modules
941          */
942         ef->object = vm_pager_allocate(OBJT_PHYS, NULL, round_page(mapsize),
943             VM_PROT_ALL, 0, thread0.td_ucred);
944         if (ef->object == NULL) {
945                 error = ENOMEM;
946                 goto out;
947         }
948 #if VM_NRESERVLEVEL > 0
949         vm_object_color(ef->object, 0);
950 #endif
951
952         /*
953          * In order to satisfy amd64's architectural requirements on the
954          * location of code and data in the kernel's address space, request a
955          * mapping that is above the kernel.
956          *
957          * Protections will be restricted once relocations are applied.
958          */
959 #ifdef __amd64__
960         mapbase = KERNBASE;
961 #else
962         mapbase = VM_MIN_KERNEL_ADDRESS;
963 #endif
964         error = vm_map_find(kernel_map, ef->object, 0, &mapbase,
965             round_page(mapsize), 0, VMFS_OPTIMAL_SPACE, VM_PROT_ALL,
966             VM_PROT_ALL, 0);
967         if (error != KERN_SUCCESS) {
968                 vm_object_deallocate(ef->object);
969                 ef->object = NULL;
970                 error = ENOMEM;
971                 goto out;
972         }
973
974         /* Wire the pages */
975         error = vm_map_wire(kernel_map, mapbase,
976             mapbase + round_page(mapsize),
977             VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
978         if (error != KERN_SUCCESS) {
979                 error = ENOMEM;
980                 goto out;
981         }
982
983         /* Inform the kld system about the situation */
984         lf->address = ef->address = (caddr_t)mapbase;
985         lf->size = mapsize;
986
987         /*
988          * Now load code/data(progbits), zero bss(nobits), allocate space for
989          * and load relocs
990          */
991         pb = 0;
992         rl = 0;
993         ra = 0;
994         alignmask = 0;
995         for (i = 0; i < hdr->e_shnum; i++) {
996                 if (shdr[i].sh_size == 0)
997                         continue;
998                 switch (shdr[i].sh_type) {
999                 case SHT_PROGBITS:
1000                 case SHT_NOBITS:
1001 #ifdef __amd64__
1002                 case SHT_X86_64_UNWIND:
1003 #endif
1004                 case SHT_INIT_ARRAY:
1005                 case SHT_FINI_ARRAY:
1006                         if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
1007                                 break;
1008                         alignmask = shdr[i].sh_addralign - 1;
1009                         mapbase += alignmask;
1010                         mapbase &= ~alignmask;
1011                         if (ef->shstrtab != NULL && shdr[i].sh_name != 0) {
1012                                 ef->progtab[pb].name =
1013                                     ef->shstrtab + shdr[i].sh_name;
1014                                 if (!strcmp(ef->progtab[pb].name, ".ctors") ||
1015                                     shdr[i].sh_type == SHT_INIT_ARRAY) {
1016                                         if (lf->ctors_addr != 0) {
1017                                                 printf(
1018                                     "%s: multiple ctor sections in %s\n",
1019                                                     __func__, filename);
1020                                         } else {
1021                                                 lf->ctors_addr =
1022                                                     (caddr_t)mapbase;
1023                                                 lf->ctors_size =
1024                                                     shdr[i].sh_size;
1025                                         }
1026                                 } else if (!strcmp(ef->progtab[pb].name,
1027                                     ".dtors") ||
1028                                     shdr[i].sh_type == SHT_FINI_ARRAY) {
1029                                         if (lf->dtors_addr != 0) {
1030                                                 printf(
1031                                     "%s: multiple dtor sections in %s\n",
1032                                                     __func__, filename);
1033                                         } else {
1034                                                 lf->dtors_addr =
1035                                                     (caddr_t)mapbase;
1036                                                 lf->dtors_size =
1037                                                     shdr[i].sh_size;
1038                                         }
1039                                 }
1040                         } else if (shdr[i].sh_type == SHT_PROGBITS)
1041                                 ef->progtab[pb].name = "<<PROGBITS>>";
1042 #ifdef __amd64__
1043                         else if (shdr[i].sh_type == SHT_X86_64_UNWIND)
1044                                 ef->progtab[pb].name = "<<UNWIND>>";
1045 #endif
1046                         else
1047                                 ef->progtab[pb].name = "<<NOBITS>>";
1048                         if (ef->progtab[pb].name != NULL && 
1049                             !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) {
1050                                 ef->progtab[pb].addr =
1051                                     dpcpu_alloc(shdr[i].sh_size);
1052                                 if (ef->progtab[pb].addr == NULL) {
1053                                         printf("%s: pcpu module space is out "
1054                                             "of space; cannot allocate %#jx "
1055                                             "for %s\n", __func__,
1056                                             (uintmax_t)shdr[i].sh_size,
1057                                             filename);
1058                                 }
1059                         }
1060 #ifdef VIMAGE
1061                         else if (ef->progtab[pb].name != NULL &&
1062                             !strcmp(ef->progtab[pb].name, VNET_SETNAME)) {
1063                                 ef->progtab[pb].addr =
1064                                     vnet_data_alloc(shdr[i].sh_size);
1065                                 if (ef->progtab[pb].addr == NULL) {
1066                                         printf("%s: vnet module space is out "
1067                                             "of space; cannot allocate %#jx "
1068                                             "for %s\n", __func__,
1069                                             (uintmax_t)shdr[i].sh_size,
1070                                             filename);
1071                                 }
1072                         }
1073 #endif
1074                         else
1075                                 ef->progtab[pb].addr =
1076                                     (void *)(uintptr_t)mapbase;
1077                         if (ef->progtab[pb].addr == NULL) {
1078                                 error = ENOSPC;
1079                                 goto out;
1080                         }
1081                         ef->progtab[pb].size = shdr[i].sh_size;
1082                         ef->progtab[pb].flags = shdr[i].sh_flags;
1083                         ef->progtab[pb].sec = i;
1084                         if (shdr[i].sh_type == SHT_PROGBITS
1085 #ifdef __amd64__
1086                             || shdr[i].sh_type == SHT_X86_64_UNWIND
1087 #endif
1088                             ) {
1089                                 error = vn_rdwr(UIO_READ, nd->ni_vp,
1090                                     ef->progtab[pb].addr,
1091                                     shdr[i].sh_size, shdr[i].sh_offset,
1092                                     UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
1093                                     NOCRED, &resid, td);
1094                                 if (error)
1095                                         goto out;
1096                                 if (resid != 0){
1097                                         error = EINVAL;
1098                                         goto out;
1099                                 }
1100                                 /* Initialize the per-cpu or vnet area. */
1101                                 if (ef->progtab[pb].addr != (void *)mapbase &&
1102                                     !strcmp(ef->progtab[pb].name, DPCPU_SETNAME))
1103                                         dpcpu_copy(ef->progtab[pb].addr,
1104                                             shdr[i].sh_size);
1105 #ifdef VIMAGE
1106                                 else if (ef->progtab[pb].addr !=
1107                                     (void *)mapbase &&
1108                                     !strcmp(ef->progtab[pb].name, VNET_SETNAME))
1109                                         vnet_data_copy(ef->progtab[pb].addr,
1110                                             shdr[i].sh_size);
1111 #endif
1112                         } else
1113                                 bzero(ef->progtab[pb].addr, shdr[i].sh_size);
1114
1115                         /* Update all symbol values with the offset. */
1116                         for (j = 0; j < ef->ddbsymcnt; j++) {
1117                                 es = &ef->ddbsymtab[j];
1118                                 if (es->st_shndx != i)
1119                                         continue;
1120                                 es->st_value += (Elf_Addr)ef->progtab[pb].addr;
1121                         }
1122                         mapbase += shdr[i].sh_size;
1123                         pb++;
1124                         break;
1125                 case SHT_REL:
1126                         if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
1127                                 break;
1128                         ef->reltab[rl].rel = malloc(shdr[i].sh_size, M_LINKER,
1129                             M_WAITOK);
1130                         ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
1131                         ef->reltab[rl].sec = shdr[i].sh_info;
1132                         error = vn_rdwr(UIO_READ, nd->ni_vp,
1133                             (void *)ef->reltab[rl].rel,
1134                             shdr[i].sh_size, shdr[i].sh_offset,
1135                             UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
1136                             &resid, td);
1137                         if (error)
1138                                 goto out;
1139                         if (resid != 0){
1140                                 error = EINVAL;
1141                                 goto out;
1142                         }
1143                         rl++;
1144                         break;
1145                 case SHT_RELA:
1146                         if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
1147                                 break;
1148                         ef->relatab[ra].rela = malloc(shdr[i].sh_size, M_LINKER,
1149                             M_WAITOK);
1150                         ef->relatab[ra].nrela =
1151                             shdr[i].sh_size / sizeof(Elf_Rela);
1152                         ef->relatab[ra].sec = shdr[i].sh_info;
1153                         error = vn_rdwr(UIO_READ, nd->ni_vp,
1154                             (void *)ef->relatab[ra].rela,
1155                             shdr[i].sh_size, shdr[i].sh_offset,
1156                             UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
1157                             &resid, td);
1158                         if (error)
1159                                 goto out;
1160                         if (resid != 0){
1161                                 error = EINVAL;
1162                                 goto out;
1163                         }
1164                         ra++;
1165                         break;
1166                 }
1167         }
1168         if (pb != ef->nprogtab) {
1169                 link_elf_error(filename, "lost progbits");
1170                 error = ENOEXEC;
1171                 goto out;
1172         }
1173         if (rl != ef->nreltab) {
1174                 link_elf_error(filename, "lost reltab");
1175                 error = ENOEXEC;
1176                 goto out;
1177         }
1178         if (ra != ef->nrelatab) {
1179                 link_elf_error(filename, "lost relatab");
1180                 error = ENOEXEC;
1181                 goto out;
1182         }
1183         if (mapbase != (vm_offset_t)ef->address + mapsize) {
1184                 printf(
1185                     "%s: mapbase 0x%lx != address %p + mapsize 0x%lx (0x%lx)\n",
1186                     filename != NULL ? filename : "<none>",
1187                     (u_long)mapbase, ef->address, (u_long)mapsize,
1188                     (u_long)(vm_offset_t)ef->address + mapsize);
1189                 error = ENOMEM;
1190                 goto out;
1191         }
1192
1193         /* Local intra-module relocations */
1194         error = link_elf_reloc_local(lf, false);
1195         if (error != 0)
1196                 goto out;
1197
1198         /* Pull in dependencies */
1199         VOP_UNLOCK(nd->ni_vp);
1200         error = linker_load_dependencies(lf);
1201         vn_lock(nd->ni_vp, LK_EXCLUSIVE | LK_RETRY);
1202         if (error)
1203                 goto out;
1204
1205         /* External relocations */
1206         error = relocate_file(ef);
1207         if (error)
1208                 goto out;
1209
1210         /* Notify MD code that a module is being loaded. */
1211         error = elf_cpu_load_file(lf);
1212         if (error)
1213                 goto out;
1214
1215 #if defined(__i386__) || defined(__amd64__)
1216         /* Now ifuncs. */
1217         error = link_elf_reloc_local(lf, true);
1218         if (error != 0)
1219                 goto out;
1220 #endif
1221
1222         link_elf_protect(ef);
1223         link_elf_invoke_cbs(lf->ctors_addr, lf->ctors_size);
1224         *result = lf;
1225
1226 out:
1227         VOP_UNLOCK(nd->ni_vp);
1228         vn_close(nd->ni_vp, FREAD, td->td_ucred, td);
1229         free(nd, M_TEMP);
1230         if (error && lf)
1231                 linker_file_unload(lf, LINKER_UNLOAD_FORCE);
1232         free(hdr, M_LINKER);
1233
1234         return error;
1235 }
1236
1237 static void
1238 link_elf_unload_file(linker_file_t file)
1239 {
1240         elf_file_t ef = (elf_file_t) file;
1241         u_int i;
1242
1243         link_elf_invoke_cbs(file->dtors_addr, file->dtors_size);
1244
1245         /* Notify MD code that a module is being unloaded. */
1246         elf_cpu_unload_file(file);
1247
1248         if (ef->progtab) {
1249                 for (i = 0; i < ef->nprogtab; i++) {
1250                         if (ef->progtab[i].size == 0)
1251                                 continue;
1252                         if (ef->progtab[i].name == NULL)
1253                                 continue;
1254                         if (!strcmp(ef->progtab[i].name, DPCPU_SETNAME))
1255                                 dpcpu_free(ef->progtab[i].addr,
1256                                     ef->progtab[i].size);
1257 #ifdef VIMAGE
1258                         else if (!strcmp(ef->progtab[i].name, VNET_SETNAME))
1259                                 vnet_data_free(ef->progtab[i].addr,
1260                                     ef->progtab[i].size);
1261 #endif
1262                 }
1263         }
1264         if (ef->preloaded) {
1265                 free(ef->reltab, M_LINKER);
1266                 free(ef->relatab, M_LINKER);
1267                 free(ef->progtab, M_LINKER);
1268                 free(ef->ctftab, M_LINKER);
1269                 free(ef->ctfoff, M_LINKER);
1270                 free(ef->typoff, M_LINKER);
1271                 if (file->pathname != NULL)
1272                         preload_delete_name(file->pathname);
1273                 return;
1274         }
1275
1276         for (i = 0; i < ef->nreltab; i++)
1277                 free(ef->reltab[i].rel, M_LINKER);
1278         for (i = 0; i < ef->nrelatab; i++)
1279                 free(ef->relatab[i].rela, M_LINKER);
1280         free(ef->reltab, M_LINKER);
1281         free(ef->relatab, M_LINKER);
1282         free(ef->progtab, M_LINKER);
1283
1284         if (ef->object != NULL)
1285                 vm_map_remove(kernel_map, (vm_offset_t)ef->address,
1286                     (vm_offset_t)ef->address + ptoa(ef->object->size));
1287         free(ef->e_shdr, M_LINKER);
1288         free(ef->ddbsymtab, M_LINKER);
1289         free(ef->ddbstrtab, M_LINKER);
1290         free(ef->shstrtab, M_LINKER);
1291         free(ef->ctftab, M_LINKER);
1292         free(ef->ctfoff, M_LINKER);
1293         free(ef->typoff, M_LINKER);
1294 }
1295
1296 static const char *
1297 symbol_name(elf_file_t ef, Elf_Size r_info)
1298 {
1299         const Elf_Sym *ref;
1300
1301         if (ELF_R_SYM(r_info)) {
1302                 ref = ef->ddbsymtab + ELF_R_SYM(r_info);
1303                 return ef->ddbstrtab + ref->st_name;
1304         } else
1305                 return NULL;
1306 }
1307
1308 static Elf_Addr
1309 findbase(elf_file_t ef, int sec)
1310 {
1311         int i;
1312         Elf_Addr base = 0;
1313
1314         for (i = 0; i < ef->nprogtab; i++) {
1315                 if (sec == ef->progtab[i].sec) {
1316                         base = (Elf_Addr)ef->progtab[i].addr;
1317                         break;
1318                 }
1319         }
1320         return base;
1321 }
1322
1323 static int
1324 relocate_file(elf_file_t ef)
1325 {
1326         const Elf_Rel *rellim;
1327         const Elf_Rel *rel;
1328         const Elf_Rela *relalim;
1329         const Elf_Rela *rela;
1330         const char *symname;
1331         const Elf_Sym *sym;
1332         int i;
1333         Elf_Size symidx;
1334         Elf_Addr base;
1335
1336         /* Perform relocations without addend if there are any: */
1337         for (i = 0; i < ef->nreltab; i++) {
1338                 rel = ef->reltab[i].rel;
1339                 if (rel == NULL) {
1340                         link_elf_error(ef->lf.filename, "lost a reltab!");
1341                         return (ENOEXEC);
1342                 }
1343                 rellim = rel + ef->reltab[i].nrel;
1344                 base = findbase(ef, ef->reltab[i].sec);
1345                 if (base == 0) {
1346                         link_elf_error(ef->lf.filename, "lost base for reltab");
1347                         return (ENOEXEC);
1348                 }
1349                 for ( ; rel < rellim; rel++) {
1350                         symidx = ELF_R_SYM(rel->r_info);
1351                         if (symidx >= ef->ddbsymcnt)
1352                                 continue;
1353                         sym = ef->ddbsymtab + symidx;
1354                         /* Local relocs are already done */
1355                         if (ELF_ST_BIND(sym->st_info) == STB_LOCAL)
1356                                 continue;
1357                         if (elf_reloc(&ef->lf, base, rel, ELF_RELOC_REL,
1358                             elf_obj_lookup)) {
1359                                 symname = symbol_name(ef, rel->r_info);
1360                                 printf("link_elf_obj: symbol %s undefined\n",
1361                                     symname);
1362                                 return (ENOENT);
1363                         }
1364                 }
1365         }
1366
1367         /* Perform relocations with addend if there are any: */
1368         for (i = 0; i < ef->nrelatab; i++) {
1369                 rela = ef->relatab[i].rela;
1370                 if (rela == NULL) {
1371                         link_elf_error(ef->lf.filename, "lost a relatab!");
1372                         return (ENOEXEC);
1373                 }
1374                 relalim = rela + ef->relatab[i].nrela;
1375                 base = findbase(ef, ef->relatab[i].sec);
1376                 if (base == 0) {
1377                         link_elf_error(ef->lf.filename,
1378                             "lost base for relatab");
1379                         return (ENOEXEC);
1380                 }
1381                 for ( ; rela < relalim; rela++) {
1382                         symidx = ELF_R_SYM(rela->r_info);
1383                         if (symidx >= ef->ddbsymcnt)
1384                                 continue;
1385                         sym = ef->ddbsymtab + symidx;
1386                         /* Local relocs are already done */
1387                         if (ELF_ST_BIND(sym->st_info) == STB_LOCAL)
1388                                 continue;
1389                         if (elf_reloc(&ef->lf, base, rela, ELF_RELOC_RELA,
1390                             elf_obj_lookup)) {
1391                                 symname = symbol_name(ef, rela->r_info);
1392                                 printf("link_elf_obj: symbol %s undefined\n",
1393                                     symname);
1394                                 return (ENOENT);
1395                         }
1396                 }
1397         }
1398
1399         /*
1400          * Only clean SHN_FBSD_CACHED for successful return.  If we
1401          * modified symbol table for the object but found an
1402          * unresolved symbol, there is no reason to roll back.
1403          */
1404         elf_obj_cleanup_globals_cache(ef);
1405
1406         return (0);
1407 }
1408
1409 static int
1410 link_elf_lookup_symbol(linker_file_t lf, const char *name, c_linker_sym_t *sym)
1411 {
1412         elf_file_t ef = (elf_file_t) lf;
1413         const Elf_Sym *symp;
1414         const char *strp;
1415         int i;
1416
1417         for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1418                 strp = ef->ddbstrtab + symp->st_name;
1419                 if (symp->st_shndx != SHN_UNDEF && strcmp(name, strp) == 0) {
1420                         *sym = (c_linker_sym_t) symp;
1421                         return 0;
1422                 }
1423         }
1424         return ENOENT;
1425 }
1426
1427 static int
1428 link_elf_symbol_values(linker_file_t lf, c_linker_sym_t sym,
1429     linker_symval_t *symval)
1430 {
1431         elf_file_t ef;
1432         const Elf_Sym *es;
1433         caddr_t val;
1434
1435         ef = (elf_file_t) lf;
1436         es = (const Elf_Sym*) sym;
1437         val = (caddr_t)es->st_value;
1438         if (es >= ef->ddbsymtab && es < (ef->ddbsymtab + ef->ddbsymcnt)) {
1439                 symval->name = ef->ddbstrtab + es->st_name;
1440                 val = (caddr_t)es->st_value;
1441                 if (ELF_ST_TYPE(es->st_info) == STT_GNU_IFUNC)
1442                         val = ((caddr_t (*)(void))val)();
1443                 symval->value = val;
1444                 symval->size = es->st_size;
1445                 return 0;
1446         }
1447         return ENOENT;
1448 }
1449
1450 static int
1451 link_elf_search_symbol(linker_file_t lf, caddr_t value,
1452     c_linker_sym_t *sym, long *diffp)
1453 {
1454         elf_file_t ef = (elf_file_t) lf;
1455         u_long off = (uintptr_t) (void *) value;
1456         u_long diff = off;
1457         u_long st_value;
1458         const Elf_Sym *es;
1459         const Elf_Sym *best = NULL;
1460         int i;
1461
1462         for (i = 0, es = ef->ddbsymtab; i < ef->ddbsymcnt; i++, es++) {
1463                 if (es->st_name == 0)
1464                         continue;
1465                 st_value = es->st_value;
1466                 if (off >= st_value) {
1467                         if (off - st_value < diff) {
1468                                 diff = off - st_value;
1469                                 best = es;
1470                                 if (diff == 0)
1471                                         break;
1472                         } else if (off - st_value == diff) {
1473                                 best = es;
1474                         }
1475                 }
1476         }
1477         if (best == NULL)
1478                 *diffp = off;
1479         else
1480                 *diffp = diff;
1481         *sym = (c_linker_sym_t) best;
1482
1483         return 0;
1484 }
1485
1486 /*
1487  * Look up a linker set on an ELF system.
1488  */
1489 static int
1490 link_elf_lookup_set(linker_file_t lf, const char *name,
1491     void ***startp, void ***stopp, int *countp)
1492 {
1493         elf_file_t ef = (elf_file_t)lf;
1494         void **start, **stop;
1495         int i, count;
1496
1497         /* Relative to section number */
1498         for (i = 0; i < ef->nprogtab; i++) {
1499                 if ((strncmp(ef->progtab[i].name, "set_", 4) == 0) &&
1500                     strcmp(ef->progtab[i].name + 4, name) == 0) {
1501                         start  = (void **)ef->progtab[i].addr;
1502                         stop = (void **)((char *)ef->progtab[i].addr +
1503                             ef->progtab[i].size);
1504                         count = stop - start;
1505                         if (startp)
1506                                 *startp = start;
1507                         if (stopp)
1508                                 *stopp = stop;
1509                         if (countp)
1510                                 *countp = count;
1511                         return (0);
1512                 }
1513         }
1514         return (ESRCH);
1515 }
1516
1517 static int
1518 link_elf_each_function_name(linker_file_t file,
1519     int (*callback)(const char *, void *), void *opaque)
1520 {
1521         elf_file_t ef = (elf_file_t)file;
1522         const Elf_Sym *symp;
1523         int i, error;
1524
1525         /* Exhaustive search */
1526         for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1527                 if (symp->st_value != 0 &&
1528                     (ELF_ST_TYPE(symp->st_info) == STT_FUNC ||
1529                     ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) {
1530                         error = callback(ef->ddbstrtab + symp->st_name, opaque);
1531                         if (error)
1532                                 return (error);
1533                 }
1534         }
1535         return (0);
1536 }
1537
1538 static int
1539 link_elf_each_function_nameval(linker_file_t file,
1540     linker_function_nameval_callback_t callback, void *opaque)
1541 {
1542         linker_symval_t symval;
1543         elf_file_t ef = (elf_file_t)file;
1544         const Elf_Sym* symp;
1545         int i, error;
1546
1547         /* Exhaustive search */
1548         for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1549                 if (symp->st_value != 0 &&
1550                     (ELF_ST_TYPE(symp->st_info) == STT_FUNC ||
1551                     ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) {
1552                         error = link_elf_symbol_values(file,
1553                             (c_linker_sym_t)symp, &symval);
1554                         if (error)
1555                                 return (error);
1556                         error = callback(file, i, &symval, opaque);
1557                         if (error)
1558                                 return (error);
1559                 }
1560         }
1561         return (0);
1562 }
1563
1564 static void
1565 elf_obj_cleanup_globals_cache(elf_file_t ef)
1566 {
1567         Elf_Sym *sym;
1568         Elf_Size i;
1569
1570         for (i = 0; i < ef->ddbsymcnt; i++) {
1571                 sym = ef->ddbsymtab + i;
1572                 if (sym->st_shndx == SHN_FBSD_CACHED) {
1573                         sym->st_shndx = SHN_UNDEF;
1574                         sym->st_value = 0;
1575                 }
1576         }
1577 }
1578
1579 /*
1580  * Symbol lookup function that can be used when the symbol index is known (ie
1581  * in relocations). It uses the symbol index instead of doing a fully fledged
1582  * hash table based lookup when such is valid. For example for local symbols.
1583  * This is not only more efficient, it's also more correct. It's not always
1584  * the case that the symbol can be found through the hash table.
1585  */
1586 static int
1587 elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps, Elf_Addr *res)
1588 {
1589         elf_file_t ef = (elf_file_t)lf;
1590         Elf_Sym *sym;
1591         const char *symbol;
1592         Elf_Addr res1;
1593
1594         /* Don't even try to lookup the symbol if the index is bogus. */
1595         if (symidx >= ef->ddbsymcnt) {
1596                 *res = 0;
1597                 return (EINVAL);
1598         }
1599
1600         sym = ef->ddbsymtab + symidx;
1601
1602         /* Quick answer if there is a definition included. */
1603         if (sym->st_shndx != SHN_UNDEF) {
1604                 res1 = (Elf_Addr)sym->st_value;
1605                 if (ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC)
1606                         res1 = ((Elf_Addr (*)(void))res1)();
1607                 *res = res1;
1608                 return (0);
1609         }
1610
1611         /* If we get here, then it is undefined and needs a lookup. */
1612         switch (ELF_ST_BIND(sym->st_info)) {
1613         case STB_LOCAL:
1614                 /* Local, but undefined? huh? */
1615                 *res = 0;
1616                 return (EINVAL);
1617
1618         case STB_GLOBAL:
1619         case STB_WEAK:
1620                 /* Relative to Data or Function name */
1621                 symbol = ef->ddbstrtab + sym->st_name;
1622
1623                 /* Force a lookup failure if the symbol name is bogus. */
1624                 if (*symbol == 0) {
1625                         *res = 0;
1626                         return (EINVAL);
1627                 }
1628                 res1 = (Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps);
1629
1630                 /*
1631                  * Cache global lookups during module relocation. The failure
1632                  * case is particularly expensive for callers, who must scan
1633                  * through the entire globals table doing strcmp(). Cache to
1634                  * avoid doing such work repeatedly.
1635                  *
1636                  * After relocation is complete, undefined globals will be
1637                  * restored to SHN_UNDEF in elf_obj_cleanup_globals_cache(),
1638                  * above.
1639                  */
1640                 if (res1 != 0) {
1641                         sym->st_shndx = SHN_FBSD_CACHED;
1642                         sym->st_value = res1;
1643                         *res = res1;
1644                         return (0);
1645                 } else if (ELF_ST_BIND(sym->st_info) == STB_WEAK) {
1646                         sym->st_value = 0;
1647                         *res = 0;
1648                         return (0);
1649                 }
1650                 return (EINVAL);
1651
1652         default:
1653                 return (EINVAL);
1654         }
1655 }
1656
1657 static void
1658 link_elf_fix_link_set(elf_file_t ef)
1659 {
1660         static const char startn[] = "__start_";
1661         static const char stopn[] = "__stop_";
1662         Elf_Sym *sym;
1663         const char *sym_name, *linkset_name;
1664         Elf_Addr startp, stopp;
1665         Elf_Size symidx;
1666         int start, i;
1667
1668         startp = stopp = 0;
1669         for (symidx = 1 /* zero entry is special */;
1670                 symidx < ef->ddbsymcnt; symidx++) {
1671                 sym = ef->ddbsymtab + symidx;
1672                 if (sym->st_shndx != SHN_UNDEF)
1673                         continue;
1674
1675                 sym_name = ef->ddbstrtab + sym->st_name;
1676                 if (strncmp(sym_name, startn, sizeof(startn) - 1) == 0) {
1677                         start = 1;
1678                         linkset_name = sym_name + sizeof(startn) - 1;
1679                 }
1680                 else if (strncmp(sym_name, stopn, sizeof(stopn) - 1) == 0) {
1681                         start = 0;
1682                         linkset_name = sym_name + sizeof(stopn) - 1;
1683                 }
1684                 else
1685                         continue;
1686
1687                 for (i = 0; i < ef->nprogtab; i++) {
1688                         if (strcmp(ef->progtab[i].name, linkset_name) == 0) {
1689                                 startp = (Elf_Addr)ef->progtab[i].addr;
1690                                 stopp = (Elf_Addr)(startp + ef->progtab[i].size);
1691                                 break;
1692                         }
1693                 }
1694                 if (i == ef->nprogtab)
1695                         continue;
1696
1697                 sym->st_value = start ? startp : stopp;
1698                 sym->st_shndx = i;
1699         }
1700 }
1701
1702 static int
1703 link_elf_reloc_local(linker_file_t lf, bool ifuncs)
1704 {
1705         elf_file_t ef = (elf_file_t)lf;
1706         const Elf_Rel *rellim;
1707         const Elf_Rel *rel;
1708         const Elf_Rela *relalim;
1709         const Elf_Rela *rela;
1710         const Elf_Sym *sym;
1711         Elf_Addr base;
1712         int i;
1713         Elf_Size symidx;
1714
1715         link_elf_fix_link_set(ef);
1716
1717         /* Perform relocations without addend if there are any: */
1718         for (i = 0; i < ef->nreltab; i++) {
1719                 rel = ef->reltab[i].rel;
1720                 if (rel == NULL) {
1721                         link_elf_error(ef->lf.filename, "lost a reltab");
1722                         return (ENOEXEC);
1723                 }
1724                 rellim = rel + ef->reltab[i].nrel;
1725                 base = findbase(ef, ef->reltab[i].sec);
1726                 if (base == 0) {
1727                         link_elf_error(ef->lf.filename, "lost base for reltab");
1728                         return (ENOEXEC);
1729                 }
1730                 for ( ; rel < rellim; rel++) {
1731                         symidx = ELF_R_SYM(rel->r_info);
1732                         if (symidx >= ef->ddbsymcnt)
1733                                 continue;
1734                         sym = ef->ddbsymtab + symidx;
1735                         /* Only do local relocs */
1736                         if (ELF_ST_BIND(sym->st_info) != STB_LOCAL)
1737                                 continue;
1738                         if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1739                             elf_is_ifunc_reloc(rel->r_info)) != ifuncs)
1740                                 continue;
1741                         if (elf_reloc_local(lf, base, rel, ELF_RELOC_REL,
1742                             elf_obj_lookup) != 0)
1743                                 return (ENOEXEC);
1744                 }
1745         }
1746
1747         /* Perform relocations with addend if there are any: */
1748         for (i = 0; i < ef->nrelatab; i++) {
1749                 rela = ef->relatab[i].rela;
1750                 if (rela == NULL) {
1751                         link_elf_error(ef->lf.filename, "lost a relatab!");
1752                         return (ENOEXEC);
1753                 }
1754                 relalim = rela + ef->relatab[i].nrela;
1755                 base = findbase(ef, ef->relatab[i].sec);
1756                 if (base == 0) {
1757                         link_elf_error(ef->lf.filename, "lost base for reltab");
1758                         return (ENOEXEC);
1759                 }
1760                 for ( ; rela < relalim; rela++) {
1761                         symidx = ELF_R_SYM(rela->r_info);
1762                         if (symidx >= ef->ddbsymcnt)
1763                                 continue;
1764                         sym = ef->ddbsymtab + symidx;
1765                         /* Only do local relocs */
1766                         if (ELF_ST_BIND(sym->st_info) != STB_LOCAL)
1767                                 continue;
1768                         if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1769                             elf_is_ifunc_reloc(rela->r_info)) != ifuncs)
1770                                 continue;
1771                         if (elf_reloc_local(lf, base, rela, ELF_RELOC_RELA,
1772                             elf_obj_lookup) != 0)
1773                                 return (ENOEXEC);
1774                 }
1775         }
1776         return (0);
1777 }
1778
1779 static long
1780 link_elf_symtab_get(linker_file_t lf, const Elf_Sym **symtab)
1781 {
1782     elf_file_t ef = (elf_file_t)lf;
1783     
1784     *symtab = ef->ddbsymtab;
1785     
1786     if (*symtab == NULL)
1787         return (0);
1788
1789     return (ef->ddbsymcnt);
1790 }
1791     
1792 static long
1793 link_elf_strtab_get(linker_file_t lf, caddr_t *strtab)
1794 {
1795     elf_file_t ef = (elf_file_t)lf;
1796
1797     *strtab = ef->ddbstrtab;
1798
1799     if (*strtab == NULL)
1800         return (0);
1801
1802     return (ef->ddbstrcnt);
1803 }