2 * Copyright (c) 1989, 1992, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software developed by the Computer Systems
6 * Engineering group at Lawrence Berkeley Laboratory under DARPA contract
7 * BG 91-66 and contributed to Berkeley.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
37 #if defined(LIBC_SCCS) && !defined(lint)
39 static char sccsid[] = "@(#)kvm.c 8.2 (Berkeley) 2/13/94";
41 #endif /* LIBC_SCCS and not lint */
43 #include <sys/param.h>
44 #include <sys/fnv_hash.h>
49 #include <sys/linker.h>
65 #include "kvm_private.h"
67 SET_DECLARE(kvm_arch, struct kvm_arch);
69 static char _kd_is_null[] = "";
71 /* from src/lib/libc/gen/nlist.c */
72 int __fdnlist(int, struct nlist *);
75 kvm_fdnlist(kvm_t *kd, struct kvm_nlist *list)
80 if (kd->resolve_symbol == NULL) {
84 for (count = 0; list[count].n_name != NULL &&
85 list[count].n_name[0] != '\0'; count++)
87 nl = calloc(count + 1, sizeof(*nl));
88 for (i = 0; i < count; i++)
89 nl[i].n_name = list[i].n_name;
90 nfail = __fdnlist(kd->nlfd, nl);
91 for (i = 0; i < count; i++) {
92 list[i].n_type = nl[i].n_type;
93 list[i].n_value = nl[i].n_value;
100 while (list->n_name != NULL && list->n_name[0] != '\0') {
101 error = kd->resolve_symbol(list->n_name, &addr);
107 list->n_value = addr;
108 list->n_type = N_DATA | N_EXT;
116 kvm_geterr(kvm_t *kd)
120 return (_kd_is_null);
127 * Report an error using printf style arguments. "program" is kd->program
128 * on hard errors, and 0 on soft errors, so that under sun error emulation,
129 * only hard errors are printed out (otherwise, programs like gdb will
130 * generate tons of error messages when trying to access bogus pointers).
133 _kvm_err(kvm_t *kd, const char *program, const char *fmt, ...)
138 if (program != NULL) {
139 (void)fprintf(stderr, "%s: ", program);
140 (void)vfprintf(stderr, fmt, ap);
141 (void)fputc('\n', stderr);
143 (void)vsnprintf(kd->errbuf,
144 sizeof(kd->errbuf), fmt, ap);
150 _kvm_syserr(kvm_t *kd, const char *program, const char *fmt, ...)
156 if (program != NULL) {
157 (void)fprintf(stderr, "%s: ", program);
158 (void)vfprintf(stderr, fmt, ap);
159 (void)fprintf(stderr, ": %s\n", strerror(errno));
161 char *cp = kd->errbuf;
163 (void)vsnprintf(cp, sizeof(kd->errbuf), fmt, ap);
165 (void)snprintf(&cp[n], sizeof(kd->errbuf) - n, ": %s",
172 _kvm_malloc(kvm_t *kd, size_t n)
176 if ((p = calloc(n, sizeof(char))) == NULL)
177 _kvm_err(kd, kd->program, "can't allocate %zu bytes: %s",
183 _kvm_read_kernel_ehdr(kvm_t *kd)
187 if (elf_version(EV_CURRENT) == EV_NONE) {
188 _kvm_err(kd, kd->program, "Unsupported libelf");
191 elf = elf_begin(kd->nlfd, ELF_C_READ, NULL);
193 _kvm_err(kd, kd->program, "%s", elf_errmsg(0));
196 if (elf_kind(elf) != ELF_K_ELF) {
197 _kvm_err(kd, kd->program, "kernel is not an ELF file");
200 if (gelf_getehdr(elf, &kd->nlehdr) == NULL) {
201 _kvm_err(kd, kd->program, "%s", elf_errmsg(0));
207 switch (kd->nlehdr.e_ident[EI_DATA]) {
212 _kvm_err(kd, kd->program,
213 "unsupported ELF data encoding for kernel");
219 _kvm_probe_elf_kernel(kvm_t *kd, int class, int machine)
222 return (kd->nlehdr.e_ident[EI_CLASS] == class &&
223 kd->nlehdr.e_type == ET_EXEC &&
224 kd->nlehdr.e_machine == machine);
228 _kvm_is_minidump(kvm_t *kd)
234 if (pread(kd->pmfd, &minihdr, 8, 0) == 8 &&
235 memcmp(&minihdr, "minidump", 8) == 0)
241 * The powerpc backend has a hack to strip a leading kerneldump
242 * header from the core before treating it as an ELF header.
244 * We can add that here if we can get a change to libelf to support
245 * an initial offset into the file. Alternatively we could patch
246 * savecore to extract cores from a regular file instead.
249 _kvm_read_core_phdrs(kvm_t *kd, size_t *phnump, GElf_Phdr **phdrp)
256 elf = elf_begin(kd->pmfd, ELF_C_READ, NULL);
258 _kvm_err(kd, kd->program, "%s", elf_errmsg(0));
261 if (elf_kind(elf) != ELF_K_ELF) {
262 _kvm_err(kd, kd->program, "invalid core");
265 if (gelf_getclass(elf) != kd->nlehdr.e_ident[EI_CLASS]) {
266 _kvm_err(kd, kd->program, "invalid core");
269 if (gelf_getehdr(elf, &ehdr) == NULL) {
270 _kvm_err(kd, kd->program, "%s", elf_errmsg(0));
273 if (ehdr.e_type != ET_CORE) {
274 _kvm_err(kd, kd->program, "invalid core");
277 if (ehdr.e_machine != kd->nlehdr.e_machine) {
278 _kvm_err(kd, kd->program, "invalid core");
282 if (elf_getphdrnum(elf, &phnum) == -1) {
283 _kvm_err(kd, kd->program, "%s", elf_errmsg(0));
287 phdr = calloc(phnum, sizeof(*phdr));
289 _kvm_err(kd, kd->program, "failed to allocate phdrs");
293 for (i = 0; i < phnum; i++) {
294 if (gelf_getphdr(elf, i, &phdr[i]) == NULL) {
295 _kvm_err(kd, kd->program, "%s", elf_errmsg(0));
310 _kvm_hpt_insert(struct hpt *hpt, uint64_t pa, off_t off)
313 uint32_t fnv = FNV1_32_INIT;
315 fnv = fnv_32_buf(&pa, sizeof(pa), fnv);
316 fnv &= (HPT_SIZE - 1);
317 hpte = malloc(sizeof(*hpte));
320 hpte->next = hpt->hpt_head[fnv];
321 hpt->hpt_head[fnv] = hpte;
325 _kvm_hpt_init(kvm_t *kd, struct hpt *hpt, void *base, size_t len, off_t off,
326 int page_size, int word_size)
328 uint64_t bits, idx, pa;
334 for (idx = 0; idx < len / word_size; idx++) {
335 if (word_size == sizeof(uint64_t))
336 bits = _kvm64toh(kd, base64[idx]);
338 bits = _kvm32toh(kd, base32[idx]);
339 pa = idx * word_size * NBBY * page_size;
340 for (; bits != 0; bits >>= 1, pa += page_size) {
343 _kvm_hpt_insert(hpt, pa, off);
350 _kvm_hpt_find(struct hpt *hpt, uint64_t pa)
353 uint32_t fnv = FNV1_32_INIT;
355 fnv = fnv_32_buf(&pa, sizeof(pa), fnv);
356 fnv &= (HPT_SIZE - 1);
357 for (hpte = hpt->hpt_head[fnv]; hpte != NULL; hpte = hpte->next) {
365 _kvm_hpt_free(struct hpt *hpt)
367 struct hpte *hpte, *next;
370 for (i = 0; i < HPT_SIZE; i++) {
371 for (hpte = hpt->hpt_head[i]; hpte != NULL; hpte = next) {
379 _kvm_open(kvm_t *kd, const char *uf, const char *mf, int flag, char *errout)
381 struct kvm_arch **parch;
394 else if (strlen(uf) >= MAXPATHLEN) {
395 _kvm_err(kd, kd->program, "exec file name too long");
398 if (flag & ~O_RDWR) {
399 _kvm_err(kd, kd->program, "bad flags arg");
405 if ((kd->pmfd = open(mf, flag | O_CLOEXEC, 0)) < 0) {
406 _kvm_syserr(kd, kd->program, "%s", mf);
409 if (fstat(kd->pmfd, &st) < 0) {
410 _kvm_syserr(kd, kd->program, "%s", mf);
413 if (S_ISREG(st.st_mode) && st.st_size <= 0) {
415 _kvm_syserr(kd, kd->program, "empty file");
418 if (S_ISCHR(st.st_mode)) {
420 * If this is a character special device, then check that
421 * it's /dev/mem. If so, open kmem too. (Maybe we should
422 * make it work for either /dev/mem or /dev/kmem -- in either
423 * case you're working with a live kernel.)
425 if (strcmp(mf, _PATH_DEVNULL) == 0) {
426 kd->vmfd = open(_PATH_DEVNULL, O_RDONLY | O_CLOEXEC);
428 } else if (strcmp(mf, _PATH_MEM) == 0) {
429 if ((kd->vmfd = open(_PATH_KMEM, flag | O_CLOEXEC)) <
431 _kvm_syserr(kd, kd->program, "%s", _PATH_KMEM);
439 * This is either a crash dump or a remote live system with its physical
440 * memory fully accessible via a special device.
441 * Open the namelist fd and determine the architecture.
443 if ((kd->nlfd = open(uf, O_RDONLY | O_CLOEXEC, 0)) < 0) {
444 _kvm_syserr(kd, kd->program, "%s", uf);
447 if (_kvm_read_kernel_ehdr(kd) < 0)
449 if (strncmp(mf, _PATH_FWMEM, strlen(_PATH_FWMEM)) == 0 ||
450 strncmp(mf, _PATH_DEVVMM, strlen(_PATH_DEVVMM)) == 0) {
454 SET_FOREACH(parch, kvm_arch) {
455 if ((*parch)->ka_probe(kd)) {
460 if (kd->arch == NULL) {
461 _kvm_err(kd, kd->program, "unsupported architecture");
466 * Non-native kernels require a symbol resolver.
468 if (!kd->arch->ka_native(kd) && kd->resolve_symbol == NULL) {
469 _kvm_err(kd, kd->program,
470 "non-native kernel requires a symbol resolver");
475 * Initialize the virtual address translation machinery.
477 if (kd->arch->ka_initvtop(kd) < 0)
482 * Copy out the error if doing sane error semantics.
485 strlcpy(errout, kd->errbuf, _POSIX2_LINE_MAX);
491 kvm_openfiles(const char *uf, const char *mf, const char *sf __unused, int flag,
496 if ((kd = calloc(1, sizeof(*kd))) == NULL) {
498 (void)strlcpy(errout, strerror(errno),
502 return (_kvm_open(kd, uf, mf, flag, errout));
506 kvm_open(const char *uf, const char *mf, const char *sf __unused, int flag,
511 if ((kd = calloc(1, sizeof(*kd))) == NULL) {
513 (void)fprintf(stderr, "%s: %s\n",
514 errstr, strerror(errno));
517 kd->program = errstr;
518 return (_kvm_open(kd, uf, mf, flag, NULL));
522 kvm_open2(const char *uf, const char *mf, int flag, char *errout,
523 int (*resolver)(const char *, kvaddr_t *))
527 if ((kd = calloc(1, sizeof(*kd))) == NULL) {
529 (void)strlcpy(errout, strerror(errno),
533 kd->resolve_symbol = resolver;
534 return (_kvm_open(kd, uf, mf, flag, errout));
546 if (kd->vmst != NULL)
547 kd->arch->ka_freevtop(kd);
549 error |= close(kd->pmfd);
551 error |= close(kd->vmfd);
553 error |= close(kd->nlfd);
554 if (kd->procbase != 0)
555 free((void *)kd->procbase);
557 free((void *) kd->argbuf);
559 free((void *) kd->argspc);
561 free((void *)kd->argv);
568 * Walk the list of unresolved symbols, generate a new list and prefix the
569 * symbol names, try again, and merge back what we could resolve.
572 kvm_fdnlist_prefix(kvm_t *kd, struct kvm_nlist *nl, int missing,
573 const char *prefix, kvaddr_t (*validate_fn)(kvm_t *, kvaddr_t))
575 struct kvm_nlist *n, *np, *p;
579 int slen, unresolved;
582 * Calculate the space we need to malloc for nlist and names.
583 * We are going to store the name twice for later lookups: once
584 * with the prefix and once the unmodified name delmited by \0.
588 for (p = nl; p->n_name && p->n_name[0]; ++p) {
589 if (p->n_type != N_UNDF)
591 len += sizeof(struct kvm_nlist) + strlen(prefix) +
592 2 * (strlen(p->n_name) + 1);
597 /* Add space for the terminating nlist entry. */
598 len += sizeof(struct kvm_nlist);
601 /* Alloc one chunk for (nlist, [names]) and setup pointers. */
602 n = np = malloc(len);
606 cp = ce = (char *)np;
607 cp += unresolved * sizeof(struct kvm_nlist);
610 /* Generate shortened nlist with special prefix. */
612 for (p = nl; p->n_name && p->n_name[0]; ++p) {
613 if (p->n_type != N_UNDF)
616 /* Save the new\0orig. name so we can later match it again. */
617 slen = snprintf(cp, ce - cp, "%s%s%c%s", prefix,
618 (prefix[0] != '\0' && p->n_name[0] == '_') ?
619 (p->n_name + 1) : p->n_name, '\0', p->n_name);
620 if (slen < 0 || slen >= ce - cp)
628 /* Do lookup on the reduced list. */
630 unresolved = kvm_fdnlist(kd, np);
632 /* Check if we could resolve further symbols and update the list. */
633 if (unresolved >= 0 && unresolved < missing) {
634 /* Find the first freshly resolved entry. */
635 for (; np->n_name && np->n_name[0]; np++)
636 if (np->n_type != N_UNDF)
639 * The lists are both in the same order,
640 * so we can walk them in parallel.
642 for (p = nl; np->n_name && np->n_name[0] &&
643 p->n_name && p->n_name[0]; ++p) {
644 if (p->n_type != N_UNDF)
646 /* Skip expanded name and compare to orig. one. */
647 ccp = np->n_name + strlen(np->n_name) + 1;
648 if (strcmp(ccp, p->n_name) != 0)
650 /* Update nlist with new, translated results. */
651 p->n_type = np->n_type;
653 p->n_value = (*validate_fn)(kd, np->n_value);
655 p->n_value = np->n_value;
657 /* Find next freshly resolved entry. */
658 for (np++; np->n_name && np->n_name[0]; np++)
659 if (np->n_type != N_UNDF)
663 /* We could assert missing = unresolved here. */
670 _kvm_nlist(kvm_t *kd, struct kvm_nlist *nl, int initialize)
674 struct kld_sym_lookup lookup;
676 const char *prefix = "";
677 char symname[1024]; /* XXX-BZ symbol name length limit? */
678 int tried_vnet, tried_dpcpu;
681 * If we can't use the kld symbol lookup, revert to the
685 error = kvm_fdnlist(kd, nl);
686 if (error <= 0) /* Hard error or success. */
689 if (_kvm_vnet_initialized(kd, initialize))
690 error = kvm_fdnlist_prefix(kd, nl, error,
691 VNET_SYMPREFIX, _kvm_vnet_validaddr);
693 if (error > 0 && _kvm_dpcpu_initialized(kd, initialize))
694 error = kvm_fdnlist_prefix(kd, nl, error,
695 DPCPU_SYMPREFIX, _kvm_dpcpu_validaddr);
701 * We can use the kld lookup syscall. Go through each nlist entry
702 * and look it up with a kldsym(2) syscall.
708 for (p = nl; p->n_name && p->n_name[0]; ++p) {
709 if (p->n_type != N_UNDF)
712 lookup.version = sizeof(lookup);
716 error = snprintf(symname, sizeof(symname), "%s%s", prefix,
717 (prefix[0] != '\0' && p->n_name[0] == '_') ?
718 (p->n_name + 1) : p->n_name);
719 if (error < 0 || error >= (int)sizeof(symname))
721 lookup.symname = symname;
722 if (lookup.symname[0] == '_')
725 if (kldsym(0, KLDSYM_LOOKUP, &lookup) != -1) {
727 if (_kvm_vnet_initialized(kd, initialize) &&
728 strcmp(prefix, VNET_SYMPREFIX) == 0)
730 _kvm_vnet_validaddr(kd, lookup.symvalue);
731 else if (_kvm_dpcpu_initialized(kd, initialize) &&
732 strcmp(prefix, DPCPU_SYMPREFIX) == 0)
734 _kvm_dpcpu_validaddr(kd, lookup.symvalue);
736 p->n_value = lookup.symvalue;
743 * Check the number of entries that weren't found. If they exist,
744 * try again with a prefix for virtualized or DPCPU symbol names.
746 error = ((p - nl) - nvalid);
747 if (error && _kvm_vnet_initialized(kd, initialize) && !tried_vnet) {
749 prefix = VNET_SYMPREFIX;
752 if (error && _kvm_dpcpu_initialized(kd, initialize) && !tried_dpcpu) {
754 prefix = DPCPU_SYMPREFIX;
759 * Return the number of entries that weren't found. If they exist,
760 * also fill internal error buffer.
762 error = ((p - nl) - nvalid);
764 _kvm_syserr(kd, kd->program, "kvm_nlist");
769 kvm_nlist2(kvm_t *kd, struct kvm_nlist *nl)
773 * If called via the public interface, permit initialization of
774 * further virtualized modules on demand.
776 return (_kvm_nlist(kd, nl, 1));
780 kvm_nlist(kvm_t *kd, struct nlist *nl)
782 struct kvm_nlist *kl;
786 * Avoid reporting truncated addresses by failing for non-native
789 if (!kvm_native(kd)) {
790 _kvm_err(kd, kd->program, "kvm_nlist of non-native vmcore");
794 for (count = 0; nl[count].n_name != NULL && nl[count].n_name[0] != '\0';
799 kl = calloc(count + 1, sizeof(*kl));
800 for (i = 0; i < count; i++)
801 kl[i].n_name = nl[i].n_name;
802 nfail = kvm_nlist2(kd, kl);
803 for (i = 0; i < count; i++) {
804 nl[i].n_type = kl[i].n_type;
807 nl[i].n_value = kl[i].n_value;
813 kvm_read(kvm_t *kd, u_long kva, void *buf, size_t len)
816 return (kvm_read2(kd, kva, buf, len));
820 kvm_read2(kvm_t *kd, kvaddr_t kva, void *buf, size_t len)
829 * We're using /dev/kmem. Just read straight from the
830 * device and let the active kernel do the address translation.
833 if (lseek(kd->vmfd, (off_t)kva, 0) == -1 && errno != 0) {
834 _kvm_err(kd, 0, "invalid address (0x%jx)",
838 cr = read(kd->vmfd, buf, len);
840 _kvm_syserr(kd, 0, "kvm_read");
842 } else if (cr < (ssize_t)len)
843 _kvm_err(kd, kd->program, "short read");
849 cc = kd->arch->ka_kvatop(kd, kva, &pa);
852 if (cc > (ssize_t)len)
855 if (lseek(kd->pmfd, pa, 0) == -1 && errno != 0) {
856 _kvm_syserr(kd, 0, _PATH_MEM);
859 cr = read(kd->pmfd, cp, cc);
861 _kvm_syserr(kd, kd->program, "kvm_read");
865 * If ka_kvatop returns a bogus value or our core file is
866 * truncated, we might wind up seeking beyond the end of the
867 * core file in which case the read will return 0 (EOF).
876 return (cp - (char *)buf);
880 kvm_write(kvm_t *kd, u_long kva, const void *buf, size_t len)
887 if (!ISALIVE(kd) && !kd->writable) {
888 _kvm_err(kd, kd->program,
889 "kvm_write not implemented for dead kernels");
895 * Just like kvm_read, only we write.
898 if (lseek(kd->vmfd, (off_t)kva, 0) == -1 && errno != 0) {
899 _kvm_err(kd, 0, "invalid address (%lx)", kva);
902 cc = write(kd->vmfd, buf, len);
904 _kvm_syserr(kd, 0, "kvm_write");
906 } else if ((size_t)cc < len)
907 _kvm_err(kd, kd->program, "short write");
913 cc = kd->arch->ka_kvatop(kd, kva, &pa);
916 if (cc > (ssize_t)len)
919 if (lseek(kd->pmfd, pa, 0) == -1 && errno != 0) {
920 _kvm_syserr(kd, 0, _PATH_MEM);
923 cw = write(kd->pmfd, cp, cc);
925 _kvm_syserr(kd, kd->program, "kvm_write");
929 * If ka_kvatop returns a bogus value or our core file is
930 * truncated, we might wind up seeking beyond the end of the
931 * core file in which case the read will return 0 (EOF).
940 return (cp - (const char *)buf);
944 kvm_native(kvm_t *kd)
949 return (kd->arch->ka_native(kd));