2 * Copyright (c) 2007 Sandvine Incorporated
3 * Copyright (c) 1998 John D. Polstra
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include <sys/endian.h>
32 #include <sys/param.h>
33 #include <sys/procfs.h>
34 #include <sys/ptrace.h>
35 #include <sys/queue.h>
36 #include <sys/linker_set.h>
38 #include <sys/sysctl.h>
41 #include <machine/elf.h>
42 #include <vm/vm_param.h>
45 #include <vm/vm_map.h>
61 * Code for generating ELF core dumps.
64 typedef void (*segment_callback)(vm_map_entry_t, void *);
66 /* Closure for cb_put_phdr(). */
68 Elf_Phdr *phdr; /* Program header to fill in */
69 Elf_Off offset; /* Offset of segment in core file */
72 /* Closure for cb_size_segment(). */
74 int count; /* Count of writable segments. */
75 size_t size; /* Total size of all writable segments. */
78 #ifdef ELFCORE_COMPAT_32
79 typedef struct fpreg32 elfcore_fpregset_t;
80 typedef struct reg32 elfcore_gregset_t;
81 typedef struct prpsinfo32 elfcore_prpsinfo_t;
82 typedef struct prstatus32 elfcore_prstatus_t;
83 static void elf_convert_gregset(elfcore_gregset_t *rd, struct reg *rs);
84 static void elf_convert_fpregset(elfcore_fpregset_t *rd, struct fpreg *rs);
86 typedef fpregset_t elfcore_fpregset_t;
87 typedef gregset_t elfcore_gregset_t;
88 typedef prpsinfo_t elfcore_prpsinfo_t;
89 typedef prstatus_t elfcore_prstatus_t;
90 #define elf_convert_gregset(d,s) *d = *s
91 #define elf_convert_fpregset(d,s) *d = *s
94 typedef void* (*notefunc_t)(void *, size_t *);
96 static void cb_put_phdr(vm_map_entry_t, void *);
97 static void cb_size_segment(vm_map_entry_t, void *);
98 static void each_writable_segment(vm_map_entry_t, segment_callback,
100 static void elf_detach(void); /* atexit() handler. */
101 static void *elf_note_fpregset(void *, size_t *);
102 static void *elf_note_prpsinfo(void *, size_t *);
103 static void *elf_note_prstatus(void *, size_t *);
104 static void *elf_note_thrmisc(void *, size_t *);
105 #if defined(__i386__) || defined(__amd64__)
106 static void *elf_note_x86_xstate(void *, size_t *);
108 static void *elf_note_procstat_auxv(void *, size_t *);
109 static void *elf_note_procstat_files(void *, size_t *);
110 static void *elf_note_procstat_groups(void *, size_t *);
111 static void *elf_note_procstat_osrel(void *, size_t *);
112 static void *elf_note_procstat_proc(void *, size_t *);
113 static void *elf_note_procstat_psstrings(void *, size_t *);
114 static void *elf_note_procstat_rlimit(void *, size_t *);
115 static void *elf_note_procstat_umask(void *, size_t *);
116 static void *elf_note_procstat_vmmap(void *, size_t *);
117 static void elf_puthdr(int, pid_t, vm_map_entry_t, void *, size_t, size_t,
119 static void elf_putnote(int, notefunc_t, void *, struct sbuf *);
120 static void elf_putnotes(pid_t, struct sbuf *, size_t *);
121 static void freemap(vm_map_entry_t);
122 static vm_map_entry_t readmap(pid_t);
123 static void *procstat_sysctl(void *, int, size_t, size_t *sizep);
125 static pid_t g_pid; /* Pid being dumped, global for elf_detach */
126 static int g_status; /* proc status after ptrace attach */
129 elf_ident(int efd, pid_t pid __unused, char *binfile __unused)
135 cnt = read(efd, &hdr, sizeof(hdr));
136 if (cnt != sizeof(hdr))
140 switch (hdr.e_ident[EI_DATA]) {
142 machine = le16toh(hdr.e_machine);
145 machine = be16toh(hdr.e_machine);
150 if (!ELF_MACHINE_OK(machine))
164 * Forward any pending signals. SIGSTOP is generated by ptrace
165 * itself, so ignore it.
167 sig = WIFSTOPPED(g_status) ? WSTOPSIG(g_status) : 0;
170 ptrace(PT_DETACH, g_pid, (caddr_t)1, sig);
175 * Write an ELF coredump for the given pid to the given fd.
178 elf_coredump(int efd, int fd, pid_t pid)
181 struct sseg_closure seginfo;
184 size_t hdrsize, notesz, segoff;
189 /* Attach to process to dump. */
191 if (atexit(elf_detach) != 0)
194 ptrace(PT_ATTACH, pid, NULL, 0);
197 if (waitpid(pid, &g_status, 0) == -1)
200 /* Get the program's memory map. */
203 /* Size the program segments. */
206 each_writable_segment(map, cb_size_segment, &seginfo);
209 * Build the header and the notes using sbuf and write to the file.
211 sb = sbuf_new_auto();
212 hdrsize = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * (1 + seginfo.count);
213 /* Start header + notes section. */
214 sbuf_start_section(sb, NULL);
215 /* Make empty header subsection. */
216 sbuf_start_section(sb, &old_len);
218 sbuf_end_section(sb, old_len, hdrsize, 0);
220 elf_putnotes(pid, sb, ¬esz);
221 /* Align up to a page boundary for the program segments. */
222 sbuf_end_section(sb, -1, PAGE_SIZE, 0);
223 if (sbuf_finish(sb) != 0)
224 err(1, "sbuf_finish");
226 segoff = sbuf_len(sb);
227 /* Fill in the header. */
228 elf_puthdr(efd, pid, map, hdr, hdrsize, notesz, segoff, seginfo.count);
230 n = write(fd, hdr, segoff);
234 errx(1, "short write");
236 /* Write the contents of all of the writable segments. */
237 php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
238 for (i = 0; i < seginfo.count; i++) {
239 struct ptrace_io_desc iorequest;
240 uintmax_t nleft = php->p_filesz;
242 iorequest.piod_op = PIOD_READ_D;
243 iorequest.piod_offs = (caddr_t)(uintptr_t)php->p_vaddr;
249 if (nleft > sizeof(buf))
253 iorequest.piod_addr = buf;
254 iorequest.piod_len = nwant;
255 ptrace(PT_IO, pid, (caddr_t)&iorequest, 0);
256 ngot = iorequest.piod_len;
257 if ((size_t)ngot < nwant)
258 errx(1, "short read wanted %zu, got %zd",
260 ngot = write(fd, buf, nwant);
262 err(1, "write of segment %d failed", i);
263 if ((size_t)ngot != nwant)
264 errx(1, "short write");
266 iorequest.piod_offs += ngot;
275 * A callback for each_writable_segment() to write out the segment's
276 * program header entry.
279 cb_put_phdr(vm_map_entry_t entry, void *closure)
281 struct phdr_closure *phc = (struct phdr_closure *)closure;
282 Elf_Phdr *phdr = phc->phdr;
284 phc->offset = round_page(phc->offset);
286 phdr->p_type = PT_LOAD;
287 phdr->p_offset = phc->offset;
288 phdr->p_vaddr = entry->start;
290 phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
291 phdr->p_align = PAGE_SIZE;
293 if (entry->protection & VM_PROT_READ)
294 phdr->p_flags |= PF_R;
295 if (entry->protection & VM_PROT_WRITE)
296 phdr->p_flags |= PF_W;
297 if (entry->protection & VM_PROT_EXECUTE)
298 phdr->p_flags |= PF_X;
300 phc->offset += phdr->p_filesz;
305 * A callback for each_writable_segment() to gather information about
306 * the number of segments and their total size.
309 cb_size_segment(vm_map_entry_t entry, void *closure)
311 struct sseg_closure *ssc = (struct sseg_closure *)closure;
314 ssc->size += entry->end - entry->start;
318 * For each segment in the given memory map, call the given function
319 * with a pointer to the map entry and some arbitrary caller-supplied
323 each_writable_segment(vm_map_entry_t map, segment_callback func, void *closure)
325 vm_map_entry_t entry;
327 for (entry = map; entry != NULL; entry = entry->next)
328 (*func)(entry, closure);
332 elf_putnotes(pid_t pid, struct sbuf *sb, size_t *sizep)
335 size_t threads, old_len;
340 threads = ptrace(PT_GETNUMLWPS, pid, NULL, 0);
342 err(1, "PT_GETNUMLWPS");
343 tids = malloc(threads * sizeof(*tids));
345 errx(1, "out of memory");
347 ptrace(PT_GETLWPLIST, pid, (void *)tids, threads);
349 err(1, "PT_GETLWPLIST");
351 sbuf_start_section(sb, &old_len);
352 elf_putnote(NT_PRPSINFO, elf_note_prpsinfo, &pid, sb);
354 for (i = 0; i < threads; ++i) {
355 elf_putnote(NT_PRSTATUS, elf_note_prstatus, tids + i, sb);
356 elf_putnote(NT_FPREGSET, elf_note_fpregset, tids + i, sb);
357 elf_putnote(NT_THRMISC, elf_note_thrmisc, tids + i, sb);
358 #if defined(__i386__) || defined(__amd64__)
359 elf_putnote(NT_X86_XSTATE, elf_note_x86_xstate, tids + i, sb);
363 #ifndef ELFCORE_COMPAT_32
364 elf_putnote(NT_PROCSTAT_PROC, elf_note_procstat_proc, &pid, sb);
365 elf_putnote(NT_PROCSTAT_FILES, elf_note_procstat_files, &pid, sb);
366 elf_putnote(NT_PROCSTAT_VMMAP, elf_note_procstat_vmmap, &pid, sb);
367 elf_putnote(NT_PROCSTAT_GROUPS, elf_note_procstat_groups, &pid, sb);
368 elf_putnote(NT_PROCSTAT_UMASK, elf_note_procstat_umask, &pid, sb);
369 elf_putnote(NT_PROCSTAT_RLIMIT, elf_note_procstat_rlimit, &pid, sb);
370 elf_putnote(NT_PROCSTAT_OSREL, elf_note_procstat_osrel, &pid, sb);
371 elf_putnote(NT_PROCSTAT_PSSTRINGS, elf_note_procstat_psstrings, &pid,
373 elf_putnote(NT_PROCSTAT_AUXV, elf_note_procstat_auxv, &pid, sb);
376 size = sbuf_end_section(sb, old_len, 1, 0);
378 err(1, "sbuf_end_section");
384 * Emit one note section to sbuf.
387 elf_putnote(int type, notefunc_t notefunc, void *arg, struct sbuf *sb)
394 desc = notefunc(arg, &descsz);
395 note.n_namesz = 8; /* strlen("FreeBSD") + 1 */
396 note.n_descsz = descsz;
399 sbuf_bcat(sb, ¬e, sizeof(note));
400 sbuf_start_section(sb, &old_len);
401 sbuf_bcat(sb, "FreeBSD", note.n_namesz);
402 sbuf_end_section(sb, old_len, sizeof(Elf32_Size), 0);
405 sbuf_start_section(sb, &old_len);
406 sbuf_bcat(sb, desc, descsz);
407 sbuf_end_section(sb, old_len, sizeof(Elf32_Size), 0);
412 * Generate the ELF coredump header.
415 elf_puthdr(int efd, pid_t pid, vm_map_entry_t map, void *hdr, size_t hdrsize,
416 size_t notesz, size_t segoff, int numsegs)
418 Elf_Ehdr *ehdr, binhdr;
420 struct phdr_closure phc;
423 cnt = read(efd, &binhdr, sizeof(binhdr));
425 err(1, "Failed to re-read ELF header");
426 else if (cnt != sizeof(binhdr))
427 errx(1, "Failed to re-read ELF header");
429 ehdr = (Elf_Ehdr *)hdr;
430 phdr = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr));
432 ehdr->e_ident[EI_MAG0] = ELFMAG0;
433 ehdr->e_ident[EI_MAG1] = ELFMAG1;
434 ehdr->e_ident[EI_MAG2] = ELFMAG2;
435 ehdr->e_ident[EI_MAG3] = ELFMAG3;
436 ehdr->e_ident[EI_CLASS] = ELF_CLASS;
437 ehdr->e_ident[EI_DATA] = ELF_DATA;
438 ehdr->e_ident[EI_VERSION] = EV_CURRENT;
439 ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
440 ehdr->e_ident[EI_ABIVERSION] = 0;
441 ehdr->e_ident[EI_PAD] = 0;
442 ehdr->e_type = ET_CORE;
443 ehdr->e_machine = binhdr.e_machine;
444 ehdr->e_version = EV_CURRENT;
446 ehdr->e_phoff = sizeof(Elf_Ehdr);
447 ehdr->e_flags = binhdr.e_flags;
448 ehdr->e_ehsize = sizeof(Elf_Ehdr);
449 ehdr->e_phentsize = sizeof(Elf_Phdr);
450 ehdr->e_phnum = numsegs + 1;
451 ehdr->e_shentsize = sizeof(Elf_Shdr);
453 ehdr->e_shstrndx = SHN_UNDEF;
456 * Fill in the program header entries.
459 /* The note segement. */
460 phdr->p_type = PT_NOTE;
461 phdr->p_offset = hdrsize;
464 phdr->p_filesz = notesz;
466 phdr->p_flags = PF_R;
467 phdr->p_align = sizeof(Elf32_Size);
470 /* All the writable segments from the program. */
473 each_writable_segment(map, cb_put_phdr, &phc);
477 * Free the memory map.
480 freemap(vm_map_entry_t map)
483 while (map != NULL) {
484 vm_map_entry_t next = map->next;
491 * Read the process's memory map using kinfo_getvmmap(), and return a list of
492 * VM map entries. Only the non-device read/writable segments are
493 * returned. The map entries in the list aren't fully filled in; only
494 * the items we need are present.
496 static vm_map_entry_t
499 vm_map_entry_t ent, *linkp, map;
500 struct kinfo_vmentry *vmentl, *kve;
503 vmentl = kinfo_getvmmap(pid, &nitems);
505 err(1, "cannot retrieve mappings for %u process", pid);
509 for (i = 0; i < nitems; i++) {
513 * Ignore 'malformed' segments or ones representing memory
514 * mapping with MAP_NOCORE on.
515 * If the 'full' support is disabled, just dump the most
516 * meaningful data segments.
518 if ((kve->kve_protection & KVME_PROT_READ) == 0 ||
519 (kve->kve_flags & KVME_FLAG_NOCOREDUMP) != 0 ||
520 kve->kve_type == KVME_TYPE_DEAD ||
521 kve->kve_type == KVME_TYPE_UNKNOWN ||
522 ((pflags & PFLAGS_FULL) == 0 &&
523 kve->kve_type != KVME_TYPE_DEFAULT &&
524 kve->kve_type != KVME_TYPE_VNODE &&
525 kve->kve_type != KVME_TYPE_SWAP &&
526 kve->kve_type != KVME_TYPE_PHYS))
529 ent = calloc(1, sizeof(*ent));
531 errx(1, "out of memory");
532 ent->start = (vm_offset_t)kve->kve_start;
533 ent->end = (vm_offset_t)kve->kve_end;
534 ent->protection = VM_PROT_READ | VM_PROT_WRITE;
535 if ((kve->kve_protection & KVME_PROT_EXEC) != 0)
536 ent->protection |= VM_PROT_EXECUTE;
546 * Miscellaneous note out functions.
550 elf_note_prpsinfo(void *arg, size_t *sizep)
554 elfcore_prpsinfo_t *psinfo;
555 struct kinfo_proc kip;
560 psinfo = calloc(1, sizeof(*psinfo));
562 errx(1, "out of memory");
563 psinfo->pr_version = PRPSINFO_VERSION;
564 psinfo->pr_psinfosz = sizeof(*psinfo);
568 name[2] = KERN_PROC_PID;
571 if (sysctl(name, 4, &kip, &len, NULL, 0) == -1)
572 err(1, "kern.proc.pid.%u", pid);
573 if (kip.ki_pid != pid)
574 err(1, "kern.proc.pid.%u", pid);
575 strlcpy(psinfo->pr_fname, kip.ki_comm, sizeof(psinfo->pr_fname));
576 name[2] = KERN_PROC_ARGS;
577 len = sizeof(psinfo->pr_psargs) - 1;
578 if (sysctl(name, 4, psinfo->pr_psargs, &len, NULL, 0) == 0 && len > 0) {
579 cp = psinfo->pr_psargs;
582 cp = memchr(cp, '\0', end - cp);
588 strlcpy(psinfo->pr_psargs, kip.ki_comm,
589 sizeof(psinfo->pr_psargs));
590 psinfo->pr_pid = pid;
592 *sizep = sizeof(*psinfo);
597 elf_note_prstatus(void *arg, size_t *sizep)
600 elfcore_prstatus_t *status;
603 tid = *(lwpid_t *)arg;
604 status = calloc(1, sizeof(*status));
606 errx(1, "out of memory");
607 status->pr_version = PRSTATUS_VERSION;
608 status->pr_statussz = sizeof(*status);
609 status->pr_gregsetsz = sizeof(elfcore_gregset_t);
610 status->pr_fpregsetsz = sizeof(elfcore_fpregset_t);
611 status->pr_osreldate = __FreeBSD_version;
612 status->pr_pid = tid;
613 ptrace(PT_GETREGS, tid, (void *)&greg, 0);
614 elf_convert_gregset(&status->pr_reg, &greg);
616 *sizep = sizeof(*status);
621 elf_note_fpregset(void *arg, size_t *sizep)
624 elfcore_fpregset_t *fpregset;
627 tid = *(lwpid_t *)arg;
628 fpregset = calloc(1, sizeof(*fpregset));
629 if (fpregset == NULL)
630 errx(1, "out of memory");
631 ptrace(PT_GETFPREGS, tid, (void *)&fpreg, 0);
632 elf_convert_fpregset(fpregset, &fpreg);
634 *sizep = sizeof(*fpregset);
639 elf_note_thrmisc(void *arg, size_t *sizep)
642 struct ptrace_lwpinfo lwpinfo;
645 tid = *(lwpid_t *)arg;
646 thrmisc = calloc(1, sizeof(*thrmisc));
648 errx(1, "out of memory");
649 ptrace(PT_LWPINFO, tid, (void *)&lwpinfo,
651 memset(&thrmisc->_pad, 0, sizeof(thrmisc->_pad));
652 strcpy(thrmisc->pr_tname, lwpinfo.pl_tdname);
654 *sizep = sizeof(*thrmisc);
658 #if defined(__i386__) || defined(__amd64__)
660 elf_note_x86_xstate(void *arg, size_t *sizep)
664 static bool xsave_checked = false;
665 static struct ptrace_xstate_info info;
667 tid = *(lwpid_t *)arg;
668 if (!xsave_checked) {
669 if (ptrace(PT_GETXSTATE_INFO, tid, (void *)&info,
672 xsave_checked = true;
674 if (info.xsave_len == 0) {
678 xstate = calloc(1, info.xsave_len);
679 ptrace(PT_GETXSTATE, tid, xstate, 0);
680 *(uint64_t *)(xstate + X86_XSTATE_XCR0_OFFSET) = info.xsave_mask;
681 *sizep = info.xsave_len;
687 procstat_sysctl(void *arg, int what, size_t structsz, size_t *sizep)
691 int name[4], structsize;
695 structsize = structsz;
701 if (sysctl(name, 4, NULL, &len, NULL, 0) == -1)
702 err(1, "kern.proc.%d.%u", what, pid);
703 buf = calloc(1, sizeof(structsize) + len * 4 / 3);
705 errx(1, "out of memory");
706 bcopy(&structsize, buf, sizeof(structsize));
707 p = (char *)buf + sizeof(structsize);
708 if (sysctl(name, 4, p, &len, NULL, 0) == -1)
709 err(1, "kern.proc.%d.%u", what, pid);
711 *sizep = sizeof(structsize) + len;
716 elf_note_procstat_proc(void *arg, size_t *sizep)
719 return (procstat_sysctl(arg, KERN_PROC_PID | KERN_PROC_INC_THREAD,
720 sizeof(struct kinfo_proc), sizep));
724 elf_note_procstat_files(void *arg, size_t *sizep)
727 return (procstat_sysctl(arg, KERN_PROC_FILEDESC,
728 sizeof(struct kinfo_file), sizep));
732 elf_note_procstat_vmmap(void *arg, size_t *sizep)
735 return (procstat_sysctl(arg, KERN_PROC_VMMAP,
736 sizeof(struct kinfo_vmentry), sizep));
740 elf_note_procstat_groups(void *arg, size_t *sizep)
743 return (procstat_sysctl(arg, KERN_PROC_GROUPS, sizeof(gid_t), sizep));
747 elf_note_procstat_umask(void *arg, size_t *sizep)
750 return (procstat_sysctl(arg, KERN_PROC_UMASK, sizeof(u_short), sizep));
754 elf_note_procstat_osrel(void *arg, size_t *sizep)
757 return (procstat_sysctl(arg, KERN_PROC_OSREL, sizeof(int), sizep));
761 elf_note_procstat_psstrings(void *arg, size_t *sizep)
764 return (procstat_sysctl(arg, KERN_PROC_PS_STRINGS,
765 sizeof(vm_offset_t), sizep));
769 elf_note_procstat_auxv(void *arg, size_t *sizep)
772 return (procstat_sysctl(arg, KERN_PROC_AUXV,
773 sizeof(Elf_Auxinfo), sizep));
777 elf_note_procstat_rlimit(void *arg, size_t *sizep)
781 int i, name[5], structsize;
785 structsize = sizeof(struct rlimit) * RLIM_NLIMITS;
786 buf = calloc(1, sizeof(structsize) + structsize);
788 errx(1, "out of memory");
789 bcopy(&structsize, buf, sizeof(structsize));
790 p = (char *)buf + sizeof(structsize);
793 name[2] = KERN_PROC_RLIMIT;
795 len = sizeof(struct rlimit);
796 for (i = 0; i < RLIM_NLIMITS; i++) {
798 if (sysctl(name, 5, p, &len, NULL, 0) == -1)
799 err(1, "kern.proc.rlimit.%u", pid);
800 if (len != sizeof(struct rlimit))
801 errx(1, "kern.proc.rlimit.%u: short read", pid);
805 *sizep = sizeof(structsize) + structsize;
809 struct dumpers __elfN(dump) = { elf_ident, elf_coredump };
810 TEXT_SET(dumpset, __elfN(dump));