2 * Copyright (c) 2002 Marcel Moolenaar
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/kerneldump.h>
35 #include <sys/malloc.h>
36 #include <sys/msgbuf.h>
38 #include <sys/watchdog.h>
41 #include <vm/vm_param.h>
42 #include <vm/vm_page.h>
43 #include <vm/vm_phys.h>
44 #include <vm/vm_dumpset.h>
47 #include <machine/dump.h>
48 #include <machine/elf.h>
49 #include <machine/md_var.h>
50 #include <machine/pcb.h>
52 CTASSERT(sizeof(struct kerneldumpheader) == 512);
54 #define MD_ALIGN(x) roundup2((off_t)(x), PAGE_SIZE)
56 /* Handle buffered writes. */
59 struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS];
61 #if !defined(__powerpc__)
63 dumpsys_gen_pa_init(void)
67 bzero(dump_map, sizeof(dump_map));
68 for (n = 0; n < nitems(dump_map); n++) {
70 if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0)
72 dump_map[n].pa_start = dump_avail[idx];
73 dump_map[n].pa_size = dump_avail[idx + 1] - dump_avail[idx];
79 dumpsys_gen_pa_next(struct dump_pa *mdp)
83 return (&dump_map[0]);
86 if (mdp->pa_size == 0)
92 dumpsys_gen_wbinv_all(void)
98 dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused,
105 dumpsys_gen_write_aux_headers(struct dumperinfo *di)
112 dumpsys_buf_seek(struct dumperinfo *di, size_t sz)
114 static uint8_t buf[DEV_BSIZE];
118 bzero(buf, sizeof(buf));
121 nbytes = MIN(sz, sizeof(buf));
123 error = dump_append(di, buf, nbytes);
133 dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz)
139 len = di->blocksize - fragsz;
142 memcpy((char *)di->blockbuf + fragsz, ptr, len);
146 if (fragsz == di->blocksize) {
147 error = dump_append(di, di->blockbuf, di->blocksize);
157 dumpsys_buf_flush(struct dumperinfo *di)
164 error = dump_append(di, di->blockbuf, di->blocksize);
169 CTASSERT(PAGE_SHIFT < 20);
170 #define PG2MB(pgs) ((pgs + (1 << (20 - PAGE_SHIFT)) - 1) >> (20 - PAGE_SHIFT))
173 dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg)
175 struct dumperinfo *di = (struct dumperinfo*)arg;
179 size_t counter, sz, chunk;
183 error = 0; /* catch case in which chunk size is 0 */
184 counter = 0; /* Update twiddle every 16MB */
186 pgs = mdp->pa_size / PAGE_SIZE;
188 maxdumppgs = min(di->maxiosize / PAGE_SIZE, MAXDUMPPGS);
189 if (maxdumppgs == 0) /* seatbelt */
192 printf(" chunk %d: %juMB (%ju pages)", seqnr, (uintmax_t)PG2MB(pgs),
198 if (chunk > maxdumppgs)
200 sz = chunk << PAGE_SHIFT;
203 printf(" %ju", (uintmax_t)PG2MB(pgs));
204 counter &= (1 << 24) - 1;
207 dumpsys_map_chunk(pa, chunk, &va);
208 wdog_kern_pat(WD_LASTVAL);
210 error = dump_append(di, va, sz);
211 dumpsys_unmap_chunk(pa, chunk, va);
217 /* Check for user abort. */
222 printf(" (CTRL-C to abort) ");
224 printf(" ... %s\n", (error) ? "fail" : "ok");
229 dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg)
235 mdp = dumpsys_pa_next(NULL);
236 while (mdp != NULL) {
237 error = (*cb)(mdp, seqnr++, arg);
240 mdp = dumpsys_pa_next(mdp);
245 static off_t fileofs;
248 cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg)
250 struct dumperinfo *di = (struct dumperinfo*)arg;
256 bzero(&phdr, sizeof(phdr));
257 phdr.p_type = PT_LOAD;
258 phdr.p_flags = PF_R; /* XXX */
259 phdr.p_offset = fileofs;
261 phdr.p_vaddr = (do_minidump? mdp->pa_start : ~0L);
262 phdr.p_paddr = (do_minidump? ~0L : mdp->pa_start);
264 phdr.p_vaddr = mdp->pa_start;
265 phdr.p_paddr = mdp->pa_start;
267 phdr.p_filesz = size;
269 phdr.p_align = PAGE_SIZE;
271 error = dumpsys_buf_write(di, (char*)&phdr, sizeof(phdr));
272 fileofs += phdr.p_filesz;
277 cb_size(struct dump_pa *mdp, int seqnr, void *arg)
281 sz = (uint64_t *)arg;
282 *sz += (uint64_t)mdp->pa_size;
287 dumpsys_generic(struct dumperinfo *di)
289 static struct kerneldumpheader kdh;
296 #if MINIDUMP_PAGE_TRACKING == 1
298 return (minidumpsys(di, false));
301 bzero(&ehdr, sizeof(ehdr));
302 ehdr.e_ident[EI_MAG0] = ELFMAG0;
303 ehdr.e_ident[EI_MAG1] = ELFMAG1;
304 ehdr.e_ident[EI_MAG2] = ELFMAG2;
305 ehdr.e_ident[EI_MAG3] = ELFMAG3;
306 ehdr.e_ident[EI_CLASS] = ELF_CLASS;
307 #if BYTE_ORDER == LITTLE_ENDIAN
308 ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
310 ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
312 ehdr.e_ident[EI_VERSION] = EV_CURRENT;
313 ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */
314 ehdr.e_type = ET_CORE;
315 ehdr.e_machine = EM_VALUE;
316 ehdr.e_phoff = sizeof(ehdr);
318 ehdr.e_ehsize = sizeof(ehdr);
319 ehdr.e_phentsize = sizeof(Elf_Phdr);
320 ehdr.e_shentsize = sizeof(Elf_Shdr);
324 /* Calculate dump size. */
326 ehdr.e_phnum = dumpsys_foreach_chunk(cb_size, &dumpsize) +
327 DUMPSYS_NUM_AUX_HDRS;
328 hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize;
329 fileofs = MD_ALIGN(hdrsz);
331 hdrgap = fileofs - roundup2((off_t)hdrsz, di->blocksize);
333 dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION,
336 error = dump_start(di, &kdh);
340 printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20,
341 ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS);
343 /* Dump ELF header */
344 error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr));
348 /* Dump program headers */
349 error = dumpsys_foreach_chunk(cb_dumphdr, di);
352 error = dumpsys_write_aux_headers(di);
355 dumpsys_buf_flush(di);
358 * All headers are written using blocked I/O, so we know the
359 * current offset is (still) block aligned. Skip the alignement
360 * in the file to have the segment contents aligned at page
363 error = dumpsys_buf_seek(di, (size_t)hdrgap);
367 /* Dump memory chunks. */
368 error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di);
372 error = dump_finish(di, &kdh);
376 printf("\nDump complete\n");
383 if (error == ECANCELED)
384 printf("\nDump aborted\n");
385 else if (error == E2BIG || error == ENOSPC)
386 printf("\nDump failed. Partition too small.\n");
388 printf("\n** DUMP FAILED (ERROR %d) **\n", error);
392 #if MINIDUMP_PAGE_TRACKING == 1
394 /* Minidump progress bar */
399 } progress_track[10] = {
412 static uint64_t dumpsys_pb_size;
413 static uint64_t dumpsys_pb_remaining;
414 static uint64_t dumpsys_pb_check;
416 /* Reset the progress bar for a dump of dumpsize. */
418 dumpsys_pb_init(uint64_t dumpsize)
422 dumpsys_pb_size = dumpsys_pb_remaining = dumpsize;
423 dumpsys_pb_check = 0;
425 for (i = 0; i < nitems(progress_track); i++)
426 progress_track[i].visited = false;
430 * Update the progress according to the delta bytes that were written out.
431 * Check and print the progress percentage.
434 dumpsys_pb_progress(size_t delta)
438 dumpsys_pb_remaining -= delta;
439 dumpsys_pb_check += delta;
442 * To save time while dumping, only loop through progress_track
445 if ((dumpsys_pb_check >> DUMPSYS_PB_CHECK_BITS) == 0)
448 dumpsys_pb_check &= (1 << DUMPSYS_PB_CHECK_BITS) - 1;
450 sofar = 100 - ((dumpsys_pb_remaining * 100) / dumpsys_pb_size);
451 for (i = 0; i < nitems(progress_track); i++) {
452 if (sofar < progress_track[i].min_per ||
453 sofar > progress_track[i].max_per)
455 if (!progress_track[i].visited) {
456 progress_track[i].visited = true;
457 printf("..%d%%", sofar);
464 minidumpsys(struct dumperinfo *di, bool livedump)
466 struct minidumpstate state;
467 struct msgbuf mb_copy;
473 KASSERT(!dumping, ("live dump invoked from incorrect context"));
476 * Before invoking cpu_minidumpsys() on the live system, we
477 * must snapshot some required global state: the message
478 * buffer, and the page dump bitset. They may be modified at
479 * any moment, so for the sake of the live dump it is best to
480 * have an unchanging snapshot to work with. Both are included
481 * as part of the dump and consumed by userspace tools.
483 * Other global state important to the minidump code is the
484 * dump_avail array and the kernel's page tables, but snapshots
485 * are not taken of these. For one, dump_avail[] is expected
486 * not to change after boot. Snapshotting the kernel page
487 * tables would involve an additional walk, so this is avoided
490 * This means live dumps are best effort, and the result may or
491 * may not be usable; there are no guarantees about the
492 * consistency of the dump's contents. Any of the following
493 * (and likely more) may affect the live dump:
495 * - Data may be modified, freed, or remapped during the
496 * course of the dump, such that the contents written out
497 * are partially or entirely unrecognizable. This means
498 * valid references may point to destroyed/mangled objects,
501 * - The dumped context of any threads that ran during the
502 * dump process may be unreliable.
504 * - The set of kernel page tables included in the dump likely
505 * won't correspond exactly to the copy of the dump bitset.
506 * This means some pages will be dumped without any way to
507 * locate them, and some pages may not have been dumped
508 * despite appearing as if they should.
510 msg_ptr = malloc(msgbufsize, M_TEMP, M_WAITOK);
511 msgbuf_duplicate(msgbufp, &mb_copy, msg_ptr);
512 state.msgbufp = &mb_copy;
514 sz = BITSET_SIZE(vm_page_dump_pages);
515 state.dump_bitset = malloc(sz, M_TEMP, M_WAITOK);
516 BIT_COPY_STORE_REL(sz, vm_page_dump, state.dump_bitset);
518 KASSERT(dumping, ("minidump invoked outside of doadump()"));
520 /* Use the globals. */
521 state.msgbufp = msgbufp;
522 state.dump_bitset = vm_page_dump;
525 error = cpu_minidumpsys(di, &state);
527 free(msg_ptr, M_TEMP);
528 free(state.dump_bitset, M_TEMP);
533 #endif /* MINIDUMP_PAGE_TRACKING == 1 */