From c9114f9f86f92742eacd1d802c34009a57e81055 Mon Sep 17 00:00:00 2001 From: Mitchell Horne Date: Tue, 23 Mar 2021 17:47:14 -0300 Subject: [PATCH] Add new vnode dumper to support live minidumps This dumper can instantiate and write the dump's contents to a file-backed vnode. Unlike existing disk or network dumpers, the vnode dumper should not be invoked during a system panic, and therefore is not added to the global dumper_configs list. Instead, the vnode dumper is constructed ad-hoc when a live dump is requested using the new ioctl on /dev/mem. This is similar in spirit to a kgdb session against the live system via /dev/mem. As described briefly in the mem(4) man page, live dumps are not guaranteed to result in a usuable output file, but offer some debugging value where forcefully panicing a system to dump its memory is not desirable/feasible. A future change to savecore(8) will add an option to save a live dump. Reviewed by: markj, Pau Amma (manpages) Discussed with: kib MFC after: 3 weeks Sponsored by: Juniper Networks, Inc. Sponsored by: Klara, Inc. Differential Revision: https://reviews.freebsd.org/D33813 --- share/man/man4/mem.4 | 62 +++++++++++ sys/conf/files | 1 + sys/dev/mem/memdev.c | 6 ++ sys/kern/kern_shutdown.c | 14 ++- sys/kern/kern_vnodedumper.c | 202 ++++++++++++++++++++++++++++++++++++ sys/sys/conf.h | 1 + sys/sys/kerneldump.h | 2 + sys/sys/memrange.h | 10 ++ 8 files changed, 296 insertions(+), 2 deletions(-) create mode 100644 sys/kern/kern_vnodedumper.c diff --git a/share/man/man4/mem.4 b/share/man/man4/mem.4 index f860df03642..6370d2a9552 100644 --- a/share/man/man4/mem.4 +++ b/share/man/man4/mem.4 @@ -202,6 +202,50 @@ to update an existing or establish a new range, or to .Dv MEMRANGE_SET_REMOVE to remove a range. .El +.Ss Live Kernel Dumps +.Pp +The +.Dv MEM_KERNELDUMP +ioctl will initiate a kernel dump against the running system, the contents of +which will be written to a process-owned file descriptor. +The resulting dump output will be in minidump format. +The request is described by +.Bd -literal +struct mem_livedump_arg { + int fd; /* input */ + int flags /* input */ + uint8_t compression /* input */ +}; +.Ed +.Pp +The +.Va fd +field is used to pass the file descriptor. +.Pp +The +.Va flags +field is currently unused and must be set to zero. +.Pp +The +.Va compression +field can be used to specify the desired compression to +be applied to the dump output. +The supported values are defined in +.In sys/kerneldump.h ; +that is, +.Dv KERNELDUMP_COMP_NONE , +.Dv KERNELDUMP_COMP_GZIP , +or +.Dv KERNELDUMP_COMP_ZSTD . +.Pp +Kernel dumps taken against the running system may have inconsistent kernel data +structures due to allocation, deallocation, or modification of memory +concurrent to the dump procedure. +Thus, the resulting core dump is not guaranteed to be usable. +A system under load is more likely to produce an inconsistent result. +Despite this, live kernel dumps can be useful for offline debugging of certain +types of kernel bugs, such as deadlocks, or in inspecting a particular part of +the system's state. .Sh RETURN VALUES .Ss MEM_EXTRACT_PADDR The @@ -229,6 +273,24 @@ base/length supplied. An attempt to remove a range failed because the range is permanently enabled. .El +.Ss MEM_KERNELDUMP +.Bl -tag -width Er +.It Bq Er EOPNOTSUPP +Kernel minidumps are not supported on this architecture. +.It Bq Er EPERM +An attempt to begin the kernel dump failed because the calling thread lacks the +.It Bq Er EBADF +The supplied file descriptor was invalid, or does not have write permission. +.It Bq Er EBUSY +An attempt to begin the kernel dump failed because one is already in progress. +.It Bq Er EINVAL +An invalid or unsupported value was specified in +.Va flags . +.It Bq Er EINVAL +An invalid or unsupported compression type was specified. +.Dv PRIV_KMEM_READ +privilege. +.El .Sh FILES .Bl -tag -width /dev/kmem -compact .It Pa /dev/mem diff --git a/sys/conf/files b/sys/conf/files index 57bd2693f53..9b907da0dd4 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3839,6 +3839,7 @@ kern/kern_tslog.c optional tslog kern/kern_ubsan.c optional kubsan kern/kern_umtx.c standard kern/kern_uuid.c standard +kern/kern_vnodedumper.c standard kern/kern_xxx.c standard kern/link_elf.c standard kern/linker_if.m standard diff --git a/sys/dev/mem/memdev.c b/sys/dev/mem/memdev.c index f03550aaa49..7d33066f567 100644 --- a/sys/dev/mem/memdev.c +++ b/sys/dev/mem/memdev.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -96,6 +97,7 @@ memioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags, { vm_map_t map; vm_map_entry_t entry; + const struct mem_livedump_arg *marg; struct mem_extract *me; int error; @@ -120,6 +122,10 @@ memioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags, } vm_map_unlock_read(map); break; + case MEM_KERNELDUMP: + marg = (const struct mem_livedump_arg *)data; + error = livedump_start(marg->fd, marg->flags, marg->compression); + break; default: error = memioctl_md(dev, cmd, data, flags, td); break; diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index 7d0f913961c..f7e72d53a56 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -390,6 +390,17 @@ print_uptime(void) printf("%lds\n", (long)ts.tv_sec); } +/* + * Set up a context that can be extracted from the dump. + */ +void +dump_savectx(void) +{ + + savectx(&dumppcb); + dumptid = curthread->td_tid; +} + int doadump(boolean_t textdump) { @@ -402,8 +413,7 @@ doadump(boolean_t textdump) if (TAILQ_EMPTY(&dumper_configs)) return (ENXIO); - savectx(&dumppcb); - dumptid = curthread->td_tid; + dump_savectx(); dumping++; coredump = TRUE; diff --git a/sys/kern/kern_vnodedumper.c b/sys/kern/kern_vnodedumper.c new file mode 100644 index 00000000000..c8fdce5e550 --- /dev/null +++ b/sys/kern/kern_vnodedumper.c @@ -0,0 +1,202 @@ +/*- + * Copyright (c) 2021-2022 Juniper Networks + * + * This software was developed by Mitchell Horne + * under sponsorship from Juniper Networks and Klara Systems. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static dumper_start_t vnode_dumper_start; +static dumper_t vnode_dump; +static dumper_hdr_t vnode_write_headers; + +static struct sx livedump_sx; +SX_SYSINIT(livedump, &livedump_sx, "Livedump sx"); + +/* + * Invoke a live minidump on the system. + */ +int +livedump_start(int fd, int flags, uint8_t compression) +{ +#if MINIDUMP_PAGE_TRACKING == 1 + struct dumperinfo di, *livedi; + struct diocskerneldump_arg kda; + struct vnode *vp; + struct file *fp; + void *rl_cookie; + int error; + + error = priv_check(curthread, PRIV_KMEM_READ); + if (error != 0) + return (error); + + if (flags != 0) + return (EINVAL); + + error = getvnode(curthread, fd, &cap_write_rights, &fp); + if (error != 0) + return (error); + vp = fp->f_vnode; + + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + goto drop; + } + + /* Set up a new dumper. */ + bzero(&di, sizeof(di)); + di.dumper_start = vnode_dumper_start; + di.dumper = vnode_dump; + di.dumper_hdr = vnode_write_headers; + di.blocksize = PAGE_SIZE; /* Arbitrary. */ + di.maxiosize = MAXDUMPPGS * PAGE_SIZE; + + bzero(&kda, sizeof(kda)); + kda.kda_compression = compression; + error = dumper_create(&di, "livedump", &kda, &livedi); + if (error != 0) + goto drop; + + /* Only allow one livedump to proceed at a time. */ + if (sx_try_xlock(&livedump_sx) == 0) { + dumper_destroy(livedi); + error = EBUSY; + goto drop; + } + + /* To be used by the callback functions. */ + livedi->priv = vp; + + /* Lock the entire file range and vnode. */ + rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + + dump_savectx(); + error = minidumpsys(livedi, true); + + VOP_UNLOCK(vp); + vn_rangelock_unlock(vp, rl_cookie); + sx_xunlock(&livedump_sx); + dumper_destroy(livedi); +drop: + fdrop(fp, curthread); + return (error); +#else + return (EOPNOTSUPP); +#endif /* MINIDUMP_PAGE_TRACKING == 1 */ +} + +int +vnode_dumper_start(struct dumperinfo *di, void *key, uint32_t keysize) +{ + + /* Always begin with an offset of zero. */ + di->dumpoff = 0; + + KASSERT(keysize == 0, ("encryption not supported for livedumps")); + return (0); +} + +/* + * Callback from dumpsys() to dump a chunk of memory. + * + * Parameters: + * arg Opaque private pointer to vnode + * virtual Virtual address (where to read the data from) + * physical Physical memory address (unused) + * offset Offset from start of core file + * length Data length + * + * Return value: + * 0 on success + * errno on error + */ +int +vnode_dump(void *arg, void *virtual, vm_offset_t physical __unused, + off_t offset, size_t length) +{ + struct vnode *vp; + int error = 0; + + vp = arg; + MPASS(vp != NULL); + ASSERT_VOP_LOCKED(vp, __func__); + + /* Done? */ + if (virtual == NULL) + return (0); + + error = vn_rdwr(UIO_WRITE, vp, virtual, length, offset, UIO_SYSSPACE, + IO_NODELOCKED, curthread->td_ucred, NOCRED, NULL, curthread); + if (error != 0) + uprintf("%s: error writing livedump block at offset %jx: %d\n", + __func__, (uintmax_t)offset, error); + return (error); +} + +/* + * Callback from dumpsys() to write out the dump header, placed at the end. + */ +int +vnode_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh) +{ + struct vnode *vp; + int error; + off_t offset; + + vp = di->priv; + MPASS(vp != NULL); + ASSERT_VOP_LOCKED(vp, __func__); + + /* Compensate for compression/encryption adjustment of dumpoff. */ + offset = roundup2(di->dumpoff, di->blocksize); + + /* Write the kernel dump header to the end of the file. */ + error = vn_rdwr(UIO_WRITE, vp, kdh, sizeof(*kdh), offset, + UIO_SYSSPACE, IO_NODELOCKED, curthread->td_ucred, NOCRED, NULL, + curthread); + if (error != 0) + uprintf("%s: error writing livedump header: %d\n", __func__, + error); + return (error); +} diff --git a/sys/sys/conf.h b/sys/sys/conf.h index 6f84a3f03db..4808de511d6 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -362,6 +362,7 @@ struct dumperinfo { extern int dumping; /* system is dumping */ +void dump_savectx(void); int doadump(boolean_t); struct diocskerneldump_arg; int dumper_create(const struct dumperinfo *di_template, const char *devname, diff --git a/sys/sys/kerneldump.h b/sys/sys/kerneldump.h index c293491eadc..2c73790bc81 100644 --- a/sys/sys/kerneldump.h +++ b/sys/sys/kerneldump.h @@ -162,6 +162,8 @@ void dumpsys_pb_progress(size_t); extern int do_minidump; +int livedump_start(int, int, uint8_t); + #endif #endif /* _SYS_KERNELDUMP_H */ diff --git a/sys/sys/memrange.h b/sys/sys/memrange.h index 454b033775f..d3eeeb79b66 100644 --- a/sys/sys/memrange.h +++ b/sys/sys/memrange.h @@ -59,6 +59,16 @@ struct mem_extract { #define MEM_EXTRACT_PADDR _IOWR('m', 52, struct mem_extract) +struct mem_livedump_arg { + int fd; + int flags; + uint8_t compression; + uint8_t pad1[7]; + uint64_t pad2[2]; +}; + +#define MEM_KERNELDUMP _IOW('m', 53, struct mem_livedump_arg) + #ifdef _KERNEL MALLOC_DECLARE(M_MEMDESC); -- 2.45.0