From 1f0a237dbc5af0dec65b8a93821d446ad5d52799 Mon Sep 17 00:00:00 2001 From: mav Date: Mon, 21 Mar 2016 00:13:39 +0000 Subject: [PATCH] MFC r296510, r296563, r296567: MFV r296505: 6531 Provide mechanism to artificially limit disk performance Reviewed by: Paul Dagnelie Reviewed by: Matthew Ahrens Reviewed by: George Wilson Approved by: Dan McDonald Author: Prakash Surya illumos/illumos-gate@97e81309571898df9fdd94aab1216dfcf23e060b git-svn-id: svn://svn.freebsd.org/base/stable/10@297108 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- .../contrib/opensolaris/cmd/zinject/zinject.c | 111 +++- .../lib/libzfs/common/libzfs_compat.c | 3 + sys/cddl/compat/opensolaris/sys/callo.h | 37 ++ sys/cddl/compat/opensolaris/sys/systm.h | 3 + .../opensolaris/common/zfs/zfs_ioctl_compat.c | 493 +++++++++++++----- .../opensolaris/common/zfs/zfs_ioctl_compat.h | 72 ++- .../uts/common/fs/zfs/sys/zfs_context.h | 4 +- .../uts/common/fs/zfs/sys/zfs_ioctl.h | 1 + .../opensolaris/uts/common/fs/zfs/sys/zio.h | 5 +- .../opensolaris/uts/common/fs/zfs/vdev_disk.c | 5 +- .../opensolaris/uts/common/fs/zfs/vdev_file.c | 5 +- .../opensolaris/uts/common/fs/zfs/vdev_geom.c | 3 +- .../uts/common/fs/zfs/vdev_queue.c | 3 - .../opensolaris/uts/common/fs/zfs/zfs_ioctl.c | 8 + .../opensolaris/uts/common/fs/zfs/zio.c | 52 ++ .../uts/common/fs/zfs/zio_inject.c | 257 ++++++++- 16 files changed, 896 insertions(+), 166 deletions(-) create mode 100644 sys/cddl/compat/opensolaris/sys/callo.h diff --git a/cddl/contrib/opensolaris/cmd/zinject/zinject.c b/cddl/contrib/opensolaris/cmd/zinject/zinject.c index ddcdb1ffd..bf42bc483 100644 --- a/cddl/contrib/opensolaris/cmd/zinject/zinject.c +++ b/cddl/contrib/opensolaris/cmd/zinject/zinject.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. */ /* @@ -229,21 +229,57 @@ usage(void) "\t\tall records if 'all' is specificed.\n" "\n" "\tzinject -p pool\n" + "\n" "\t\tInject a panic fault at the specified function. Only \n" "\t\tfunctions which call spa_vdev_config_exit(), or \n" "\t\tspa_vdev_exit() will trigger a panic.\n" "\n" "\tzinject -d device [-e errno] [-L ] [-F]\n" "\t [-T pool\n" + "\n" "\t\tInject a fault into a particular device or the device's\n" "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n " "\t\t'pad1', or 'pad2'.\n" "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n" "\n" "\tzinject -d device -A pool\n" + "\n" "\t\tPerform a specific action on a particular device\n" "\n" + "\tzinject -d device -D latency:lanes pool\n" + "\n" + "\t\tAdd an artificial delay to IO requests on a particular\n" + "\t\tdevice, such that the requests take a minimum of 'latency'\n" + "\t\tmilliseconds to complete. Each delay has an associated\n" + "\t\tnumber of 'lanes' which defines the number of concurrent\n" + "\t\tIO requests that can be processed.\n" + "\n" + "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n" + "\t\tthe device will only be able to service a single IO request\n" + "\t\tat a time with each request taking 10 ms to complete. So,\n" + "\t\tif only a single request is submitted every 10 ms, the\n" + "\t\taverage latency will be 10 ms; but if more than one request\n" + "\t\tis submitted every 10 ms, the average latency will be more\n" + "\t\tthan 10 ms.\n" + "\n" + "\t\tSimilarly, if a delay of 10 ms is specified to have two\n" + "\t\tlanes (-D 10:2), then the device will be able to service\n" + "\t\ttwo requests at a time, each with a minimum latency of\n" + "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n" + "\t\tthe average latency will be 10 ms; but if more than two\n" + "\t\trequests are submitted every 10 ms, the average latency\n" + "\t\twill be more than 10 ms.\n" + "\n" + "\t\tAlso note, these delays are additive. So two invocations\n" + "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n" + "\t\tof '-D 10:2'. This also means, one can specify multiple\n" + "\t\tlanes with differing target latencies. For example, an\n" + "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n" + "\t\tcreate 3 lanes on the device; one lane with a latency\n" + "\t\tof 10 ms and two lanes with a 25 ms latency.\n" + "\n" "\tzinject -I [-s | -g ] pool\n" + "\n" "\t\tCause the pool to stop writing blocks yet not\n" "\t\treport errors for a duration. Simulates buggy hardware\n" "\t\tthat fails to honor cache flush requests.\n" @@ -357,6 +393,9 @@ print_device_handler(int id, const char *pool, zinject_record_t *record, if (record->zi_guid == 0 || record->zi_func[0] != '\0') return (0); + if (record->zi_cmd == ZINJECT_DELAY_IO) + return (0); + if (*count == 0) { (void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID"); (void) printf("--- --------------- ----------------\n"); @@ -370,6 +409,35 @@ print_device_handler(int id, const char *pool, zinject_record_t *record, return (0); } +static int +print_delay_handler(int id, const char *pool, zinject_record_t *record, + void *data) +{ + int *count = data; + + if (record->zi_guid == 0 || record->zi_func[0] != '\0') + return (0); + + if (record->zi_cmd != ZINJECT_DELAY_IO) + return (0); + + if (*count == 0) { + (void) printf("%3s %-15s %-15s %-15s %s\n", + "ID", "POOL", "DELAY (ms)", "LANES", "GUID"); + (void) printf("--- --------------- --------------- " + "--------------- ----------------\n"); + } + + *count += 1; + + (void) printf("%3d %-15s %-15llu %-15llu %llx\n", id, pool, + (u_longlong_t)NSEC2MSEC(record->zi_timer), + (u_longlong_t)record->zi_nlanes, + (u_longlong_t)record->zi_guid); + + return (0); +} + static int print_panic_handler(int id, const char *pool, zinject_record_t *record, void *data) @@ -407,6 +475,13 @@ print_all_handlers(void) count = 0; } + (void) iter_handlers(print_delay_handler, &count); + if (count > 0) { + total += count; + (void) printf("\n"); + count = 0; + } + (void) iter_handlers(print_data_handler, &count); if (count > 0) { total += count; @@ -549,6 +624,35 @@ perform_action(const char *pool, zinject_record_t *record, int cmd) return (1); } +static int +parse_delay(char *str, uint64_t *delay, uint64_t *nlanes) +{ + unsigned long scan_delay; + unsigned long scan_nlanes; + + if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2) + return (1); + + /* + * We explicitly disallow a delay of zero here, because we key + * off this value being non-zero in translate_device(), to + * determine if the fault is a ZINJECT_DELAY_IO fault or not. + */ + if (scan_delay == 0) + return (1); + + /* + * The units for the CLI delay parameter is milliseconds, but + * the data passed to the kernel is interpreted as nanoseconds. + * Thus we scale the milliseconds to nanoseconds here, and this + * nanosecond value is used to pass the delay to the kernel. + */ + *delay = MSEC2NSEC(scan_delay); + *nlanes = scan_nlanes; + + return (0); +} + int main(int argc, char **argv) { @@ -632,8 +736,9 @@ main(int argc, char **argv) device = optarg; break; case 'D': - record.zi_timer = strtoull(optarg, &end, 10); - if (errno != 0 || *end != '\0') { + ret = parse_delay(optarg, &record.zi_timer, + &record.zi_nlanes); + if (ret != 0) { (void) fprintf(stderr, "invalid i/o delay " "value: '%s'\n", optarg); usage(); diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.c index 833e1b675..b9ecd9dcc 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_compat.c @@ -74,6 +74,9 @@ zcmd_ioctl(int fd, int request, zfs_cmd_t *zc) if (zfs_ioctl_version >= ZFS_IOCVER_DEADMAN) { switch (zfs_ioctl_version) { + case ZFS_IOCVER_RESUME: + cflag = ZFS_CMD_COMPAT_RESUME; + break; case ZFS_IOCVER_EDBP: cflag = ZFS_CMD_COMPAT_EDBP; break; diff --git a/sys/cddl/compat/opensolaris/sys/callo.h b/sys/cddl/compat/opensolaris/sys/callo.h new file mode 100644 index 000000000..df2ae694c --- /dev/null +++ b/sys/cddl/compat/opensolaris/sys/callo.h @@ -0,0 +1,37 @@ +/*- + * Copyright (c) 2016 Alexander Motin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _OPENSOLARIS_SYS_CALLO_H_ +#define _OPENSOLARIS_SYS_CALLO_H_ + +#include_next + +#define CALLOUT_REALTIME 0 /* realtime callout type */ +#define CALLOUT_NORMAL 1 /* normal callout type */ + +#endif /* !_OPENSOLARIS_SYS_CALLO_H_ */ diff --git a/sys/cddl/compat/opensolaris/sys/systm.h b/sys/cddl/compat/opensolaris/sys/systm.h index fe0e1998c..f6a0dce45 100644 --- a/sys/cddl/compat/opensolaris/sys/systm.h +++ b/sys/cddl/compat/opensolaris/sys/systm.h @@ -42,6 +42,9 @@ #define delay(x) pause("soldelay", (x)) +#define timeout_generic(type, fn, arg, t, r, f) \ + timeout(fn, arg, t / (NANOSEC/hz) + 1) + #endif /* _KERNEL */ #endif /* _OPENSOLARIS_SYS_SYSTM_H_ */ diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c b/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c index 39ad7ce5f..0d8971cde 100644 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c +++ b/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c @@ -54,8 +54,69 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag) zfs_cmd_deadman_t *zcdm_c; zfs_cmd_zcmd_t *zcmd_c; zfs_cmd_edbp_t *edbp_c; + zfs_cmd_resume_t *resume_c; switch (cflag) { + case ZFS_CMD_COMPAT_RESUME: + resume_c = (void *)addr; + /* zc */ + strlcpy(zc->zc_name, resume_c->zc_name, MAXPATHLEN); + strlcpy(zc->zc_value, resume_c->zc_value, MAXPATHLEN * 2); + strlcpy(zc->zc_string, resume_c->zc_string, MAXPATHLEN); + +#define FIELD_COPY(field) zc->field = resume_c->field + FIELD_COPY(zc_nvlist_src); + FIELD_COPY(zc_nvlist_src_size); + FIELD_COPY(zc_nvlist_dst); + FIELD_COPY(zc_nvlist_dst_size); + FIELD_COPY(zc_nvlist_dst_filled); + FIELD_COPY(zc_pad2); + FIELD_COPY(zc_history); + FIELD_COPY(zc_guid); + FIELD_COPY(zc_nvlist_conf); + FIELD_COPY(zc_nvlist_conf_size); + FIELD_COPY(zc_cookie); + FIELD_COPY(zc_objset_type); + FIELD_COPY(zc_perm_action); + FIELD_COPY(zc_history_len); + FIELD_COPY(zc_history_offset); + FIELD_COPY(zc_obj); + FIELD_COPY(zc_iflags); + FIELD_COPY(zc_share); + FIELD_COPY(zc_jailid); + FIELD_COPY(zc_objset_stats); + FIELD_COPY(zc_begin_record); + FIELD_COPY(zc_inject_record.zi_objset); + FIELD_COPY(zc_inject_record.zi_object); + FIELD_COPY(zc_inject_record.zi_start); + FIELD_COPY(zc_inject_record.zi_end); + FIELD_COPY(zc_inject_record.zi_guid); + FIELD_COPY(zc_inject_record.zi_level); + FIELD_COPY(zc_inject_record.zi_error); + FIELD_COPY(zc_inject_record.zi_type); + FIELD_COPY(zc_inject_record.zi_freq); + FIELD_COPY(zc_inject_record.zi_failfast); + strlcpy(zc->zc_inject_record.zi_func, + resume_c->zc_inject_record.zi_func, MAXNAMELEN); + FIELD_COPY(zc_inject_record.zi_iotype); + FIELD_COPY(zc_inject_record.zi_duration); + FIELD_COPY(zc_inject_record.zi_timer); + zc->zc_inject_record.zi_nlanes = 1; + FIELD_COPY(zc_inject_record.zi_cmd); + FIELD_COPY(zc_inject_record.zi_pad); + FIELD_COPY(zc_defer_destroy); + FIELD_COPY(zc_flags); + FIELD_COPY(zc_action_handle); + FIELD_COPY(zc_cleanup_fd); + FIELD_COPY(zc_simple); + FIELD_COPY(zc_resumable); + FIELD_COPY(zc_sendobj); + FIELD_COPY(zc_fromobj); + FIELD_COPY(zc_createtxg); + FIELD_COPY(zc_stat); +#undef FIELD_COPY + break; + case ZFS_CMD_COMPAT_EDBP: edbp_c = (void *)addr; /* zc */ @@ -63,40 +124,57 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag) strlcpy(zc->zc_value, edbp_c->zc_value, MAXPATHLEN * 2); strlcpy(zc->zc_string, edbp_c->zc_string, MAXPATHLEN); -#define ZCMD_COPY(field) zc->field = edbp_c->field - ZCMD_COPY(zc_nvlist_src); - ZCMD_COPY(zc_nvlist_src_size); - ZCMD_COPY(zc_nvlist_dst); - ZCMD_COPY(zc_nvlist_dst_size); - ZCMD_COPY(zc_nvlist_dst_filled); - ZCMD_COPY(zc_pad2); - ZCMD_COPY(zc_history); - ZCMD_COPY(zc_guid); - ZCMD_COPY(zc_nvlist_conf); - ZCMD_COPY(zc_nvlist_conf_size); - ZCMD_COPY(zc_cookie); - ZCMD_COPY(zc_objset_type); - ZCMD_COPY(zc_perm_action); - ZCMD_COPY(zc_history_len); - ZCMD_COPY(zc_history_offset); - ZCMD_COPY(zc_obj); - ZCMD_COPY(zc_iflags); - ZCMD_COPY(zc_share); - ZCMD_COPY(zc_jailid); - ZCMD_COPY(zc_objset_stats); +#define FIELD_COPY(field) zc->field = edbp_c->field + FIELD_COPY(zc_nvlist_src); + FIELD_COPY(zc_nvlist_src_size); + FIELD_COPY(zc_nvlist_dst); + FIELD_COPY(zc_nvlist_dst_size); + FIELD_COPY(zc_nvlist_dst_filled); + FIELD_COPY(zc_pad2); + FIELD_COPY(zc_history); + FIELD_COPY(zc_guid); + FIELD_COPY(zc_nvlist_conf); + FIELD_COPY(zc_nvlist_conf_size); + FIELD_COPY(zc_cookie); + FIELD_COPY(zc_objset_type); + FIELD_COPY(zc_perm_action); + FIELD_COPY(zc_history_len); + FIELD_COPY(zc_history_offset); + FIELD_COPY(zc_obj); + FIELD_COPY(zc_iflags); + FIELD_COPY(zc_share); + FIELD_COPY(zc_jailid); + FIELD_COPY(zc_objset_stats); zc->zc_begin_record.drr_u.drr_begin = edbp_c->zc_begin_record; - ZCMD_COPY(zc_inject_record); - ZCMD_COPY(zc_defer_destroy); - ZCMD_COPY(zc_flags); - ZCMD_COPY(zc_action_handle); - ZCMD_COPY(zc_cleanup_fd); - ZCMD_COPY(zc_simple); + FIELD_COPY(zc_inject_record.zi_objset); + FIELD_COPY(zc_inject_record.zi_object); + FIELD_COPY(zc_inject_record.zi_start); + FIELD_COPY(zc_inject_record.zi_end); + FIELD_COPY(zc_inject_record.zi_guid); + FIELD_COPY(zc_inject_record.zi_level); + FIELD_COPY(zc_inject_record.zi_error); + FIELD_COPY(zc_inject_record.zi_type); + FIELD_COPY(zc_inject_record.zi_freq); + FIELD_COPY(zc_inject_record.zi_failfast); + strlcpy(zc->zc_inject_record.zi_func, + edbp_c->zc_inject_record.zi_func, MAXNAMELEN); + FIELD_COPY(zc_inject_record.zi_iotype); + FIELD_COPY(zc_inject_record.zi_duration); + FIELD_COPY(zc_inject_record.zi_timer); + zc->zc_inject_record.zi_nlanes = 1; + FIELD_COPY(zc_inject_record.zi_cmd); + FIELD_COPY(zc_inject_record.zi_pad); + FIELD_COPY(zc_defer_destroy); + FIELD_COPY(zc_flags); + FIELD_COPY(zc_action_handle); + FIELD_COPY(zc_cleanup_fd); + FIELD_COPY(zc_simple); zc->zc_resumable = B_FALSE; - ZCMD_COPY(zc_sendobj); - ZCMD_COPY(zc_fromobj); - ZCMD_COPY(zc_createtxg); - ZCMD_COPY(zc_stat); -#undef ZCMD_COPY + FIELD_COPY(zc_sendobj); + FIELD_COPY(zc_fromobj); + FIELD_COPY(zc_createtxg); + FIELD_COPY(zc_stat); +#undef FIELD_COPY break; case ZFS_CMD_COMPAT_ZCMD: @@ -106,43 +184,60 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag) strlcpy(zc->zc_value, zcmd_c->zc_value, MAXPATHLEN * 2); strlcpy(zc->zc_string, zcmd_c->zc_string, MAXPATHLEN); -#define ZCMD_COPY(field) zc->field = zcmd_c->field - ZCMD_COPY(zc_nvlist_src); - ZCMD_COPY(zc_nvlist_src_size); - ZCMD_COPY(zc_nvlist_dst); - ZCMD_COPY(zc_nvlist_dst_size); - ZCMD_COPY(zc_nvlist_dst_filled); - ZCMD_COPY(zc_pad2); - ZCMD_COPY(zc_history); - ZCMD_COPY(zc_guid); - ZCMD_COPY(zc_nvlist_conf); - ZCMD_COPY(zc_nvlist_conf_size); - ZCMD_COPY(zc_cookie); - ZCMD_COPY(zc_objset_type); - ZCMD_COPY(zc_perm_action); - ZCMD_COPY(zc_history_len); - ZCMD_COPY(zc_history_offset); - ZCMD_COPY(zc_obj); - ZCMD_COPY(zc_iflags); - ZCMD_COPY(zc_share); - ZCMD_COPY(zc_jailid); - ZCMD_COPY(zc_objset_stats); +#define FIELD_COPY(field) zc->field = zcmd_c->field + FIELD_COPY(zc_nvlist_src); + FIELD_COPY(zc_nvlist_src_size); + FIELD_COPY(zc_nvlist_dst); + FIELD_COPY(zc_nvlist_dst_size); + FIELD_COPY(zc_nvlist_dst_filled); + FIELD_COPY(zc_pad2); + FIELD_COPY(zc_history); + FIELD_COPY(zc_guid); + FIELD_COPY(zc_nvlist_conf); + FIELD_COPY(zc_nvlist_conf_size); + FIELD_COPY(zc_cookie); + FIELD_COPY(zc_objset_type); + FIELD_COPY(zc_perm_action); + FIELD_COPY(zc_history_len); + FIELD_COPY(zc_history_offset); + FIELD_COPY(zc_obj); + FIELD_COPY(zc_iflags); + FIELD_COPY(zc_share); + FIELD_COPY(zc_jailid); + FIELD_COPY(zc_objset_stats); zc->zc_begin_record.drr_u.drr_begin = zcmd_c->zc_begin_record; - ZCMD_COPY(zc_inject_record); + FIELD_COPY(zc_inject_record.zi_objset); + FIELD_COPY(zc_inject_record.zi_object); + FIELD_COPY(zc_inject_record.zi_start); + FIELD_COPY(zc_inject_record.zi_end); + FIELD_COPY(zc_inject_record.zi_guid); + FIELD_COPY(zc_inject_record.zi_level); + FIELD_COPY(zc_inject_record.zi_error); + FIELD_COPY(zc_inject_record.zi_type); + FIELD_COPY(zc_inject_record.zi_freq); + FIELD_COPY(zc_inject_record.zi_failfast); + strlcpy(zc->zc_inject_record.zi_func, + zcmd_c->zc_inject_record.zi_func, MAXNAMELEN); + FIELD_COPY(zc_inject_record.zi_iotype); + FIELD_COPY(zc_inject_record.zi_duration); + FIELD_COPY(zc_inject_record.zi_timer); + zc->zc_inject_record.zi_nlanes = 1; + FIELD_COPY(zc_inject_record.zi_cmd); + FIELD_COPY(zc_inject_record.zi_pad); /* boolean_t -> uint32_t */ zc->zc_defer_destroy = (uint32_t)(zcmd_c->zc_defer_destroy); zc->zc_flags = 0; - ZCMD_COPY(zc_action_handle); - ZCMD_COPY(zc_cleanup_fd); - ZCMD_COPY(zc_simple); + FIELD_COPY(zc_action_handle); + FIELD_COPY(zc_cleanup_fd); + FIELD_COPY(zc_simple); zc->zc_resumable = B_FALSE; - ZCMD_COPY(zc_sendobj); - ZCMD_COPY(zc_fromobj); - ZCMD_COPY(zc_createtxg); - ZCMD_COPY(zc_stat); -#undef ZCMD_COPY + FIELD_COPY(zc_sendobj); + FIELD_COPY(zc_fromobj); + FIELD_COPY(zc_createtxg); + FIELD_COPY(zc_stat); +#undef FIELD_COPY break; @@ -152,6 +247,8 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag) strlcpy(zc->zc_name, zcdm_c->zc_name, MAXPATHLEN); strlcpy(zc->zc_value, zcdm_c->zc_value, MAXPATHLEN * 2); strlcpy(zc->zc_string, zcdm_c->zc_string, MAXPATHLEN); + +#define FIELD_COPY(field) zc->field = zcdm_c->field zc->zc_guid = zcdm_c->zc_guid; zc->zc_nvlist_conf = zcdm_c->zc_nvlist_conf; zc->zc_nvlist_conf_size = zcdm_c->zc_nvlist_conf_size; @@ -181,12 +278,28 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag) zc->zc_fromobj = zcdm_c->zc_fromobj; zc->zc_createtxg = zcdm_c->zc_createtxg; zc->zc_stat = zcdm_c->zc_stat; - - /* zc_inject_record doesn't change in libzfs_core */ - zcdm_c->zc_inject_record = zc->zc_inject_record; + FIELD_COPY(zc_inject_record.zi_objset); + FIELD_COPY(zc_inject_record.zi_object); + FIELD_COPY(zc_inject_record.zi_start); + FIELD_COPY(zc_inject_record.zi_end); + FIELD_COPY(zc_inject_record.zi_guid); + FIELD_COPY(zc_inject_record.zi_level); + FIELD_COPY(zc_inject_record.zi_error); + FIELD_COPY(zc_inject_record.zi_type); + FIELD_COPY(zc_inject_record.zi_freq); + FIELD_COPY(zc_inject_record.zi_failfast); + strlcpy(zc->zc_inject_record.zi_func, + resume_c->zc_inject_record.zi_func, MAXNAMELEN); + FIELD_COPY(zc_inject_record.zi_iotype); + FIELD_COPY(zc_inject_record.zi_duration); + FIELD_COPY(zc_inject_record.zi_timer); + zc->zc_inject_record.zi_nlanes = 1; + FIELD_COPY(zc_inject_record.zi_cmd); + FIELD_COPY(zc_inject_record.zi_pad); /* we always assume zc_nvlist_dst_filled is true */ zc->zc_nvlist_dst_filled = B_TRUE; +#undef FIELD_COPY break; case ZFS_CMD_COMPAT_V28: @@ -255,6 +368,7 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag) zc28_c->zc_inject_record.zi_duration; zc->zc_inject_record.zi_timer = zc28_c->zc_inject_record.zi_timer; + zc->zc_inject_record.zi_nlanes = 1; zc->zc_inject_record.zi_cmd = ZINJECT_UNINITIALIZED; zc->zc_inject_record.zi_pad = 0; break; @@ -319,47 +433,121 @@ zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int request, zfs_cmd_deadman_t *zcdm_c; zfs_cmd_zcmd_t *zcmd_c; zfs_cmd_edbp_t *edbp_c; + zfs_cmd_resume_t *resume_c; switch (cflag) { + case ZFS_CMD_COMPAT_RESUME: + resume_c = (void *)addr; + strlcpy(resume_c->zc_name, zc->zc_name, MAXPATHLEN); + strlcpy(resume_c->zc_value, zc->zc_value, MAXPATHLEN * 2); + strlcpy(resume_c->zc_string, zc->zc_string, MAXPATHLEN); + +#define FIELD_COPY(field) resume_c->field = zc->field + FIELD_COPY(zc_nvlist_src); + FIELD_COPY(zc_nvlist_src_size); + FIELD_COPY(zc_nvlist_dst); + FIELD_COPY(zc_nvlist_dst_size); + FIELD_COPY(zc_nvlist_dst_filled); + FIELD_COPY(zc_pad2); + FIELD_COPY(zc_history); + FIELD_COPY(zc_guid); + FIELD_COPY(zc_nvlist_conf); + FIELD_COPY(zc_nvlist_conf_size); + FIELD_COPY(zc_cookie); + FIELD_COPY(zc_objset_type); + FIELD_COPY(zc_perm_action); + FIELD_COPY(zc_history_len); + FIELD_COPY(zc_history_offset); + FIELD_COPY(zc_obj); + FIELD_COPY(zc_iflags); + FIELD_COPY(zc_share); + FIELD_COPY(zc_jailid); + FIELD_COPY(zc_objset_stats); + FIELD_COPY(zc_begin_record); + FIELD_COPY(zc_inject_record.zi_objset); + FIELD_COPY(zc_inject_record.zi_object); + FIELD_COPY(zc_inject_record.zi_start); + FIELD_COPY(zc_inject_record.zi_end); + FIELD_COPY(zc_inject_record.zi_guid); + FIELD_COPY(zc_inject_record.zi_level); + FIELD_COPY(zc_inject_record.zi_error); + FIELD_COPY(zc_inject_record.zi_type); + FIELD_COPY(zc_inject_record.zi_freq); + FIELD_COPY(zc_inject_record.zi_failfast); + strlcpy(resume_c->zc_inject_record.zi_func, + zc->zc_inject_record.zi_func, MAXNAMELEN); + FIELD_COPY(zc_inject_record.zi_iotype); + FIELD_COPY(zc_inject_record.zi_duration); + FIELD_COPY(zc_inject_record.zi_timer); + FIELD_COPY(zc_inject_record.zi_cmd); + FIELD_COPY(zc_inject_record.zi_pad); + FIELD_COPY(zc_defer_destroy); + FIELD_COPY(zc_flags); + FIELD_COPY(zc_action_handle); + FIELD_COPY(zc_cleanup_fd); + FIELD_COPY(zc_simple); + FIELD_COPY(zc_sendobj); + FIELD_COPY(zc_fromobj); + FIELD_COPY(zc_createtxg); + FIELD_COPY(zc_stat); +#undef FIELD_COPY + break; + case ZFS_CMD_COMPAT_EDBP: edbp_c = (void *)addr; strlcpy(edbp_c->zc_name, zc->zc_name, MAXPATHLEN); strlcpy(edbp_c->zc_value, zc->zc_value, MAXPATHLEN * 2); strlcpy(edbp_c->zc_string, zc->zc_string, MAXPATHLEN); -#define ZCMD_COPY(field) edbp_c->field = zc->field - ZCMD_COPY(zc_nvlist_src); - ZCMD_COPY(zc_nvlist_src_size); - ZCMD_COPY(zc_nvlist_dst); - ZCMD_COPY(zc_nvlist_dst_size); - ZCMD_COPY(zc_nvlist_dst_filled); - ZCMD_COPY(zc_pad2); - ZCMD_COPY(zc_history); - ZCMD_COPY(zc_guid); - ZCMD_COPY(zc_nvlist_conf); - ZCMD_COPY(zc_nvlist_conf_size); - ZCMD_COPY(zc_cookie); - ZCMD_COPY(zc_objset_type); - ZCMD_COPY(zc_perm_action); - ZCMD_COPY(zc_history_len); - ZCMD_COPY(zc_history_offset); - ZCMD_COPY(zc_obj); - ZCMD_COPY(zc_iflags); - ZCMD_COPY(zc_share); - ZCMD_COPY(zc_jailid); - ZCMD_COPY(zc_objset_stats); +#define FIELD_COPY(field) edbp_c->field = zc->field + FIELD_COPY(zc_nvlist_src); + FIELD_COPY(zc_nvlist_src_size); + FIELD_COPY(zc_nvlist_dst); + FIELD_COPY(zc_nvlist_dst_size); + FIELD_COPY(zc_nvlist_dst_filled); + FIELD_COPY(zc_pad2); + FIELD_COPY(zc_history); + FIELD_COPY(zc_guid); + FIELD_COPY(zc_nvlist_conf); + FIELD_COPY(zc_nvlist_conf_size); + FIELD_COPY(zc_cookie); + FIELD_COPY(zc_objset_type); + FIELD_COPY(zc_perm_action); + FIELD_COPY(zc_history_len); + FIELD_COPY(zc_history_offset); + FIELD_COPY(zc_obj); + FIELD_COPY(zc_iflags); + FIELD_COPY(zc_share); + FIELD_COPY(zc_jailid); + FIELD_COPY(zc_objset_stats); edbp_c->zc_begin_record = zc->zc_begin_record.drr_u.drr_begin; - ZCMD_COPY(zc_inject_record); - ZCMD_COPY(zc_defer_destroy); - ZCMD_COPY(zc_flags); - ZCMD_COPY(zc_action_handle); - ZCMD_COPY(zc_cleanup_fd); - ZCMD_COPY(zc_simple); - ZCMD_COPY(zc_sendobj); - ZCMD_COPY(zc_fromobj); - ZCMD_COPY(zc_createtxg); - ZCMD_COPY(zc_stat); -#undef ZCMD_COPY + FIELD_COPY(zc_inject_record.zi_objset); + FIELD_COPY(zc_inject_record.zi_object); + FIELD_COPY(zc_inject_record.zi_start); + FIELD_COPY(zc_inject_record.zi_end); + FIELD_COPY(zc_inject_record.zi_guid); + FIELD_COPY(zc_inject_record.zi_level); + FIELD_COPY(zc_inject_record.zi_error); + FIELD_COPY(zc_inject_record.zi_type); + FIELD_COPY(zc_inject_record.zi_freq); + FIELD_COPY(zc_inject_record.zi_failfast); + strlcpy(resume_c->zc_inject_record.zi_func, + zc->zc_inject_record.zi_func, MAXNAMELEN); + FIELD_COPY(zc_inject_record.zi_iotype); + FIELD_COPY(zc_inject_record.zi_duration); + FIELD_COPY(zc_inject_record.zi_timer); + FIELD_COPY(zc_inject_record.zi_cmd); + FIELD_COPY(zc_inject_record.zi_pad); + FIELD_COPY(zc_defer_destroy); + FIELD_COPY(zc_flags); + FIELD_COPY(zc_action_handle); + FIELD_COPY(zc_cleanup_fd); + FIELD_COPY(zc_simple); + FIELD_COPY(zc_sendobj); + FIELD_COPY(zc_fromobj); + FIELD_COPY(zc_createtxg); + FIELD_COPY(zc_stat); +#undef FIELD_COPY break; case ZFS_CMD_COMPAT_ZCMD: @@ -369,42 +557,58 @@ zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int request, strlcpy(zcmd_c->zc_value, zc->zc_value, MAXPATHLEN * 2); strlcpy(zcmd_c->zc_string, zc->zc_string, MAXPATHLEN); -#define ZCMD_COPY(field) zcmd_c->field = zc->field - ZCMD_COPY(zc_nvlist_src); - ZCMD_COPY(zc_nvlist_src_size); - ZCMD_COPY(zc_nvlist_dst); - ZCMD_COPY(zc_nvlist_dst_size); - ZCMD_COPY(zc_nvlist_dst_filled); - ZCMD_COPY(zc_pad2); - ZCMD_COPY(zc_history); - ZCMD_COPY(zc_guid); - ZCMD_COPY(zc_nvlist_conf); - ZCMD_COPY(zc_nvlist_conf_size); - ZCMD_COPY(zc_cookie); - ZCMD_COPY(zc_objset_type); - ZCMD_COPY(zc_perm_action); - ZCMD_COPY(zc_history_len); - ZCMD_COPY(zc_history_offset); - ZCMD_COPY(zc_obj); - ZCMD_COPY(zc_iflags); - ZCMD_COPY(zc_share); - ZCMD_COPY(zc_jailid); - ZCMD_COPY(zc_objset_stats); +#define FIELD_COPY(field) zcmd_c->field = zc->field + FIELD_COPY(zc_nvlist_src); + FIELD_COPY(zc_nvlist_src_size); + FIELD_COPY(zc_nvlist_dst); + FIELD_COPY(zc_nvlist_dst_size); + FIELD_COPY(zc_nvlist_dst_filled); + FIELD_COPY(zc_pad2); + FIELD_COPY(zc_history); + FIELD_COPY(zc_guid); + FIELD_COPY(zc_nvlist_conf); + FIELD_COPY(zc_nvlist_conf_size); + FIELD_COPY(zc_cookie); + FIELD_COPY(zc_objset_type); + FIELD_COPY(zc_perm_action); + FIELD_COPY(zc_history_len); + FIELD_COPY(zc_history_offset); + FIELD_COPY(zc_obj); + FIELD_COPY(zc_iflags); + FIELD_COPY(zc_share); + FIELD_COPY(zc_jailid); + FIELD_COPY(zc_objset_stats); zcmd_c->zc_begin_record = zc->zc_begin_record.drr_u.drr_begin; - ZCMD_COPY(zc_inject_record); + FIELD_COPY(zc_inject_record.zi_objset); + FIELD_COPY(zc_inject_record.zi_object); + FIELD_COPY(zc_inject_record.zi_start); + FIELD_COPY(zc_inject_record.zi_end); + FIELD_COPY(zc_inject_record.zi_guid); + FIELD_COPY(zc_inject_record.zi_level); + FIELD_COPY(zc_inject_record.zi_error); + FIELD_COPY(zc_inject_record.zi_type); + FIELD_COPY(zc_inject_record.zi_freq); + FIELD_COPY(zc_inject_record.zi_failfast); + strlcpy(resume_c->zc_inject_record.zi_func, + zc->zc_inject_record.zi_func, MAXNAMELEN); + FIELD_COPY(zc_inject_record.zi_iotype); + FIELD_COPY(zc_inject_record.zi_duration); + FIELD_COPY(zc_inject_record.zi_timer); + FIELD_COPY(zc_inject_record.zi_cmd); + FIELD_COPY(zc_inject_record.zi_pad); /* boolean_t -> uint32_t */ zcmd_c->zc_defer_destroy = (uint32_t)(zc->zc_defer_destroy); zcmd_c->zc_temphold = 0; - ZCMD_COPY(zc_action_handle); - ZCMD_COPY(zc_cleanup_fd); - ZCMD_COPY(zc_simple); - ZCMD_COPY(zc_sendobj); - ZCMD_COPY(zc_fromobj); - ZCMD_COPY(zc_createtxg); - ZCMD_COPY(zc_stat); -#undef ZCMD_COPY + FIELD_COPY(zc_action_handle); + FIELD_COPY(zc_cleanup_fd); + FIELD_COPY(zc_simple); + FIELD_COPY(zc_sendobj); + FIELD_COPY(zc_fromobj); + FIELD_COPY(zc_createtxg); + FIELD_COPY(zc_stat); +#undef FIELD_COPY break; @@ -414,6 +618,8 @@ zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int request, strlcpy(zcdm_c->zc_name, zc->zc_name, MAXPATHLEN); strlcpy(zcdm_c->zc_value, zc->zc_value, MAXPATHLEN * 2); strlcpy(zcdm_c->zc_string, zc->zc_string, MAXPATHLEN); + +#define FIELD_COPY(field) zcdm_c->field = zc->field zcdm_c->zc_guid = zc->zc_guid; zcdm_c->zc_nvlist_conf = zc->zc_nvlist_conf; zcdm_c->zc_nvlist_conf_size = zc->zc_nvlist_conf_size; @@ -442,9 +648,24 @@ zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int request, zcdm_c->zc_fromobj = zc->zc_fromobj; zcdm_c->zc_createtxg = zc->zc_createtxg; zcdm_c->zc_stat = zc->zc_stat; - - /* zc_inject_record doesn't change in libzfs_core */ - zc->zc_inject_record = zcdm_c->zc_inject_record; + FIELD_COPY(zc_inject_record.zi_objset); + FIELD_COPY(zc_inject_record.zi_object); + FIELD_COPY(zc_inject_record.zi_start); + FIELD_COPY(zc_inject_record.zi_end); + FIELD_COPY(zc_inject_record.zi_guid); + FIELD_COPY(zc_inject_record.zi_level); + FIELD_COPY(zc_inject_record.zi_error); + FIELD_COPY(zc_inject_record.zi_type); + FIELD_COPY(zc_inject_record.zi_freq); + FIELD_COPY(zc_inject_record.zi_failfast); + strlcpy(resume_c->zc_inject_record.zi_func, + zc->zc_inject_record.zi_func, MAXNAMELEN); + FIELD_COPY(zc_inject_record.zi_iotype); + FIELD_COPY(zc_inject_record.zi_duration); + FIELD_COPY(zc_inject_record.zi_timer); + FIELD_COPY(zc_inject_record.zi_cmd); + FIELD_COPY(zc_inject_record.zi_pad); +#undef FIELD_COPY #ifndef _KERNEL if (request == ZFS_IOC_RECV) strlcpy(zcdm_c->zc_top_ds, @@ -766,6 +987,12 @@ zcmd_ioctl_compat(int fd, int request, zfs_cmd_t *zc, const int cflag) zp.zfs_cmd_size = sizeof(zfs_cmd_t); zp.zfs_ioctl_version = ZFS_IOCVER_CURRENT; return (ioctl(fd, ncmd, &zp)); + case ZFS_CMD_COMPAT_RESUME: + ncmd = _IOWR('Z', request, struct zfs_iocparm); + zp.zfs_cmd = (uint64_t)zc; + zp.zfs_cmd_size = sizeof(zfs_cmd_resume_t); + zp.zfs_ioctl_version = ZFS_IOCVER_RESUME; + return (ioctl(fd, ncmd, &zp)); case ZFS_CMD_COMPAT_EDBP: ncmd = _IOWR('Z', request, struct zfs_iocparm); zp.zfs_cmd = (uint64_t)zc; @@ -876,7 +1103,8 @@ zfs_ioctl_compat_innvl(zfs_cmd_t *zc, nvlist_t * innvl, const int vec, int err; if (cflag == ZFS_CMD_COMPAT_NONE || cflag == ZFS_CMD_COMPAT_LZC || - cflag == ZFS_CMD_COMPAT_ZCMD || cflag == ZFS_CMD_COMPAT_EDBP) + cflag == ZFS_CMD_COMPAT_ZCMD || cflag == ZFS_CMD_COMPAT_EDBP || + cflag == ZFS_CMD_COMPAT_RESUME) goto out; switch (vec) { @@ -1028,7 +1256,8 @@ zfs_ioctl_compat_outnvl(zfs_cmd_t *zc, nvlist_t * outnvl, const int vec, nvlist_t *tmpnvl; if (cflag == ZFS_CMD_COMPAT_NONE || cflag == ZFS_CMD_COMPAT_LZC || - cflag == ZFS_CMD_COMPAT_ZCMD || cflag == ZFS_CMD_COMPAT_EDBP) + cflag == ZFS_CMD_COMPAT_ZCMD || cflag == ZFS_CMD_COMPAT_EDBP || + cflag == ZFS_CMD_COMPAT_RESUME) return (outnvl); switch (vec) { diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h b/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h index 8361aa32b..6f24380a7 100644 --- a/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h +++ b/sys/cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.h @@ -53,7 +53,8 @@ extern "C" { #define ZFS_IOCVER_ZCMD 3 #define ZFS_IOCVER_EDBP 4 #define ZFS_IOCVER_RESUME 5 -#define ZFS_IOCVER_CURRENT ZFS_IOCVER_RESUME +#define ZFS_IOCVER_INLANES 6 +#define ZFS_IOCVER_CURRENT ZFS_IOCVER_INLANES /* compatibility conversion flag */ #define ZFS_CMD_COMPAT_NONE 0 @@ -63,6 +64,7 @@ extern "C" { #define ZFS_CMD_COMPAT_LZC 4 #define ZFS_CMD_COMPAT_ZCMD 5 #define ZFS_CMD_COMPAT_EDBP 6 +#define ZFS_CMD_COMPAT_RESUME 7 #define ZFS_IOC_COMPAT_PASS 254 #define ZFS_IOC_COMPAT_FAIL 255 @@ -167,6 +169,25 @@ typedef struct zfs_cmd_v28 { zfs_stat_t zc_stat; } zfs_cmd_v28_t; +typedef struct zinject_record_deadman { + uint64_t zi_objset; + uint64_t zi_object; + uint64_t zi_start; + uint64_t zi_end; + uint64_t zi_guid; + uint32_t zi_level; + uint32_t zi_error; + uint64_t zi_type; + uint32_t zi_freq; + uint32_t zi_failfast; + char zi_func[MAXNAMELEN]; + uint32_t zi_iotype; + int32_t zi_duration; + uint64_t zi_timer; + uint32_t zi_cmd; + uint32_t zi_pad; +} zinject_record_deadman_t; + typedef struct zfs_cmd_deadman { char zc_name[MAXPATHLEN]; char zc_value[MAXPATHLEN * 2]; @@ -192,7 +213,7 @@ typedef struct zfs_cmd_deadman { dmu_objset_stats_t zc_objset_stats; struct drr_begin zc_begin_record; /* zc_inject_record doesn't change in libzfs_core */ - zinject_record_t zc_inject_record; + zinject_record_deadman_t zc_inject_record; boolean_t zc_defer_destroy; boolean_t zc_temphold; uint64_t zc_action_handle; @@ -235,7 +256,7 @@ typedef struct zfs_cmd_zcmd { uint64_t zc_jailid; dmu_objset_stats_t zc_objset_stats; struct drr_begin zc_begin_record; - zinject_record_t zc_inject_record; + zinject_record_deadman_t zc_inject_record; boolean_t zc_defer_destroy; boolean_t zc_temphold; uint64_t zc_action_handle; @@ -278,7 +299,7 @@ typedef struct zfs_cmd_edbp { uint64_t zc_jailid; dmu_objset_stats_t zc_objset_stats; struct drr_begin zc_begin_record; - zinject_record_t zc_inject_record; + zinject_record_deadman_t zc_inject_record; uint32_t zc_defer_destroy; uint32_t zc_flags; uint64_t zc_action_handle; @@ -291,6 +312,49 @@ typedef struct zfs_cmd_edbp { zfs_stat_t zc_stat; } zfs_cmd_edbp_t; +typedef struct zfs_cmd_resume { + char zc_name[MAXPATHLEN]; /* name of pool or dataset */ + uint64_t zc_nvlist_src; /* really (char *) */ + uint64_t zc_nvlist_src_size; + uint64_t zc_nvlist_dst; /* really (char *) */ + uint64_t zc_nvlist_dst_size; + boolean_t zc_nvlist_dst_filled; /* put an nvlist in dst? */ + int zc_pad2; + + /* + * The following members are for legacy ioctls which haven't been + * converted to the new method. + */ + uint64_t zc_history; /* really (char *) */ + char zc_value[MAXPATHLEN * 2]; + char zc_string[MAXNAMELEN]; + uint64_t zc_guid; + uint64_t zc_nvlist_conf; /* really (char *) */ + uint64_t zc_nvlist_conf_size; + uint64_t zc_cookie; + uint64_t zc_objset_type; + uint64_t zc_perm_action; + uint64_t zc_history_len; + uint64_t zc_history_offset; + uint64_t zc_obj; + uint64_t zc_iflags; /* internal to zfs(7fs) */ + zfs_share_t zc_share; + uint64_t zc_jailid; + dmu_objset_stats_t zc_objset_stats; + dmu_replay_record_t zc_begin_record; + zinject_record_deadman_t zc_inject_record; + uint32_t zc_defer_destroy; + uint32_t zc_flags; + uint64_t zc_action_handle; + int zc_cleanup_fd; + uint8_t zc_simple; + boolean_t zc_resumable; + uint64_t zc_sendobj; + uint64_t zc_fromobj; + uint64_t zc_createtxg; + zfs_stat_t zc_stat; +} zfs_cmd_resume_t; + #ifdef _KERNEL unsigned static long zfs_ioctl_v15_to_v28[] = { 0, /* 0 ZFS_IOC_POOL_CREATE */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h index 510204042..17503d8d6 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h @@ -94,10 +94,8 @@ extern "C" { #include #ifdef illumos #include -#include -#else /* FreeBSD */ -#include #endif +#include #include #include diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h index 2a50e191b..a0612d3ad 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -308,6 +308,7 @@ typedef struct zinject_record { uint32_t zi_iotype; int32_t zi_duration; uint64_t zi_timer; + uint64_t zi_nlanes; uint32_t zi_cmd; uint32_t zi_pad; } zinject_record_t; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h index eac4b905c..56c821a0a 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h @@ -455,6 +455,7 @@ struct zio { uint64_t io_offset; hrtime_t io_timestamp; + hrtime_t io_target_timestamp; avl_node_t io_queue_node; avl_node_t io_offset_node; @@ -548,6 +549,8 @@ extern int zio_wait(zio_t *zio); extern void zio_nowait(zio_t *zio); extern void zio_execute(zio_t *zio); extern void zio_interrupt(zio_t *zio); +extern void zio_delay_init(zio_t *zio); +extern void zio_delay_interrupt(zio_t *zio); extern zio_t *zio_walk_parents(zio_t *cio); extern zio_t *zio_walk_children(zio_t *pio); @@ -609,7 +612,7 @@ extern int zio_handle_fault_injection(zio_t *zio, int error); extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error); extern int zio_handle_label_injection(zio_t *zio, int error); extern void zio_handle_ignored_writes(zio_t *zio); -extern uint64_t zio_handle_io_delay(zio_t *zio); +extern hrtime_t zio_handle_io_delay(zio_t *zio); /* * Checksum ereport functions diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c index c397891d4..681a67046 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 Joyent, Inc. All rights reserved. */ @@ -691,7 +691,7 @@ vdev_disk_io_intr(buf_t *bp) kmem_free(vb, sizeof (vdev_buf_t)); - zio_interrupt(zio); + zio_delay_interrupt(zio); } static void @@ -797,6 +797,7 @@ vdev_disk_io_start(zio_t *zio) } ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); + zio->io_target_timestamp = zio_handle_io_delay(zio); vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c index 01ef7563e..8228af1e1 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. */ #include @@ -188,6 +188,7 @@ vdev_file_io_start(zio_t *zio) } ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); + zio->io_target_timestamp = zio_handle_io_delay(zio); zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ? UIO_READ : UIO_WRITE, vp, zio->io_data, zio->io_size, @@ -196,7 +197,7 @@ vdev_file_io_start(zio_t *zio) if (resid != 0 && zio->io_error == 0) zio->io_error = ENOSPC; - zio_interrupt(zio); + zio_delay_interrupt(zio); #ifdef illumos VERIFY3U(taskq_dispatch(system_taskq, vdev_file_io_strategy, bp, diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c index 60fd73b09..8f9f295db 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c @@ -876,7 +876,7 @@ vdev_geom_io_intr(struct bio *bp) break; } g_destroy_bio(bp); - zio_interrupt(zio); + zio_delay_interrupt(zio); } static void @@ -939,6 +939,7 @@ vdev_geom_io_start(zio_t *zio) switch (zio->io_type) { case ZIO_TYPE_READ: case ZIO_TYPE_WRITE: + zio->io_target_timestamp = zio_handle_io_delay(zio); bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE; bp->bio_data = zio->io_data; bp->bio_offset = zio->io_offset; diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c index 9f83abb41..7a0791198 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c @@ -883,9 +883,6 @@ vdev_queue_io_done(zio_t *zio) vdev_queue_t *vq = &zio->io_vd->vdev_queue; zio_t *nio; - if (zio_injection_enabled) - delay(SEC_TO_TICK(zio_handle_io_delay(zio))); - mutex_enter(&vq->vq_lock); vdev_queue_pending_remove(vq, zio); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c index 4ff601312..b37f0a3f6 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -6220,6 +6220,14 @@ zfsdev_ioctl(struct cdev *dev, u_long zcmd, caddr_t arg, int flag, goto out; } break; + case ZFS_IOCVER_RESUME: + if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_resume_t)) { + error = SET_ERROR(EFAULT); + goto out; + } + compat = B_TRUE; + cflag = ZFS_CMD_COMPAT_RESUME; + break; case ZFS_IOCVER_EDBP: if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_edbp_t)) { error = SET_ERROR(EFAULT); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c index 5a80d9b27..ee5deb91f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c @@ -1447,6 +1447,58 @@ zio_interrupt(zio_t *zio) zio_taskq_dispatch(zio, ZIO_TASKQ_INTERRUPT, B_FALSE); } +void +zio_delay_interrupt(zio_t *zio) +{ + /* + * The timeout_generic() function isn't defined in userspace, so + * rather than trying to implement the function, the zio delay + * functionality has been disabled for userspace builds. + */ + +#ifdef _KERNEL + /* + * If io_target_timestamp is zero, then no delay has been registered + * for this IO, thus jump to the end of this function and "skip" the + * delay; issuing it directly to the zio layer. + */ + if (zio->io_target_timestamp != 0) { + hrtime_t now = gethrtime(); + + if (now >= zio->io_target_timestamp) { + /* + * This IO has already taken longer than the target + * delay to complete, so we don't want to delay it + * any longer; we "miss" the delay and issue it + * directly to the zio layer. This is likely due to + * the target latency being set to a value less than + * the underlying hardware can satisfy (e.g. delay + * set to 1ms, but the disks take 10ms to complete an + * IO request). + */ + + DTRACE_PROBE2(zio__delay__miss, zio_t *, zio, + hrtime_t, now); + + zio_interrupt(zio); + } else { + hrtime_t diff = zio->io_target_timestamp - now; + + DTRACE_PROBE3(zio__delay__hit, zio_t *, zio, + hrtime_t, now, hrtime_t, diff); + + (void) timeout_generic(CALLOUT_NORMAL, + (void (*)(void *))zio_interrupt, zio, diff, 1, 0); + } + + return; + } +#endif + + DTRACE_PROBE1(zio__delay__skip, zio_t *, zio); + zio_interrupt(zio); +} + /* * Execute the I/O pipeline until one of the following occurs: * diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c index 0a7f4e4b3..26f59af99 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. */ /* @@ -49,15 +49,53 @@ uint32_t zio_injection_enabled; +/* + * Data describing each zinject handler registered on the system, and + * contains the list node linking the handler in the global zinject + * handler list. + */ typedef struct inject_handler { int zi_id; spa_t *zi_spa; zinject_record_t zi_record; + uint64_t *zi_lanes; + int zi_next_lane; list_node_t zi_link; } inject_handler_t; +/* + * List of all zinject handlers registered on the system, protected by + * the inject_lock defined below. + */ static list_t inject_handlers; + +/* + * This protects insertion into, and traversal of, the inject handler + * list defined above; as well as the inject_delay_count. Any time a + * handler is inserted or removed from the list, this lock should be + * taken as a RW_WRITER; and any time traversal is done over the list + * (without modification to it) this lock should be taken as a RW_READER. + */ static krwlock_t inject_lock; + +/* + * This holds the number of zinject delay handlers that have been + * registered on the system. It is protected by the inject_lock defined + * above. Thus modifications to this count must be a RW_WRITER of the + * inject_lock, and reads of this count must be (at least) a RW_READER + * of the lock. + */ +static int inject_delay_count = 0; + +/* + * This lock is used only in zio_handle_io_delay(), refer to the comment + * in that function for more details. + */ +static kmutex_t inject_delay_mtx; + +/* + * Used to assign unique identifying numbers to each new zinject handler. + */ static int inject_next_id = 1; /* @@ -360,32 +398,164 @@ spa_handle_ignored_writes(spa_t *spa) rw_exit(&inject_lock); } -uint64_t +hrtime_t zio_handle_io_delay(zio_t *zio) { vdev_t *vd = zio->io_vd; - inject_handler_t *handler; - uint64_t seconds = 0; - - if (zio_injection_enabled == 0) - return (0); + inject_handler_t *min_handler = NULL; + hrtime_t min_target = 0; rw_enter(&inject_lock, RW_READER); - for (handler = list_head(&inject_handlers); handler != NULL; - handler = list_next(&inject_handlers, handler)) { + /* + * inject_delay_count is a subset of zio_injection_enabled that + * is only incremented for delay handlers. These checks are + * mainly added to remind the reader why we're not explicitly + * checking zio_injection_enabled like the other functions. + */ + IMPLY(inject_delay_count > 0, zio_injection_enabled > 0); + IMPLY(zio_injection_enabled == 0, inject_delay_count == 0); + + /* + * If there aren't any inject delay handlers registered, then we + * can short circuit and simply return 0 here. A value of zero + * informs zio_delay_interrupt() that this request should not be + * delayed. This short circuit keeps us from acquiring the + * inject_delay_mutex unnecessarily. + */ + if (inject_delay_count == 0) { + rw_exit(&inject_lock); + return (0); + } + + /* + * Each inject handler has a number of "lanes" associated with + * it. Each lane is able to handle requests independently of one + * another, and at a latency defined by the inject handler + * record's zi_timer field. Thus if a handler in configured with + * a single lane with a 10ms latency, it will delay requests + * such that only a single request is completed every 10ms. So, + * if more than one request is attempted per each 10ms interval, + * the average latency of the requests will be greater than + * 10ms; but if only a single request is submitted each 10ms + * interval the average latency will be 10ms. + * + * We need to acquire this mutex to prevent multiple concurrent + * threads being assigned to the same lane of a given inject + * handler. The mutex allows us to perform the following two + * operations atomically: + * + * 1. determine the minimum handler and minimum target + * value of all the possible handlers + * 2. update that minimum handler's lane array + * + * Without atomicity, two (or more) threads could pick the same + * lane in step (1), and then conflict with each other in step + * (2). This could allow a single lane handler to process + * multiple requests simultaneously, which shouldn't be possible. + */ + mutex_enter(&inject_delay_mtx); + for (inject_handler_t *handler = list_head(&inject_handlers); + handler != NULL; handler = list_next(&inject_handlers, handler)) { if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO) continue; - if (vd->vdev_guid == handler->zi_record.zi_guid) { - seconds = handler->zi_record.zi_timer; - break; + if (vd->vdev_guid != handler->zi_record.zi_guid) + continue; + + /* + * Defensive; should never happen as the array allocation + * occurs prior to inserting this handler on the list. + */ + ASSERT3P(handler->zi_lanes, !=, NULL); + + /* + * This should never happen, the zinject command should + * prevent a user from setting an IO delay with zero lanes. + */ + ASSERT3U(handler->zi_record.zi_nlanes, !=, 0); + + ASSERT3U(handler->zi_record.zi_nlanes, >, + handler->zi_next_lane); + + /* + * We want to issue this IO to the lane that will become + * idle the soonest, so we compare the soonest this + * specific handler can complete the IO with all other + * handlers, to find the lowest value of all possible + * lanes. We then use this lane to submit the request. + * + * Since each handler has a constant value for its + * delay, we can just use the "next" lane for that + * handler; as it will always be the lane with the + * lowest value for that particular handler (i.e. the + * lane that will become idle the soonest). This saves a + * scan of each handler's lanes array. + * + * There's two cases to consider when determining when + * this specific IO request should complete. If this + * lane is idle, we want to "submit" the request now so + * it will complete after zi_timer milliseconds. Thus, + * we set the target to now + zi_timer. + * + * If the lane is busy, we want this request to complete + * zi_timer milliseconds after the lane becomes idle. + * Since the 'zi_lanes' array holds the time at which + * each lane will become idle, we use that value to + * determine when this request should complete. + */ + hrtime_t idle = handler->zi_record.zi_timer + gethrtime(); + hrtime_t busy = handler->zi_record.zi_timer + + handler->zi_lanes[handler->zi_next_lane]; + hrtime_t target = MAX(idle, busy); + + if (min_handler == NULL) { + min_handler = handler; + min_target = target; + continue; } + ASSERT3P(min_handler, !=, NULL); + ASSERT3U(min_target, !=, 0); + + /* + * We don't yet increment the "next lane" variable since + * we still might find a lower value lane in another + * handler during any remaining iterations. Once we're + * sure we've selected the absolute minimum, we'll claim + * the lane and increment the handler's "next lane" + * field below. + */ + + if (target < min_target) { + min_handler = handler; + min_target = target; + } } + + /* + * 'min_handler' will be NULL if no IO delays are registered for + * this vdev, otherwise it will point to the handler containing + * the lane that will become idle the soonest. + */ + if (min_handler != NULL) { + ASSERT3U(min_target, !=, 0); + min_handler->zi_lanes[min_handler->zi_next_lane] = min_target; + + /* + * If we've used all possible lanes for this handler, + * loop back and start using the first lane again; + * otherwise, just increment the lane index. + */ + min_handler->zi_next_lane = (min_handler->zi_next_lane + 1) % + min_handler->zi_record.zi_nlanes; + } + + mutex_exit(&inject_delay_mtx); rw_exit(&inject_lock); - return (seconds); + + return (min_target); } /* @@ -409,6 +579,24 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) if ((error = spa_reset(name)) != 0) return (error); + if (record->zi_cmd == ZINJECT_DELAY_IO) { + /* + * A value of zero for the number of lanes or for the + * delay time doesn't make sense. + */ + if (record->zi_timer == 0 || record->zi_nlanes == 0) + return (SET_ERROR(EINVAL)); + + /* + * The number of lanes is directly mapped to the size of + * an array used by the handler. Thus, to ensure the + * user doesn't trigger an allocation that's "too large" + * we cap the number of lanes here. + */ + if (record->zi_nlanes >= UINT16_MAX) + return (SET_ERROR(EINVAL)); + } + if (!(flags & ZINJECT_NULL)) { /* * spa_inject_ref() will add an injection reference, which will @@ -420,11 +608,34 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP); + handler->zi_spa = spa; + handler->zi_record = *record; + + if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { + handler->zi_lanes = kmem_zalloc( + sizeof (*handler->zi_lanes) * + handler->zi_record.zi_nlanes, KM_SLEEP); + handler->zi_next_lane = 0; + } else { + handler->zi_lanes = NULL; + handler->zi_next_lane = 0; + } + rw_enter(&inject_lock, RW_WRITER); + /* + * We can't move this increment into the conditional + * above because we need to hold the RW_WRITER lock of + * inject_lock, and we don't want to hold that while + * allocating the handler's zi_lanes array. + */ + if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { + ASSERT3S(inject_delay_count, >=, 0); + inject_delay_count++; + ASSERT3S(inject_delay_count, >, 0); + } + *id = handler->zi_id = inject_next_id++; - handler->zi_spa = spa; - handler->zi_record = *record; list_insert_tail(&inject_handlers, handler); atomic_inc_32(&zio_injection_enabled); @@ -502,9 +713,23 @@ zio_clear_fault(int id) return (SET_ERROR(ENOENT)); } + if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { + ASSERT3S(inject_delay_count, >, 0); + inject_delay_count--; + ASSERT3S(inject_delay_count, >=, 0); + } + list_remove(&inject_handlers, handler); rw_exit(&inject_lock); + if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { + ASSERT3P(handler->zi_lanes, !=, NULL); + kmem_free(handler->zi_lanes, sizeof (*handler->zi_lanes) * + handler->zi_record.zi_nlanes); + } else { + ASSERT3P(handler->zi_lanes, ==, NULL); + } + spa_inject_delref(handler->zi_spa); kmem_free(handler, sizeof (inject_handler_t)); atomic_dec_32(&zio_injection_enabled); @@ -516,6 +741,7 @@ void zio_inject_init(void) { rw_init(&inject_lock, NULL, RW_DEFAULT, NULL); + mutex_init(&inject_delay_mtx, NULL, MUTEX_DEFAULT, NULL); list_create(&inject_handlers, sizeof (inject_handler_t), offsetof(inject_handler_t, zi_link)); } @@ -524,5 +750,6 @@ void zio_inject_fini(void) { list_destroy(&inject_handlers); + mutex_destroy(&inject_delay_mtx); rw_destroy(&inject_lock); } -- 2.45.0