From 612455a9f079d9c4f804d7bafa4ef9a562840047 Mon Sep 17 00:00:00 2001 From: avg Date: Mon, 17 Feb 2014 17:43:13 +0000 Subject: [PATCH] MFC r254591,255753: Enhance the ZFS vdev layer to maintain both a logical and a physical minimum allocation size for devices Missed userland bits. git-svn-id: svn://svn.freebsd.org/base/stable/9@262100 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- .../opensolaris/cmd/zpool/zpool_main.c | 24 ++++- .../opensolaris/lib/libzfs/common/libzfs.h | 1 + .../lib/libzfs/common/libzfs_status.c | 87 +++++++++++-------- .../opensolaris/lib/libzpool/common/kernel.c | 6 ++ .../lib/libzpool/common/sys/zfs_context.h | 46 ++++++++++ 5 files changed, 127 insertions(+), 37 deletions(-) diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c index dfa8cbdee..713456aee 100644 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c @@ -1294,12 +1294,13 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int namewidth, int depth, boolean_t isspare) { nvlist_t **child; - uint_t c, children; + uint_t c, vsc, children; pool_scan_stat_t *ps = NULL; vdev_stat_t *vs; char rbuf[6], wbuf[6], cbuf[6]; char *vname; uint64_t notpresent; + uint64_t ashift; spare_cbdata_t cb; const char *state; @@ -1308,7 +1309,7 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv, children = 0; verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&vs, &c) == 0); + (uint64_t **)&vs, &vsc) == 0); state = zpool_state_to_name(vs->vs_state, vs->vs_aux); if (isspare) { @@ -1362,6 +1363,10 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv, (void) printf(gettext("unsupported feature(s)")); break; + case VDEV_AUX_ASHIFT_TOO_BIG: + (void) printf(gettext("unsupported minimum blocksize")); + break; + case VDEV_AUX_SPARED: verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &cb.cb_guid) == 0); @@ -1404,6 +1409,12 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv, (void) printf(gettext("corrupted data")); break; } + } else if (children == 0 && !isspare && + VDEV_STAT_VALID(vs_physical_ashift, vsc) && + vs->vs_configured_ashift < vs->vs_physical_ashift) { + (void) printf( + gettext(" block size: %dB configured, %dB native"), + 1 << vs->vs_configured_ashift, 1 << vs->vs_physical_ashift); } (void) nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_SCAN_STATS, @@ -4267,6 +4278,15 @@ status_callback(zpool_handle_t *zhp, void *data) "'zpool clear'.\n")); break; + case ZPOOL_STATUS_NON_NATIVE_ASHIFT: + (void) printf(gettext("status: One or more devices are " + "configured to use a non-native block size.\n" + "\tExpect reduced performance.\n")); + (void) printf(gettext("action: Replace affected devices with " + "devices that support the\n\tconfigured block size, or " + "migrate data to a properly configured\n\tpool.\n")); + break; + default: /* * The remaining errors can't actually be generated, yet. diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h index 981e9bd9e..0a5ca82e4 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h @@ -326,6 +326,7 @@ typedef enum { ZPOOL_STATUS_RESILVERING, /* device being resilvered */ ZPOOL_STATUS_OFFLINE_DEV, /* device online */ ZPOOL_STATUS_REMOVED_DEV, /* removed device */ + ZPOOL_STATUS_NON_NATIVE_ASHIFT, /* (e.g. 512e dev with ashift of 9) */ /* * Finally, the following indicates a healthy pool. diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c index 71b27a121..906883ca6 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_status.c @@ -73,57 +73,66 @@ static char *zfs_msgid_table[] = { /* ARGSUSED */ static int -vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) +vdev_missing(vdev_stat_t *vs, uint_t vsc) { - return (state == VDEV_STATE_CANT_OPEN && - aux == VDEV_AUX_OPEN_FAILED); + return (vs->vs_state == VDEV_STATE_CANT_OPEN && + vs->vs_aux == VDEV_AUX_OPEN_FAILED); } /* ARGSUSED */ static int -vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs) +vdev_faulted(vdev_stat_t *vs, uint_t vsc) { - return (state == VDEV_STATE_FAULTED); + return (vs->vs_state == VDEV_STATE_FAULTED); } /* ARGSUSED */ static int -vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) +vdev_errors(vdev_stat_t *vs, uint_t vsc) { - return (state == VDEV_STATE_DEGRADED || errs != 0); + return (vs->vs_state == VDEV_STATE_DEGRADED || + vs->vs_read_errors != 0 || vs->vs_write_errors != 0 || + vs->vs_checksum_errors != 0); } /* ARGSUSED */ static int -vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) +vdev_broken(vdev_stat_t *vs, uint_t vsc) { - return (state == VDEV_STATE_CANT_OPEN); + return (vs->vs_state == VDEV_STATE_CANT_OPEN); } /* ARGSUSED */ static int -vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) +vdev_offlined(vdev_stat_t *vs, uint_t vsc) { - return (state == VDEV_STATE_OFFLINE); + return (vs->vs_state == VDEV_STATE_OFFLINE); } /* ARGSUSED */ static int -vdev_removed(uint64_t state, uint64_t aux, uint64_t errs) +vdev_removed(vdev_stat_t *vs, uint_t vsc) { - return (state == VDEV_STATE_REMOVED); + return (vs->vs_state == VDEV_STATE_REMOVED); +} + +static int +vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc) +{ + return (VDEV_STAT_VALID(vs_physical_ashift, vsc) && + vs->vs_configured_ashift < vs->vs_physical_ashift); } /* * Detect if any leaf devices that have seen errors or could not be opened. */ static boolean_t -find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) +find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), + boolean_t ignore_replacing) { nvlist_t **child; vdev_stat_t *vs; - uint_t c, children; - char *type; + uint_t c, vsc, children; /* * Ignore problems within a 'replacing' vdev, since we're presumably in @@ -131,23 +140,25 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) * out again. We'll pick up the fact that a resilver is happening * later. */ - verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); - if (strcmp(type, VDEV_TYPE_REPLACING) == 0) - return (B_FALSE); + if (ignore_replacing == B_TRUE) { + char *type; + + verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, + &type) == 0); + if (strcmp(type, VDEV_TYPE_REPLACING) == 0) + return (B_FALSE); + } if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (c = 0; c < children; c++) - if (find_vdev_problem(child[c], func)) + if (find_vdev_problem(child[c], func, ignore_replacing)) return (B_TRUE); } else { verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t **)&vs, &c) == 0); + (uint64_t **)&vs, &vsc) == 0); - if (func(vs->vs_state, vs->vs_aux, - vs->vs_read_errors + - vs->vs_write_errors + - vs->vs_checksum_errors)) + if (func(vs, vsc) != 0) return (B_TRUE); } @@ -157,7 +168,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) { for (c = 0; c < children; c++) - if (find_vdev_problem(child[c], func)) + if (find_vdev_problem(child[c], func, ignore_replacing)) return (B_TRUE); } @@ -270,15 +281,15 @@ check_status(nvlist_t *config, boolean_t isimport) * Bad devices in non-replicated config. */ if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_faulted)) + find_vdev_problem(nvroot, vdev_faulted, B_TRUE)) return (ZPOOL_STATUS_FAULTED_DEV_NR); if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_missing)) + find_vdev_problem(nvroot, vdev_missing, B_TRUE)) return (ZPOOL_STATUS_MISSING_DEV_NR); if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_broken)) + find_vdev_problem(nvroot, vdev_broken, B_TRUE)) return (ZPOOL_STATUS_CORRUPT_LABEL_NR); /* @@ -300,31 +311,37 @@ check_status(nvlist_t *config, boolean_t isimport) /* * Missing devices in a replicated config. */ - if (find_vdev_problem(nvroot, vdev_faulted)) + if (find_vdev_problem(nvroot, vdev_faulted, B_TRUE)) return (ZPOOL_STATUS_FAULTED_DEV_R); - if (find_vdev_problem(nvroot, vdev_missing)) + if (find_vdev_problem(nvroot, vdev_missing, B_TRUE)) return (ZPOOL_STATUS_MISSING_DEV_R); - if (find_vdev_problem(nvroot, vdev_broken)) + if (find_vdev_problem(nvroot, vdev_broken, B_TRUE)) return (ZPOOL_STATUS_CORRUPT_LABEL_R); /* * Devices with errors */ - if (!isimport && find_vdev_problem(nvroot, vdev_errors)) + if (!isimport && find_vdev_problem(nvroot, vdev_errors, B_TRUE)) return (ZPOOL_STATUS_FAILING_DEV); /* * Offlined devices */ - if (find_vdev_problem(nvroot, vdev_offlined)) + if (find_vdev_problem(nvroot, vdev_offlined, B_TRUE)) return (ZPOOL_STATUS_OFFLINE_DEV); /* * Removed device */ - if (find_vdev_problem(nvroot, vdev_removed)) + if (find_vdev_problem(nvroot, vdev_removed, B_TRUE)) return (ZPOOL_STATUS_REMOVED_DEV); + /* + * Suboptimal, but usable, ashift configuration. + */ + if (find_vdev_problem(nvroot, vdev_non_native_ashift, B_FALSE)) + return (ZPOOL_STATUS_NON_NATIVE_ASHIFT); + /* * Outdated, but usable, version */ diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c b/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c index 520fdbf6a..c4e2d2e78 100644 --- a/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c +++ b/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c @@ -626,6 +626,12 @@ dprintf_setup(int *argc, char **argv) dprintf_print_all = 1; } +int +sysctl_handle_64(SYSCTL_HANDLER_ARGS) +{ + return (0); +} + /* * ========================================================================= * debug printfs diff --git a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h index f34767e48..87ed1cace 100644 --- a/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h +++ b/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h @@ -688,11 +688,55 @@ typedef uint32_t idmap_rid_t; #define SX_SYSINIT(name, lock, desc) +#define SYSCTL_HANDLER_ARGS struct sysctl_oid *oidp, void *arg1, \ + intptr_t arg2, struct sysctl_req *req + +/* + * This describes the access space for a sysctl request. This is needed + * so that we can use the interface from the kernel or from user-space. + */ +struct sysctl_req { + struct thread *td; /* used for access checking */ + int lock; /* wiring state */ + void *oldptr; + size_t oldlen; + size_t oldidx; + int (*oldfunc)(struct sysctl_req *, const void *, size_t); + void *newptr; + size_t newlen; + size_t newidx; + int (*newfunc)(struct sysctl_req *, void *, size_t); + size_t validlen; + int flags; +}; + +SLIST_HEAD(sysctl_oid_list, sysctl_oid); + +/* + * This describes one "oid" in the MIB tree. Potentially more nodes can + * be hidden behind it, expanded by the handler. + */ +struct sysctl_oid { + struct sysctl_oid_list *oid_parent; + SLIST_ENTRY(sysctl_oid) oid_link; + int oid_number; + u_int oid_kind; + void *oid_arg1; + intptr_t oid_arg2; + const char *oid_name; + int (*oid_handler)(SYSCTL_HANDLER_ARGS); + const char *oid_fmt; + int oid_refcnt; + u_int oid_running; + const char *oid_descr; +}; + #define SYSCTL_DECL(...) #define SYSCTL_NODE(...) #define SYSCTL_INT(...) #define SYSCTL_UINT(...) #define SYSCTL_ULONG(...) +#define SYSCTL_PROC(...) #define SYSCTL_QUAD(...) #define SYSCTL_UQUAD(...) #ifdef TUNABLE_INT @@ -704,6 +748,8 @@ typedef uint32_t idmap_rid_t; #define TUNABLE_ULONG(...) #define TUNABLE_QUAD(...) +int sysctl_handle_64(SYSCTL_HANDLER_ARGS); + /* Errors */ #ifndef ERESTART -- 2.45.0