]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libzfs/libzfs_pool.c
OpenZFS 9235 - rename zpool_rewind_policy_t to zpool_load_policy_t
[FreeBSD/FreeBSD.git] / lib / libzfs / libzfs_pool.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
26  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
27  * Copyright (c) 2017 Datto Inc.
28  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
29  */
30
31 #include <ctype.h>
32 #include <errno.h>
33 #include <devid.h>
34 #include <fcntl.h>
35 #include <libintl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <strings.h>
39 #include <unistd.h>
40 #include <libgen.h>
41 #include <zone.h>
42 #include <sys/stat.h>
43 #include <sys/efi_partition.h>
44 #include <sys/systeminfo.h>
45 #include <sys/vtoc.h>
46 #include <sys/zfs_ioctl.h>
47 #include <sys/vdev_disk.h>
48 #include <dlfcn.h>
49
50 #include "zfs_namecheck.h"
51 #include "zfs_prop.h"
52 #include "libzfs_impl.h"
53 #include "zfs_comutil.h"
54 #include "zfeature_common.h"
55
56 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
57 static boolean_t zpool_vdev_is_interior(const char *name);
58
59 typedef struct prop_flags {
60         int create:1;   /* Validate property on creation */
61         int import:1;   /* Validate property on import */
62 } prop_flags_t;
63
64 /*
65  * ====================================================================
66  *   zpool property functions
67  * ====================================================================
68  */
69
70 static int
71 zpool_get_all_props(zpool_handle_t *zhp)
72 {
73         zfs_cmd_t zc = {"\0"};
74         libzfs_handle_t *hdl = zhp->zpool_hdl;
75
76         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
77
78         if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
79                 return (-1);
80
81         while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
82                 if (errno == ENOMEM) {
83                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
84                                 zcmd_free_nvlists(&zc);
85                                 return (-1);
86                         }
87                 } else {
88                         zcmd_free_nvlists(&zc);
89                         return (-1);
90                 }
91         }
92
93         if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
94                 zcmd_free_nvlists(&zc);
95                 return (-1);
96         }
97
98         zcmd_free_nvlists(&zc);
99
100         return (0);
101 }
102
103 static int
104 zpool_props_refresh(zpool_handle_t *zhp)
105 {
106         nvlist_t *old_props;
107
108         old_props = zhp->zpool_props;
109
110         if (zpool_get_all_props(zhp) != 0)
111                 return (-1);
112
113         nvlist_free(old_props);
114         return (0);
115 }
116
117 static const char *
118 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
119     zprop_source_t *src)
120 {
121         nvlist_t *nv, *nvl;
122         uint64_t ival;
123         char *value;
124         zprop_source_t source;
125
126         nvl = zhp->zpool_props;
127         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
128                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
129                 source = ival;
130                 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
131         } else {
132                 source = ZPROP_SRC_DEFAULT;
133                 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
134                         value = "-";
135         }
136
137         if (src)
138                 *src = source;
139
140         return (value);
141 }
142
143 uint64_t
144 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
145 {
146         nvlist_t *nv, *nvl;
147         uint64_t value;
148         zprop_source_t source;
149
150         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
151                 /*
152                  * zpool_get_all_props() has most likely failed because
153                  * the pool is faulted, but if all we need is the top level
154                  * vdev's guid then get it from the zhp config nvlist.
155                  */
156                 if ((prop == ZPOOL_PROP_GUID) &&
157                     (nvlist_lookup_nvlist(zhp->zpool_config,
158                     ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
159                     (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
160                     == 0)) {
161                         return (value);
162                 }
163                 return (zpool_prop_default_numeric(prop));
164         }
165
166         nvl = zhp->zpool_props;
167         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
168                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
169                 source = value;
170                 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
171         } else {
172                 source = ZPROP_SRC_DEFAULT;
173                 value = zpool_prop_default_numeric(prop);
174         }
175
176         if (src)
177                 *src = source;
178
179         return (value);
180 }
181
182 /*
183  * Map VDEV STATE to printed strings.
184  */
185 const char *
186 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
187 {
188         switch (state) {
189         case VDEV_STATE_CLOSED:
190         case VDEV_STATE_OFFLINE:
191                 return (gettext("OFFLINE"));
192         case VDEV_STATE_REMOVED:
193                 return (gettext("REMOVED"));
194         case VDEV_STATE_CANT_OPEN:
195                 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
196                         return (gettext("FAULTED"));
197                 else if (aux == VDEV_AUX_SPLIT_POOL)
198                         return (gettext("SPLIT"));
199                 else
200                         return (gettext("UNAVAIL"));
201         case VDEV_STATE_FAULTED:
202                 return (gettext("FAULTED"));
203         case VDEV_STATE_DEGRADED:
204                 return (gettext("DEGRADED"));
205         case VDEV_STATE_HEALTHY:
206                 return (gettext("ONLINE"));
207
208         default:
209                 break;
210         }
211
212         return (gettext("UNKNOWN"));
213 }
214
215 /*
216  * Map POOL STATE to printed strings.
217  */
218 const char *
219 zpool_pool_state_to_name(pool_state_t state)
220 {
221         switch (state) {
222         default:
223                 break;
224         case POOL_STATE_ACTIVE:
225                 return (gettext("ACTIVE"));
226         case POOL_STATE_EXPORTED:
227                 return (gettext("EXPORTED"));
228         case POOL_STATE_DESTROYED:
229                 return (gettext("DESTROYED"));
230         case POOL_STATE_SPARE:
231                 return (gettext("SPARE"));
232         case POOL_STATE_L2CACHE:
233                 return (gettext("L2CACHE"));
234         case POOL_STATE_UNINITIALIZED:
235                 return (gettext("UNINITIALIZED"));
236         case POOL_STATE_UNAVAIL:
237                 return (gettext("UNAVAIL"));
238         case POOL_STATE_POTENTIALLY_ACTIVE:
239                 return (gettext("POTENTIALLY_ACTIVE"));
240         }
241
242         return (gettext("UNKNOWN"));
243 }
244
245 /*
246  * Get a zpool property value for 'prop' and return the value in
247  * a pre-allocated buffer.
248  */
249 int
250 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
251     size_t len, zprop_source_t *srctype, boolean_t literal)
252 {
253         uint64_t intval;
254         const char *strval;
255         zprop_source_t src = ZPROP_SRC_NONE;
256         nvlist_t *nvroot;
257         vdev_stat_t *vs;
258         uint_t vsc;
259
260         if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
261                 switch (prop) {
262                 case ZPOOL_PROP_NAME:
263                         (void) strlcpy(buf, zpool_get_name(zhp), len);
264                         break;
265
266                 case ZPOOL_PROP_HEALTH:
267                         (void) strlcpy(buf, "FAULTED", len);
268                         break;
269
270                 case ZPOOL_PROP_GUID:
271                         intval = zpool_get_prop_int(zhp, prop, &src);
272                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
273                         break;
274
275                 case ZPOOL_PROP_ALTROOT:
276                 case ZPOOL_PROP_CACHEFILE:
277                 case ZPOOL_PROP_COMMENT:
278                         if (zhp->zpool_props != NULL ||
279                             zpool_get_all_props(zhp) == 0) {
280                                 (void) strlcpy(buf,
281                                     zpool_get_prop_string(zhp, prop, &src),
282                                     len);
283                                 break;
284                         }
285                         /* FALLTHROUGH */
286                 default:
287                         (void) strlcpy(buf, "-", len);
288                         break;
289                 }
290
291                 if (srctype != NULL)
292                         *srctype = src;
293                 return (0);
294         }
295
296         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
297             prop != ZPOOL_PROP_NAME)
298                 return (-1);
299
300         switch (zpool_prop_get_type(prop)) {
301         case PROP_TYPE_STRING:
302                 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
303                     len);
304                 break;
305
306         case PROP_TYPE_NUMBER:
307                 intval = zpool_get_prop_int(zhp, prop, &src);
308
309                 switch (prop) {
310                 case ZPOOL_PROP_SIZE:
311                 case ZPOOL_PROP_ALLOCATED:
312                 case ZPOOL_PROP_FREE:
313                 case ZPOOL_PROP_FREEING:
314                 case ZPOOL_PROP_LEAKED:
315                 case ZPOOL_PROP_ASHIFT:
316                         if (literal)
317                                 (void) snprintf(buf, len, "%llu",
318                                     (u_longlong_t)intval);
319                         else
320                                 (void) zfs_nicenum(intval, buf, len);
321                         break;
322
323                 case ZPOOL_PROP_EXPANDSZ:
324                         if (intval == 0) {
325                                 (void) strlcpy(buf, "-", len);
326                         } else if (literal) {
327                                 (void) snprintf(buf, len, "%llu",
328                                     (u_longlong_t)intval);
329                         } else {
330                                 (void) zfs_nicebytes(intval, buf, len);
331                         }
332                         break;
333
334                 case ZPOOL_PROP_CAPACITY:
335                         if (literal) {
336                                 (void) snprintf(buf, len, "%llu",
337                                     (u_longlong_t)intval);
338                         } else {
339                                 (void) snprintf(buf, len, "%llu%%",
340                                     (u_longlong_t)intval);
341                         }
342                         break;
343
344                 case ZPOOL_PROP_FRAGMENTATION:
345                         if (intval == UINT64_MAX) {
346                                 (void) strlcpy(buf, "-", len);
347                         } else if (literal) {
348                                 (void) snprintf(buf, len, "%llu",
349                                     (u_longlong_t)intval);
350                         } else {
351                                 (void) snprintf(buf, len, "%llu%%",
352                                     (u_longlong_t)intval);
353                         }
354                         break;
355
356                 case ZPOOL_PROP_DEDUPRATIO:
357                         if (literal)
358                                 (void) snprintf(buf, len, "%llu.%02llu",
359                                     (u_longlong_t)(intval / 100),
360                                     (u_longlong_t)(intval % 100));
361                         else
362                                 (void) snprintf(buf, len, "%llu.%02llux",
363                                     (u_longlong_t)(intval / 100),
364                                     (u_longlong_t)(intval % 100));
365                         break;
366
367                 case ZPOOL_PROP_HEALTH:
368                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
369                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
370                         verify(nvlist_lookup_uint64_array(nvroot,
371                             ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
372                             == 0);
373
374                         (void) strlcpy(buf, zpool_state_to_name(intval,
375                             vs->vs_aux), len);
376                         break;
377                 case ZPOOL_PROP_VERSION:
378                         if (intval >= SPA_VERSION_FEATURES) {
379                                 (void) snprintf(buf, len, "-");
380                                 break;
381                         }
382                         /* FALLTHROUGH */
383                 default:
384                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
385                 }
386                 break;
387
388         case PROP_TYPE_INDEX:
389                 intval = zpool_get_prop_int(zhp, prop, &src);
390                 if (zpool_prop_index_to_string(prop, intval, &strval)
391                     != 0)
392                         return (-1);
393                 (void) strlcpy(buf, strval, len);
394                 break;
395
396         default:
397                 abort();
398         }
399
400         if (srctype)
401                 *srctype = src;
402
403         return (0);
404 }
405
406 /*
407  * Check if the bootfs name has the same pool name as it is set to.
408  * Assuming bootfs is a valid dataset name.
409  */
410 static boolean_t
411 bootfs_name_valid(const char *pool, char *bootfs)
412 {
413         int len = strlen(pool);
414         if (bootfs[0] == '\0')
415                 return (B_TRUE);
416
417         if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
418                 return (B_FALSE);
419
420         if (strncmp(pool, bootfs, len) == 0 &&
421             (bootfs[len] == '/' || bootfs[len] == '\0'))
422                 return (B_TRUE);
423
424         return (B_FALSE);
425 }
426
427 boolean_t
428 zpool_is_bootable(zpool_handle_t *zhp)
429 {
430         char bootfs[ZFS_MAX_DATASET_NAME_LEN];
431
432         return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
433             sizeof (bootfs), NULL, B_FALSE) == 0 && strncmp(bootfs, "-",
434             sizeof (bootfs)) != 0);
435 }
436
437
438 /*
439  * Given an nvlist of zpool properties to be set, validate that they are
440  * correct, and parse any numeric properties (index, boolean, etc) if they are
441  * specified as strings.
442  */
443 static nvlist_t *
444 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
445     nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
446 {
447         nvpair_t *elem;
448         nvlist_t *retprops;
449         zpool_prop_t prop;
450         char *strval;
451         uint64_t intval;
452         char *slash, *check;
453         struct stat64 statbuf;
454         zpool_handle_t *zhp;
455
456         if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
457                 (void) no_memory(hdl);
458                 return (NULL);
459         }
460
461         elem = NULL;
462         while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
463                 const char *propname = nvpair_name(elem);
464
465                 prop = zpool_name_to_prop(propname);
466                 if (prop == ZPOOL_PROP_INVAL && zpool_prop_feature(propname)) {
467                         int err;
468                         char *fname = strchr(propname, '@') + 1;
469
470                         err = zfeature_lookup_name(fname, NULL);
471                         if (err != 0) {
472                                 ASSERT3U(err, ==, ENOENT);
473                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
474                                     "invalid feature '%s'"), fname);
475                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
476                                 goto error;
477                         }
478
479                         if (nvpair_type(elem) != DATA_TYPE_STRING) {
480                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
481                                     "'%s' must be a string"), propname);
482                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
483                                 goto error;
484                         }
485
486                         (void) nvpair_value_string(elem, &strval);
487                         if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0 &&
488                             strcmp(strval, ZFS_FEATURE_DISABLED) != 0) {
489                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
490                                     "property '%s' can only be set to "
491                                     "'enabled' or 'disabled'"), propname);
492                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
493                                 goto error;
494                         }
495
496                         if (!flags.create &&
497                             strcmp(strval, ZFS_FEATURE_DISABLED) == 0) {
498                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
499                                     "property '%s' can only be set to "
500                                     "'disabled' at creation time"), propname);
501                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
502                                 goto error;
503                         }
504
505                         if (nvlist_add_uint64(retprops, propname, 0) != 0) {
506                                 (void) no_memory(hdl);
507                                 goto error;
508                         }
509                         continue;
510                 }
511
512                 /*
513                  * Make sure this property is valid and applies to this type.
514                  */
515                 if (prop == ZPOOL_PROP_INVAL) {
516                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
517                             "invalid property '%s'"), propname);
518                         (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
519                         goto error;
520                 }
521
522                 if (zpool_prop_readonly(prop)) {
523                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
524                             "is readonly"), propname);
525                         (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
526                         goto error;
527                 }
528
529                 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
530                     &strval, &intval, errbuf) != 0)
531                         goto error;
532
533                 /*
534                  * Perform additional checking for specific properties.
535                  */
536                 switch (prop) {
537                 case ZPOOL_PROP_VERSION:
538                         if (intval < version ||
539                             !SPA_VERSION_IS_SUPPORTED(intval)) {
540                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
541                                     "property '%s' number %d is invalid."),
542                                     propname, intval);
543                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
544                                 goto error;
545                         }
546                         break;
547
548                 case ZPOOL_PROP_ASHIFT:
549                         if (intval != 0 &&
550                             (intval < ASHIFT_MIN || intval > ASHIFT_MAX)) {
551                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
552                                     "invalid '%s=%d' property: only values "
553                                     "between %" PRId32 " and %" PRId32 " "
554                                     "are allowed.\n"),
555                                     propname, intval, ASHIFT_MIN, ASHIFT_MAX);
556                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
557                                 goto error;
558                         }
559                         break;
560
561                 case ZPOOL_PROP_BOOTFS:
562                         if (flags.create || flags.import) {
563                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
564                                     "property '%s' cannot be set at creation "
565                                     "or import time"), propname);
566                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
567                                 goto error;
568                         }
569
570                         if (version < SPA_VERSION_BOOTFS) {
571                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
572                                     "pool must be upgraded to support "
573                                     "'%s' property"), propname);
574                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
575                                 goto error;
576                         }
577
578                         /*
579                          * bootfs property value has to be a dataset name and
580                          * the dataset has to be in the same pool as it sets to.
581                          */
582                         if (!bootfs_name_valid(poolname, strval)) {
583                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
584                                     "is an invalid name"), strval);
585                                 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
586                                 goto error;
587                         }
588
589                         if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
590                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
591                                     "could not open pool '%s'"), poolname);
592                                 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
593                                 goto error;
594                         }
595                         zpool_close(zhp);
596                         break;
597
598                 case ZPOOL_PROP_ALTROOT:
599                         if (!flags.create && !flags.import) {
600                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
601                                     "property '%s' can only be set during pool "
602                                     "creation or import"), propname);
603                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
604                                 goto error;
605                         }
606
607                         if (strval[0] != '/') {
608                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
609                                     "bad alternate root '%s'"), strval);
610                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
611                                 goto error;
612                         }
613                         break;
614
615                 case ZPOOL_PROP_CACHEFILE:
616                         if (strval[0] == '\0')
617                                 break;
618
619                         if (strcmp(strval, "none") == 0)
620                                 break;
621
622                         if (strval[0] != '/') {
623                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
624                                     "property '%s' must be empty, an "
625                                     "absolute path, or 'none'"), propname);
626                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
627                                 goto error;
628                         }
629
630                         slash = strrchr(strval, '/');
631
632                         if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
633                             strcmp(slash, "/..") == 0) {
634                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
635                                     "'%s' is not a valid file"), strval);
636                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
637                                 goto error;
638                         }
639
640                         *slash = '\0';
641
642                         if (strval[0] != '\0' &&
643                             (stat64(strval, &statbuf) != 0 ||
644                             !S_ISDIR(statbuf.st_mode))) {
645                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
646                                     "'%s' is not a valid directory"),
647                                     strval);
648                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
649                                 goto error;
650                         }
651
652                         *slash = '/';
653                         break;
654
655                 case ZPOOL_PROP_COMMENT:
656                         for (check = strval; *check != '\0'; check++) {
657                                 if (!isprint(*check)) {
658                                         zfs_error_aux(hdl,
659                                             dgettext(TEXT_DOMAIN,
660                                             "comment may only have printable "
661                                             "characters"));
662                                         (void) zfs_error(hdl, EZFS_BADPROP,
663                                             errbuf);
664                                         goto error;
665                                 }
666                         }
667                         if (strlen(strval) > ZPROP_MAX_COMMENT) {
668                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
669                                     "comment must not exceed %d characters"),
670                                     ZPROP_MAX_COMMENT);
671                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
672                                 goto error;
673                         }
674                         break;
675                 case ZPOOL_PROP_READONLY:
676                         if (!flags.import) {
677                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
678                                     "property '%s' can only be set at "
679                                     "import time"), propname);
680                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
681                                 goto error;
682                         }
683                         break;
684                 case ZPOOL_PROP_TNAME:
685                         if (!flags.create) {
686                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
687                                     "property '%s' can only be set at "
688                                     "creation time"), propname);
689                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
690                                 goto error;
691                         }
692                         break;
693                 case ZPOOL_PROP_MULTIHOST:
694                         if (get_system_hostid() == 0) {
695                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
696                                     "requires a non-zero system hostid"));
697                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
698                                 goto error;
699                         }
700                         break;
701                 default:
702                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
703                             "property '%s'(%d) not defined"), propname, prop);
704                         break;
705                 }
706         }
707
708         return (retprops);
709 error:
710         nvlist_free(retprops);
711         return (NULL);
712 }
713
714 /*
715  * Set zpool property : propname=propval.
716  */
717 int
718 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
719 {
720         zfs_cmd_t zc = {"\0"};
721         int ret = -1;
722         char errbuf[1024];
723         nvlist_t *nvl = NULL;
724         nvlist_t *realprops;
725         uint64_t version;
726         prop_flags_t flags = { 0 };
727
728         (void) snprintf(errbuf, sizeof (errbuf),
729             dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
730             zhp->zpool_name);
731
732         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
733                 return (no_memory(zhp->zpool_hdl));
734
735         if (nvlist_add_string(nvl, propname, propval) != 0) {
736                 nvlist_free(nvl);
737                 return (no_memory(zhp->zpool_hdl));
738         }
739
740         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
741         if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
742             zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
743                 nvlist_free(nvl);
744                 return (-1);
745         }
746
747         nvlist_free(nvl);
748         nvl = realprops;
749
750         /*
751          * Execute the corresponding ioctl() to set this property.
752          */
753         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
754
755         if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
756                 nvlist_free(nvl);
757                 return (-1);
758         }
759
760         ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
761
762         zcmd_free_nvlists(&zc);
763         nvlist_free(nvl);
764
765         if (ret)
766                 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
767         else
768                 (void) zpool_props_refresh(zhp);
769
770         return (ret);
771 }
772
773 int
774 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
775 {
776         libzfs_handle_t *hdl = zhp->zpool_hdl;
777         zprop_list_t *entry;
778         char buf[ZFS_MAXPROPLEN];
779         nvlist_t *features = NULL;
780         nvpair_t *nvp;
781         zprop_list_t **last;
782         boolean_t firstexpand = (NULL == *plp);
783         int i;
784
785         if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
786                 return (-1);
787
788         last = plp;
789         while (*last != NULL)
790                 last = &(*last)->pl_next;
791
792         if ((*plp)->pl_all)
793                 features = zpool_get_features(zhp);
794
795         if ((*plp)->pl_all && firstexpand) {
796                 for (i = 0; i < SPA_FEATURES; i++) {
797                         zprop_list_t *entry = zfs_alloc(hdl,
798                             sizeof (zprop_list_t));
799                         entry->pl_prop = ZPROP_INVAL;
800                         entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
801                             spa_feature_table[i].fi_uname);
802                         entry->pl_width = strlen(entry->pl_user_prop);
803                         entry->pl_all = B_TRUE;
804
805                         *last = entry;
806                         last = &entry->pl_next;
807                 }
808         }
809
810         /* add any unsupported features */
811         for (nvp = nvlist_next_nvpair(features, NULL);
812             nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
813                 char *propname;
814                 boolean_t found;
815                 zprop_list_t *entry;
816
817                 if (zfeature_is_supported(nvpair_name(nvp)))
818                         continue;
819
820                 propname = zfs_asprintf(hdl, "unsupported@%s",
821                     nvpair_name(nvp));
822
823                 /*
824                  * Before adding the property to the list make sure that no
825                  * other pool already added the same property.
826                  */
827                 found = B_FALSE;
828                 entry = *plp;
829                 while (entry != NULL) {
830                         if (entry->pl_user_prop != NULL &&
831                             strcmp(propname, entry->pl_user_prop) == 0) {
832                                 found = B_TRUE;
833                                 break;
834                         }
835                         entry = entry->pl_next;
836                 }
837                 if (found) {
838                         free(propname);
839                         continue;
840                 }
841
842                 entry = zfs_alloc(hdl, sizeof (zprop_list_t));
843                 entry->pl_prop = ZPROP_INVAL;
844                 entry->pl_user_prop = propname;
845                 entry->pl_width = strlen(entry->pl_user_prop);
846                 entry->pl_all = B_TRUE;
847
848                 *last = entry;
849                 last = &entry->pl_next;
850         }
851
852         for (entry = *plp; entry != NULL; entry = entry->pl_next) {
853
854                 if (entry->pl_fixed)
855                         continue;
856
857                 if (entry->pl_prop != ZPROP_INVAL &&
858                     zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
859                     NULL, B_FALSE) == 0) {
860                         if (strlen(buf) > entry->pl_width)
861                                 entry->pl_width = strlen(buf);
862                 }
863         }
864
865         return (0);
866 }
867
868 /*
869  * Get the state for the given feature on the given ZFS pool.
870  */
871 int
872 zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
873     size_t len)
874 {
875         uint64_t refcount;
876         boolean_t found = B_FALSE;
877         nvlist_t *features = zpool_get_features(zhp);
878         boolean_t supported;
879         const char *feature = strchr(propname, '@') + 1;
880
881         supported = zpool_prop_feature(propname);
882         ASSERT(supported || zpool_prop_unsupported(propname));
883
884         /*
885          * Convert from feature name to feature guid. This conversion is
886          * unnecessary for unsupported@... properties because they already
887          * use guids.
888          */
889         if (supported) {
890                 int ret;
891                 spa_feature_t fid;
892
893                 ret = zfeature_lookup_name(feature, &fid);
894                 if (ret != 0) {
895                         (void) strlcpy(buf, "-", len);
896                         return (ENOTSUP);
897                 }
898                 feature = spa_feature_table[fid].fi_guid;
899         }
900
901         if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
902                 found = B_TRUE;
903
904         if (supported) {
905                 if (!found) {
906                         (void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
907                 } else  {
908                         if (refcount == 0)
909                                 (void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
910                         else
911                                 (void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
912                 }
913         } else {
914                 if (found) {
915                         if (refcount == 0) {
916                                 (void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
917                         } else {
918                                 (void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
919                         }
920                 } else {
921                         (void) strlcpy(buf, "-", len);
922                         return (ENOTSUP);
923                 }
924         }
925
926         return (0);
927 }
928
929 /*
930  * Validate the given pool name, optionally putting an extended error message in
931  * 'buf'.
932  */
933 boolean_t
934 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
935 {
936         namecheck_err_t why;
937         char what;
938         int ret;
939
940         ret = pool_namecheck(pool, &why, &what);
941
942         /*
943          * The rules for reserved pool names were extended at a later point.
944          * But we need to support users with existing pools that may now be
945          * invalid.  So we only check for this expanded set of names during a
946          * create (or import), and only in userland.
947          */
948         if (ret == 0 && !isopen &&
949             (strncmp(pool, "mirror", 6) == 0 ||
950             strncmp(pool, "raidz", 5) == 0 ||
951             strncmp(pool, "spare", 5) == 0 ||
952             strcmp(pool, "log") == 0)) {
953                 if (hdl != NULL)
954                         zfs_error_aux(hdl,
955                             dgettext(TEXT_DOMAIN, "name is reserved"));
956                 return (B_FALSE);
957         }
958
959
960         if (ret != 0) {
961                 if (hdl != NULL) {
962                         switch (why) {
963                         case NAME_ERR_TOOLONG:
964                                 zfs_error_aux(hdl,
965                                     dgettext(TEXT_DOMAIN, "name is too long"));
966                                 break;
967
968                         case NAME_ERR_INVALCHAR:
969                                 zfs_error_aux(hdl,
970                                     dgettext(TEXT_DOMAIN, "invalid character "
971                                     "'%c' in pool name"), what);
972                                 break;
973
974                         case NAME_ERR_NOLETTER:
975                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
976                                     "name must begin with a letter"));
977                                 break;
978
979                         case NAME_ERR_RESERVED:
980                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
981                                     "name is reserved"));
982                                 break;
983
984                         case NAME_ERR_DISKLIKE:
985                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
986                                     "pool name is reserved"));
987                                 break;
988
989                         case NAME_ERR_LEADING_SLASH:
990                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
991                                     "leading slash in name"));
992                                 break;
993
994                         case NAME_ERR_EMPTY_COMPONENT:
995                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
996                                     "empty component in name"));
997                                 break;
998
999                         case NAME_ERR_TRAILING_SLASH:
1000                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1001                                     "trailing slash in name"));
1002                                 break;
1003
1004                         case NAME_ERR_MULTIPLE_DELIMITERS:
1005                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1006                                     "multiple '@' and/or '#' delimiters in "
1007                                     "name"));
1008                                 break;
1009
1010                         case NAME_ERR_NO_AT:
1011                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1012                                     "permission set is missing '@'"));
1013                                 break;
1014
1015                         default:
1016                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1017                                     "(%d) not defined"), why);
1018                                 break;
1019                         }
1020                 }
1021                 return (B_FALSE);
1022         }
1023
1024         return (B_TRUE);
1025 }
1026
1027 /*
1028  * Open a handle to the given pool, even if the pool is currently in the FAULTED
1029  * state.
1030  */
1031 zpool_handle_t *
1032 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
1033 {
1034         zpool_handle_t *zhp;
1035         boolean_t missing;
1036
1037         /*
1038          * Make sure the pool name is valid.
1039          */
1040         if (!zpool_name_valid(hdl, B_TRUE, pool)) {
1041                 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1042                     dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1043                     pool);
1044                 return (NULL);
1045         }
1046
1047         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1048                 return (NULL);
1049
1050         zhp->zpool_hdl = hdl;
1051         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1052
1053         if (zpool_refresh_stats(zhp, &missing) != 0) {
1054                 zpool_close(zhp);
1055                 return (NULL);
1056         }
1057
1058         if (missing) {
1059                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
1060                 (void) zfs_error_fmt(hdl, EZFS_NOENT,
1061                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
1062                 zpool_close(zhp);
1063                 return (NULL);
1064         }
1065
1066         return (zhp);
1067 }
1068
1069 /*
1070  * Like the above, but silent on error.  Used when iterating over pools (because
1071  * the configuration cache may be out of date).
1072  */
1073 int
1074 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
1075 {
1076         zpool_handle_t *zhp;
1077         boolean_t missing;
1078
1079         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1080                 return (-1);
1081
1082         zhp->zpool_hdl = hdl;
1083         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1084
1085         if (zpool_refresh_stats(zhp, &missing) != 0) {
1086                 zpool_close(zhp);
1087                 return (-1);
1088         }
1089
1090         if (missing) {
1091                 zpool_close(zhp);
1092                 *ret = NULL;
1093                 return (0);
1094         }
1095
1096         *ret = zhp;
1097         return (0);
1098 }
1099
1100 /*
1101  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
1102  * state.
1103  */
1104 zpool_handle_t *
1105 zpool_open(libzfs_handle_t *hdl, const char *pool)
1106 {
1107         zpool_handle_t *zhp;
1108
1109         if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
1110                 return (NULL);
1111
1112         if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
1113                 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
1114                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
1115                 zpool_close(zhp);
1116                 return (NULL);
1117         }
1118
1119         return (zhp);
1120 }
1121
1122 /*
1123  * Close the handle.  Simply frees the memory associated with the handle.
1124  */
1125 void
1126 zpool_close(zpool_handle_t *zhp)
1127 {
1128         nvlist_free(zhp->zpool_config);
1129         nvlist_free(zhp->zpool_old_config);
1130         nvlist_free(zhp->zpool_props);
1131         free(zhp);
1132 }
1133
1134 /*
1135  * Return the name of the pool.
1136  */
1137 const char *
1138 zpool_get_name(zpool_handle_t *zhp)
1139 {
1140         return (zhp->zpool_name);
1141 }
1142
1143
1144 /*
1145  * Return the state of the pool (ACTIVE or UNAVAILABLE)
1146  */
1147 int
1148 zpool_get_state(zpool_handle_t *zhp)
1149 {
1150         return (zhp->zpool_state);
1151 }
1152
1153 /*
1154  * Create the named pool, using the provided vdev list.  It is assumed
1155  * that the consumer has already validated the contents of the nvlist, so we
1156  * don't have to worry about error semantics.
1157  */
1158 int
1159 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
1160     nvlist_t *props, nvlist_t *fsprops)
1161 {
1162         zfs_cmd_t zc = {"\0"};
1163         nvlist_t *zc_fsprops = NULL;
1164         nvlist_t *zc_props = NULL;
1165         nvlist_t *hidden_args = NULL;
1166         uint8_t *wkeydata = NULL;
1167         uint_t wkeylen = 0;
1168         char msg[1024];
1169         int ret = -1;
1170
1171         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1172             "cannot create '%s'"), pool);
1173
1174         if (!zpool_name_valid(hdl, B_FALSE, pool))
1175                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
1176
1177         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1178                 return (-1);
1179
1180         if (props) {
1181                 prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
1182
1183                 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
1184                     SPA_VERSION_1, flags, msg)) == NULL) {
1185                         goto create_failed;
1186                 }
1187         }
1188
1189         if (fsprops) {
1190                 uint64_t zoned;
1191                 char *zonestr;
1192
1193                 zoned = ((nvlist_lookup_string(fsprops,
1194                     zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
1195                     strcmp(zonestr, "on") == 0);
1196
1197                 if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
1198                     fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) {
1199                         goto create_failed;
1200                 }
1201                 if (!zc_props &&
1202                     (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
1203                         goto create_failed;
1204                 }
1205                 if (zfs_crypto_create(hdl, NULL, zc_fsprops, props,
1206                     &wkeydata, &wkeylen) != 0) {
1207                         zfs_error(hdl, EZFS_CRYPTOFAILED, msg);
1208                         goto create_failed;
1209                 }
1210                 if (nvlist_add_nvlist(zc_props,
1211                     ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
1212                         goto create_failed;
1213                 }
1214                 if (wkeydata != NULL) {
1215                         if (nvlist_alloc(&hidden_args, NV_UNIQUE_NAME, 0) != 0)
1216                                 goto create_failed;
1217
1218                         if (nvlist_add_uint8_array(hidden_args, "wkeydata",
1219                             wkeydata, wkeylen) != 0)
1220                                 goto create_failed;
1221
1222                         if (nvlist_add_nvlist(zc_props, ZPOOL_HIDDEN_ARGS,
1223                             hidden_args) != 0)
1224                                 goto create_failed;
1225                 }
1226         }
1227
1228         if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
1229                 goto create_failed;
1230
1231         (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
1232
1233         if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
1234
1235                 zcmd_free_nvlists(&zc);
1236                 nvlist_free(zc_props);
1237                 nvlist_free(zc_fsprops);
1238                 nvlist_free(hidden_args);
1239                 if (wkeydata != NULL)
1240                         free(wkeydata);
1241
1242                 switch (errno) {
1243                 case EBUSY:
1244                         /*
1245                          * This can happen if the user has specified the same
1246                          * device multiple times.  We can't reliably detect this
1247                          * until we try to add it and see we already have a
1248                          * label.  This can also happen under if the device is
1249                          * part of an active md or lvm device.
1250                          */
1251                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1252                             "one or more vdevs refer to the same device, or "
1253                             "one of\nthe devices is part of an active md or "
1254                             "lvm device"));
1255                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1256
1257                 case ERANGE:
1258                         /*
1259                          * This happens if the record size is smaller or larger
1260                          * than the allowed size range, or not a power of 2.
1261                          *
1262                          * NOTE: although zfs_valid_proplist is called earlier,
1263                          * this case may have slipped through since the
1264                          * pool does not exist yet and it is therefore
1265                          * impossible to read properties e.g. max blocksize
1266                          * from the pool.
1267                          */
1268                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1269                             "record size invalid"));
1270                         return (zfs_error(hdl, EZFS_BADPROP, msg));
1271
1272                 case EOVERFLOW:
1273                         /*
1274                          * This occurs when one of the devices is below
1275                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1276                          * device was the problem device since there's no
1277                          * reliable way to determine device size from userland.
1278                          */
1279                         {
1280                                 char buf[64];
1281
1282                                 zfs_nicebytes(SPA_MINDEVSIZE, buf,
1283                                     sizeof (buf));
1284
1285                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1286                                     "one or more devices is less than the "
1287                                     "minimum size (%s)"), buf);
1288                         }
1289                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1290
1291                 case ENOSPC:
1292                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1293                             "one or more devices is out of space"));
1294                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1295
1296                 case ENOTBLK:
1297                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1298                             "cache device must be a disk or disk slice"));
1299                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1300
1301                 default:
1302                         return (zpool_standard_error(hdl, errno, msg));
1303                 }
1304         }
1305
1306 create_failed:
1307         zcmd_free_nvlists(&zc);
1308         nvlist_free(zc_props);
1309         nvlist_free(zc_fsprops);
1310         nvlist_free(hidden_args);
1311         if (wkeydata != NULL)
1312                 free(wkeydata);
1313         return (ret);
1314 }
1315
1316 /*
1317  * Destroy the given pool.  It is up to the caller to ensure that there are no
1318  * datasets left in the pool.
1319  */
1320 int
1321 zpool_destroy(zpool_handle_t *zhp, const char *log_str)
1322 {
1323         zfs_cmd_t zc = {"\0"};
1324         zfs_handle_t *zfp = NULL;
1325         libzfs_handle_t *hdl = zhp->zpool_hdl;
1326         char msg[1024];
1327
1328         if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1329             (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1330                 return (-1);
1331
1332         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1333         zc.zc_history = (uint64_t)(uintptr_t)log_str;
1334
1335         if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1336                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1337                     "cannot destroy '%s'"), zhp->zpool_name);
1338
1339                 if (errno == EROFS) {
1340                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1341                             "one or more devices is read only"));
1342                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1343                 } else {
1344                         (void) zpool_standard_error(hdl, errno, msg);
1345                 }
1346
1347                 if (zfp)
1348                         zfs_close(zfp);
1349                 return (-1);
1350         }
1351
1352         if (zfp) {
1353                 remove_mountpoint(zfp);
1354                 zfs_close(zfp);
1355         }
1356
1357         return (0);
1358 }
1359
1360 /*
1361  * Add the given vdevs to the pool.  The caller must have already performed the
1362  * necessary verification to ensure that the vdev specification is well-formed.
1363  */
1364 int
1365 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1366 {
1367         zfs_cmd_t zc = {"\0"};
1368         int ret;
1369         libzfs_handle_t *hdl = zhp->zpool_hdl;
1370         char msg[1024];
1371         nvlist_t **spares, **l2cache;
1372         uint_t nspares, nl2cache;
1373
1374         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1375             "cannot add to '%s'"), zhp->zpool_name);
1376
1377         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1378             SPA_VERSION_SPARES &&
1379             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1380             &spares, &nspares) == 0) {
1381                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1382                     "upgraded to add hot spares"));
1383                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1384         }
1385
1386         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1387             SPA_VERSION_L2CACHE &&
1388             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1389             &l2cache, &nl2cache) == 0) {
1390                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1391                     "upgraded to add cache devices"));
1392                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1393         }
1394
1395         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1396                 return (-1);
1397         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1398
1399         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1400                 switch (errno) {
1401                 case EBUSY:
1402                         /*
1403                          * This can happen if the user has specified the same
1404                          * device multiple times.  We can't reliably detect this
1405                          * until we try to add it and see we already have a
1406                          * label.
1407                          */
1408                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1409                             "one or more vdevs refer to the same device"));
1410                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1411                         break;
1412
1413                 case EINVAL:
1414                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1415                             "invalid config; a pool with removing/removed "
1416                             "vdevs does not support adding raidz vdevs"));
1417                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1418                         break;
1419
1420                 case EOVERFLOW:
1421                         /*
1422                          * This occurrs when one of the devices is below
1423                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1424                          * device was the problem device since there's no
1425                          * reliable way to determine device size from userland.
1426                          */
1427                         {
1428                                 char buf[64];
1429
1430                                 zfs_nicebytes(SPA_MINDEVSIZE, buf,
1431                                     sizeof (buf));
1432
1433                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1434                                     "device is less than the minimum "
1435                                     "size (%s)"), buf);
1436                         }
1437                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1438                         break;
1439
1440                 case ENOTSUP:
1441                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1442                             "pool must be upgraded to add these vdevs"));
1443                         (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1444                         break;
1445
1446                 case ENOTBLK:
1447                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1448                             "cache device must be a disk or disk slice"));
1449                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1450                         break;
1451
1452                 default:
1453                         (void) zpool_standard_error(hdl, errno, msg);
1454                 }
1455
1456                 ret = -1;
1457         } else {
1458                 ret = 0;
1459         }
1460
1461         zcmd_free_nvlists(&zc);
1462
1463         return (ret);
1464 }
1465
1466 /*
1467  * Exports the pool from the system.  The caller must ensure that there are no
1468  * mounted datasets in the pool.
1469  */
1470 static int
1471 zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
1472     const char *log_str)
1473 {
1474         zfs_cmd_t zc = {"\0"};
1475         char msg[1024];
1476
1477         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1478             "cannot export '%s'"), zhp->zpool_name);
1479
1480         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1481         zc.zc_cookie = force;
1482         zc.zc_guid = hardforce;
1483         zc.zc_history = (uint64_t)(uintptr_t)log_str;
1484
1485         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1486                 switch (errno) {
1487                 case EXDEV:
1488                         zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1489                             "use '-f' to override the following errors:\n"
1490                             "'%s' has an active shared spare which could be"
1491                             " used by other pools once '%s' is exported."),
1492                             zhp->zpool_name, zhp->zpool_name);
1493                         return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1494                             msg));
1495                 default:
1496                         return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1497                             msg));
1498                 }
1499         }
1500
1501         return (0);
1502 }
1503
1504 int
1505 zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
1506 {
1507         return (zpool_export_common(zhp, force, B_FALSE, log_str));
1508 }
1509
1510 int
1511 zpool_export_force(zpool_handle_t *zhp, const char *log_str)
1512 {
1513         return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
1514 }
1515
1516 static void
1517 zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1518     nvlist_t *config)
1519 {
1520         nvlist_t *nv = NULL;
1521         uint64_t rewindto;
1522         int64_t loss = -1;
1523         struct tm t;
1524         char timestr[128];
1525
1526         if (!hdl->libzfs_printerr || config == NULL)
1527                 return;
1528
1529         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1530             nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
1531                 return;
1532         }
1533
1534         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1535                 return;
1536         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1537
1538         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1539             strftime(timestr, 128, "%c", &t) != 0) {
1540                 if (dryrun) {
1541                         (void) printf(dgettext(TEXT_DOMAIN,
1542                             "Would be able to return %s "
1543                             "to its state as of %s.\n"),
1544                             name, timestr);
1545                 } else {
1546                         (void) printf(dgettext(TEXT_DOMAIN,
1547                             "Pool %s returned to its state as of %s.\n"),
1548                             name, timestr);
1549                 }
1550                 if (loss > 120) {
1551                         (void) printf(dgettext(TEXT_DOMAIN,
1552                             "%s approximately %lld "),
1553                             dryrun ? "Would discard" : "Discarded",
1554                             ((longlong_t)loss + 30) / 60);
1555                         (void) printf(dgettext(TEXT_DOMAIN,
1556                             "minutes of transactions.\n"));
1557                 } else if (loss > 0) {
1558                         (void) printf(dgettext(TEXT_DOMAIN,
1559                             "%s approximately %lld "),
1560                             dryrun ? "Would discard" : "Discarded",
1561                             (longlong_t)loss);
1562                         (void) printf(dgettext(TEXT_DOMAIN,
1563                             "seconds of transactions.\n"));
1564                 }
1565         }
1566 }
1567
1568 void
1569 zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1570     nvlist_t *config)
1571 {
1572         nvlist_t *nv = NULL;
1573         int64_t loss = -1;
1574         uint64_t edata = UINT64_MAX;
1575         uint64_t rewindto;
1576         struct tm t;
1577         char timestr[128];
1578
1579         if (!hdl->libzfs_printerr)
1580                 return;
1581
1582         if (reason >= 0)
1583                 (void) printf(dgettext(TEXT_DOMAIN, "action: "));
1584         else
1585                 (void) printf(dgettext(TEXT_DOMAIN, "\t"));
1586
1587         /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1588         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1589             nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
1590             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1591                 goto no_info;
1592
1593         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1594         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1595             &edata);
1596
1597         (void) printf(dgettext(TEXT_DOMAIN,
1598             "Recovery is possible, but will result in some data loss.\n"));
1599
1600         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1601             strftime(timestr, 128, "%c", &t) != 0) {
1602                 (void) printf(dgettext(TEXT_DOMAIN,
1603                     "\tReturning the pool to its state as of %s\n"
1604                     "\tshould correct the problem.  "),
1605                     timestr);
1606         } else {
1607                 (void) printf(dgettext(TEXT_DOMAIN,
1608                     "\tReverting the pool to an earlier state "
1609                     "should correct the problem.\n\t"));
1610         }
1611
1612         if (loss > 120) {
1613                 (void) printf(dgettext(TEXT_DOMAIN,
1614                     "Approximately %lld minutes of data\n"
1615                     "\tmust be discarded, irreversibly.  "),
1616                     ((longlong_t)loss + 30) / 60);
1617         } else if (loss > 0) {
1618                 (void) printf(dgettext(TEXT_DOMAIN,
1619                     "Approximately %lld seconds of data\n"
1620                     "\tmust be discarded, irreversibly.  "),
1621                     (longlong_t)loss);
1622         }
1623         if (edata != 0 && edata != UINT64_MAX) {
1624                 if (edata == 1) {
1625                         (void) printf(dgettext(TEXT_DOMAIN,
1626                             "After rewind, at least\n"
1627                             "\tone persistent user-data error will remain.  "));
1628                 } else {
1629                         (void) printf(dgettext(TEXT_DOMAIN,
1630                             "After rewind, several\n"
1631                             "\tpersistent user-data errors will remain.  "));
1632                 }
1633         }
1634         (void) printf(dgettext(TEXT_DOMAIN,
1635             "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
1636             reason >= 0 ? "clear" : "import", name);
1637
1638         (void) printf(dgettext(TEXT_DOMAIN,
1639             "A scrub of the pool\n"
1640             "\tis strongly recommended after recovery.\n"));
1641         return;
1642
1643 no_info:
1644         (void) printf(dgettext(TEXT_DOMAIN,
1645             "Destroy and re-create the pool from\n\ta backup source.\n"));
1646 }
1647
1648 /*
1649  * zpool_import() is a contracted interface. Should be kept the same
1650  * if possible.
1651  *
1652  * Applications should use zpool_import_props() to import a pool with
1653  * new properties value to be set.
1654  */
1655 int
1656 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1657     char *altroot)
1658 {
1659         nvlist_t *props = NULL;
1660         int ret;
1661
1662         if (altroot != NULL) {
1663                 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1664                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1665                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1666                             newname));
1667                 }
1668
1669                 if (nvlist_add_string(props,
1670                     zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1671                     nvlist_add_string(props,
1672                     zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1673                         nvlist_free(props);
1674                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1675                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1676                             newname));
1677                 }
1678         }
1679
1680         ret = zpool_import_props(hdl, config, newname, props,
1681             ZFS_IMPORT_NORMAL);
1682         nvlist_free(props);
1683         return (ret);
1684 }
1685
1686 static void
1687 print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1688     int indent)
1689 {
1690         nvlist_t **child;
1691         uint_t c, children;
1692         char *vname;
1693         uint64_t is_log = 0;
1694
1695         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1696             &is_log);
1697
1698         if (name != NULL)
1699                 (void) printf("\t%*s%s%s\n", indent, "", name,
1700                     is_log ? " [log]" : "");
1701
1702         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1703             &child, &children) != 0)
1704                 return;
1705
1706         for (c = 0; c < children; c++) {
1707                 vname = zpool_vdev_name(hdl, NULL, child[c], VDEV_NAME_TYPE_ID);
1708                 print_vdev_tree(hdl, vname, child[c], indent + 2);
1709                 free(vname);
1710         }
1711 }
1712
1713 void
1714 zpool_print_unsup_feat(nvlist_t *config)
1715 {
1716         nvlist_t *nvinfo, *unsup_feat;
1717         nvpair_t *nvp;
1718
1719         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
1720             0);
1721         verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
1722             &unsup_feat) == 0);
1723
1724         for (nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
1725             nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
1726                 char *desc;
1727
1728                 verify(nvpair_type(nvp) == DATA_TYPE_STRING);
1729                 verify(nvpair_value_string(nvp, &desc) == 0);
1730
1731                 if (strlen(desc) > 0)
1732                         (void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
1733                 else
1734                         (void) printf("\t%s\n", nvpair_name(nvp));
1735         }
1736 }
1737
1738 /*
1739  * Import the given pool using the known configuration and a list of
1740  * properties to be set. The configuration should have come from
1741  * zpool_find_import(). The 'newname' parameters control whether the pool
1742  * is imported with a different name.
1743  */
1744 int
1745 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1746     nvlist_t *props, int flags)
1747 {
1748         zfs_cmd_t zc = {"\0"};
1749         zpool_load_policy_t policy;
1750         nvlist_t *nv = NULL;
1751         nvlist_t *nvinfo = NULL;
1752         nvlist_t *missing = NULL;
1753         char *thename;
1754         char *origname;
1755         int ret;
1756         int error = 0;
1757         char errbuf[1024];
1758
1759         verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1760             &origname) == 0);
1761
1762         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1763             "cannot import pool '%s'"), origname);
1764
1765         if (newname != NULL) {
1766                 if (!zpool_name_valid(hdl, B_FALSE, newname))
1767                         return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1768                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1769                             newname));
1770                 thename = (char *)newname;
1771         } else {
1772                 thename = origname;
1773         }
1774
1775         if (props != NULL) {
1776                 uint64_t version;
1777                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1778
1779                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1780                     &version) == 0);
1781
1782                 if ((props = zpool_valid_proplist(hdl, origname,
1783                     props, version, flags, errbuf)) == NULL)
1784                         return (-1);
1785                 if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1786                         nvlist_free(props);
1787                         return (-1);
1788                 }
1789                 nvlist_free(props);
1790         }
1791
1792         (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1793
1794         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1795             &zc.zc_guid) == 0);
1796
1797         if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1798                 zcmd_free_nvlists(&zc);
1799                 return (-1);
1800         }
1801         if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1802                 zcmd_free_nvlists(&zc);
1803                 return (-1);
1804         }
1805
1806         zc.zc_cookie = flags;
1807         while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1808             errno == ENOMEM) {
1809                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1810                         zcmd_free_nvlists(&zc);
1811                         return (-1);
1812                 }
1813         }
1814         if (ret != 0)
1815                 error = errno;
1816
1817         (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1818
1819         zcmd_free_nvlists(&zc);
1820
1821         zpool_get_load_policy(config, &policy);
1822
1823         if (error) {
1824                 char desc[1024];
1825                 char aux[256];
1826
1827                 /*
1828                  * Dry-run failed, but we print out what success
1829                  * looks like if we found a best txg
1830                  */
1831                 if (policy.zlp_rewind & ZPOOL_TRY_REWIND) {
1832                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1833                             B_TRUE, nv);
1834                         nvlist_free(nv);
1835                         return (-1);
1836                 }
1837
1838                 if (newname == NULL)
1839                         (void) snprintf(desc, sizeof (desc),
1840                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1841                             thename);
1842                 else
1843                         (void) snprintf(desc, sizeof (desc),
1844                             dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1845                             origname, thename);
1846
1847                 switch (error) {
1848                 case ENOTSUP:
1849                         if (nv != NULL && nvlist_lookup_nvlist(nv,
1850                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1851                             nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
1852                                 (void) printf(dgettext(TEXT_DOMAIN, "This "
1853                                     "pool uses the following feature(s) not "
1854                                     "supported by this system:\n"));
1855                                 zpool_print_unsup_feat(nv);
1856                                 if (nvlist_exists(nvinfo,
1857                                     ZPOOL_CONFIG_CAN_RDONLY)) {
1858                                         (void) printf(dgettext(TEXT_DOMAIN,
1859                                             "All unsupported features are only "
1860                                             "required for writing to the pool."
1861                                             "\nThe pool can be imported using "
1862                                             "'-o readonly=on'.\n"));
1863                                 }
1864                         }
1865                         /*
1866                          * Unsupported version.
1867                          */
1868                         (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1869                         break;
1870
1871                 case EREMOTEIO:
1872                         if (nv != NULL && nvlist_lookup_nvlist(nv,
1873                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0) {
1874                                 char *hostname = "<unknown>";
1875                                 uint64_t hostid = 0;
1876                                 mmp_state_t mmp_state;
1877
1878                                 mmp_state = fnvlist_lookup_uint64(nvinfo,
1879                                     ZPOOL_CONFIG_MMP_STATE);
1880
1881                                 if (nvlist_exists(nvinfo,
1882                                     ZPOOL_CONFIG_MMP_HOSTNAME))
1883                                         hostname = fnvlist_lookup_string(nvinfo,
1884                                             ZPOOL_CONFIG_MMP_HOSTNAME);
1885
1886                                 if (nvlist_exists(nvinfo,
1887                                     ZPOOL_CONFIG_MMP_HOSTID))
1888                                         hostid = fnvlist_lookup_uint64(nvinfo,
1889                                             ZPOOL_CONFIG_MMP_HOSTID);
1890
1891                                 if (mmp_state == MMP_STATE_ACTIVE) {
1892                                         (void) snprintf(aux, sizeof (aux),
1893                                             dgettext(TEXT_DOMAIN, "pool is imp"
1894                                             "orted on host '%s' (hostid=%lx).\n"
1895                                             "Export the pool on the other "
1896                                             "system, then run 'zpool import'."),
1897                                             hostname, (unsigned long) hostid);
1898                                 } else if (mmp_state == MMP_STATE_NO_HOSTID) {
1899                                         (void) snprintf(aux, sizeof (aux),
1900                                             dgettext(TEXT_DOMAIN, "pool has "
1901                                             "the multihost property on and "
1902                                             "the\nsystem's hostid is not set. "
1903                                             "Set a unique system hostid with "
1904                                             "the zgenhostid(8) command.\n"));
1905                                 }
1906
1907                                 (void) zfs_error_aux(hdl, aux);
1908                         }
1909                         (void) zfs_error(hdl, EZFS_ACTIVE_POOL, desc);
1910                         break;
1911
1912                 case EINVAL:
1913                         (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1914                         break;
1915
1916                 case EROFS:
1917                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1918                             "one or more devices is read only"));
1919                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
1920                         break;
1921
1922                 case ENXIO:
1923                         if (nv && nvlist_lookup_nvlist(nv,
1924                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1925                             nvlist_lookup_nvlist(nvinfo,
1926                             ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1927                                 (void) printf(dgettext(TEXT_DOMAIN,
1928                                     "The devices below are missing or "
1929                                     "corrupted, use '-m' to import the pool "
1930                                     "anyway:\n"));
1931                                 print_vdev_tree(hdl, NULL, missing, 2);
1932                                 (void) printf("\n");
1933                         }
1934                         (void) zpool_standard_error(hdl, error, desc);
1935                         break;
1936
1937                 case EEXIST:
1938                         (void) zpool_standard_error(hdl, error, desc);
1939                         break;
1940
1941                 case EBUSY:
1942                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1943                             "one or more devices are already in use\n"));
1944                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
1945                         break;
1946                 case ENAMETOOLONG:
1947                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1948                             "new name of at least one dataset is longer than "
1949                             "the maximum allowable length"));
1950                         (void) zfs_error(hdl, EZFS_NAMETOOLONG, desc);
1951                         break;
1952                 default:
1953                         (void) zpool_standard_error(hdl, error, desc);
1954                         zpool_explain_recover(hdl,
1955                             newname ? origname : thename, -error, nv);
1956                         break;
1957                 }
1958
1959                 nvlist_free(nv);
1960                 ret = -1;
1961         } else {
1962                 zpool_handle_t *zhp;
1963
1964                 /*
1965                  * This should never fail, but play it safe anyway.
1966                  */
1967                 if (zpool_open_silent(hdl, thename, &zhp) != 0)
1968                         ret = -1;
1969                 else if (zhp != NULL)
1970                         zpool_close(zhp);
1971                 if (policy.zlp_rewind &
1972                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
1973                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1974                             ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0), nv);
1975                 }
1976                 nvlist_free(nv);
1977                 return (0);
1978         }
1979
1980         return (ret);
1981 }
1982
1983 /*
1984  * Scan the pool.
1985  */
1986 int
1987 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
1988 {
1989         zfs_cmd_t zc = {"\0"};
1990         char msg[1024];
1991         int err;
1992         libzfs_handle_t *hdl = zhp->zpool_hdl;
1993
1994         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1995         zc.zc_cookie = func;
1996         zc.zc_flags = cmd;
1997
1998         if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0)
1999                 return (0);
2000
2001         err = errno;
2002
2003         /* ECANCELED on a scrub means we resumed a paused scrub */
2004         if (err == ECANCELED && func == POOL_SCAN_SCRUB &&
2005             cmd == POOL_SCRUB_NORMAL)
2006                 return (0);
2007
2008         if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL)
2009                 return (0);
2010
2011         if (func == POOL_SCAN_SCRUB) {
2012                 if (cmd == POOL_SCRUB_PAUSE) {
2013                         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2014                             "cannot pause scrubbing %s"), zc.zc_name);
2015                 } else {
2016                         assert(cmd == POOL_SCRUB_NORMAL);
2017                         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2018                             "cannot scrub %s"), zc.zc_name);
2019                 }
2020         } else if (func == POOL_SCAN_NONE) {
2021                 (void) snprintf(msg, sizeof (msg),
2022                     dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
2023                     zc.zc_name);
2024         } else {
2025                 assert(!"unexpected result");
2026         }
2027
2028         if (err == EBUSY) {
2029                 nvlist_t *nvroot;
2030                 pool_scan_stat_t *ps = NULL;
2031                 uint_t psc;
2032
2033                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
2034                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2035                 (void) nvlist_lookup_uint64_array(nvroot,
2036                     ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
2037                 if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
2038                         if (cmd == POOL_SCRUB_PAUSE)
2039                                 return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
2040                         else
2041                                 return (zfs_error(hdl, EZFS_SCRUBBING, msg));
2042                 } else {
2043                         return (zfs_error(hdl, EZFS_RESILVERING, msg));
2044                 }
2045         } else if (err == ENOENT) {
2046                 return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
2047         } else {
2048                 return (zpool_standard_error(hdl, err, msg));
2049         }
2050 }
2051
2052 /*
2053  * Find a vdev that matches the search criteria specified. We use the
2054  * the nvpair name to determine how we should look for the device.
2055  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
2056  * spare; but FALSE if its an INUSE spare.
2057  */
2058 static nvlist_t *
2059 vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
2060     boolean_t *l2cache, boolean_t *log)
2061 {
2062         uint_t c, children;
2063         nvlist_t **child;
2064         nvlist_t *ret;
2065         uint64_t is_log;
2066         char *srchkey;
2067         nvpair_t *pair = nvlist_next_nvpair(search, NULL);
2068
2069         /* Nothing to look for */
2070         if (search == NULL || pair == NULL)
2071                 return (NULL);
2072
2073         /* Obtain the key we will use to search */
2074         srchkey = nvpair_name(pair);
2075
2076         switch (nvpair_type(pair)) {
2077         case DATA_TYPE_UINT64:
2078                 if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
2079                         uint64_t srchval, theguid;
2080
2081                         verify(nvpair_value_uint64(pair, &srchval) == 0);
2082                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2083                             &theguid) == 0);
2084                         if (theguid == srchval)
2085                                 return (nv);
2086                 }
2087                 break;
2088
2089         case DATA_TYPE_STRING: {
2090                 char *srchval, *val;
2091
2092                 verify(nvpair_value_string(pair, &srchval) == 0);
2093                 if (nvlist_lookup_string(nv, srchkey, &val) != 0)
2094                         break;
2095
2096                 /*
2097                  * Search for the requested value. Special cases:
2098                  *
2099                  * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in
2100                  *   "-part1", or "p1".  The suffix is hidden from the user,
2101                  *   but included in the string, so this matches around it.
2102                  * - ZPOOL_CONFIG_PATH for short names zfs_strcmp_shortname()
2103                  *   is used to check all possible expanded paths.
2104                  * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
2105                  *
2106                  * Otherwise, all other searches are simple string compares.
2107                  */
2108                 if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
2109                         uint64_t wholedisk = 0;
2110
2111                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2112                             &wholedisk);
2113                         if (zfs_strcmp_pathname(srchval, val, wholedisk) == 0)
2114                                 return (nv);
2115
2116                 } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
2117                         char *type, *idx, *end, *p;
2118                         uint64_t id, vdev_id;
2119
2120                         /*
2121                          * Determine our vdev type, keeping in mind
2122                          * that the srchval is composed of a type and
2123                          * vdev id pair (i.e. mirror-4).
2124                          */
2125                         if ((type = strdup(srchval)) == NULL)
2126                                 return (NULL);
2127
2128                         if ((p = strrchr(type, '-')) == NULL) {
2129                                 free(type);
2130                                 break;
2131                         }
2132                         idx = p + 1;
2133                         *p = '\0';
2134
2135                         /*
2136                          * If the types don't match then keep looking.
2137                          */
2138                         if (strncmp(val, type, strlen(val)) != 0) {
2139                                 free(type);
2140                                 break;
2141                         }
2142
2143                         verify(zpool_vdev_is_interior(type));
2144                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
2145                             &id) == 0);
2146
2147                         errno = 0;
2148                         vdev_id = strtoull(idx, &end, 10);
2149
2150                         free(type);
2151                         if (errno != 0)
2152                                 return (NULL);
2153
2154                         /*
2155                          * Now verify that we have the correct vdev id.
2156                          */
2157                         if (vdev_id == id)
2158                                 return (nv);
2159                 }
2160
2161                 /*
2162                  * Common case
2163                  */
2164                 if (strcmp(srchval, val) == 0)
2165                         return (nv);
2166                 break;
2167         }
2168
2169         default:
2170                 break;
2171         }
2172
2173         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
2174             &child, &children) != 0)
2175                 return (NULL);
2176
2177         for (c = 0; c < children; c++) {
2178                 if ((ret = vdev_to_nvlist_iter(child[c], search,
2179                     avail_spare, l2cache, NULL)) != NULL) {
2180                         /*
2181                          * The 'is_log' value is only set for the toplevel
2182                          * vdev, not the leaf vdevs.  So we always lookup the
2183                          * log device from the root of the vdev tree (where
2184                          * 'log' is non-NULL).
2185                          */
2186                         if (log != NULL &&
2187                             nvlist_lookup_uint64(child[c],
2188                             ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
2189                             is_log) {
2190                                 *log = B_TRUE;
2191                         }
2192                         return (ret);
2193                 }
2194         }
2195
2196         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
2197             &child, &children) == 0) {
2198                 for (c = 0; c < children; c++) {
2199                         if ((ret = vdev_to_nvlist_iter(child[c], search,
2200                             avail_spare, l2cache, NULL)) != NULL) {
2201                                 *avail_spare = B_TRUE;
2202                                 return (ret);
2203                         }
2204                 }
2205         }
2206
2207         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
2208             &child, &children) == 0) {
2209                 for (c = 0; c < children; c++) {
2210                         if ((ret = vdev_to_nvlist_iter(child[c], search,
2211                             avail_spare, l2cache, NULL)) != NULL) {
2212                                 *l2cache = B_TRUE;
2213                                 return (ret);
2214                         }
2215                 }
2216         }
2217
2218         return (NULL);
2219 }
2220
2221 /*
2222  * Given a physical path (minus the "/devices" prefix), find the
2223  * associated vdev.
2224  */
2225 nvlist_t *
2226 zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
2227     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
2228 {
2229         nvlist_t *search, *nvroot, *ret;
2230
2231         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2232         verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
2233
2234         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2235             &nvroot) == 0);
2236
2237         *avail_spare = B_FALSE;
2238         *l2cache = B_FALSE;
2239         if (log != NULL)
2240                 *log = B_FALSE;
2241         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2242         nvlist_free(search);
2243
2244         return (ret);
2245 }
2246
2247 /*
2248  * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
2249  */
2250 static boolean_t
2251 zpool_vdev_is_interior(const char *name)
2252 {
2253         if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2254             strncmp(name, VDEV_TYPE_SPARE, strlen(VDEV_TYPE_SPARE)) == 0 ||
2255             strncmp(name,
2256             VDEV_TYPE_REPLACING, strlen(VDEV_TYPE_REPLACING)) == 0 ||
2257             strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
2258                 return (B_TRUE);
2259         return (B_FALSE);
2260 }
2261
2262 nvlist_t *
2263 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
2264     boolean_t *l2cache, boolean_t *log)
2265 {
2266         char *end;
2267         nvlist_t *nvroot, *search, *ret;
2268         uint64_t guid;
2269
2270         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2271
2272         guid = strtoull(path, &end, 0);
2273         if (guid != 0 && *end == '\0') {
2274                 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
2275         } else if (zpool_vdev_is_interior(path)) {
2276                 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
2277         } else {
2278                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
2279         }
2280
2281         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2282             &nvroot) == 0);
2283
2284         *avail_spare = B_FALSE;
2285         *l2cache = B_FALSE;
2286         if (log != NULL)
2287                 *log = B_FALSE;
2288         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2289         nvlist_free(search);
2290
2291         return (ret);
2292 }
2293
2294 static int
2295 vdev_is_online(nvlist_t *nv)
2296 {
2297         uint64_t ival;
2298
2299         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
2300             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
2301             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
2302                 return (0);
2303
2304         return (1);
2305 }
2306
2307 /*
2308  * Helper function for zpool_get_physpaths().
2309  */
2310 static int
2311 vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
2312     size_t *bytes_written)
2313 {
2314         size_t bytes_left, pos, rsz;
2315         char *tmppath;
2316         const char *format;
2317
2318         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
2319             &tmppath) != 0)
2320                 return (EZFS_NODEVICE);
2321
2322         pos = *bytes_written;
2323         bytes_left = physpath_size - pos;
2324         format = (pos == 0) ? "%s" : " %s";
2325
2326         rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
2327         *bytes_written += rsz;
2328
2329         if (rsz >= bytes_left) {
2330                 /* if physpath was not copied properly, clear it */
2331                 if (bytes_left != 0) {
2332                         physpath[pos] = 0;
2333                 }
2334                 return (EZFS_NOSPC);
2335         }
2336         return (0);
2337 }
2338
2339 static int
2340 vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
2341     size_t *rsz, boolean_t is_spare)
2342 {
2343         char *type;
2344         int ret;
2345
2346         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
2347                 return (EZFS_INVALCONFIG);
2348
2349         if (strcmp(type, VDEV_TYPE_DISK) == 0) {
2350                 /*
2351                  * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
2352                  * For a spare vdev, we only want to boot from the active
2353                  * spare device.
2354                  */
2355                 if (is_spare) {
2356                         uint64_t spare = 0;
2357                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
2358                             &spare);
2359                         if (!spare)
2360                                 return (EZFS_INVALCONFIG);
2361                 }
2362
2363                 if (vdev_is_online(nv)) {
2364                         if ((ret = vdev_get_one_physpath(nv, physpath,
2365                             phypath_size, rsz)) != 0)
2366                                 return (ret);
2367                 }
2368         } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
2369             strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
2370             strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
2371             (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
2372                 nvlist_t **child;
2373                 uint_t count;
2374                 int i, ret;
2375
2376                 if (nvlist_lookup_nvlist_array(nv,
2377                     ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2378                         return (EZFS_INVALCONFIG);
2379
2380                 for (i = 0; i < count; i++) {
2381                         ret = vdev_get_physpaths(child[i], physpath,
2382                             phypath_size, rsz, is_spare);
2383                         if (ret == EZFS_NOSPC)
2384                                 return (ret);
2385                 }
2386         }
2387
2388         return (EZFS_POOL_INVALARG);
2389 }
2390
2391 /*
2392  * Get phys_path for a root pool config.
2393  * Return 0 on success; non-zero on failure.
2394  */
2395 static int
2396 zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2397 {
2398         size_t rsz;
2399         nvlist_t *vdev_root;
2400         nvlist_t **child;
2401         uint_t count;
2402         char *type;
2403
2404         rsz = 0;
2405
2406         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2407             &vdev_root) != 0)
2408                 return (EZFS_INVALCONFIG);
2409
2410         if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2411             nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2412             &child, &count) != 0)
2413                 return (EZFS_INVALCONFIG);
2414
2415         /*
2416          * root pool can only have a single top-level vdev.
2417          */
2418         if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1)
2419                 return (EZFS_POOL_INVALARG);
2420
2421         (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2422             B_FALSE);
2423
2424         /* No online devices */
2425         if (rsz == 0)
2426                 return (EZFS_NODEVICE);
2427
2428         return (0);
2429 }
2430
2431 /*
2432  * Get phys_path for a root pool
2433  * Return 0 on success; non-zero on failure.
2434  */
2435 int
2436 zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2437 {
2438         return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2439             phypath_size));
2440 }
2441
2442 /*
2443  * If the device has being dynamically expanded then we need to relabel
2444  * the disk to use the new unallocated space.
2445  */
2446 static int
2447 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
2448 {
2449         int fd, error;
2450
2451         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
2452                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2453                     "relabel '%s': unable to open device: %d"), path, errno);
2454                 return (zfs_error(hdl, EZFS_OPENFAILED, msg));
2455         }
2456
2457         /*
2458          * It's possible that we might encounter an error if the device
2459          * does not have any unallocated space left. If so, we simply
2460          * ignore that error and continue on.
2461          *
2462          * Also, we don't call efi_rescan() - that would just return EBUSY.
2463          * The module will do it for us in vdev_disk_open().
2464          */
2465         error = efi_use_whole_disk(fd);
2466
2467         /* Flush the buffers to disk and invalidate the page cache. */
2468         (void) fsync(fd);
2469         (void) ioctl(fd, BLKFLSBUF);
2470
2471         (void) close(fd);
2472         if (error && error != VT_ENOSPC) {
2473                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2474                     "relabel '%s': unable to read disk capacity"), path);
2475                 return (zfs_error(hdl, EZFS_NOCAP, msg));
2476         }
2477
2478         return (0);
2479 }
2480
2481 /*
2482  * Convert a vdev path to a GUID.  Returns GUID or 0 on error.
2483  *
2484  * If is_spare, is_l2cache, or is_log is non-NULL, then store within it
2485  * if the VDEV is a spare, l2cache, or log device.  If they're NULL then
2486  * ignore them.
2487  */
2488 static uint64_t
2489 zpool_vdev_path_to_guid_impl(zpool_handle_t *zhp, const char *path,
2490     boolean_t *is_spare, boolean_t *is_l2cache, boolean_t *is_log)
2491 {
2492         uint64_t guid;
2493         boolean_t spare = B_FALSE, l2cache = B_FALSE, log = B_FALSE;
2494         nvlist_t *tgt;
2495
2496         if ((tgt = zpool_find_vdev(zhp, path, &spare, &l2cache,
2497             &log)) == NULL)
2498                 return (0);
2499
2500         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &guid) == 0);
2501         if (is_spare != NULL)
2502                 *is_spare = spare;
2503         if (is_l2cache != NULL)
2504                 *is_l2cache = l2cache;
2505         if (is_log != NULL)
2506                 *is_log = log;
2507
2508         return (guid);
2509 }
2510
2511 /* Convert a vdev path to a GUID.  Returns GUID or 0 on error. */
2512 uint64_t
2513 zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path)
2514 {
2515         return (zpool_vdev_path_to_guid_impl(zhp, path, NULL, NULL, NULL));
2516 }
2517
2518 /*
2519  * Bring the specified vdev online.   The 'flags' parameter is a set of the
2520  * ZFS_ONLINE_* flags.
2521  */
2522 int
2523 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2524     vdev_state_t *newstate)
2525 {
2526         zfs_cmd_t zc = {"\0"};
2527         char msg[1024];
2528         char *pathname;
2529         nvlist_t *tgt;
2530         boolean_t avail_spare, l2cache, islog;
2531         libzfs_handle_t *hdl = zhp->zpool_hdl;
2532         int error;
2533
2534         if (flags & ZFS_ONLINE_EXPAND) {
2535                 (void) snprintf(msg, sizeof (msg),
2536                     dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2537         } else {
2538                 (void) snprintf(msg, sizeof (msg),
2539                     dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2540         }
2541
2542         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2543         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2544             &islog)) == NULL)
2545                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2546
2547         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2548
2549         if (avail_spare)
2550                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2551
2552         if ((flags & ZFS_ONLINE_EXPAND ||
2553             zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) &&
2554             nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &pathname) == 0) {
2555                 uint64_t wholedisk = 0;
2556
2557                 (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2558                     &wholedisk);
2559
2560                 /*
2561                  * XXX - L2ARC 1.0 devices can't support expansion.
2562                  */
2563                 if (l2cache) {
2564                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2565                             "cannot expand cache devices"));
2566                         return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2567                 }
2568
2569                 if (wholedisk) {
2570                         const char *fullpath = path;
2571                         char buf[MAXPATHLEN];
2572
2573                         if (path[0] != '/') {
2574                                 error = zfs_resolve_shortname(path, buf,
2575                                     sizeof (buf));
2576                                 if (error != 0)
2577                                         return (zfs_error(hdl, EZFS_NODEVICE,
2578                                             msg));
2579
2580                                 fullpath = buf;
2581                         }
2582
2583                         error = zpool_relabel_disk(hdl, fullpath, msg);
2584                         if (error != 0)
2585                                 return (error);
2586                 }
2587         }
2588
2589         zc.zc_cookie = VDEV_STATE_ONLINE;
2590         zc.zc_obj = flags;
2591
2592         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2593                 if (errno == EINVAL) {
2594                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2595                             "from this pool into a new one.  Use '%s' "
2596                             "instead"), "zpool detach");
2597                         return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2598                 }
2599                 return (zpool_standard_error(hdl, errno, msg));
2600         }
2601
2602         *newstate = zc.zc_cookie;
2603         return (0);
2604 }
2605
2606 /*
2607  * Take the specified vdev offline
2608  */
2609 int
2610 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2611 {
2612         zfs_cmd_t zc = {"\0"};
2613         char msg[1024];
2614         nvlist_t *tgt;
2615         boolean_t avail_spare, l2cache;
2616         libzfs_handle_t *hdl = zhp->zpool_hdl;
2617
2618         (void) snprintf(msg, sizeof (msg),
2619             dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2620
2621         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2622         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2623             NULL)) == NULL)
2624                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2625
2626         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2627
2628         if (avail_spare)
2629                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2630
2631         zc.zc_cookie = VDEV_STATE_OFFLINE;
2632         zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2633
2634         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2635                 return (0);
2636
2637         switch (errno) {
2638         case EBUSY:
2639
2640                 /*
2641                  * There are no other replicas of this device.
2642                  */
2643                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2644
2645         case EEXIST:
2646                 /*
2647                  * The log device has unplayed logs
2648                  */
2649                 return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2650
2651         default:
2652                 return (zpool_standard_error(hdl, errno, msg));
2653         }
2654 }
2655
2656 /*
2657  * Mark the given vdev faulted.
2658  */
2659 int
2660 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2661 {
2662         zfs_cmd_t zc = {"\0"};
2663         char msg[1024];
2664         libzfs_handle_t *hdl = zhp->zpool_hdl;
2665
2666         (void) snprintf(msg, sizeof (msg),
2667             dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
2668
2669         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2670         zc.zc_guid = guid;
2671         zc.zc_cookie = VDEV_STATE_FAULTED;
2672         zc.zc_obj = aux;
2673
2674         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2675                 return (0);
2676
2677         switch (errno) {
2678         case EBUSY:
2679
2680                 /*
2681                  * There are no other replicas of this device.
2682                  */
2683                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2684
2685         default:
2686                 return (zpool_standard_error(hdl, errno, msg));
2687         }
2688
2689 }
2690
2691 /*
2692  * Mark the given vdev degraded.
2693  */
2694 int
2695 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2696 {
2697         zfs_cmd_t zc = {"\0"};
2698         char msg[1024];
2699         libzfs_handle_t *hdl = zhp->zpool_hdl;
2700
2701         (void) snprintf(msg, sizeof (msg),
2702             dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
2703
2704         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2705         zc.zc_guid = guid;
2706         zc.zc_cookie = VDEV_STATE_DEGRADED;
2707         zc.zc_obj = aux;
2708
2709         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2710                 return (0);
2711
2712         return (zpool_standard_error(hdl, errno, msg));
2713 }
2714
2715 /*
2716  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2717  * a hot spare.
2718  */
2719 static boolean_t
2720 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2721 {
2722         nvlist_t **child;
2723         uint_t c, children;
2724         char *type;
2725
2726         if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2727             &children) == 0) {
2728                 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2729                     &type) == 0);
2730
2731                 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2732                     children == 2 && child[which] == tgt)
2733                         return (B_TRUE);
2734
2735                 for (c = 0; c < children; c++)
2736                         if (is_replacing_spare(child[c], tgt, which))
2737                                 return (B_TRUE);
2738         }
2739
2740         return (B_FALSE);
2741 }
2742
2743 /*
2744  * Attach new_disk (fully described by nvroot) to old_disk.
2745  * If 'replacing' is specified, the new disk will replace the old one.
2746  */
2747 int
2748 zpool_vdev_attach(zpool_handle_t *zhp,
2749     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2750 {
2751         zfs_cmd_t zc = {"\0"};
2752         char msg[1024];
2753         int ret;
2754         nvlist_t *tgt;
2755         boolean_t avail_spare, l2cache, islog;
2756         uint64_t val;
2757         char *newname;
2758         nvlist_t **child;
2759         uint_t children;
2760         nvlist_t *config_root;
2761         libzfs_handle_t *hdl = zhp->zpool_hdl;
2762         boolean_t rootpool = zpool_is_bootable(zhp);
2763
2764         if (replacing)
2765                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2766                     "cannot replace %s with %s"), old_disk, new_disk);
2767         else
2768                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2769                     "cannot attach %s to %s"), new_disk, old_disk);
2770
2771         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2772         if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2773             &islog)) == NULL)
2774                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2775
2776         if (avail_spare)
2777                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2778
2779         if (l2cache)
2780                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2781
2782         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2783         zc.zc_cookie = replacing;
2784
2785         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2786             &child, &children) != 0 || children != 1) {
2787                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2788                     "new device must be a single disk"));
2789                 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2790         }
2791
2792         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2793             ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2794
2795         if ((newname = zpool_vdev_name(NULL, NULL, child[0], 0)) == NULL)
2796                 return (-1);
2797
2798         /*
2799          * If the target is a hot spare that has been swapped in, we can only
2800          * replace it with another hot spare.
2801          */
2802         if (replacing &&
2803             nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
2804             (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2805             NULL) == NULL || !avail_spare) &&
2806             is_replacing_spare(config_root, tgt, 1)) {
2807                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2808                     "can only be replaced by another hot spare"));
2809                 free(newname);
2810                 return (zfs_error(hdl, EZFS_BADTARGET, msg));
2811         }
2812
2813         free(newname);
2814
2815         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2816                 return (-1);
2817
2818         ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
2819
2820         zcmd_free_nvlists(&zc);
2821
2822         if (ret == 0) {
2823                 if (rootpool) {
2824                         /*
2825                          * XXX need a better way to prevent user from
2826                          * booting up a half-baked vdev.
2827                          */
2828                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2829                             "sure to wait until resilver is done "
2830                             "before rebooting.\n"));
2831                 }
2832                 return (0);
2833         }
2834
2835         switch (errno) {
2836         case ENOTSUP:
2837                 /*
2838                  * Can't attach to or replace this type of vdev.
2839                  */
2840                 if (replacing) {
2841                         uint64_t version = zpool_get_prop_int(zhp,
2842                             ZPOOL_PROP_VERSION, NULL);
2843
2844                         if (islog)
2845                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2846                                     "cannot replace a log with a spare"));
2847                         else if (version >= SPA_VERSION_MULTI_REPLACE)
2848                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2849                                     "already in replacing/spare config; wait "
2850                                     "for completion or use 'zpool detach'"));
2851                         else
2852                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2853                                     "cannot replace a replacing device"));
2854                 } else {
2855                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2856                             "can only attach to mirrors and top-level "
2857                             "disks"));
2858                 }
2859                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2860                 break;
2861
2862         case EINVAL:
2863                 /*
2864                  * The new device must be a single disk.
2865                  */
2866                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2867                     "new device must be a single disk"));
2868                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
2869                 break;
2870
2871         case EBUSY:
2872                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, "
2873                     "or device removal is in progress"),
2874                     new_disk);
2875                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2876                 break;
2877
2878         case EOVERFLOW:
2879                 /*
2880                  * The new device is too small.
2881                  */
2882                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2883                     "device is too small"));
2884                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2885                 break;
2886
2887         case EDOM:
2888                 /*
2889                  * The new device has a different optimal sector size.
2890                  */
2891                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2892                     "new device has a different optimal sector size; use the "
2893                     "option '-o ashift=N' to override the optimal size"));
2894                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2895                 break;
2896
2897         case ENAMETOOLONG:
2898                 /*
2899                  * The resulting top-level vdev spec won't fit in the label.
2900                  */
2901                 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
2902                 break;
2903
2904         default:
2905                 (void) zpool_standard_error(hdl, errno, msg);
2906         }
2907
2908         return (-1);
2909 }
2910
2911 /*
2912  * Detach the specified device.
2913  */
2914 int
2915 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
2916 {
2917         zfs_cmd_t zc = {"\0"};
2918         char msg[1024];
2919         nvlist_t *tgt;
2920         boolean_t avail_spare, l2cache;
2921         libzfs_handle_t *hdl = zhp->zpool_hdl;
2922
2923         (void) snprintf(msg, sizeof (msg),
2924             dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
2925
2926         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2927         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2928             NULL)) == NULL)
2929                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2930
2931         if (avail_spare)
2932                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2933
2934         if (l2cache)
2935                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2936
2937         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2938
2939         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
2940                 return (0);
2941
2942         switch (errno) {
2943
2944         case ENOTSUP:
2945                 /*
2946                  * Can't detach from this type of vdev.
2947                  */
2948                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
2949                     "applicable to mirror and replacing vdevs"));
2950                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2951                 break;
2952
2953         case EBUSY:
2954                 /*
2955                  * There are no other replicas of this device.
2956                  */
2957                 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
2958                 break;
2959
2960         default:
2961                 (void) zpool_standard_error(hdl, errno, msg);
2962         }
2963
2964         return (-1);
2965 }
2966
2967 /*
2968  * Find a mirror vdev in the source nvlist.
2969  *
2970  * The mchild array contains a list of disks in one of the top-level mirrors
2971  * of the source pool.  The schild array contains a list of disks that the
2972  * user specified on the command line.  We loop over the mchild array to
2973  * see if any entry in the schild array matches.
2974  *
2975  * If a disk in the mchild array is found in the schild array, we return
2976  * the index of that entry.  Otherwise we return -1.
2977  */
2978 static int
2979 find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
2980     nvlist_t **schild, uint_t schildren)
2981 {
2982         uint_t mc;
2983
2984         for (mc = 0; mc < mchildren; mc++) {
2985                 uint_t sc;
2986                 char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2987                     mchild[mc], 0);
2988
2989                 for (sc = 0; sc < schildren; sc++) {
2990                         char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2991                             schild[sc], 0);
2992                         boolean_t result = (strcmp(mpath, spath) == 0);
2993
2994                         free(spath);
2995                         if (result) {
2996                                 free(mpath);
2997                                 return (mc);
2998                         }
2999                 }
3000
3001                 free(mpath);
3002         }
3003
3004         return (-1);
3005 }
3006
3007 /*
3008  * Split a mirror pool.  If newroot points to null, then a new nvlist
3009  * is generated and it is the responsibility of the caller to free it.
3010  */
3011 int
3012 zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
3013     nvlist_t *props, splitflags_t flags)
3014 {
3015         zfs_cmd_t zc = {"\0"};
3016         char msg[1024];
3017         nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
3018         nvlist_t **varray = NULL, *zc_props = NULL;
3019         uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
3020         libzfs_handle_t *hdl = zhp->zpool_hdl;
3021         uint64_t vers, readonly = B_FALSE;
3022         boolean_t freelist = B_FALSE, memory_err = B_TRUE;
3023         int retval = 0;
3024
3025         (void) snprintf(msg, sizeof (msg),
3026             dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
3027
3028         if (!zpool_name_valid(hdl, B_FALSE, newname))
3029                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
3030
3031         if ((config = zpool_get_config(zhp, NULL)) == NULL) {
3032                 (void) fprintf(stderr, gettext("Internal error: unable to "
3033                     "retrieve pool configuration\n"));
3034                 return (-1);
3035         }
3036
3037         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
3038             == 0);
3039         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
3040
3041         if (props) {
3042                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
3043                 if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
3044                     props, vers, flags, msg)) == NULL)
3045                         return (-1);
3046                 (void) nvlist_lookup_uint64(zc_props,
3047                     zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
3048                 if (readonly) {
3049                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3050                             "property %s can only be set at import time"),
3051                             zpool_prop_to_name(ZPOOL_PROP_READONLY));
3052                         return (-1);
3053                 }
3054         }
3055
3056         if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
3057             &children) != 0) {
3058                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3059                     "Source pool is missing vdev tree"));
3060                 nvlist_free(zc_props);
3061                 return (-1);
3062         }
3063
3064         varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
3065         vcount = 0;
3066
3067         if (*newroot == NULL ||
3068             nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
3069             &newchild, &newchildren) != 0)
3070                 newchildren = 0;
3071
3072         for (c = 0; c < children; c++) {
3073                 uint64_t is_log = B_FALSE, is_hole = B_FALSE;
3074                 char *type;
3075                 nvlist_t **mchild, *vdev;
3076                 uint_t mchildren;
3077                 int entry;
3078
3079                 /*
3080                  * Unlike cache & spares, slogs are stored in the
3081                  * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
3082                  */
3083                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
3084                     &is_log);
3085                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
3086                     &is_hole);
3087                 if (is_log || is_hole) {
3088                         /*
3089                          * Create a hole vdev and put it in the config.
3090                          */
3091                         if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
3092                                 goto out;
3093                         if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
3094                             VDEV_TYPE_HOLE) != 0)
3095                                 goto out;
3096                         if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
3097                             1) != 0)
3098                                 goto out;
3099                         if (lastlog == 0)
3100                                 lastlog = vcount;
3101                         varray[vcount++] = vdev;
3102                         continue;
3103                 }
3104                 lastlog = 0;
3105                 verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
3106                     == 0);
3107                 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
3108                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3109                             "Source pool must be composed only of mirrors\n"));
3110                         retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3111                         goto out;
3112                 }
3113
3114                 verify(nvlist_lookup_nvlist_array(child[c],
3115                     ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
3116
3117                 /* find or add an entry for this top-level vdev */
3118                 if (newchildren > 0 &&
3119                     (entry = find_vdev_entry(zhp, mchild, mchildren,
3120                     newchild, newchildren)) >= 0) {
3121                         /* We found a disk that the user specified. */
3122                         vdev = mchild[entry];
3123                         ++found;
3124                 } else {
3125                         /* User didn't specify a disk for this vdev. */
3126                         vdev = mchild[mchildren - 1];
3127                 }
3128
3129                 if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
3130                         goto out;
3131         }
3132
3133         /* did we find every disk the user specified? */
3134         if (found != newchildren) {
3135                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
3136                     "include at most one disk from each mirror"));
3137                 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3138                 goto out;
3139         }
3140
3141         /* Prepare the nvlist for populating. */
3142         if (*newroot == NULL) {
3143                 if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
3144                         goto out;
3145                 freelist = B_TRUE;
3146                 if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
3147                     VDEV_TYPE_ROOT) != 0)
3148                         goto out;
3149         } else {
3150                 verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
3151         }
3152
3153         /* Add all the children we found */
3154         if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
3155             lastlog == 0 ? vcount : lastlog) != 0)
3156                 goto out;
3157
3158         /*
3159          * If we're just doing a dry run, exit now with success.
3160          */
3161         if (flags.dryrun) {
3162                 memory_err = B_FALSE;
3163                 freelist = B_FALSE;
3164                 goto out;
3165         }
3166
3167         /* now build up the config list & call the ioctl */
3168         if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
3169                 goto out;
3170
3171         if (nvlist_add_nvlist(newconfig,
3172             ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
3173             nvlist_add_string(newconfig,
3174             ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
3175             nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
3176                 goto out;
3177
3178         /*
3179          * The new pool is automatically part of the namespace unless we
3180          * explicitly export it.
3181          */
3182         if (!flags.import)
3183                 zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
3184         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3185         (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
3186         if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
3187                 goto out;
3188         if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
3189                 goto out;
3190
3191         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
3192                 retval = zpool_standard_error(hdl, errno, msg);
3193                 goto out;
3194         }
3195
3196         freelist = B_FALSE;
3197         memory_err = B_FALSE;
3198
3199 out:
3200         if (varray != NULL) {
3201                 int v;
3202
3203                 for (v = 0; v < vcount; v++)
3204                         nvlist_free(varray[v]);
3205                 free(varray);
3206         }
3207         zcmd_free_nvlists(&zc);
3208         nvlist_free(zc_props);
3209         nvlist_free(newconfig);
3210         if (freelist) {
3211                 nvlist_free(*newroot);
3212                 *newroot = NULL;
3213         }
3214
3215         if (retval != 0)
3216                 return (retval);
3217
3218         if (memory_err)
3219                 return (no_memory(hdl));
3220
3221         return (0);
3222 }
3223
3224 /*
3225  * Remove the given device.
3226  */
3227 int
3228 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
3229 {
3230         zfs_cmd_t zc = {"\0"};
3231         char msg[1024];
3232         nvlist_t *tgt;
3233         boolean_t avail_spare, l2cache, islog;
3234         libzfs_handle_t *hdl = zhp->zpool_hdl;
3235         uint64_t version;
3236
3237         (void) snprintf(msg, sizeof (msg),
3238             dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
3239
3240         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3241         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3242             &islog)) == NULL)
3243                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3244
3245         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
3246         if (islog && version < SPA_VERSION_HOLES) {
3247                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3248                     "pool must be upgraded to support log removal"));
3249                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
3250         }
3251
3252         if (!islog && !avail_spare && !l2cache && zpool_is_bootable(zhp)) {
3253                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3254                     "root pool can not have removed devices, "
3255                     "because GRUB does not understand them"));
3256                 return (zfs_error(hdl, EINVAL, msg));
3257         }
3258
3259         zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
3260
3261         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3262                 return (0);
3263
3264         switch (errno) {
3265
3266         case EINVAL:
3267                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3268                     "invalid config; all top-level vdevs must "
3269                     "have the same sector size and not be raidz."));
3270                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
3271                 break;
3272
3273         case EBUSY:
3274                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3275                     "Pool busy; removal may already be in progress"));
3276                 (void) zfs_error(hdl, EZFS_BUSY, msg);
3277                 break;
3278
3279         default:
3280                 (void) zpool_standard_error(hdl, errno, msg);
3281         }
3282         return (-1);
3283 }
3284
3285 int
3286 zpool_vdev_remove_cancel(zpool_handle_t *zhp)
3287 {
3288         zfs_cmd_t zc;
3289         char msg[1024];
3290         libzfs_handle_t *hdl = zhp->zpool_hdl;
3291
3292         (void) snprintf(msg, sizeof (msg),
3293             dgettext(TEXT_DOMAIN, "cannot cancel removal"));
3294
3295         bzero(&zc, sizeof (zc));
3296         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3297         zc.zc_cookie = 1;
3298
3299         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3300                 return (0);
3301
3302         return (zpool_standard_error(hdl, errno, msg));
3303 }
3304
3305 int
3306 zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
3307     uint64_t *sizep)
3308 {
3309         char msg[1024];
3310         nvlist_t *tgt;
3311         boolean_t avail_spare, l2cache, islog;
3312         libzfs_handle_t *hdl = zhp->zpool_hdl;
3313
3314         (void) snprintf(msg, sizeof (msg),
3315             dgettext(TEXT_DOMAIN, "cannot determine indirect size of %s"),
3316             path);
3317
3318         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3319             &islog)) == NULL)
3320                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3321
3322         if (avail_spare || l2cache || islog) {
3323                 *sizep = 0;
3324                 return (0);
3325         }
3326
3327         if (nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_INDIRECT_SIZE, sizep) != 0) {
3328                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3329                     "indirect size not available"));
3330                 return (zfs_error(hdl, EINVAL, msg));
3331         }
3332         return (0);
3333 }
3334
3335 /*
3336  * Clear the errors for the pool, or the particular device if specified.
3337  */
3338 int
3339 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
3340 {
3341         zfs_cmd_t zc = {"\0"};
3342         char msg[1024];
3343         nvlist_t *tgt;
3344         zpool_load_policy_t policy;
3345         boolean_t avail_spare, l2cache;
3346         libzfs_handle_t *hdl = zhp->zpool_hdl;
3347         nvlist_t *nvi = NULL;
3348         int error;
3349
3350         if (path)
3351                 (void) snprintf(msg, sizeof (msg),
3352                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3353                     path);
3354         else
3355                 (void) snprintf(msg, sizeof (msg),
3356                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3357                     zhp->zpool_name);
3358
3359         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3360         if (path) {
3361                 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
3362                     &l2cache, NULL)) == NULL)
3363                         return (zfs_error(hdl, EZFS_NODEVICE, msg));
3364
3365                 /*
3366                  * Don't allow error clearing for hot spares.  Do allow
3367                  * error clearing for l2cache devices.
3368                  */
3369                 if (avail_spare)
3370                         return (zfs_error(hdl, EZFS_ISSPARE, msg));
3371
3372                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
3373                     &zc.zc_guid) == 0);
3374         }
3375
3376         zpool_get_load_policy(rewindnvl, &policy);
3377         zc.zc_cookie = policy.zlp_rewind;
3378
3379         if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
3380                 return (-1);
3381
3382         if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
3383                 return (-1);
3384
3385         while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
3386             errno == ENOMEM) {
3387                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3388                         zcmd_free_nvlists(&zc);
3389                         return (-1);
3390                 }
3391         }
3392
3393         if (!error || ((policy.zlp_rewind & ZPOOL_TRY_REWIND) &&
3394             errno != EPERM && errno != EACCES)) {
3395                 if (policy.zlp_rewind &
3396                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
3397                         (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
3398                         zpool_rewind_exclaim(hdl, zc.zc_name,
3399                             ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0),
3400                             nvi);
3401                         nvlist_free(nvi);
3402                 }
3403                 zcmd_free_nvlists(&zc);
3404                 return (0);
3405         }
3406
3407         zcmd_free_nvlists(&zc);
3408         return (zpool_standard_error(hdl, errno, msg));
3409 }
3410
3411 /*
3412  * Similar to zpool_clear(), but takes a GUID (used by fmd).
3413  */
3414 int
3415 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
3416 {
3417         zfs_cmd_t zc = {"\0"};
3418         char msg[1024];
3419         libzfs_handle_t *hdl = zhp->zpool_hdl;
3420
3421         (void) snprintf(msg, sizeof (msg),
3422             dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
3423             (u_longlong_t)guid);
3424
3425         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3426         zc.zc_guid = guid;
3427         zc.zc_cookie = ZPOOL_NO_REWIND;
3428
3429         if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
3430                 return (0);
3431
3432         return (zpool_standard_error(hdl, errno, msg));
3433 }
3434
3435 /*
3436  * Change the GUID for a pool.
3437  */
3438 int
3439 zpool_reguid(zpool_handle_t *zhp)
3440 {
3441         char msg[1024];
3442         libzfs_handle_t *hdl = zhp->zpool_hdl;
3443         zfs_cmd_t zc = {"\0"};
3444
3445         (void) snprintf(msg, sizeof (msg),
3446             dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
3447
3448         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3449         if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
3450                 return (0);
3451
3452         return (zpool_standard_error(hdl, errno, msg));
3453 }
3454
3455 /*
3456  * Reopen the pool.
3457  */
3458 int
3459 zpool_reopen_one(zpool_handle_t *zhp, void *data)
3460 {
3461         libzfs_handle_t *hdl = zpool_get_handle(zhp);
3462         const char *pool_name = zpool_get_name(zhp);
3463         boolean_t *scrub_restart = data;
3464         int error;
3465
3466         error = lzc_reopen(pool_name, *scrub_restart);
3467         if (error) {
3468                 return (zpool_standard_error_fmt(hdl, error,
3469                     dgettext(TEXT_DOMAIN, "cannot reopen '%s'"), pool_name));
3470         }
3471
3472         return (0);
3473 }
3474
3475 /* call into libzfs_core to execute the sync IOCTL per pool */
3476 int
3477 zpool_sync_one(zpool_handle_t *zhp, void *data)
3478 {
3479         int ret;
3480         libzfs_handle_t *hdl = zpool_get_handle(zhp);
3481         const char *pool_name = zpool_get_name(zhp);
3482         boolean_t *force = data;
3483         nvlist_t *innvl = fnvlist_alloc();
3484
3485         fnvlist_add_boolean_value(innvl, "force", *force);
3486         if ((ret = lzc_sync(pool_name, innvl, NULL)) != 0) {
3487                 nvlist_free(innvl);
3488                 return (zpool_standard_error_fmt(hdl, ret,
3489                     dgettext(TEXT_DOMAIN, "sync '%s' failed"), pool_name));
3490         }
3491         nvlist_free(innvl);
3492
3493         return (0);
3494 }
3495
3496 #if defined(__sun__) || defined(__sun)
3497 /*
3498  * Convert from a devid string to a path.
3499  */
3500 static char *
3501 devid_to_path(char *devid_str)
3502 {
3503         ddi_devid_t devid;
3504         char *minor;
3505         char *path;
3506         devid_nmlist_t *list = NULL;
3507         int ret;
3508
3509         if (devid_str_decode(devid_str, &devid, &minor) != 0)
3510                 return (NULL);
3511
3512         ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
3513
3514         devid_str_free(minor);
3515         devid_free(devid);
3516
3517         if (ret != 0)
3518                 return (NULL);
3519
3520         /*
3521          * In a case the strdup() fails, we will just return NULL below.
3522          */
3523         path = strdup(list[0].devname);
3524
3525         devid_free_nmlist(list);
3526
3527         return (path);
3528 }
3529
3530 /*
3531  * Convert from a path to a devid string.
3532  */
3533 static char *
3534 path_to_devid(const char *path)
3535 {
3536         int fd;
3537         ddi_devid_t devid;
3538         char *minor, *ret;
3539
3540         if ((fd = open(path, O_RDONLY)) < 0)
3541                 return (NULL);
3542
3543         minor = NULL;
3544         ret = NULL;
3545         if (devid_get(fd, &devid) == 0) {
3546                 if (devid_get_minor_name(fd, &minor) == 0)
3547                         ret = devid_str_encode(devid, minor);
3548                 if (minor != NULL)
3549                         devid_str_free(minor);
3550                 devid_free(devid);
3551         }
3552         (void) close(fd);
3553
3554         return (ret);
3555 }
3556
3557 /*
3558  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
3559  * ignore any failure here, since a common case is for an unprivileged user to
3560  * type 'zpool status', and we'll display the correct information anyway.
3561  */
3562 static void
3563 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3564 {
3565         zfs_cmd_t zc = {"\0"};
3566
3567         (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3568         (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3569         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3570             &zc.zc_guid) == 0);
3571
3572         (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3573 }
3574 #endif /* sun */
3575
3576 /*
3577  * Remove partition suffix from a vdev path.  Partition suffixes may take three
3578  * forms: "-partX", "pX", or "X", where X is a string of digits.  The second
3579  * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
3580  * third case only occurs when preceded by a string matching the regular
3581  * expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk.
3582  *
3583  * caller must free the returned string
3584  */
3585 char *
3586 zfs_strip_partition(char *path)
3587 {
3588         char *tmp = strdup(path);
3589         char *part = NULL, *d = NULL;
3590         if (!tmp)
3591                 return (NULL);
3592
3593         if ((part = strstr(tmp, "-part")) && part != tmp) {
3594                 d = part + 5;
3595         } else if ((part = strrchr(tmp, 'p')) &&
3596             part > tmp + 1 && isdigit(*(part-1))) {
3597                 d = part + 1;
3598         } else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') &&
3599             tmp[1] == 'd') {
3600                 for (d = &tmp[2]; isalpha(*d); part = ++d) { }
3601         } else if (strncmp("xvd", tmp, 3) == 0) {
3602                 for (d = &tmp[3]; isalpha(*d); part = ++d) { }
3603         }
3604         if (part && d && *d != '\0') {
3605                 for (; isdigit(*d); d++) { }
3606                 if (*d == '\0')
3607                         *part = '\0';
3608         }
3609
3610         return (tmp);
3611 }
3612
3613 /*
3614  * Same as zfs_strip_partition, but allows "/dev/" to be in the pathname
3615  *
3616  * path:        /dev/sda1
3617  * returns:     /dev/sda
3618  *
3619  * Returned string must be freed.
3620  */
3621 char *
3622 zfs_strip_partition_path(char *path)
3623 {
3624         char *newpath = strdup(path);
3625         char *sd_offset;
3626         char *new_sd;
3627
3628         if (!newpath)
3629                 return (NULL);
3630
3631         /* Point to "sda1" part of "/dev/sda1" */
3632         sd_offset = strrchr(newpath, '/') + 1;
3633
3634         /* Get our new name "sda" */
3635         new_sd = zfs_strip_partition(sd_offset);
3636         if (!new_sd) {
3637                 free(newpath);
3638                 return (NULL);
3639         }
3640
3641         /* Paste the "sda" where "sda1" was */
3642         strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1);
3643
3644         /* Free temporary "sda" */
3645         free(new_sd);
3646
3647         return (newpath);
3648 }
3649
3650 #define PATH_BUF_LEN    64
3651
3652 /*
3653  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
3654  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3655  * We also check if this is a whole disk, in which case we strip off the
3656  * trailing 's0' slice name.
3657  *
3658  * This routine is also responsible for identifying when disks have been
3659  * reconfigured in a new location.  The kernel will have opened the device by
3660  * devid, but the path will still refer to the old location.  To catch this, we
3661  * first do a path -> devid translation (which is fast for the common case).  If
3662  * the devid matches, we're done.  If not, we do a reverse devid -> path
3663  * translation and issue the appropriate ioctl() to update the path of the vdev.
3664  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3665  * of these checks.
3666  */
3667 char *
3668 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3669     int name_flags)
3670 {
3671         char *path, *type, *env;
3672         uint64_t value;
3673         char buf[PATH_BUF_LEN];
3674         char tmpbuf[PATH_BUF_LEN];
3675
3676         /*
3677          * vdev_name will be "root"/"root-0" for the root vdev, but it is the
3678          * zpool name that will be displayed to the user.
3679          */
3680         verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
3681         if (zhp != NULL && strcmp(type, "root") == 0)
3682                 return (zfs_strdup(hdl, zpool_get_name(zhp)));
3683
3684         env = getenv("ZPOOL_VDEV_NAME_PATH");
3685         if (env && (strtoul(env, NULL, 0) > 0 ||
3686             !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3687                 name_flags |= VDEV_NAME_PATH;
3688
3689         env = getenv("ZPOOL_VDEV_NAME_GUID");
3690         if (env && (strtoul(env, NULL, 0) > 0 ||
3691             !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3692                 name_flags |= VDEV_NAME_GUID;
3693
3694         env = getenv("ZPOOL_VDEV_NAME_FOLLOW_LINKS");
3695         if (env && (strtoul(env, NULL, 0) > 0 ||
3696             !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3697                 name_flags |= VDEV_NAME_FOLLOW_LINKS;
3698
3699         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 ||
3700             name_flags & VDEV_NAME_GUID) {
3701                 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value);
3702                 (void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)value);
3703                 path = buf;
3704         } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
3705 #if defined(__sun__) || defined(__sun)
3706                 /*
3707                  * Live VDEV path updates to a kernel VDEV during a
3708                  * zpool_vdev_name lookup are not supported on Linux.
3709                  */
3710                 char *devid;
3711                 vdev_stat_t *vs;
3712                 uint_t vsc;
3713
3714                 /*
3715                  * If the device is dead (faulted, offline, etc) then don't
3716                  * bother opening it.  Otherwise we may be forcing the user to
3717                  * open a misbehaving device, which can have undesirable
3718                  * effects.
3719                  */
3720                 if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
3721                     (uint64_t **)&vs, &vsc) != 0 ||
3722                     vs->vs_state >= VDEV_STATE_DEGRADED) &&
3723                     zhp != NULL &&
3724                     nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3725                         /*
3726                          * Determine if the current path is correct.
3727                          */
3728                         char *newdevid = path_to_devid(path);
3729
3730                         if (newdevid == NULL ||
3731                             strcmp(devid, newdevid) != 0) {
3732                                 char *newpath;
3733
3734                                 if ((newpath = devid_to_path(devid)) != NULL) {
3735                                         /*
3736                                          * Update the path appropriately.
3737                                          */
3738                                         set_path(zhp, nv, newpath);
3739                                         if (nvlist_add_string(nv,
3740                                             ZPOOL_CONFIG_PATH, newpath) == 0)
3741                                                 verify(nvlist_lookup_string(nv,
3742                                                     ZPOOL_CONFIG_PATH,
3743                                                     &path) == 0);
3744                                         free(newpath);
3745                                 }
3746                         }
3747
3748                         if (newdevid)
3749                                 devid_str_free(newdevid);
3750                 }
3751 #endif /* sun */
3752
3753                 if (name_flags & VDEV_NAME_FOLLOW_LINKS) {
3754                         char *rp = realpath(path, NULL);
3755                         if (rp) {
3756                                 strlcpy(buf, rp, sizeof (buf));
3757                                 path = buf;
3758                                 free(rp);
3759                         }
3760                 }
3761
3762                 /*
3763                  * For a block device only use the name.
3764                  */
3765                 if ((strcmp(type, VDEV_TYPE_DISK) == 0) &&
3766                     !(name_flags & VDEV_NAME_PATH)) {
3767                         path = strrchr(path, '/');
3768                         path++;
3769                 }
3770
3771                 /*
3772                  * Remove the partition from the path it this is a whole disk.
3773                  */
3774                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
3775                     == 0 && value && !(name_flags & VDEV_NAME_PATH)) {
3776                         return (zfs_strip_partition(path));
3777                 }
3778         } else {
3779                 path = type;
3780
3781                 /*
3782                  * If it's a raidz device, we need to stick in the parity level.
3783                  */
3784                 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3785                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3786                             &value) == 0);
3787                         (void) snprintf(buf, sizeof (buf), "%s%llu", path,
3788                             (u_longlong_t)value);
3789                         path = buf;
3790                 }
3791
3792                 /*
3793                  * We identify each top-level vdev by using a <type-id>
3794                  * naming convention.
3795                  */
3796                 if (name_flags & VDEV_NAME_TYPE_ID) {
3797                         uint64_t id;
3798                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3799                             &id) == 0);
3800                         (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s-%llu",
3801                             path, (u_longlong_t)id);
3802                         path = tmpbuf;
3803                 }
3804         }
3805
3806         return (zfs_strdup(hdl, path));
3807 }
3808
3809 static int
3810 zbookmark_mem_compare(const void *a, const void *b)
3811 {
3812         return (memcmp(a, b, sizeof (zbookmark_phys_t)));
3813 }
3814
3815 /*
3816  * Retrieve the persistent error log, uniquify the members, and return to the
3817  * caller.
3818  */
3819 int
3820 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3821 {
3822         zfs_cmd_t zc = {"\0"};
3823         libzfs_handle_t *hdl = zhp->zpool_hdl;
3824         uint64_t count;
3825         zbookmark_phys_t *zb = NULL;
3826         int i;
3827
3828         /*
3829          * Retrieve the raw error list from the kernel.  If the number of errors
3830          * has increased, allocate more space and continue until we get the
3831          * entire list.
3832          */
3833         verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3834             &count) == 0);
3835         if (count == 0)
3836                 return (0);
3837         zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
3838             count * sizeof (zbookmark_phys_t));
3839         zc.zc_nvlist_dst_size = count;
3840         (void) strcpy(zc.zc_name, zhp->zpool_name);
3841         for (;;) {
3842                 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3843                     &zc) != 0) {
3844                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3845                         if (errno == ENOMEM) {
3846                                 void *dst;
3847
3848                                 count = zc.zc_nvlist_dst_size;
3849                                 dst = zfs_alloc(zhp->zpool_hdl, count *
3850                                     sizeof (zbookmark_phys_t));
3851                                 zc.zc_nvlist_dst = (uintptr_t)dst;
3852                         } else {
3853                                 return (zpool_standard_error_fmt(hdl, errno,
3854                                     dgettext(TEXT_DOMAIN, "errors: List of "
3855                                     "errors unavailable")));
3856                         }
3857                 } else {
3858                         break;
3859                 }
3860         }
3861
3862         /*
3863          * Sort the resulting bookmarks.  This is a little confusing due to the
3864          * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
3865          * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3866          * _not_ copied as part of the process.  So we point the start of our
3867          * array appropriate and decrement the total number of elements.
3868          */
3869         zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) +
3870             zc.zc_nvlist_dst_size;
3871         count -= zc.zc_nvlist_dst_size;
3872
3873         qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
3874
3875         verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3876
3877         /*
3878          * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3879          */
3880         for (i = 0; i < count; i++) {
3881                 nvlist_t *nv;
3882
3883                 /* ignoring zb_blkid and zb_level for now */
3884                 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3885                     zb[i-1].zb_object == zb[i].zb_object)
3886                         continue;
3887
3888                 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3889                         goto nomem;
3890                 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3891                     zb[i].zb_objset) != 0) {
3892                         nvlist_free(nv);
3893                         goto nomem;
3894                 }
3895                 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3896                     zb[i].zb_object) != 0) {
3897                         nvlist_free(nv);
3898                         goto nomem;
3899                 }
3900                 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3901                         nvlist_free(nv);
3902                         goto nomem;
3903                 }
3904                 nvlist_free(nv);
3905         }
3906
3907         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3908         return (0);
3909
3910 nomem:
3911         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3912         return (no_memory(zhp->zpool_hdl));
3913 }
3914
3915 /*
3916  * Upgrade a ZFS pool to the latest on-disk version.
3917  */
3918 int
3919 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
3920 {
3921         zfs_cmd_t zc = {"\0"};
3922         libzfs_handle_t *hdl = zhp->zpool_hdl;
3923
3924         (void) strcpy(zc.zc_name, zhp->zpool_name);
3925         zc.zc_cookie = new_version;
3926
3927         if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
3928                 return (zpool_standard_error_fmt(hdl, errno,
3929                     dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
3930                     zhp->zpool_name));
3931         return (0);
3932 }
3933
3934 void
3935 zfs_save_arguments(int argc, char **argv, char *string, int len)
3936 {
3937         int i;
3938
3939         (void) strlcpy(string, basename(argv[0]), len);
3940         for (i = 1; i < argc; i++) {
3941                 (void) strlcat(string, " ", len);
3942                 (void) strlcat(string, argv[i], len);
3943         }
3944 }
3945
3946 int
3947 zpool_log_history(libzfs_handle_t *hdl, const char *message)
3948 {
3949         zfs_cmd_t zc = {"\0"};
3950         nvlist_t *args;
3951         int err;
3952
3953         args = fnvlist_alloc();
3954         fnvlist_add_string(args, "message", message);
3955         err = zcmd_write_src_nvlist(hdl, &zc, args);
3956         if (err == 0)
3957                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
3958         nvlist_free(args);
3959         zcmd_free_nvlists(&zc);
3960         return (err);
3961 }
3962
3963 /*
3964  * Perform ioctl to get some command history of a pool.
3965  *
3966  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
3967  * logical offset of the history buffer to start reading from.
3968  *
3969  * Upon return, 'off' is the next logical offset to read from and
3970  * 'len' is the actual amount of bytes read into 'buf'.
3971  */
3972 static int
3973 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
3974 {
3975         zfs_cmd_t zc = {"\0"};
3976         libzfs_handle_t *hdl = zhp->zpool_hdl;
3977
3978         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3979
3980         zc.zc_history = (uint64_t)(uintptr_t)buf;
3981         zc.zc_history_len = *len;
3982         zc.zc_history_offset = *off;
3983
3984         if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
3985                 switch (errno) {
3986                 case EPERM:
3987                         return (zfs_error_fmt(hdl, EZFS_PERM,
3988                             dgettext(TEXT_DOMAIN,
3989                             "cannot show history for pool '%s'"),
3990                             zhp->zpool_name));
3991                 case ENOENT:
3992                         return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
3993                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3994                             "'%s'"), zhp->zpool_name));
3995                 case ENOTSUP:
3996                         return (zfs_error_fmt(hdl, EZFS_BADVERSION,
3997                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3998                             "'%s', pool must be upgraded"), zhp->zpool_name));
3999                 default:
4000                         return (zpool_standard_error_fmt(hdl, errno,
4001                             dgettext(TEXT_DOMAIN,
4002                             "cannot get history for '%s'"), zhp->zpool_name));
4003                 }
4004         }
4005
4006         *len = zc.zc_history_len;
4007         *off = zc.zc_history_offset;
4008
4009         return (0);
4010 }
4011
4012 /*
4013  * Process the buffer of nvlists, unpacking and storing each nvlist record
4014  * into 'records'.  'leftover' is set to the number of bytes that weren't
4015  * processed as there wasn't a complete record.
4016  */
4017 int
4018 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
4019     nvlist_t ***records, uint_t *numrecords)
4020 {
4021         uint64_t reclen;
4022         nvlist_t *nv;
4023         int i;
4024         void *tmp;
4025
4026         while (bytes_read > sizeof (reclen)) {
4027
4028                 /* get length of packed record (stored as little endian) */
4029                 for (i = 0, reclen = 0; i < sizeof (reclen); i++)
4030                         reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
4031
4032                 if (bytes_read < sizeof (reclen) + reclen)
4033                         break;
4034
4035                 /* unpack record */
4036                 if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
4037                         return (ENOMEM);
4038                 bytes_read -= sizeof (reclen) + reclen;
4039                 buf += sizeof (reclen) + reclen;
4040
4041                 /* add record to nvlist array */
4042                 (*numrecords)++;
4043                 if (ISP2(*numrecords + 1)) {
4044                         tmp = realloc(*records,
4045                             *numrecords * 2 * sizeof (nvlist_t *));
4046                         if (tmp == NULL) {
4047                                 nvlist_free(nv);
4048                                 (*numrecords)--;
4049                                 return (ENOMEM);
4050                         }
4051                         *records = tmp;
4052                 }
4053                 (*records)[*numrecords - 1] = nv;
4054         }
4055
4056         *leftover = bytes_read;
4057         return (0);
4058 }
4059
4060 /*
4061  * Retrieve the command history of a pool.
4062  */
4063 int
4064 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
4065 {
4066         char *buf;
4067         int buflen = 128 * 1024;
4068         uint64_t off = 0;
4069         nvlist_t **records = NULL;
4070         uint_t numrecords = 0;
4071         int err, i;
4072
4073         buf = malloc(buflen);
4074         if (buf == NULL)
4075                 return (ENOMEM);
4076         do {
4077                 uint64_t bytes_read = buflen;
4078                 uint64_t leftover;
4079
4080                 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
4081                         break;
4082
4083                 /* if nothing else was read in, we're at EOF, just return */
4084                 if (!bytes_read)
4085                         break;
4086
4087                 if ((err = zpool_history_unpack(buf, bytes_read,
4088                     &leftover, &records, &numrecords)) != 0)
4089                         break;
4090                 off -= leftover;
4091                 if (leftover == bytes_read) {
4092                         /*
4093                          * no progress made, because buffer is not big enough
4094                          * to hold this record; resize and retry.
4095                          */
4096                         buflen *= 2;
4097                         free(buf);
4098                         buf = malloc(buflen);
4099                         if (buf == NULL)
4100                                 return (ENOMEM);
4101                 }
4102
4103                 /* CONSTCOND */
4104         } while (1);
4105
4106         free(buf);
4107
4108         if (!err) {
4109                 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
4110                 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
4111                     records, numrecords) == 0);
4112         }
4113         for (i = 0; i < numrecords; i++)
4114                 nvlist_free(records[i]);
4115         free(records);
4116
4117         return (err);
4118 }
4119
4120 /*
4121  * Retrieve the next event given the passed 'zevent_fd' file descriptor.
4122  * If there is a new event available 'nvp' will contain a newly allocated
4123  * nvlist and 'dropped' will be set to the number of missed events since
4124  * the last call to this function.  When 'nvp' is set to NULL it indicates
4125  * no new events are available.  In either case the function returns 0 and
4126  * it is up to the caller to free 'nvp'.  In the case of a fatal error the
4127  * function will return a non-zero value.  When the function is called in
4128  * blocking mode (the default, unless the ZEVENT_NONBLOCK flag is passed),
4129  * it will not return until a new event is available.
4130  */
4131 int
4132 zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
4133     int *dropped, unsigned flags, int zevent_fd)
4134 {
4135         zfs_cmd_t zc = {"\0"};
4136         int error = 0;
4137
4138         *nvp = NULL;
4139         *dropped = 0;
4140         zc.zc_cleanup_fd = zevent_fd;
4141
4142         if (flags & ZEVENT_NONBLOCK)
4143                 zc.zc_guid = ZEVENT_NONBLOCK;
4144
4145         if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
4146                 return (-1);
4147
4148 retry:
4149         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
4150                 switch (errno) {
4151                 case ESHUTDOWN:
4152                         error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
4153                             dgettext(TEXT_DOMAIN, "zfs shutdown"));
4154                         goto out;
4155                 case ENOENT:
4156                         /* Blocking error case should not occur */
4157                         if (!(flags & ZEVENT_NONBLOCK))
4158                                 error = zpool_standard_error_fmt(hdl, errno,
4159                                     dgettext(TEXT_DOMAIN, "cannot get event"));
4160
4161                         goto out;
4162                 case ENOMEM:
4163                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
4164                                 error = zfs_error_fmt(hdl, EZFS_NOMEM,
4165                                     dgettext(TEXT_DOMAIN, "cannot get event"));
4166                                 goto out;
4167                         } else {
4168                                 goto retry;
4169                         }
4170                 default:
4171                         error = zpool_standard_error_fmt(hdl, errno,
4172                             dgettext(TEXT_DOMAIN, "cannot get event"));
4173                         goto out;
4174                 }
4175         }
4176
4177         error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
4178         if (error != 0)
4179                 goto out;
4180
4181         *dropped = (int)zc.zc_cookie;
4182 out:
4183         zcmd_free_nvlists(&zc);
4184
4185         return (error);
4186 }
4187
4188 /*
4189  * Clear all events.
4190  */
4191 int
4192 zpool_events_clear(libzfs_handle_t *hdl, int *count)
4193 {
4194         zfs_cmd_t zc = {"\0"};
4195         char msg[1024];
4196
4197         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
4198             "cannot clear events"));
4199
4200         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
4201                 return (zpool_standard_error_fmt(hdl, errno, msg));
4202
4203         if (count != NULL)
4204                 *count = (int)zc.zc_cookie; /* # of events cleared */
4205
4206         return (0);
4207 }
4208
4209 /*
4210  * Seek to a specific EID, ZEVENT_SEEK_START, or ZEVENT_SEEK_END for
4211  * the passed zevent_fd file handle.  On success zero is returned,
4212  * otherwise -1 is returned and hdl->libzfs_error is set to the errno.
4213  */
4214 int
4215 zpool_events_seek(libzfs_handle_t *hdl, uint64_t eid, int zevent_fd)
4216 {
4217         zfs_cmd_t zc = {"\0"};
4218         int error = 0;
4219
4220         zc.zc_guid = eid;
4221         zc.zc_cleanup_fd = zevent_fd;
4222
4223         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_SEEK, &zc) != 0) {
4224                 switch (errno) {
4225                 case ENOENT:
4226                         error = zfs_error_fmt(hdl, EZFS_NOENT,
4227                             dgettext(TEXT_DOMAIN, "cannot get event"));
4228                         break;
4229
4230                 case ENOMEM:
4231                         error = zfs_error_fmt(hdl, EZFS_NOMEM,
4232                             dgettext(TEXT_DOMAIN, "cannot get event"));
4233                         break;
4234
4235                 default:
4236                         error = zpool_standard_error_fmt(hdl, errno,
4237                             dgettext(TEXT_DOMAIN, "cannot get event"));
4238                         break;
4239                 }
4240         }
4241
4242         return (error);
4243 }
4244
4245 void
4246 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
4247     char *pathname, size_t len)
4248 {
4249         zfs_cmd_t zc = {"\0"};
4250         boolean_t mounted = B_FALSE;
4251         char *mntpnt = NULL;
4252         char dsname[ZFS_MAX_DATASET_NAME_LEN];
4253
4254         if (dsobj == 0) {
4255                 /* special case for the MOS */
4256                 (void) snprintf(pathname, len, "<metadata>:<0x%llx>",
4257                     (longlong_t)obj);
4258                 return;
4259         }
4260
4261         /* get the dataset's name */
4262         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
4263         zc.zc_obj = dsobj;
4264         if (ioctl(zhp->zpool_hdl->libzfs_fd,
4265             ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
4266                 /* just write out a path of two object numbers */
4267                 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
4268                     (longlong_t)dsobj, (longlong_t)obj);
4269                 return;
4270         }
4271         (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
4272
4273         /* find out if the dataset is mounted */
4274         mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
4275
4276         /* get the corrupted object's path */
4277         (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
4278         zc.zc_obj = obj;
4279         if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
4280             &zc) == 0) {
4281                 if (mounted) {
4282                         (void) snprintf(pathname, len, "%s%s", mntpnt,
4283                             zc.zc_value);
4284                 } else {
4285                         (void) snprintf(pathname, len, "%s:%s",
4286                             dsname, zc.zc_value);
4287                 }
4288         } else {
4289                 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname,
4290                     (longlong_t)obj);
4291         }
4292         free(mntpnt);
4293 }
4294
4295 /*
4296  * Read the EFI label from the config, if a label does not exist then
4297  * pass back the error to the caller. If the caller has passed a non-NULL
4298  * diskaddr argument then we set it to the starting address of the EFI
4299  * partition.
4300  */
4301 static int
4302 read_efi_label(nvlist_t *config, diskaddr_t *sb)
4303 {
4304         char *path;
4305         int fd;
4306         char diskname[MAXPATHLEN];
4307         int err = -1;
4308
4309         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
4310                 return (err);
4311
4312         (void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
4313             strrchr(path, '/'));
4314         if ((fd = open(diskname, O_RDONLY|O_DIRECT)) >= 0) {
4315                 struct dk_gpt *vtoc;
4316
4317                 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
4318                         if (sb != NULL)
4319                                 *sb = vtoc->efi_parts[0].p_start;
4320                         efi_free(vtoc);
4321                 }
4322                 (void) close(fd);
4323         }
4324         return (err);
4325 }
4326
4327 /*
4328  * determine where a partition starts on a disk in the current
4329  * configuration
4330  */
4331 static diskaddr_t
4332 find_start_block(nvlist_t *config)
4333 {
4334         nvlist_t **child;
4335         uint_t c, children;
4336         diskaddr_t sb = MAXOFFSET_T;
4337         uint64_t wholedisk;
4338
4339         if (nvlist_lookup_nvlist_array(config,
4340             ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
4341                 if (nvlist_lookup_uint64(config,
4342                     ZPOOL_CONFIG_WHOLE_DISK,
4343                     &wholedisk) != 0 || !wholedisk) {
4344                         return (MAXOFFSET_T);
4345                 }
4346                 if (read_efi_label(config, &sb) < 0)
4347                         sb = MAXOFFSET_T;
4348                 return (sb);
4349         }
4350
4351         for (c = 0; c < children; c++) {
4352                 sb = find_start_block(child[c]);
4353                 if (sb != MAXOFFSET_T) {
4354                         return (sb);
4355                 }
4356         }
4357         return (MAXOFFSET_T);
4358 }
4359
4360 static int
4361 zpool_label_disk_check(char *path)
4362 {
4363         struct dk_gpt *vtoc;
4364         int fd, err;
4365
4366         if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
4367                 return (errno);
4368
4369         if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
4370                 (void) close(fd);
4371                 return (err);
4372         }
4373
4374         if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
4375                 efi_free(vtoc);
4376                 (void) close(fd);
4377                 return (EIDRM);
4378         }
4379
4380         efi_free(vtoc);
4381         (void) close(fd);
4382         return (0);
4383 }
4384
4385 /*
4386  * Generate a unique partition name for the ZFS member.  Partitions must
4387  * have unique names to ensure udev will be able to create symlinks under
4388  * /dev/disk/by-partlabel/ for all pool members.  The partition names are
4389  * of the form <pool>-<unique-id>.
4390  */
4391 static void
4392 zpool_label_name(char *label_name, int label_size)
4393 {
4394         uint64_t id = 0;
4395         int fd;
4396
4397         fd = open("/dev/urandom", O_RDONLY);
4398         if (fd >= 0) {
4399                 if (read(fd, &id, sizeof (id)) != sizeof (id))
4400                         id = 0;
4401
4402                 close(fd);
4403         }
4404
4405         if (id == 0)
4406                 id = (((uint64_t)rand()) << 32) | (uint64_t)rand();
4407
4408         snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id);
4409 }
4410
4411 /*
4412  * Label an individual disk.  The name provided is the short name,
4413  * stripped of any leading /dev path.
4414  */
4415 int
4416 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
4417 {
4418         char path[MAXPATHLEN];
4419         struct dk_gpt *vtoc;
4420         int rval, fd;
4421         size_t resv = EFI_MIN_RESV_SIZE;
4422         uint64_t slice_size;
4423         diskaddr_t start_block;
4424         char errbuf[1024];
4425
4426         /* prepare an error message just in case */
4427         (void) snprintf(errbuf, sizeof (errbuf),
4428             dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
4429
4430         if (zhp) {
4431                 nvlist_t *nvroot;
4432
4433                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
4434                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
4435
4436                 if (zhp->zpool_start_block == 0)
4437                         start_block = find_start_block(nvroot);
4438                 else
4439                         start_block = zhp->zpool_start_block;
4440                 zhp->zpool_start_block = start_block;
4441         } else {
4442                 /* new pool */
4443                 start_block = NEW_START_BLOCK;
4444         }
4445
4446         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4447
4448         if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) {
4449                 /*
4450                  * This shouldn't happen.  We've long since verified that this
4451                  * is a valid device.
4452                  */
4453                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4454                     "label '%s': unable to open device: %d"), path, errno);
4455                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
4456         }
4457
4458         if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
4459                 /*
4460                  * The only way this can fail is if we run out of memory, or we
4461                  * were unable to read the disk's capacity
4462                  */
4463                 if (errno == ENOMEM)
4464                         (void) no_memory(hdl);
4465
4466                 (void) close(fd);
4467                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4468                     "label '%s': unable to read disk capacity"), path);
4469
4470                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
4471         }
4472
4473         slice_size = vtoc->efi_last_u_lba + 1;
4474         slice_size -= EFI_MIN_RESV_SIZE;
4475         if (start_block == MAXOFFSET_T)
4476                 start_block = NEW_START_BLOCK;
4477         slice_size -= start_block;
4478         slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
4479
4480         vtoc->efi_parts[0].p_start = start_block;
4481         vtoc->efi_parts[0].p_size = slice_size;
4482
4483         /*
4484          * Why we use V_USR: V_BACKUP confuses users, and is considered
4485          * disposable by some EFI utilities (since EFI doesn't have a backup
4486          * slice).  V_UNASSIGNED is supposed to be used only for zero size
4487          * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
4488          * etc. were all pretty specific.  V_USR is as close to reality as we
4489          * can get, in the absence of V_OTHER.
4490          */
4491         vtoc->efi_parts[0].p_tag = V_USR;
4492         zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN);
4493
4494         vtoc->efi_parts[8].p_start = slice_size + start_block;
4495         vtoc->efi_parts[8].p_size = resv;
4496         vtoc->efi_parts[8].p_tag = V_RESERVED;
4497
4498         rval = efi_write(fd, vtoc);
4499
4500         /* Flush the buffers to disk and invalidate the page cache. */
4501         (void) fsync(fd);
4502         (void) ioctl(fd, BLKFLSBUF);
4503
4504         if (rval == 0)
4505                 rval = efi_rescan(fd);
4506
4507         /*
4508          * Some block drivers (like pcata) may not support EFI GPT labels.
4509          * Print out a helpful error message directing the user to manually
4510          * label the disk and give a specific slice.
4511          */
4512         if (rval != 0) {
4513                 (void) close(fd);
4514                 efi_free(vtoc);
4515
4516                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
4517                     "parted(8) and then provide a specific slice: %d"), rval);
4518                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4519         }
4520
4521         (void) close(fd);
4522         efi_free(vtoc);
4523
4524         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4525         (void) zfs_append_partition(path, MAXPATHLEN);
4526
4527         /* Wait to udev to signal use the device has settled. */
4528         rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT);
4529         if (rval) {
4530                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
4531                     "detect device partitions on '%s': %d"), path, rval);
4532                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4533         }
4534
4535         /* We can't be to paranoid.  Read the label back and verify it. */
4536         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4537         rval = zpool_label_disk_check(path);
4538         if (rval) {
4539                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
4540                     "EFI label on '%s' is damaged.  Ensure\nthis device "
4541                     "is not in in use, and is functioning properly: %d"),
4542                     path, rval);
4543                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4544         }
4545
4546         return (0);
4547 }
4548
4549 /*
4550  * Allocate and return the underlying device name for a device mapper device.
4551  * If a device mapper device maps to multiple devices, return the first device.
4552  *
4553  * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a
4554  * DM device (like /dev/disk/by-vdev/A0) are also allowed.
4555  *
4556  * Returns device name, or NULL on error or no match.  If dm_name is not a DM
4557  * device then return NULL.
4558  *
4559  * NOTE: The returned name string must be *freed*.
4560  */
4561 char *
4562 dm_get_underlying_path(char *dm_name)
4563 {
4564         DIR *dp = NULL;
4565         struct dirent *ep;
4566         char *realp;
4567         char *tmp = NULL;
4568         char *path = NULL;
4569         char *dev_str;
4570         int size;
4571
4572         if (dm_name == NULL)
4573                 return (NULL);
4574
4575         /* dm name may be a symlink (like /dev/disk/by-vdev/A0) */
4576         realp = realpath(dm_name, NULL);
4577         if (realp == NULL)
4578                 return (NULL);
4579
4580         /*
4581          * If they preface 'dev' with a path (like "/dev") then strip it off.
4582          * We just want the 'dm-N' part.
4583          */
4584         tmp = strrchr(realp, '/');
4585         if (tmp != NULL)
4586                 dev_str = tmp + 1;    /* +1 since we want the chr after '/' */
4587         else
4588                 dev_str = tmp;
4589
4590         size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str);
4591         if (size == -1 || !tmp)
4592                 goto end;
4593
4594         dp = opendir(tmp);
4595         if (dp == NULL)
4596                 goto end;
4597
4598         /* Return first sd* entry in /sys/block/dm-N/slaves/ */
4599         while ((ep = readdir(dp))) {
4600                 if (ep->d_type != DT_DIR) {     /* skip "." and ".." dirs */
4601                         size = asprintf(&path, "/dev/%s", ep->d_name);
4602                         break;
4603                 }
4604         }
4605
4606 end:
4607         if (dp != NULL)
4608                 closedir(dp);
4609         free(tmp);
4610         free(realp);
4611         return (path);
4612 }
4613
4614 /*
4615  * Return 1 if device is a device mapper or multipath device.
4616  * Return 0 if not.
4617  */
4618 int
4619 zfs_dev_is_dm(char *dev_name)
4620 {
4621
4622         char *tmp;
4623         tmp = dm_get_underlying_path(dev_name);
4624         if (tmp == NULL)
4625                 return (0);
4626
4627         free(tmp);
4628         return (1);
4629 }
4630
4631 /*
4632  * By "whole disk" we mean an entire physical disk (something we can
4633  * label, toggle the write cache on, etc.) as opposed to the full
4634  * capacity of a pseudo-device such as lofi or did.  We act as if we
4635  * are labeling the disk, which should be a pretty good test of whether
4636  * it's a viable device or not.  Returns B_TRUE if it is and B_FALSE if
4637  * it isn't.
4638  */
4639 int
4640 zfs_dev_is_whole_disk(char *dev_name)
4641 {
4642         struct dk_gpt *label;
4643         int fd;
4644
4645         if ((fd = open(dev_name, O_RDONLY | O_DIRECT)) < 0)
4646                 return (0);
4647
4648         if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
4649                 (void) close(fd);
4650                 return (0);
4651         }
4652
4653         efi_free(label);
4654         (void) close(fd);
4655
4656         return (1);
4657 }
4658
4659 /*
4660  * Lookup the underlying device for a device name
4661  *
4662  * Often you'll have a symlink to a device, a partition device,
4663  * or a multipath device, and want to look up the underlying device.
4664  * This function returns the underlying device name.  If the device
4665  * name is already the underlying device, then just return the same
4666  * name.  If the device is a DM device with multiple underlying devices
4667  * then return the first one.
4668  *
4669  * For example:
4670  *
4671  * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
4672  * dev_name:    /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
4673  * returns:     /dev/sda
4674  *
4675  * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
4676  * dev_name:    /dev/mapper/mpatha
4677  * returns:     /dev/sda (first device)
4678  *
4679  * 3. /dev/sda (already the underlying device)
4680  * dev_name:    /dev/sda
4681  * returns:     /dev/sda
4682  *
4683  * 4. /dev/dm-3 (mapped to /dev/sda)
4684  * dev_name:    /dev/dm-3
4685  * returns:     /dev/sda
4686  *
4687  * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
4688  * dev_name:    /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
4689  * returns:     /dev/sdb
4690  *
4691  * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
4692  * dev_name:    /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
4693  * returns:     /dev/sda
4694  *
4695  * Returns underlying device name, or NULL on error or no match.
4696  *
4697  * NOTE: The returned name string must be *freed*.
4698  */
4699 char *
4700 zfs_get_underlying_path(char *dev_name)
4701 {
4702         char *name = NULL;
4703         char *tmp;
4704
4705         if (dev_name == NULL)
4706                 return (NULL);
4707
4708         tmp = dm_get_underlying_path(dev_name);
4709
4710         /* dev_name not a DM device, so just un-symlinkize it */
4711         if (tmp == NULL)
4712                 tmp = realpath(dev_name, NULL);
4713
4714         if (tmp != NULL) {
4715                 name = zfs_strip_partition_path(tmp);
4716                 free(tmp);
4717         }
4718
4719         return (name);
4720 }
4721
4722 /*
4723  * Given a dev name like "sda", return the full enclosure sysfs path to
4724  * the disk.  You can also pass in the name with "/dev" prepended
4725  * to it (like /dev/sda).
4726  *
4727  * For example, disk "sda" in enclosure slot 1:
4728  *     dev:            "sda"
4729  *     returns:        "/sys/class/enclosure/1:0:3:0/Slot 1"
4730  *
4731  * 'dev' must be a non-devicemapper device.
4732  *
4733  * Returned string must be freed.
4734  */
4735 char *
4736 zfs_get_enclosure_sysfs_path(char *dev_name)
4737 {
4738         DIR *dp = NULL;
4739         struct dirent *ep;
4740         char buf[MAXPATHLEN];
4741         char *tmp1 = NULL;
4742         char *tmp2 = NULL;
4743         char *tmp3 = NULL;
4744         char *path = NULL;
4745         size_t size;
4746         int tmpsize;
4747
4748         if (dev_name == NULL)
4749                 return (NULL);
4750
4751         /* If they preface 'dev' with a path (like "/dev") then strip it off */
4752         tmp1 = strrchr(dev_name, '/');
4753         if (tmp1 != NULL)
4754                 dev_name = tmp1 + 1;    /* +1 since we want the chr after '/' */
4755
4756         tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
4757         if (tmpsize == -1 || tmp1 == NULL) {
4758                 tmp1 = NULL;
4759                 goto end;
4760         }
4761
4762         dp = opendir(tmp1);
4763         if (dp == NULL) {
4764                 tmp1 = NULL;    /* To make free() at the end a NOP */
4765                 goto end;
4766         }
4767
4768         /*
4769          * Look though all sysfs entries in /sys/block/<dev>/device for
4770          * the enclosure symlink.
4771          */
4772         while ((ep = readdir(dp))) {
4773                 /* Ignore everything that's not our enclosure_device link */
4774                 if (strstr(ep->d_name, "enclosure_device") == NULL)
4775                         continue;
4776
4777                 if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 ||
4778                     tmp2 == NULL)
4779                         break;
4780
4781                 size = readlink(tmp2, buf, sizeof (buf));
4782
4783                 /* Did readlink fail or crop the link name? */
4784                 if (size == -1 || size >= sizeof (buf)) {
4785                         free(tmp2);
4786                         tmp2 = NULL;    /* To make free() at the end a NOP */
4787                         break;
4788                 }
4789
4790                 /*
4791                  * We got a valid link.  readlink() doesn't terminate strings
4792                  * so we have to do it.
4793                  */
4794                 buf[size] = '\0';
4795
4796                 /*
4797                  * Our link will look like:
4798                  *
4799                  * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
4800                  *
4801                  * We want to grab the "enclosure/1:0:3:0/SLOT 1" part
4802                  */
4803                 tmp3 = strstr(buf, "enclosure");
4804                 if (tmp3 == NULL)
4805                         break;
4806
4807                 if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
4808                         /* If asprintf() fails, 'path' is undefined */
4809                         path = NULL;
4810                         break;
4811                 }
4812
4813                 if (path == NULL)
4814                         break;
4815         }
4816
4817 end:
4818         free(tmp2);
4819         free(tmp1);
4820
4821         if (dp != NULL)
4822                 closedir(dp);
4823
4824         return (path);
4825 }