]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
MFC r289500: 6298 zfs_create_008_neg and zpool_create_023_neg need to be
[FreeBSD/stable/10.git] / cddl / contrib / opensolaris / lib / libzfs / common / libzfs_pool.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
26  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
27  */
28
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <ctype.h>
32 #include <errno.h>
33 #include <devid.h>
34 #include <fcntl.h>
35 #include <libintl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <strings.h>
39 #include <unistd.h>
40 #include <libgen.h>
41 #include <sys/zfs_ioctl.h>
42 #include <dlfcn.h>
43
44 #include "zfs_namecheck.h"
45 #include "zfs_prop.h"
46 #include "libzfs_impl.h"
47 #include "zfs_comutil.h"
48 #include "zfeature_common.h"
49
50 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
51
52 #define DISK_ROOT       "/dev/dsk"
53 #define RDISK_ROOT      "/dev/rdsk"
54 #define BACKUP_SLICE    "s2"
55
56 typedef struct prop_flags {
57         int create:1;   /* Validate property on creation */
58         int import:1;   /* Validate property on import */
59 } prop_flags_t;
60
61 /*
62  * ====================================================================
63  *   zpool property functions
64  * ====================================================================
65  */
66
67 static int
68 zpool_get_all_props(zpool_handle_t *zhp)
69 {
70         zfs_cmd_t zc = { 0 };
71         libzfs_handle_t *hdl = zhp->zpool_hdl;
72
73         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
74
75         if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
76                 return (-1);
77
78         while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
79                 if (errno == ENOMEM) {
80                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
81                                 zcmd_free_nvlists(&zc);
82                                 return (-1);
83                         }
84                 } else {
85                         zcmd_free_nvlists(&zc);
86                         return (-1);
87                 }
88         }
89
90         if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
91                 zcmd_free_nvlists(&zc);
92                 return (-1);
93         }
94
95         zcmd_free_nvlists(&zc);
96
97         return (0);
98 }
99
100 static int
101 zpool_props_refresh(zpool_handle_t *zhp)
102 {
103         nvlist_t *old_props;
104
105         old_props = zhp->zpool_props;
106
107         if (zpool_get_all_props(zhp) != 0)
108                 return (-1);
109
110         nvlist_free(old_props);
111         return (0);
112 }
113
114 static char *
115 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
116     zprop_source_t *src)
117 {
118         nvlist_t *nv, *nvl;
119         uint64_t ival;
120         char *value;
121         zprop_source_t source;
122
123         nvl = zhp->zpool_props;
124         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
125                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
126                 source = ival;
127                 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
128         } else {
129                 source = ZPROP_SRC_DEFAULT;
130                 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
131                         value = "-";
132         }
133
134         if (src)
135                 *src = source;
136
137         return (value);
138 }
139
140 uint64_t
141 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
142 {
143         nvlist_t *nv, *nvl;
144         uint64_t value;
145         zprop_source_t source;
146
147         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
148                 /*
149                  * zpool_get_all_props() has most likely failed because
150                  * the pool is faulted, but if all we need is the top level
151                  * vdev's guid then get it from the zhp config nvlist.
152                  */
153                 if ((prop == ZPOOL_PROP_GUID) &&
154                     (nvlist_lookup_nvlist(zhp->zpool_config,
155                     ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
156                     (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
157                     == 0)) {
158                         return (value);
159                 }
160                 return (zpool_prop_default_numeric(prop));
161         }
162
163         nvl = zhp->zpool_props;
164         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
165                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
166                 source = value;
167                 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
168         } else {
169                 source = ZPROP_SRC_DEFAULT;
170                 value = zpool_prop_default_numeric(prop);
171         }
172
173         if (src)
174                 *src = source;
175
176         return (value);
177 }
178
179 /*
180  * Map VDEV STATE to printed strings.
181  */
182 const char *
183 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
184 {
185         switch (state) {
186         case VDEV_STATE_CLOSED:
187         case VDEV_STATE_OFFLINE:
188                 return (gettext("OFFLINE"));
189         case VDEV_STATE_REMOVED:
190                 return (gettext("REMOVED"));
191         case VDEV_STATE_CANT_OPEN:
192                 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
193                         return (gettext("FAULTED"));
194                 else if (aux == VDEV_AUX_SPLIT_POOL)
195                         return (gettext("SPLIT"));
196                 else
197                         return (gettext("UNAVAIL"));
198         case VDEV_STATE_FAULTED:
199                 return (gettext("FAULTED"));
200         case VDEV_STATE_DEGRADED:
201                 return (gettext("DEGRADED"));
202         case VDEV_STATE_HEALTHY:
203                 return (gettext("ONLINE"));
204         }
205
206         return (gettext("UNKNOWN"));
207 }
208
209 /*
210  * Map POOL STATE to printed strings.
211  */
212 const char *
213 zpool_pool_state_to_name(pool_state_t state)
214 {
215         switch (state) {
216         case POOL_STATE_ACTIVE:
217                 return (gettext("ACTIVE"));
218         case POOL_STATE_EXPORTED:
219                 return (gettext("EXPORTED"));
220         case POOL_STATE_DESTROYED:
221                 return (gettext("DESTROYED"));
222         case POOL_STATE_SPARE:
223                 return (gettext("SPARE"));
224         case POOL_STATE_L2CACHE:
225                 return (gettext("L2CACHE"));
226         case POOL_STATE_UNINITIALIZED:
227                 return (gettext("UNINITIALIZED"));
228         case POOL_STATE_UNAVAIL:
229                 return (gettext("UNAVAIL"));
230         case POOL_STATE_POTENTIALLY_ACTIVE:
231                 return (gettext("POTENTIALLY_ACTIVE"));
232         }
233
234         return (gettext("UNKNOWN"));
235 }
236
237 /*
238  * Get a zpool property value for 'prop' and return the value in
239  * a pre-allocated buffer.
240  */
241 int
242 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
243     zprop_source_t *srctype, boolean_t literal)
244 {
245         uint64_t intval;
246         const char *strval;
247         zprop_source_t src = ZPROP_SRC_NONE;
248         nvlist_t *nvroot;
249         vdev_stat_t *vs;
250         uint_t vsc;
251
252         if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
253                 switch (prop) {
254                 case ZPOOL_PROP_NAME:
255                         (void) strlcpy(buf, zpool_get_name(zhp), len);
256                         break;
257
258                 case ZPOOL_PROP_HEALTH:
259                         (void) strlcpy(buf,
260                             zpool_pool_state_to_name(POOL_STATE_UNAVAIL), len);
261                         break;
262
263                 case ZPOOL_PROP_GUID:
264                         intval = zpool_get_prop_int(zhp, prop, &src);
265                         (void) snprintf(buf, len, "%llu", intval);
266                         break;
267
268                 case ZPOOL_PROP_ALTROOT:
269                 case ZPOOL_PROP_CACHEFILE:
270                 case ZPOOL_PROP_COMMENT:
271                         if (zhp->zpool_props != NULL ||
272                             zpool_get_all_props(zhp) == 0) {
273                                 (void) strlcpy(buf,
274                                     zpool_get_prop_string(zhp, prop, &src),
275                                     len);
276                                 break;
277                         }
278                         /* FALLTHROUGH */
279                 default:
280                         (void) strlcpy(buf, "-", len);
281                         break;
282                 }
283
284                 if (srctype != NULL)
285                         *srctype = src;
286                 return (0);
287         }
288
289         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
290             prop != ZPOOL_PROP_NAME)
291                 return (-1);
292
293         switch (zpool_prop_get_type(prop)) {
294         case PROP_TYPE_STRING:
295                 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
296                     len);
297                 break;
298
299         case PROP_TYPE_NUMBER:
300                 intval = zpool_get_prop_int(zhp, prop, &src);
301
302                 switch (prop) {
303                 case ZPOOL_PROP_SIZE:
304                 case ZPOOL_PROP_ALLOCATED:
305                 case ZPOOL_PROP_FREE:
306                 case ZPOOL_PROP_FREEING:
307                 case ZPOOL_PROP_LEAKED:
308                         if (literal) {
309                                 (void) snprintf(buf, len, "%llu",
310                                     (u_longlong_t)intval);
311                         } else {
312                                 (void) zfs_nicenum(intval, buf, len);
313                         }
314                         break;
315                 case ZPOOL_PROP_EXPANDSZ:
316                         if (intval == 0) {
317                                 (void) strlcpy(buf, "-", len);
318                         } else if (literal) {
319                                 (void) snprintf(buf, len, "%llu",
320                                     (u_longlong_t)intval);
321                         } else {
322                                 (void) zfs_nicenum(intval, buf, len);
323                         }
324                         break;
325                 case ZPOOL_PROP_CAPACITY:
326                         if (literal) {
327                                 (void) snprintf(buf, len, "%llu",
328                                     (u_longlong_t)intval);
329                         } else {
330                                 (void) snprintf(buf, len, "%llu%%",
331                                     (u_longlong_t)intval);
332                         }
333                         break;
334                 case ZPOOL_PROP_FRAGMENTATION:
335                         if (intval == UINT64_MAX) {
336                                 (void) strlcpy(buf, "-", len);
337                         } else {
338                                 (void) snprintf(buf, len, "%llu%%",
339                                     (u_longlong_t)intval);
340                         }
341                         break;
342                 case ZPOOL_PROP_DEDUPRATIO:
343                         (void) snprintf(buf, len, "%llu.%02llux",
344                             (u_longlong_t)(intval / 100),
345                             (u_longlong_t)(intval % 100));
346                         break;
347                 case ZPOOL_PROP_HEALTH:
348                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
349                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
350                         verify(nvlist_lookup_uint64_array(nvroot,
351                             ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
352                             == 0);
353
354                         (void) strlcpy(buf, zpool_state_to_name(intval,
355                             vs->vs_aux), len);
356                         break;
357                 case ZPOOL_PROP_VERSION:
358                         if (intval >= SPA_VERSION_FEATURES) {
359                                 (void) snprintf(buf, len, "-");
360                                 break;
361                         }
362                         /* FALLTHROUGH */
363                 default:
364                         (void) snprintf(buf, len, "%llu", intval);
365                 }
366                 break;
367
368         case PROP_TYPE_INDEX:
369                 intval = zpool_get_prop_int(zhp, prop, &src);
370                 if (zpool_prop_index_to_string(prop, intval, &strval)
371                     != 0)
372                         return (-1);
373                 (void) strlcpy(buf, strval, len);
374                 break;
375
376         default:
377                 abort();
378         }
379
380         if (srctype)
381                 *srctype = src;
382
383         return (0);
384 }
385
386 /*
387  * Check if the bootfs name has the same pool name as it is set to.
388  * Assuming bootfs is a valid dataset name.
389  */
390 static boolean_t
391 bootfs_name_valid(const char *pool, char *bootfs)
392 {
393         int len = strlen(pool);
394
395         if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
396                 return (B_FALSE);
397
398         if (strncmp(pool, bootfs, len) == 0 &&
399             (bootfs[len] == '/' || bootfs[len] == '\0'))
400                 return (B_TRUE);
401
402         return (B_FALSE);
403 }
404
405 /*
406  * Inspect the configuration to determine if any of the devices contain
407  * an EFI label.
408  */
409 static boolean_t
410 pool_uses_efi(nvlist_t *config)
411 {
412 #ifdef sun
413         nvlist_t **child;
414         uint_t c, children;
415
416         if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
417             &child, &children) != 0)
418                 return (read_efi_label(config, NULL) >= 0);
419
420         for (c = 0; c < children; c++) {
421                 if (pool_uses_efi(child[c]))
422                         return (B_TRUE);
423         }
424 #endif  /* sun */
425         return (B_FALSE);
426 }
427
428 boolean_t
429 zpool_is_bootable(zpool_handle_t *zhp)
430 {
431         char bootfs[ZPOOL_MAXNAMELEN];
432
433         return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
434             sizeof (bootfs), NULL, B_FALSE) == 0 && strncmp(bootfs, "-",
435             sizeof (bootfs)) != 0);
436 }
437
438
439 /*
440  * Given an nvlist of zpool properties to be set, validate that they are
441  * correct, and parse any numeric properties (index, boolean, etc) if they are
442  * specified as strings.
443  */
444 static nvlist_t *
445 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
446     nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
447 {
448         nvpair_t *elem;
449         nvlist_t *retprops;
450         zpool_prop_t prop;
451         char *strval;
452         uint64_t intval;
453         char *slash, *check;
454         struct stat64 statbuf;
455         zpool_handle_t *zhp;
456         nvlist_t *nvroot;
457
458         if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
459                 (void) no_memory(hdl);
460                 return (NULL);
461         }
462
463         elem = NULL;
464         while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
465                 const char *propname = nvpair_name(elem);
466
467                 prop = zpool_name_to_prop(propname);
468                 if (prop == ZPROP_INVAL && zpool_prop_feature(propname)) {
469                         int err;
470                         char *fname = strchr(propname, '@') + 1;
471
472                         err = zfeature_lookup_name(fname, NULL);
473                         if (err != 0) {
474                                 ASSERT3U(err, ==, ENOENT);
475                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
476                                     "invalid feature '%s'"), fname);
477                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
478                                 goto error;
479                         }
480
481                         if (nvpair_type(elem) != DATA_TYPE_STRING) {
482                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
483                                     "'%s' must be a string"), propname);
484                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
485                                 goto error;
486                         }
487
488                         (void) nvpair_value_string(elem, &strval);
489                         if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0) {
490                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
491                                     "property '%s' can only be set to "
492                                     "'enabled'"), propname);
493                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
494                                 goto error;
495                         }
496
497                         if (nvlist_add_uint64(retprops, propname, 0) != 0) {
498                                 (void) no_memory(hdl);
499                                 goto error;
500                         }
501                         continue;
502                 }
503
504                 /*
505                  * Make sure this property is valid and applies to this type.
506                  */
507                 if (prop == ZPROP_INVAL) {
508                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
509                             "invalid property '%s'"), propname);
510                         (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
511                         goto error;
512                 }
513
514                 if (zpool_prop_readonly(prop)) {
515                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
516                             "is readonly"), propname);
517                         (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
518                         goto error;
519                 }
520
521                 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
522                     &strval, &intval, errbuf) != 0)
523                         goto error;
524
525                 /*
526                  * Perform additional checking for specific properties.
527                  */
528                 switch (prop) {
529                 case ZPOOL_PROP_VERSION:
530                         if (intval < version ||
531                             !SPA_VERSION_IS_SUPPORTED(intval)) {
532                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
533                                     "property '%s' number %d is invalid."),
534                                     propname, intval);
535                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
536                                 goto error;
537                         }
538                         break;
539
540                 case ZPOOL_PROP_BOOTFS:
541                         if (flags.create || flags.import) {
542                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
543                                     "property '%s' cannot be set at creation "
544                                     "or import time"), propname);
545                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
546                                 goto error;
547                         }
548
549                         if (version < SPA_VERSION_BOOTFS) {
550                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
551                                     "pool must be upgraded to support "
552                                     "'%s' property"), propname);
553                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
554                                 goto error;
555                         }
556
557                         /*
558                          * bootfs property value has to be a dataset name and
559                          * the dataset has to be in the same pool as it sets to.
560                          */
561                         if (strval[0] != '\0' && !bootfs_name_valid(poolname,
562                             strval)) {
563                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
564                                     "is an invalid name"), strval);
565                                 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
566                                 goto error;
567                         }
568
569                         if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
570                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
571                                     "could not open pool '%s'"), poolname);
572                                 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
573                                 goto error;
574                         }
575                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
576                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
577
578 #ifdef sun
579                         /*
580                          * bootfs property cannot be set on a disk which has
581                          * been EFI labeled.
582                          */
583                         if (pool_uses_efi(nvroot)) {
584                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
585                                     "property '%s' not supported on "
586                                     "EFI labeled devices"), propname);
587                                 (void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
588                                 zpool_close(zhp);
589                                 goto error;
590                         }
591 #endif  /* sun */
592                         zpool_close(zhp);
593                         break;
594
595                 case ZPOOL_PROP_ALTROOT:
596                         if (!flags.create && !flags.import) {
597                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
598                                     "property '%s' can only be set during pool "
599                                     "creation or import"), propname);
600                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
601                                 goto error;
602                         }
603
604                         if (strval[0] != '/') {
605                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
606                                     "bad alternate root '%s'"), strval);
607                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
608                                 goto error;
609                         }
610                         break;
611
612                 case ZPOOL_PROP_CACHEFILE:
613                         if (strval[0] == '\0')
614                                 break;
615
616                         if (strcmp(strval, "none") == 0)
617                                 break;
618
619                         if (strval[0] != '/') {
620                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
621                                     "property '%s' must be empty, an "
622                                     "absolute path, or 'none'"), propname);
623                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
624                                 goto error;
625                         }
626
627                         slash = strrchr(strval, '/');
628
629                         if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
630                             strcmp(slash, "/..") == 0) {
631                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
632                                     "'%s' is not a valid file"), strval);
633                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
634                                 goto error;
635                         }
636
637                         *slash = '\0';
638
639                         if (strval[0] != '\0' &&
640                             (stat64(strval, &statbuf) != 0 ||
641                             !S_ISDIR(statbuf.st_mode))) {
642                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
643                                     "'%s' is not a valid directory"),
644                                     strval);
645                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
646                                 goto error;
647                         }
648
649                         *slash = '/';
650                         break;
651
652                 case ZPOOL_PROP_COMMENT:
653                         for (check = strval; *check != '\0'; check++) {
654                                 if (!isprint(*check)) {
655                                         zfs_error_aux(hdl,
656                                             dgettext(TEXT_DOMAIN,
657                                             "comment may only have printable "
658                                             "characters"));
659                                         (void) zfs_error(hdl, EZFS_BADPROP,
660                                             errbuf);
661                                         goto error;
662                                 }
663                         }
664                         if (strlen(strval) > ZPROP_MAX_COMMENT) {
665                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
666                                     "comment must not exceed %d characters"),
667                                     ZPROP_MAX_COMMENT);
668                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
669                                 goto error;
670                         }
671                         break;
672                 case ZPOOL_PROP_READONLY:
673                         if (!flags.import) {
674                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
675                                     "property '%s' can only be set at "
676                                     "import time"), propname);
677                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
678                                 goto error;
679                         }
680                         break;
681                 }
682         }
683
684         return (retprops);
685 error:
686         nvlist_free(retprops);
687         return (NULL);
688 }
689
690 /*
691  * Set zpool property : propname=propval.
692  */
693 int
694 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
695 {
696         zfs_cmd_t zc = { 0 };
697         int ret = -1;
698         char errbuf[1024];
699         nvlist_t *nvl = NULL;
700         nvlist_t *realprops;
701         uint64_t version;
702         prop_flags_t flags = { 0 };
703
704         (void) snprintf(errbuf, sizeof (errbuf),
705             dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
706             zhp->zpool_name);
707
708         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
709                 return (no_memory(zhp->zpool_hdl));
710
711         if (nvlist_add_string(nvl, propname, propval) != 0) {
712                 nvlist_free(nvl);
713                 return (no_memory(zhp->zpool_hdl));
714         }
715
716         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
717         if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
718             zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
719                 nvlist_free(nvl);
720                 return (-1);
721         }
722
723         nvlist_free(nvl);
724         nvl = realprops;
725
726         /*
727          * Execute the corresponding ioctl() to set this property.
728          */
729         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
730
731         if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
732                 nvlist_free(nvl);
733                 return (-1);
734         }
735
736         ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
737
738         zcmd_free_nvlists(&zc);
739         nvlist_free(nvl);
740
741         if (ret)
742                 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
743         else
744                 (void) zpool_props_refresh(zhp);
745
746         return (ret);
747 }
748
749 int
750 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
751 {
752         libzfs_handle_t *hdl = zhp->zpool_hdl;
753         zprop_list_t *entry;
754         char buf[ZFS_MAXPROPLEN];
755         nvlist_t *features = NULL;
756         zprop_list_t **last;
757         boolean_t firstexpand = (NULL == *plp);
758
759         if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
760                 return (-1);
761
762         last = plp;
763         while (*last != NULL)
764                 last = &(*last)->pl_next;
765
766         if ((*plp)->pl_all)
767                 features = zpool_get_features(zhp);
768
769         if ((*plp)->pl_all && firstexpand) {
770                 for (int i = 0; i < SPA_FEATURES; i++) {
771                         zprop_list_t *entry = zfs_alloc(hdl,
772                             sizeof (zprop_list_t));
773                         entry->pl_prop = ZPROP_INVAL;
774                         entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
775                             spa_feature_table[i].fi_uname);
776                         entry->pl_width = strlen(entry->pl_user_prop);
777                         entry->pl_all = B_TRUE;
778
779                         *last = entry;
780                         last = &entry->pl_next;
781                 }
782         }
783
784         /* add any unsupported features */
785         for (nvpair_t *nvp = nvlist_next_nvpair(features, NULL);
786             nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
787                 char *propname;
788                 boolean_t found;
789                 zprop_list_t *entry;
790
791                 if (zfeature_is_supported(nvpair_name(nvp)))
792                         continue;
793
794                 propname = zfs_asprintf(hdl, "unsupported@%s",
795                     nvpair_name(nvp));
796
797                 /*
798                  * Before adding the property to the list make sure that no
799                  * other pool already added the same property.
800                  */
801                 found = B_FALSE;
802                 entry = *plp;
803                 while (entry != NULL) {
804                         if (entry->pl_user_prop != NULL &&
805                             strcmp(propname, entry->pl_user_prop) == 0) {
806                                 found = B_TRUE;
807                                 break;
808                         }
809                         entry = entry->pl_next;
810                 }
811                 if (found) {
812                         free(propname);
813                         continue;
814                 }
815
816                 entry = zfs_alloc(hdl, sizeof (zprop_list_t));
817                 entry->pl_prop = ZPROP_INVAL;
818                 entry->pl_user_prop = propname;
819                 entry->pl_width = strlen(entry->pl_user_prop);
820                 entry->pl_all = B_TRUE;
821
822                 *last = entry;
823                 last = &entry->pl_next;
824         }
825
826         for (entry = *plp; entry != NULL; entry = entry->pl_next) {
827
828                 if (entry->pl_fixed)
829                         continue;
830
831                 if (entry->pl_prop != ZPROP_INVAL &&
832                     zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
833                     NULL, B_FALSE) == 0) {
834                         if (strlen(buf) > entry->pl_width)
835                                 entry->pl_width = strlen(buf);
836                 }
837         }
838
839         return (0);
840 }
841
842 /*
843  * Get the state for the given feature on the given ZFS pool.
844  */
845 int
846 zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
847     size_t len)
848 {
849         uint64_t refcount;
850         boolean_t found = B_FALSE;
851         nvlist_t *features = zpool_get_features(zhp);
852         boolean_t supported;
853         const char *feature = strchr(propname, '@') + 1;
854
855         supported = zpool_prop_feature(propname);
856         ASSERT(supported || zpool_prop_unsupported(propname));
857
858         /*
859          * Convert from feature name to feature guid. This conversion is
860          * unecessary for unsupported@... properties because they already
861          * use guids.
862          */
863         if (supported) {
864                 int ret;
865                 spa_feature_t fid;
866
867                 ret = zfeature_lookup_name(feature, &fid);
868                 if (ret != 0) {
869                         (void) strlcpy(buf, "-", len);
870                         return (ENOTSUP);
871                 }
872                 feature = spa_feature_table[fid].fi_guid;
873         }
874
875         if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
876                 found = B_TRUE;
877
878         if (supported) {
879                 if (!found) {
880                         (void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
881                 } else  {
882                         if (refcount == 0)
883                                 (void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
884                         else
885                                 (void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
886                 }
887         } else {
888                 if (found) {
889                         if (refcount == 0) {
890                                 (void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
891                         } else {
892                                 (void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
893                         }
894                 } else {
895                         (void) strlcpy(buf, "-", len);
896                         return (ENOTSUP);
897                 }
898         }
899
900         return (0);
901 }
902
903 /*
904  * Don't start the slice at the default block of 34; many storage
905  * devices will use a stripe width of 128k, so start there instead.
906  */
907 #define NEW_START_BLOCK 256
908
909 /*
910  * Validate the given pool name, optionally putting an extended error message in
911  * 'buf'.
912  */
913 boolean_t
914 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
915 {
916         namecheck_err_t why;
917         char what;
918         int ret;
919
920         ret = pool_namecheck(pool, &why, &what);
921
922         /*
923          * The rules for reserved pool names were extended at a later point.
924          * But we need to support users with existing pools that may now be
925          * invalid.  So we only check for this expanded set of names during a
926          * create (or import), and only in userland.
927          */
928         if (ret == 0 && !isopen &&
929             (strncmp(pool, "mirror", 6) == 0 ||
930             strncmp(pool, "raidz", 5) == 0 ||
931             strncmp(pool, "spare", 5) == 0 ||
932             strcmp(pool, "log") == 0)) {
933                 if (hdl != NULL)
934                         zfs_error_aux(hdl,
935                             dgettext(TEXT_DOMAIN, "name is reserved"));
936                 return (B_FALSE);
937         }
938
939
940         if (ret != 0) {
941                 if (hdl != NULL) {
942                         switch (why) {
943                         case NAME_ERR_TOOLONG:
944                                 zfs_error_aux(hdl,
945                                     dgettext(TEXT_DOMAIN, "name is too long"));
946                                 break;
947
948                         case NAME_ERR_INVALCHAR:
949                                 zfs_error_aux(hdl,
950                                     dgettext(TEXT_DOMAIN, "invalid character "
951                                     "'%c' in pool name"), what);
952                                 break;
953
954                         case NAME_ERR_NOLETTER:
955                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
956                                     "name must begin with a letter"));
957                                 break;
958
959                         case NAME_ERR_RESERVED:
960                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
961                                     "name is reserved"));
962                                 break;
963
964                         case NAME_ERR_DISKLIKE:
965                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
966                                     "pool name is reserved"));
967                                 break;
968
969                         case NAME_ERR_LEADING_SLASH:
970                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
971                                     "leading slash in name"));
972                                 break;
973
974                         case NAME_ERR_EMPTY_COMPONENT:
975                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
976                                     "empty component in name"));
977                                 break;
978
979                         case NAME_ERR_TRAILING_SLASH:
980                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
981                                     "trailing slash in name"));
982                                 break;
983
984                         case NAME_ERR_MULTIPLE_AT:
985                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
986                                     "multiple '@' delimiters in name"));
987                                 break;
988
989                         }
990                 }
991                 return (B_FALSE);
992         }
993
994         return (B_TRUE);
995 }
996
997 /*
998  * Open a handle to the given pool, even if the pool is currently in the FAULTED
999  * state.
1000  */
1001 zpool_handle_t *
1002 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
1003 {
1004         zpool_handle_t *zhp;
1005         boolean_t missing;
1006
1007         /*
1008          * Make sure the pool name is valid.
1009          */
1010         if (!zpool_name_valid(hdl, B_TRUE, pool)) {
1011                 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1012                     dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1013                     pool);
1014                 return (NULL);
1015         }
1016
1017         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1018                 return (NULL);
1019
1020         zhp->zpool_hdl = hdl;
1021         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1022
1023         if (zpool_refresh_stats(zhp, &missing) != 0) {
1024                 zpool_close(zhp);
1025                 return (NULL);
1026         }
1027
1028         if (missing) {
1029                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
1030                 (void) zfs_error_fmt(hdl, EZFS_NOENT,
1031                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
1032                 zpool_close(zhp);
1033                 return (NULL);
1034         }
1035
1036         return (zhp);
1037 }
1038
1039 /*
1040  * Like the above, but silent on error.  Used when iterating over pools (because
1041  * the configuration cache may be out of date).
1042  */
1043 int
1044 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
1045 {
1046         zpool_handle_t *zhp;
1047         boolean_t missing;
1048
1049         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1050                 return (-1);
1051
1052         zhp->zpool_hdl = hdl;
1053         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1054
1055         if (zpool_refresh_stats(zhp, &missing) != 0) {
1056                 zpool_close(zhp);
1057                 return (-1);
1058         }
1059
1060         if (missing) {
1061                 zpool_close(zhp);
1062                 *ret = NULL;
1063                 return (0);
1064         }
1065
1066         *ret = zhp;
1067         return (0);
1068 }
1069
1070 /*
1071  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
1072  * state.
1073  */
1074 zpool_handle_t *
1075 zpool_open(libzfs_handle_t *hdl, const char *pool)
1076 {
1077         zpool_handle_t *zhp;
1078
1079         if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
1080                 return (NULL);
1081
1082         if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
1083                 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
1084                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
1085                 zpool_close(zhp);
1086                 return (NULL);
1087         }
1088
1089         return (zhp);
1090 }
1091
1092 /*
1093  * Close the handle.  Simply frees the memory associated with the handle.
1094  */
1095 void
1096 zpool_close(zpool_handle_t *zhp)
1097 {
1098         if (zhp->zpool_config)
1099                 nvlist_free(zhp->zpool_config);
1100         if (zhp->zpool_old_config)
1101                 nvlist_free(zhp->zpool_old_config);
1102         if (zhp->zpool_props)
1103                 nvlist_free(zhp->zpool_props);
1104         free(zhp);
1105 }
1106
1107 /*
1108  * Return the name of the pool.
1109  */
1110 const char *
1111 zpool_get_name(zpool_handle_t *zhp)
1112 {
1113         return (zhp->zpool_name);
1114 }
1115
1116
1117 /*
1118  * Return the state of the pool (ACTIVE or UNAVAILABLE)
1119  */
1120 int
1121 zpool_get_state(zpool_handle_t *zhp)
1122 {
1123         return (zhp->zpool_state);
1124 }
1125
1126 /*
1127  * Create the named pool, using the provided vdev list.  It is assumed
1128  * that the consumer has already validated the contents of the nvlist, so we
1129  * don't have to worry about error semantics.
1130  */
1131 int
1132 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
1133     nvlist_t *props, nvlist_t *fsprops)
1134 {
1135         zfs_cmd_t zc = { 0 };
1136         nvlist_t *zc_fsprops = NULL;
1137         nvlist_t *zc_props = NULL;
1138         char msg[1024];
1139         int ret = -1;
1140
1141         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1142             "cannot create '%s'"), pool);
1143
1144         if (!zpool_name_valid(hdl, B_FALSE, pool))
1145                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
1146
1147         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1148                 return (-1);
1149
1150         if (props) {
1151                 prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
1152
1153                 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
1154                     SPA_VERSION_1, flags, msg)) == NULL) {
1155                         goto create_failed;
1156                 }
1157         }
1158
1159         if (fsprops) {
1160                 uint64_t zoned;
1161                 char *zonestr;
1162
1163                 zoned = ((nvlist_lookup_string(fsprops,
1164                     zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
1165                     strcmp(zonestr, "on") == 0);
1166
1167                 if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
1168                     fsprops, zoned, NULL, NULL, msg)) == NULL) {
1169                         goto create_failed;
1170                 }
1171                 if (!zc_props &&
1172                     (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
1173                         goto create_failed;
1174                 }
1175                 if (nvlist_add_nvlist(zc_props,
1176                     ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
1177                         goto create_failed;
1178                 }
1179         }
1180
1181         if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
1182                 goto create_failed;
1183
1184         (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
1185
1186         if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
1187
1188                 zcmd_free_nvlists(&zc);
1189                 nvlist_free(zc_props);
1190                 nvlist_free(zc_fsprops);
1191
1192                 switch (errno) {
1193                 case EBUSY:
1194                         /*
1195                          * This can happen if the user has specified the same
1196                          * device multiple times.  We can't reliably detect this
1197                          * until we try to add it and see we already have a
1198                          * label.
1199                          */
1200                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1201                             "one or more vdevs refer to the same device"));
1202                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1203
1204                 case ERANGE:
1205                         /*
1206                          * This happens if the record size is smaller or larger
1207                          * than the allowed size range, or not a power of 2.
1208                          *
1209                          * NOTE: although zfs_valid_proplist is called earlier,
1210                          * this case may have slipped through since the
1211                          * pool does not exist yet and it is therefore
1212                          * impossible to read properties e.g. max blocksize
1213                          * from the pool.
1214                          */
1215                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1216                             "record size invalid"));
1217                         return (zfs_error(hdl, EZFS_BADPROP, msg));
1218
1219                 case EOVERFLOW:
1220                         /*
1221                          * This occurs when one of the devices is below
1222                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1223                          * device was the problem device since there's no
1224                          * reliable way to determine device size from userland.
1225                          */
1226                         {
1227                                 char buf[64];
1228
1229                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1230
1231                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1232                                     "one or more devices is less than the "
1233                                     "minimum size (%s)"), buf);
1234                         }
1235                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1236
1237                 case ENOSPC:
1238                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1239                             "one or more devices is out of space"));
1240                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1241
1242                 case ENOTBLK:
1243                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1244                             "cache device must be a disk or disk slice"));
1245                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1246
1247                 default:
1248                         return (zpool_standard_error(hdl, errno, msg));
1249                 }
1250         }
1251
1252 create_failed:
1253         zcmd_free_nvlists(&zc);
1254         nvlist_free(zc_props);
1255         nvlist_free(zc_fsprops);
1256         return (ret);
1257 }
1258
1259 /*
1260  * Destroy the given pool.  It is up to the caller to ensure that there are no
1261  * datasets left in the pool.
1262  */
1263 int
1264 zpool_destroy(zpool_handle_t *zhp, const char *log_str)
1265 {
1266         zfs_cmd_t zc = { 0 };
1267         zfs_handle_t *zfp = NULL;
1268         libzfs_handle_t *hdl = zhp->zpool_hdl;
1269         char msg[1024];
1270
1271         if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1272             (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1273                 return (-1);
1274
1275         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1276         zc.zc_history = (uint64_t)(uintptr_t)log_str;
1277
1278         if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1279                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1280                     "cannot destroy '%s'"), zhp->zpool_name);
1281
1282                 if (errno == EROFS) {
1283                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1284                             "one or more devices is read only"));
1285                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1286                 } else {
1287                         (void) zpool_standard_error(hdl, errno, msg);
1288                 }
1289
1290                 if (zfp)
1291                         zfs_close(zfp);
1292                 return (-1);
1293         }
1294
1295         if (zfp) {
1296                 remove_mountpoint(zfp);
1297                 zfs_close(zfp);
1298         }
1299
1300         return (0);
1301 }
1302
1303 /*
1304  * Add the given vdevs to the pool.  The caller must have already performed the
1305  * necessary verification to ensure that the vdev specification is well-formed.
1306  */
1307 int
1308 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1309 {
1310         zfs_cmd_t zc = { 0 };
1311         int ret;
1312         libzfs_handle_t *hdl = zhp->zpool_hdl;
1313         char msg[1024];
1314         nvlist_t **spares, **l2cache;
1315         uint_t nspares, nl2cache;
1316
1317         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1318             "cannot add to '%s'"), zhp->zpool_name);
1319
1320         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1321             SPA_VERSION_SPARES &&
1322             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1323             &spares, &nspares) == 0) {
1324                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1325                     "upgraded to add hot spares"));
1326                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1327         }
1328
1329         if (zpool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
1330             ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
1331                 uint64_t s;
1332
1333                 for (s = 0; s < nspares; s++) {
1334                         char *path;
1335
1336                         if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
1337                             &path) == 0 && pool_uses_efi(spares[s])) {
1338                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1339                                     "device '%s' contains an EFI label and "
1340                                     "cannot be used on root pools."),
1341                                     zpool_vdev_name(hdl, NULL, spares[s],
1342                                     B_FALSE));
1343                                 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
1344                         }
1345                 }
1346         }
1347
1348         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1349             SPA_VERSION_L2CACHE &&
1350             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1351             &l2cache, &nl2cache) == 0) {
1352                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1353                     "upgraded to add cache devices"));
1354                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1355         }
1356
1357         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1358                 return (-1);
1359         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1360
1361         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1362                 switch (errno) {
1363                 case EBUSY:
1364                         /*
1365                          * This can happen if the user has specified the same
1366                          * device multiple times.  We can't reliably detect this
1367                          * until we try to add it and see we already have a
1368                          * label.
1369                          */
1370                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1371                             "one or more vdevs refer to the same device"));
1372                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1373                         break;
1374
1375                 case EOVERFLOW:
1376                         /*
1377                          * This occurrs when one of the devices is below
1378                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1379                          * device was the problem device since there's no
1380                          * reliable way to determine device size from userland.
1381                          */
1382                         {
1383                                 char buf[64];
1384
1385                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1386
1387                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1388                                     "device is less than the minimum "
1389                                     "size (%s)"), buf);
1390                         }
1391                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1392                         break;
1393
1394                 case ENOTSUP:
1395                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1396                             "pool must be upgraded to add these vdevs"));
1397                         (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1398                         break;
1399
1400                 case EDOM:
1401                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1402                             "root pool can not have multiple vdevs"
1403                             " or separate logs"));
1404                         (void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
1405                         break;
1406
1407                 case ENOTBLK:
1408                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1409                             "cache device must be a disk or disk slice"));
1410                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1411                         break;
1412
1413                 default:
1414                         (void) zpool_standard_error(hdl, errno, msg);
1415                 }
1416
1417                 ret = -1;
1418         } else {
1419                 ret = 0;
1420         }
1421
1422         zcmd_free_nvlists(&zc);
1423
1424         return (ret);
1425 }
1426
1427 /*
1428  * Exports the pool from the system.  The caller must ensure that there are no
1429  * mounted datasets in the pool.
1430  */
1431 static int
1432 zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
1433     const char *log_str)
1434 {
1435         zfs_cmd_t zc = { 0 };
1436         char msg[1024];
1437
1438         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1439             "cannot export '%s'"), zhp->zpool_name);
1440
1441         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1442         zc.zc_cookie = force;
1443         zc.zc_guid = hardforce;
1444         zc.zc_history = (uint64_t)(uintptr_t)log_str;
1445
1446         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1447                 switch (errno) {
1448                 case EXDEV:
1449                         zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1450                             "use '-f' to override the following errors:\n"
1451                             "'%s' has an active shared spare which could be"
1452                             " used by other pools once '%s' is exported."),
1453                             zhp->zpool_name, zhp->zpool_name);
1454                         return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1455                             msg));
1456                 default:
1457                         return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1458                             msg));
1459                 }
1460         }
1461
1462         return (0);
1463 }
1464
1465 int
1466 zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
1467 {
1468         return (zpool_export_common(zhp, force, B_FALSE, log_str));
1469 }
1470
1471 int
1472 zpool_export_force(zpool_handle_t *zhp, const char *log_str)
1473 {
1474         return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
1475 }
1476
1477 static void
1478 zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1479     nvlist_t *config)
1480 {
1481         nvlist_t *nv = NULL;
1482         uint64_t rewindto;
1483         int64_t loss = -1;
1484         struct tm t;
1485         char timestr[128];
1486
1487         if (!hdl->libzfs_printerr || config == NULL)
1488                 return;
1489
1490         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1491             nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
1492                 return;
1493         }
1494
1495         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1496                 return;
1497         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1498
1499         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1500             strftime(timestr, 128, 0, &t) != 0) {
1501                 if (dryrun) {
1502                         (void) printf(dgettext(TEXT_DOMAIN,
1503                             "Would be able to return %s "
1504                             "to its state as of %s.\n"),
1505                             name, timestr);
1506                 } else {
1507                         (void) printf(dgettext(TEXT_DOMAIN,
1508                             "Pool %s returned to its state as of %s.\n"),
1509                             name, timestr);
1510                 }
1511                 if (loss > 120) {
1512                         (void) printf(dgettext(TEXT_DOMAIN,
1513                             "%s approximately %lld "),
1514                             dryrun ? "Would discard" : "Discarded",
1515                             (loss + 30) / 60);
1516                         (void) printf(dgettext(TEXT_DOMAIN,
1517                             "minutes of transactions.\n"));
1518                 } else if (loss > 0) {
1519                         (void) printf(dgettext(TEXT_DOMAIN,
1520                             "%s approximately %lld "),
1521                             dryrun ? "Would discard" : "Discarded", loss);
1522                         (void) printf(dgettext(TEXT_DOMAIN,
1523                             "seconds of transactions.\n"));
1524                 }
1525         }
1526 }
1527
1528 void
1529 zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1530     nvlist_t *config)
1531 {
1532         nvlist_t *nv = NULL;
1533         int64_t loss = -1;
1534         uint64_t edata = UINT64_MAX;
1535         uint64_t rewindto;
1536         struct tm t;
1537         char timestr[128];
1538
1539         if (!hdl->libzfs_printerr)
1540                 return;
1541
1542         if (reason >= 0)
1543                 (void) printf(dgettext(TEXT_DOMAIN, "action: "));
1544         else
1545                 (void) printf(dgettext(TEXT_DOMAIN, "\t"));
1546
1547         /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1548         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1549             nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
1550             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1551                 goto no_info;
1552
1553         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1554         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1555             &edata);
1556
1557         (void) printf(dgettext(TEXT_DOMAIN,
1558             "Recovery is possible, but will result in some data loss.\n"));
1559
1560         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1561             strftime(timestr, 128, 0, &t) != 0) {
1562                 (void) printf(dgettext(TEXT_DOMAIN,
1563                     "\tReturning the pool to its state as of %s\n"
1564                     "\tshould correct the problem.  "),
1565                     timestr);
1566         } else {
1567                 (void) printf(dgettext(TEXT_DOMAIN,
1568                     "\tReverting the pool to an earlier state "
1569                     "should correct the problem.\n\t"));
1570         }
1571
1572         if (loss > 120) {
1573                 (void) printf(dgettext(TEXT_DOMAIN,
1574                     "Approximately %lld minutes of data\n"
1575                     "\tmust be discarded, irreversibly.  "), (loss + 30) / 60);
1576         } else if (loss > 0) {
1577                 (void) printf(dgettext(TEXT_DOMAIN,
1578                     "Approximately %lld seconds of data\n"
1579                     "\tmust be discarded, irreversibly.  "), loss);
1580         }
1581         if (edata != 0 && edata != UINT64_MAX) {
1582                 if (edata == 1) {
1583                         (void) printf(dgettext(TEXT_DOMAIN,
1584                             "After rewind, at least\n"
1585                             "\tone persistent user-data error will remain.  "));
1586                 } else {
1587                         (void) printf(dgettext(TEXT_DOMAIN,
1588                             "After rewind, several\n"
1589                             "\tpersistent user-data errors will remain.  "));
1590                 }
1591         }
1592         (void) printf(dgettext(TEXT_DOMAIN,
1593             "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
1594             reason >= 0 ? "clear" : "import", name);
1595
1596         (void) printf(dgettext(TEXT_DOMAIN,
1597             "A scrub of the pool\n"
1598             "\tis strongly recommended after recovery.\n"));
1599         return;
1600
1601 no_info:
1602         (void) printf(dgettext(TEXT_DOMAIN,
1603             "Destroy and re-create the pool from\n\ta backup source.\n"));
1604 }
1605
1606 /*
1607  * zpool_import() is a contracted interface. Should be kept the same
1608  * if possible.
1609  *
1610  * Applications should use zpool_import_props() to import a pool with
1611  * new properties value to be set.
1612  */
1613 int
1614 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1615     char *altroot)
1616 {
1617         nvlist_t *props = NULL;
1618         int ret;
1619
1620         if (altroot != NULL) {
1621                 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1622                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1623                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1624                             newname));
1625                 }
1626
1627                 if (nvlist_add_string(props,
1628                     zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1629                     nvlist_add_string(props,
1630                     zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1631                         nvlist_free(props);
1632                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1633                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1634                             newname));
1635                 }
1636         }
1637
1638         ret = zpool_import_props(hdl, config, newname, props,
1639             ZFS_IMPORT_NORMAL);
1640         if (props)
1641                 nvlist_free(props);
1642         return (ret);
1643 }
1644
1645 static void
1646 print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1647     int indent)
1648 {
1649         nvlist_t **child;
1650         uint_t c, children;
1651         char *vname;
1652         uint64_t is_log = 0;
1653
1654         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1655             &is_log);
1656
1657         if (name != NULL)
1658                 (void) printf("\t%*s%s%s\n", indent, "", name,
1659                     is_log ? " [log]" : "");
1660
1661         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1662             &child, &children) != 0)
1663                 return;
1664
1665         for (c = 0; c < children; c++) {
1666                 vname = zpool_vdev_name(hdl, NULL, child[c], B_TRUE);
1667                 print_vdev_tree(hdl, vname, child[c], indent + 2);
1668                 free(vname);
1669         }
1670 }
1671
1672 void
1673 zpool_print_unsup_feat(nvlist_t *config)
1674 {
1675         nvlist_t *nvinfo, *unsup_feat;
1676
1677         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
1678             0);
1679         verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
1680             &unsup_feat) == 0);
1681
1682         for (nvpair_t *nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
1683             nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
1684                 char *desc;
1685
1686                 verify(nvpair_type(nvp) == DATA_TYPE_STRING);
1687                 verify(nvpair_value_string(nvp, &desc) == 0);
1688
1689                 if (strlen(desc) > 0)
1690                         (void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
1691                 else
1692                         (void) printf("\t%s\n", nvpair_name(nvp));
1693         }
1694 }
1695
1696 /*
1697  * Import the given pool using the known configuration and a list of
1698  * properties to be set. The configuration should have come from
1699  * zpool_find_import(). The 'newname' parameters control whether the pool
1700  * is imported with a different name.
1701  */
1702 int
1703 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1704     nvlist_t *props, int flags)
1705 {
1706         zfs_cmd_t zc = { 0 };
1707         zpool_rewind_policy_t policy;
1708         nvlist_t *nv = NULL;
1709         nvlist_t *nvinfo = NULL;
1710         nvlist_t *missing = NULL;
1711         char *thename;
1712         char *origname;
1713         int ret;
1714         int error = 0;
1715         char errbuf[1024];
1716
1717         verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1718             &origname) == 0);
1719
1720         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1721             "cannot import pool '%s'"), origname);
1722
1723         if (newname != NULL) {
1724                 if (!zpool_name_valid(hdl, B_FALSE, newname))
1725                         return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1726                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1727                             newname));
1728                 thename = (char *)newname;
1729         } else {
1730                 thename = origname;
1731         }
1732
1733         if (props != NULL) {
1734                 uint64_t version;
1735                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1736
1737                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1738                     &version) == 0);
1739
1740                 if ((props = zpool_valid_proplist(hdl, origname,
1741                     props, version, flags, errbuf)) == NULL)
1742                         return (-1);
1743                 if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1744                         nvlist_free(props);
1745                         return (-1);
1746                 }
1747                 nvlist_free(props);
1748         }
1749
1750         (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1751
1752         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1753             &zc.zc_guid) == 0);
1754
1755         if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1756                 zcmd_free_nvlists(&zc);
1757                 return (-1);
1758         }
1759         if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1760                 zcmd_free_nvlists(&zc);
1761                 return (-1);
1762         }
1763
1764         zc.zc_cookie = flags;
1765         while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1766             errno == ENOMEM) {
1767                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1768                         zcmd_free_nvlists(&zc);
1769                         return (-1);
1770                 }
1771         }
1772         if (ret != 0)
1773                 error = errno;
1774
1775         (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1776
1777         zcmd_free_nvlists(&zc);
1778
1779         zpool_get_rewind_policy(config, &policy);
1780
1781         if (error) {
1782                 char desc[1024];
1783
1784                 /*
1785                  * Dry-run failed, but we print out what success
1786                  * looks like if we found a best txg
1787                  */
1788                 if (policy.zrp_request & ZPOOL_TRY_REWIND) {
1789                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1790                             B_TRUE, nv);
1791                         nvlist_free(nv);
1792                         return (-1);
1793                 }
1794
1795                 if (newname == NULL)
1796                         (void) snprintf(desc, sizeof (desc),
1797                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1798                             thename);
1799                 else
1800                         (void) snprintf(desc, sizeof (desc),
1801                             dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1802                             origname, thename);
1803
1804                 switch (error) {
1805                 case ENOTSUP:
1806                         if (nv != NULL && nvlist_lookup_nvlist(nv,
1807                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1808                             nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
1809                                 (void) printf(dgettext(TEXT_DOMAIN, "This "
1810                                     "pool uses the following feature(s) not "
1811                                     "supported by this system:\n"));
1812                                 zpool_print_unsup_feat(nv);
1813                                 if (nvlist_exists(nvinfo,
1814                                     ZPOOL_CONFIG_CAN_RDONLY)) {
1815                                         (void) printf(dgettext(TEXT_DOMAIN,
1816                                             "All unsupported features are only "
1817                                             "required for writing to the pool."
1818                                             "\nThe pool can be imported using "
1819                                             "'-o readonly=on'.\n"));
1820                                 }
1821                         }
1822                         /*
1823                          * Unsupported version.
1824                          */
1825                         (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1826                         break;
1827
1828                 case EINVAL:
1829                         (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1830                         break;
1831
1832                 case EROFS:
1833                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1834                             "one or more devices is read only"));
1835                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
1836                         break;
1837
1838                 case ENXIO:
1839                         if (nv && nvlist_lookup_nvlist(nv,
1840                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1841                             nvlist_lookup_nvlist(nvinfo,
1842                             ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1843                                 (void) printf(dgettext(TEXT_DOMAIN,
1844                                     "The devices below are missing, use "
1845                                     "'-m' to import the pool anyway:\n"));
1846                                 print_vdev_tree(hdl, NULL, missing, 2);
1847                                 (void) printf("\n");
1848                         }
1849                         (void) zpool_standard_error(hdl, error, desc);
1850                         break;
1851
1852                 case EEXIST:
1853                         (void) zpool_standard_error(hdl, error, desc);
1854                         break;
1855
1856                 default:
1857                         (void) zpool_standard_error(hdl, error, desc);
1858                         zpool_explain_recover(hdl,
1859                             newname ? origname : thename, -error, nv);
1860                         break;
1861                 }
1862
1863                 nvlist_free(nv);
1864                 ret = -1;
1865         } else {
1866                 zpool_handle_t *zhp;
1867
1868                 /*
1869                  * This should never fail, but play it safe anyway.
1870                  */
1871                 if (zpool_open_silent(hdl, thename, &zhp) != 0)
1872                         ret = -1;
1873                 else if (zhp != NULL)
1874                         zpool_close(zhp);
1875                 if (policy.zrp_request &
1876                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
1877                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1878                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv);
1879                 }
1880                 nvlist_free(nv);
1881                 return (0);
1882         }
1883
1884         return (ret);
1885 }
1886
1887 /*
1888  * Scan the pool.
1889  */
1890 int
1891 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
1892 {
1893         zfs_cmd_t zc = { 0 };
1894         char msg[1024];
1895         libzfs_handle_t *hdl = zhp->zpool_hdl;
1896
1897         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1898         zc.zc_cookie = func;
1899
1900         if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
1901             (errno == ENOENT && func != POOL_SCAN_NONE))
1902                 return (0);
1903
1904         if (func == POOL_SCAN_SCRUB) {
1905                 (void) snprintf(msg, sizeof (msg),
1906                     dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1907         } else if (func == POOL_SCAN_NONE) {
1908                 (void) snprintf(msg, sizeof (msg),
1909                     dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
1910                     zc.zc_name);
1911         } else {
1912                 assert(!"unexpected result");
1913         }
1914
1915         if (errno == EBUSY) {
1916                 nvlist_t *nvroot;
1917                 pool_scan_stat_t *ps = NULL;
1918                 uint_t psc;
1919
1920                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
1921                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
1922                 (void) nvlist_lookup_uint64_array(nvroot,
1923                     ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
1924                 if (ps && ps->pss_func == POOL_SCAN_SCRUB)
1925                         return (zfs_error(hdl, EZFS_SCRUBBING, msg));
1926                 else
1927                         return (zfs_error(hdl, EZFS_RESILVERING, msg));
1928         } else if (errno == ENOENT) {
1929                 return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
1930         } else {
1931                 return (zpool_standard_error(hdl, errno, msg));
1932         }
1933 }
1934
1935 #ifdef illumos
1936 /*
1937  * This provides a very minimal check whether a given string is likely a
1938  * c#t#d# style string.  Users of this are expected to do their own
1939  * verification of the s# part.
1940  */
1941 #define CTD_CHECK(str)  (str && str[0] == 'c' && isdigit(str[1]))
1942
1943 /*
1944  * More elaborate version for ones which may start with "/dev/dsk/"
1945  * and the like.
1946  */
1947 static int
1948 ctd_check_path(char *str) {
1949         /*
1950          * If it starts with a slash, check the last component.
1951          */
1952         if (str && str[0] == '/') {
1953                 char *tmp = strrchr(str, '/');
1954
1955                 /*
1956                  * If it ends in "/old", check the second-to-last
1957                  * component of the string instead.
1958                  */
1959                 if (tmp != str && strcmp(tmp, "/old") == 0) {
1960                         for (tmp--; *tmp != '/'; tmp--)
1961                                 ;
1962                 }
1963                 str = tmp + 1;
1964         }
1965         return (CTD_CHECK(str));
1966 }
1967 #endif
1968
1969 /*
1970  * Find a vdev that matches the search criteria specified. We use the
1971  * the nvpair name to determine how we should look for the device.
1972  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1973  * spare; but FALSE if its an INUSE spare.
1974  */
1975 static nvlist_t *
1976 vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
1977     boolean_t *l2cache, boolean_t *log)
1978 {
1979         uint_t c, children;
1980         nvlist_t **child;
1981         nvlist_t *ret;
1982         uint64_t is_log;
1983         char *srchkey;
1984         nvpair_t *pair = nvlist_next_nvpair(search, NULL);
1985
1986         /* Nothing to look for */
1987         if (search == NULL || pair == NULL)
1988                 return (NULL);
1989
1990         /* Obtain the key we will use to search */
1991         srchkey = nvpair_name(pair);
1992
1993         switch (nvpair_type(pair)) {
1994         case DATA_TYPE_UINT64:
1995                 if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
1996                         uint64_t srchval, theguid;
1997
1998                         verify(nvpair_value_uint64(pair, &srchval) == 0);
1999                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2000                             &theguid) == 0);
2001                         if (theguid == srchval)
2002                                 return (nv);
2003                 }
2004                 break;
2005
2006         case DATA_TYPE_STRING: {
2007                 char *srchval, *val;
2008
2009                 verify(nvpair_value_string(pair, &srchval) == 0);
2010                 if (nvlist_lookup_string(nv, srchkey, &val) != 0)
2011                         break;
2012
2013                 /*
2014                  * Search for the requested value. Special cases:
2015                  *
2016                  * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in
2017                  *   "s0" or "s0/old".  The "s0" part is hidden from the user,
2018                  *   but included in the string, so this matches around it.
2019                  * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
2020                  *
2021                  * Otherwise, all other searches are simple string compares.
2022                  */
2023 #ifdef illumos
2024                 if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 &&
2025                     ctd_check_path(val)) {
2026                         uint64_t wholedisk = 0;
2027
2028                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2029                             &wholedisk);
2030                         if (wholedisk) {
2031                                 int slen = strlen(srchval);
2032                                 int vlen = strlen(val);
2033
2034                                 if (slen != vlen - 2)
2035                                         break;
2036
2037                                 /*
2038                                  * make_leaf_vdev() should only set
2039                                  * wholedisk for ZPOOL_CONFIG_PATHs which
2040                                  * will include "/dev/dsk/", giving plenty of
2041                                  * room for the indices used next.
2042                                  */
2043                                 ASSERT(vlen >= 6);
2044
2045                                 /*
2046                                  * strings identical except trailing "s0"
2047                                  */
2048                                 if (strcmp(&val[vlen - 2], "s0") == 0 &&
2049                                     strncmp(srchval, val, slen) == 0)
2050                                         return (nv);
2051
2052                                 /*
2053                                  * strings identical except trailing "s0/old"
2054                                  */
2055                                 if (strcmp(&val[vlen - 6], "s0/old") == 0 &&
2056                                     strcmp(&srchval[slen - 4], "/old") == 0 &&
2057                                     strncmp(srchval, val, slen - 4) == 0)
2058                                         return (nv);
2059
2060                                 break;
2061                         }
2062                 } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
2063 #else
2064                 if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
2065 #endif
2066                         char *type, *idx, *end, *p;
2067                         uint64_t id, vdev_id;
2068
2069                         /*
2070                          * Determine our vdev type, keeping in mind
2071                          * that the srchval is composed of a type and
2072                          * vdev id pair (i.e. mirror-4).
2073                          */
2074                         if ((type = strdup(srchval)) == NULL)
2075                                 return (NULL);
2076
2077                         if ((p = strrchr(type, '-')) == NULL) {
2078                                 free(type);
2079                                 break;
2080                         }
2081                         idx = p + 1;
2082                         *p = '\0';
2083
2084                         /*
2085                          * If the types don't match then keep looking.
2086                          */
2087                         if (strncmp(val, type, strlen(val)) != 0) {
2088                                 free(type);
2089                                 break;
2090                         }
2091
2092                         verify(strncmp(type, VDEV_TYPE_RAIDZ,
2093                             strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2094                             strncmp(type, VDEV_TYPE_MIRROR,
2095                             strlen(VDEV_TYPE_MIRROR)) == 0);
2096                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
2097                             &id) == 0);
2098
2099                         errno = 0;
2100                         vdev_id = strtoull(idx, &end, 10);
2101
2102                         free(type);
2103                         if (errno != 0)
2104                                 return (NULL);
2105
2106                         /*
2107                          * Now verify that we have the correct vdev id.
2108                          */
2109                         if (vdev_id == id)
2110                                 return (nv);
2111                 }
2112
2113                 /*
2114                  * Common case
2115                  */
2116                 if (strcmp(srchval, val) == 0)
2117                         return (nv);
2118                 break;
2119         }
2120
2121         default:
2122                 break;
2123         }
2124
2125         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
2126             &child, &children) != 0)
2127                 return (NULL);
2128
2129         for (c = 0; c < children; c++) {
2130                 if ((ret = vdev_to_nvlist_iter(child[c], search,
2131                     avail_spare, l2cache, NULL)) != NULL) {
2132                         /*
2133                          * The 'is_log' value is only set for the toplevel
2134                          * vdev, not the leaf vdevs.  So we always lookup the
2135                          * log device from the root of the vdev tree (where
2136                          * 'log' is non-NULL).
2137                          */
2138                         if (log != NULL &&
2139                             nvlist_lookup_uint64(child[c],
2140                             ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
2141                             is_log) {
2142                                 *log = B_TRUE;
2143                         }
2144                         return (ret);
2145                 }
2146         }
2147
2148         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
2149             &child, &children) == 0) {
2150                 for (c = 0; c < children; c++) {
2151                         if ((ret = vdev_to_nvlist_iter(child[c], search,
2152                             avail_spare, l2cache, NULL)) != NULL) {
2153                                 *avail_spare = B_TRUE;
2154                                 return (ret);
2155                         }
2156                 }
2157         }
2158
2159         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
2160             &child, &children) == 0) {
2161                 for (c = 0; c < children; c++) {
2162                         if ((ret = vdev_to_nvlist_iter(child[c], search,
2163                             avail_spare, l2cache, NULL)) != NULL) {
2164                                 *l2cache = B_TRUE;
2165                                 return (ret);
2166                         }
2167                 }
2168         }
2169
2170         return (NULL);
2171 }
2172
2173 /*
2174  * Given a physical path (minus the "/devices" prefix), find the
2175  * associated vdev.
2176  */
2177 nvlist_t *
2178 zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
2179     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
2180 {
2181         nvlist_t *search, *nvroot, *ret;
2182
2183         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2184         verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
2185
2186         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2187             &nvroot) == 0);
2188
2189         *avail_spare = B_FALSE;
2190         *l2cache = B_FALSE;
2191         if (log != NULL)
2192                 *log = B_FALSE;
2193         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2194         nvlist_free(search);
2195
2196         return (ret);
2197 }
2198
2199 /*
2200  * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
2201  */
2202 boolean_t
2203 zpool_vdev_is_interior(const char *name)
2204 {
2205         if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2206             strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
2207                 return (B_TRUE);
2208         return (B_FALSE);
2209 }
2210
2211 nvlist_t *
2212 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
2213     boolean_t *l2cache, boolean_t *log)
2214 {
2215         char buf[MAXPATHLEN];
2216         char *end;
2217         nvlist_t *nvroot, *search, *ret;
2218         uint64_t guid;
2219
2220         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2221
2222         guid = strtoull(path, &end, 10);
2223         if (guid != 0 && *end == '\0') {
2224                 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
2225         } else if (zpool_vdev_is_interior(path)) {
2226                 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
2227         } else if (path[0] != '/') {
2228                 (void) snprintf(buf, sizeof (buf), "%s%s", _PATH_DEV, path);
2229                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
2230         } else {
2231                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
2232         }
2233
2234         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2235             &nvroot) == 0);
2236
2237         *avail_spare = B_FALSE;
2238         *l2cache = B_FALSE;
2239         if (log != NULL)
2240                 *log = B_FALSE;
2241         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2242         nvlist_free(search);
2243
2244         return (ret);
2245 }
2246
2247 static int
2248 vdev_online(nvlist_t *nv)
2249 {
2250         uint64_t ival;
2251
2252         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
2253             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
2254             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
2255                 return (0);
2256
2257         return (1);
2258 }
2259
2260 /*
2261  * Helper function for zpool_get_physpaths().
2262  */
2263 static int
2264 vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
2265     size_t *bytes_written)
2266 {
2267         size_t bytes_left, pos, rsz;
2268         char *tmppath;
2269         const char *format;
2270
2271         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
2272             &tmppath) != 0)
2273                 return (EZFS_NODEVICE);
2274
2275         pos = *bytes_written;
2276         bytes_left = physpath_size - pos;
2277         format = (pos == 0) ? "%s" : " %s";
2278
2279         rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
2280         *bytes_written += rsz;
2281
2282         if (rsz >= bytes_left) {
2283                 /* if physpath was not copied properly, clear it */
2284                 if (bytes_left != 0) {
2285                         physpath[pos] = 0;
2286                 }
2287                 return (EZFS_NOSPC);
2288         }
2289         return (0);
2290 }
2291
2292 static int
2293 vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
2294     size_t *rsz, boolean_t is_spare)
2295 {
2296         char *type;
2297         int ret;
2298
2299         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
2300                 return (EZFS_INVALCONFIG);
2301
2302         if (strcmp(type, VDEV_TYPE_DISK) == 0) {
2303                 /*
2304                  * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
2305                  * For a spare vdev, we only want to boot from the active
2306                  * spare device.
2307                  */
2308                 if (is_spare) {
2309                         uint64_t spare = 0;
2310                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
2311                             &spare);
2312                         if (!spare)
2313                                 return (EZFS_INVALCONFIG);
2314                 }
2315
2316                 if (vdev_online(nv)) {
2317                         if ((ret = vdev_get_one_physpath(nv, physpath,
2318                             phypath_size, rsz)) != 0)
2319                                 return (ret);
2320                 }
2321         } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
2322             strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
2323             (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
2324                 nvlist_t **child;
2325                 uint_t count;
2326                 int i, ret;
2327
2328                 if (nvlist_lookup_nvlist_array(nv,
2329                     ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2330                         return (EZFS_INVALCONFIG);
2331
2332                 for (i = 0; i < count; i++) {
2333                         ret = vdev_get_physpaths(child[i], physpath,
2334                             phypath_size, rsz, is_spare);
2335                         if (ret == EZFS_NOSPC)
2336                                 return (ret);
2337                 }
2338         }
2339
2340         return (EZFS_POOL_INVALARG);
2341 }
2342
2343 /*
2344  * Get phys_path for a root pool config.
2345  * Return 0 on success; non-zero on failure.
2346  */
2347 static int
2348 zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2349 {
2350         size_t rsz;
2351         nvlist_t *vdev_root;
2352         nvlist_t **child;
2353         uint_t count;
2354         char *type;
2355
2356         rsz = 0;
2357
2358         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2359             &vdev_root) != 0)
2360                 return (EZFS_INVALCONFIG);
2361
2362         if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2363             nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2364             &child, &count) != 0)
2365                 return (EZFS_INVALCONFIG);
2366
2367         /*
2368          * root pool can not have EFI labeled disks and can only have
2369          * a single top-level vdev.
2370          */
2371         if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 ||
2372             pool_uses_efi(vdev_root))
2373                 return (EZFS_POOL_INVALARG);
2374
2375         (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2376             B_FALSE);
2377
2378         /* No online devices */
2379         if (rsz == 0)
2380                 return (EZFS_NODEVICE);
2381
2382         return (0);
2383 }
2384
2385 /*
2386  * Get phys_path for a root pool
2387  * Return 0 on success; non-zero on failure.
2388  */
2389 int
2390 zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2391 {
2392         return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2393             phypath_size));
2394 }
2395
2396 /*
2397  * If the device has being dynamically expanded then we need to relabel
2398  * the disk to use the new unallocated space.
2399  */
2400 static int
2401 zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
2402 {
2403 #ifdef sun
2404         char path[MAXPATHLEN];
2405         char errbuf[1024];
2406         int fd, error;
2407         int (*_efi_use_whole_disk)(int);
2408
2409         if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT,
2410             "efi_use_whole_disk")) == NULL)
2411                 return (-1);
2412
2413         (void) snprintf(path, sizeof (path), "%s/%s", RDISK_ROOT, name);
2414
2415         if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
2416                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2417                     "relabel '%s': unable to open device"), name);
2418                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2419         }
2420
2421         /*
2422          * It's possible that we might encounter an error if the device
2423          * does not have any unallocated space left. If so, we simply
2424          * ignore that error and continue on.
2425          */
2426         error = _efi_use_whole_disk(fd);
2427         (void) close(fd);
2428         if (error && error != VT_ENOSPC) {
2429                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2430                     "relabel '%s': unable to read disk capacity"), name);
2431                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2432         }
2433 #endif  /* sun */
2434         return (0);
2435 }
2436
2437 /*
2438  * Bring the specified vdev online.   The 'flags' parameter is a set of the
2439  * ZFS_ONLINE_* flags.
2440  */
2441 int
2442 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2443     vdev_state_t *newstate)
2444 {
2445         zfs_cmd_t zc = { 0 };
2446         char msg[1024];
2447         nvlist_t *tgt;
2448         boolean_t avail_spare, l2cache, islog;
2449         libzfs_handle_t *hdl = zhp->zpool_hdl;
2450
2451         if (flags & ZFS_ONLINE_EXPAND) {
2452                 (void) snprintf(msg, sizeof (msg),
2453                     dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2454         } else {
2455                 (void) snprintf(msg, sizeof (msg),
2456                     dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2457         }
2458
2459         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2460         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2461             &islog)) == NULL)
2462                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2463
2464         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2465
2466         if (avail_spare)
2467                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2468
2469         if (flags & ZFS_ONLINE_EXPAND ||
2470             zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
2471                 char *pathname = NULL;
2472                 uint64_t wholedisk = 0;
2473
2474                 (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2475                     &wholedisk);
2476                 verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
2477                     &pathname) == 0);
2478
2479                 /*
2480                  * XXX - L2ARC 1.0 devices can't support expansion.
2481                  */
2482                 if (l2cache) {
2483                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2484                             "cannot expand cache devices"));
2485                         return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2486                 }
2487
2488                 if (wholedisk) {
2489                         pathname += strlen(DISK_ROOT) + 1;
2490                         (void) zpool_relabel_disk(hdl, pathname);
2491                 }
2492         }
2493
2494         zc.zc_cookie = VDEV_STATE_ONLINE;
2495         zc.zc_obj = flags;
2496
2497         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2498                 if (errno == EINVAL) {
2499                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2500                             "from this pool into a new one.  Use '%s' "
2501                             "instead"), "zpool detach");
2502                         return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2503                 }
2504                 return (zpool_standard_error(hdl, errno, msg));
2505         }
2506
2507         *newstate = zc.zc_cookie;
2508         return (0);
2509 }
2510
2511 /*
2512  * Take the specified vdev offline
2513  */
2514 int
2515 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2516 {
2517         zfs_cmd_t zc = { 0 };
2518         char msg[1024];
2519         nvlist_t *tgt;
2520         boolean_t avail_spare, l2cache;
2521         libzfs_handle_t *hdl = zhp->zpool_hdl;
2522
2523         (void) snprintf(msg, sizeof (msg),
2524             dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2525
2526         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2527         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2528             NULL)) == NULL)
2529                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2530
2531         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2532
2533         if (avail_spare)
2534                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2535
2536         zc.zc_cookie = VDEV_STATE_OFFLINE;
2537         zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2538
2539         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2540                 return (0);
2541
2542         switch (errno) {
2543         case EBUSY:
2544
2545                 /*
2546                  * There are no other replicas of this device.
2547                  */
2548                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2549
2550         case EEXIST:
2551                 /*
2552                  * The log device has unplayed logs
2553                  */
2554                 return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2555
2556         default:
2557                 return (zpool_standard_error(hdl, errno, msg));
2558         }
2559 }
2560
2561 /*
2562  * Mark the given vdev faulted.
2563  */
2564 int
2565 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2566 {
2567         zfs_cmd_t zc = { 0 };
2568         char msg[1024];
2569         libzfs_handle_t *hdl = zhp->zpool_hdl;
2570
2571         (void) snprintf(msg, sizeof (msg),
2572             dgettext(TEXT_DOMAIN, "cannot fault %llu"), guid);
2573
2574         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2575         zc.zc_guid = guid;
2576         zc.zc_cookie = VDEV_STATE_FAULTED;
2577         zc.zc_obj = aux;
2578
2579         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2580                 return (0);
2581
2582         switch (errno) {
2583         case EBUSY:
2584
2585                 /*
2586                  * There are no other replicas of this device.
2587                  */
2588                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2589
2590         default:
2591                 return (zpool_standard_error(hdl, errno, msg));
2592         }
2593
2594 }
2595
2596 /*
2597  * Mark the given vdev degraded.
2598  */
2599 int
2600 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2601 {
2602         zfs_cmd_t zc = { 0 };
2603         char msg[1024];
2604         libzfs_handle_t *hdl = zhp->zpool_hdl;
2605
2606         (void) snprintf(msg, sizeof (msg),
2607             dgettext(TEXT_DOMAIN, "cannot degrade %llu"), guid);
2608
2609         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2610         zc.zc_guid = guid;
2611         zc.zc_cookie = VDEV_STATE_DEGRADED;
2612         zc.zc_obj = aux;
2613
2614         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2615                 return (0);
2616
2617         return (zpool_standard_error(hdl, errno, msg));
2618 }
2619
2620 /*
2621  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2622  * a hot spare.
2623  */
2624 static boolean_t
2625 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2626 {
2627         nvlist_t **child;
2628         uint_t c, children;
2629         char *type;
2630
2631         if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2632             &children) == 0) {
2633                 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2634                     &type) == 0);
2635
2636                 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2637                     children == 2 && child[which] == tgt)
2638                         return (B_TRUE);
2639
2640                 for (c = 0; c < children; c++)
2641                         if (is_replacing_spare(child[c], tgt, which))
2642                                 return (B_TRUE);
2643         }
2644
2645         return (B_FALSE);
2646 }
2647
2648 /*
2649  * Attach new_disk (fully described by nvroot) to old_disk.
2650  * If 'replacing' is specified, the new disk will replace the old one.
2651  */
2652 int
2653 zpool_vdev_attach(zpool_handle_t *zhp,
2654     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2655 {
2656         zfs_cmd_t zc = { 0 };
2657         char msg[1024];
2658         int ret;
2659         nvlist_t *tgt;
2660         boolean_t avail_spare, l2cache, islog;
2661         uint64_t val;
2662         char *newname;
2663         nvlist_t **child;
2664         uint_t children;
2665         nvlist_t *config_root;
2666         libzfs_handle_t *hdl = zhp->zpool_hdl;
2667         boolean_t rootpool = zpool_is_bootable(zhp);
2668
2669         if (replacing)
2670                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2671                     "cannot replace %s with %s"), old_disk, new_disk);
2672         else
2673                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2674                     "cannot attach %s to %s"), new_disk, old_disk);
2675
2676         /*
2677          * If this is a root pool, make sure that we're not attaching an
2678          * EFI labeled device.
2679          */
2680         if (rootpool && pool_uses_efi(nvroot)) {
2681                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2682                     "EFI labeled devices are not supported on root pools."));
2683                 return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
2684         }
2685
2686         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2687         if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2688             &islog)) == 0)
2689                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2690
2691         if (avail_spare)
2692                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2693
2694         if (l2cache)
2695                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2696
2697         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2698         zc.zc_cookie = replacing;
2699
2700         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2701             &child, &children) != 0 || children != 1) {
2702                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2703                     "new device must be a single disk"));
2704                 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2705         }
2706
2707         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2708             ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2709
2710         if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL)
2711                 return (-1);
2712
2713         /*
2714          * If the target is a hot spare that has been swapped in, we can only
2715          * replace it with another hot spare.
2716          */
2717         if (replacing &&
2718             nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
2719             (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2720             NULL) == NULL || !avail_spare) &&
2721             is_replacing_spare(config_root, tgt, 1)) {
2722                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2723                     "can only be replaced by another hot spare"));
2724                 free(newname);
2725                 return (zfs_error(hdl, EZFS_BADTARGET, msg));
2726         }
2727
2728         free(newname);
2729
2730         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2731                 return (-1);
2732
2733         ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
2734
2735         zcmd_free_nvlists(&zc);
2736
2737         if (ret == 0) {
2738                 if (rootpool) {
2739                         /*
2740                          * XXX need a better way to prevent user from
2741                          * booting up a half-baked vdev.
2742                          */
2743                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2744                             "sure to wait until resilver is done "
2745                             "before rebooting.\n"));
2746                         (void) fprintf(stderr, "\n");
2747                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "If "
2748                             "you boot from pool '%s', you may need to update\n"
2749                             "boot code on newly attached disk '%s'.\n\n"
2750                             "Assuming you use GPT partitioning and 'da0' is "
2751                             "your new boot disk\n"
2752                             "you may use the following command:\n\n"
2753                             "\tgpart bootcode -b /boot/pmbr -p "
2754                             "/boot/gptzfsboot -i 1 da0\n\n"),
2755                             zhp->zpool_name, new_disk);
2756                 }
2757                 return (0);
2758         }
2759
2760         switch (errno) {
2761         case ENOTSUP:
2762                 /*
2763                  * Can't attach to or replace this type of vdev.
2764                  */
2765                 if (replacing) {
2766                         uint64_t version = zpool_get_prop_int(zhp,
2767                             ZPOOL_PROP_VERSION, NULL);
2768
2769                         if (islog)
2770                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2771                                     "cannot replace a log with a spare"));
2772                         else if (version >= SPA_VERSION_MULTI_REPLACE)
2773                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2774                                     "already in replacing/spare config; wait "
2775                                     "for completion or use 'zpool detach'"));
2776                         else
2777                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2778                                     "cannot replace a replacing device"));
2779                 } else {
2780                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2781                             "can only attach to mirrors and top-level "
2782                             "disks"));
2783                 }
2784                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2785                 break;
2786
2787         case EINVAL:
2788                 /*
2789                  * The new device must be a single disk.
2790                  */
2791                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2792                     "new device must be a single disk"));
2793                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
2794                 break;
2795
2796         case EBUSY:
2797                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
2798                     new_disk);
2799                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2800                 break;
2801
2802         case EOVERFLOW:
2803                 /*
2804                  * The new device is too small.
2805                  */
2806                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2807                     "device is too small"));
2808                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2809                 break;
2810
2811         case EDOM:
2812                 /*
2813                  * The new device has a different alignment requirement.
2814                  */
2815                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2816                     "devices have different sector alignment"));
2817                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2818                 break;
2819
2820         case ENAMETOOLONG:
2821                 /*
2822                  * The resulting top-level vdev spec won't fit in the label.
2823                  */
2824                 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
2825                 break;
2826
2827         default:
2828                 (void) zpool_standard_error(hdl, errno, msg);
2829         }
2830
2831         return (-1);
2832 }
2833
2834 /*
2835  * Detach the specified device.
2836  */
2837 int
2838 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
2839 {
2840         zfs_cmd_t zc = { 0 };
2841         char msg[1024];
2842         nvlist_t *tgt;
2843         boolean_t avail_spare, l2cache;
2844         libzfs_handle_t *hdl = zhp->zpool_hdl;
2845
2846         (void) snprintf(msg, sizeof (msg),
2847             dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
2848
2849         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2850         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2851             NULL)) == 0)
2852                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2853
2854         if (avail_spare)
2855                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2856
2857         if (l2cache)
2858                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2859
2860         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2861
2862         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
2863                 return (0);
2864
2865         switch (errno) {
2866
2867         case ENOTSUP:
2868                 /*
2869                  * Can't detach from this type of vdev.
2870                  */
2871                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
2872                     "applicable to mirror and replacing vdevs"));
2873                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2874                 break;
2875
2876         case EBUSY:
2877                 /*
2878                  * There are no other replicas of this device.
2879                  */
2880                 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
2881                 break;
2882
2883         default:
2884                 (void) zpool_standard_error(hdl, errno, msg);
2885         }
2886
2887         return (-1);
2888 }
2889
2890 /*
2891  * Find a mirror vdev in the source nvlist.
2892  *
2893  * The mchild array contains a list of disks in one of the top-level mirrors
2894  * of the source pool.  The schild array contains a list of disks that the
2895  * user specified on the command line.  We loop over the mchild array to
2896  * see if any entry in the schild array matches.
2897  *
2898  * If a disk in the mchild array is found in the schild array, we return
2899  * the index of that entry.  Otherwise we return -1.
2900  */
2901 static int
2902 find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
2903     nvlist_t **schild, uint_t schildren)
2904 {
2905         uint_t mc;
2906
2907         for (mc = 0; mc < mchildren; mc++) {
2908                 uint_t sc;
2909                 char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2910                     mchild[mc], B_FALSE);
2911
2912                 for (sc = 0; sc < schildren; sc++) {
2913                         char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
2914                             schild[sc], B_FALSE);
2915                         boolean_t result = (strcmp(mpath, spath) == 0);
2916
2917                         free(spath);
2918                         if (result) {
2919                                 free(mpath);
2920                                 return (mc);
2921                         }
2922                 }
2923
2924                 free(mpath);
2925         }
2926
2927         return (-1);
2928 }
2929
2930 /*
2931  * Split a mirror pool.  If newroot points to null, then a new nvlist
2932  * is generated and it is the responsibility of the caller to free it.
2933  */
2934 int
2935 zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
2936     nvlist_t *props, splitflags_t flags)
2937 {
2938         zfs_cmd_t zc = { 0 };
2939         char msg[1024];
2940         nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
2941         nvlist_t **varray = NULL, *zc_props = NULL;
2942         uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
2943         libzfs_handle_t *hdl = zhp->zpool_hdl;
2944         uint64_t vers;
2945         boolean_t freelist = B_FALSE, memory_err = B_TRUE;
2946         int retval = 0;
2947
2948         (void) snprintf(msg, sizeof (msg),
2949             dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
2950
2951         if (!zpool_name_valid(hdl, B_FALSE, newname))
2952                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
2953
2954         if ((config = zpool_get_config(zhp, NULL)) == NULL) {
2955                 (void) fprintf(stderr, gettext("Internal error: unable to "
2956                     "retrieve pool configuration\n"));
2957                 return (-1);
2958         }
2959
2960         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
2961             == 0);
2962         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
2963
2964         if (props) {
2965                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
2966                 if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
2967                     props, vers, flags, msg)) == NULL)
2968                         return (-1);
2969         }
2970
2971         if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
2972             &children) != 0) {
2973                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2974                     "Source pool is missing vdev tree"));
2975                 if (zc_props)
2976                         nvlist_free(zc_props);
2977                 return (-1);
2978         }
2979
2980         varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
2981         vcount = 0;
2982
2983         if (*newroot == NULL ||
2984             nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
2985             &newchild, &newchildren) != 0)
2986                 newchildren = 0;
2987
2988         for (c = 0; c < children; c++) {
2989                 uint64_t is_log = B_FALSE, is_hole = B_FALSE;
2990                 char *type;
2991                 nvlist_t **mchild, *vdev;
2992                 uint_t mchildren;
2993                 int entry;
2994
2995                 /*
2996                  * Unlike cache & spares, slogs are stored in the
2997                  * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
2998                  */
2999                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
3000                     &is_log);
3001                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
3002                     &is_hole);
3003                 if (is_log || is_hole) {
3004                         /*
3005                          * Create a hole vdev and put it in the config.
3006                          */
3007                         if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
3008                                 goto out;
3009                         if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
3010                             VDEV_TYPE_HOLE) != 0)
3011                                 goto out;
3012                         if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
3013                             1) != 0)
3014                                 goto out;
3015                         if (lastlog == 0)
3016                                 lastlog = vcount;
3017                         varray[vcount++] = vdev;
3018                         continue;
3019                 }
3020                 lastlog = 0;
3021                 verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
3022                     == 0);
3023                 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
3024                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3025                             "Source pool must be composed only of mirrors\n"));
3026                         retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3027                         goto out;
3028                 }
3029
3030                 verify(nvlist_lookup_nvlist_array(child[c],
3031                     ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
3032
3033                 /* find or add an entry for this top-level vdev */
3034                 if (newchildren > 0 &&
3035                     (entry = find_vdev_entry(zhp, mchild, mchildren,
3036                     newchild, newchildren)) >= 0) {
3037                         /* We found a disk that the user specified. */
3038                         vdev = mchild[entry];
3039                         ++found;
3040                 } else {
3041                         /* User didn't specify a disk for this vdev. */
3042                         vdev = mchild[mchildren - 1];
3043                 }
3044
3045                 if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
3046                         goto out;
3047         }
3048
3049         /* did we find every disk the user specified? */
3050         if (found != newchildren) {
3051                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
3052                     "include at most one disk from each mirror"));
3053                 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3054                 goto out;
3055         }
3056
3057         /* Prepare the nvlist for populating. */
3058         if (*newroot == NULL) {
3059                 if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
3060                         goto out;
3061                 freelist = B_TRUE;
3062                 if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
3063                     VDEV_TYPE_ROOT) != 0)
3064                         goto out;
3065         } else {
3066                 verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
3067         }
3068
3069         /* Add all the children we found */
3070         if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
3071             lastlog == 0 ? vcount : lastlog) != 0)
3072                 goto out;
3073
3074         /*
3075          * If we're just doing a dry run, exit now with success.
3076          */
3077         if (flags.dryrun) {
3078                 memory_err = B_FALSE;
3079                 freelist = B_FALSE;
3080                 goto out;
3081         }
3082
3083         /* now build up the config list & call the ioctl */
3084         if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
3085                 goto out;
3086
3087         if (nvlist_add_nvlist(newconfig,
3088             ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
3089             nvlist_add_string(newconfig,
3090             ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
3091             nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
3092                 goto out;
3093
3094         /*
3095          * The new pool is automatically part of the namespace unless we
3096          * explicitly export it.
3097          */
3098         if (!flags.import)
3099                 zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
3100         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3101         (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
3102         if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
3103                 goto out;
3104         if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
3105                 goto out;
3106
3107         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
3108                 retval = zpool_standard_error(hdl, errno, msg);
3109                 goto out;
3110         }
3111
3112         freelist = B_FALSE;
3113         memory_err = B_FALSE;
3114
3115 out:
3116         if (varray != NULL) {
3117                 int v;
3118
3119                 for (v = 0; v < vcount; v++)
3120                         nvlist_free(varray[v]);
3121                 free(varray);
3122         }
3123         zcmd_free_nvlists(&zc);
3124         if (zc_props)
3125                 nvlist_free(zc_props);
3126         if (newconfig)
3127                 nvlist_free(newconfig);
3128         if (freelist) {
3129                 nvlist_free(*newroot);
3130                 *newroot = NULL;
3131         }
3132
3133         if (retval != 0)
3134                 return (retval);
3135
3136         if (memory_err)
3137                 return (no_memory(hdl));
3138
3139         return (0);
3140 }
3141
3142 /*
3143  * Remove the given device.  Currently, this is supported only for hot spares
3144  * and level 2 cache devices.
3145  */
3146 int
3147 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
3148 {
3149         zfs_cmd_t zc = { 0 };
3150         char msg[1024];
3151         nvlist_t *tgt;
3152         boolean_t avail_spare, l2cache, islog;
3153         libzfs_handle_t *hdl = zhp->zpool_hdl;
3154         uint64_t version;
3155
3156         (void) snprintf(msg, sizeof (msg),
3157             dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
3158
3159         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3160         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3161             &islog)) == 0)
3162                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3163         /*
3164          * XXX - this should just go away.
3165          */
3166         if (!avail_spare && !l2cache && !islog) {
3167                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3168                     "only inactive hot spares, cache, top-level, "
3169                     "or log devices can be removed"));
3170                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3171         }
3172
3173         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
3174         if (islog && version < SPA_VERSION_HOLES) {
3175                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3176                     "pool must be upgrade to support log removal"));
3177                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
3178         }
3179
3180         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
3181
3182         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3183                 return (0);
3184
3185         return (zpool_standard_error(hdl, errno, msg));
3186 }
3187
3188 /*
3189  * Clear the errors for the pool, or the particular device if specified.
3190  */
3191 int
3192 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
3193 {
3194         zfs_cmd_t zc = { 0 };
3195         char msg[1024];
3196         nvlist_t *tgt;
3197         zpool_rewind_policy_t policy;
3198         boolean_t avail_spare, l2cache;
3199         libzfs_handle_t *hdl = zhp->zpool_hdl;
3200         nvlist_t *nvi = NULL;
3201         int error;
3202
3203         if (path)
3204                 (void) snprintf(msg, sizeof (msg),
3205                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3206                     path);
3207         else
3208                 (void) snprintf(msg, sizeof (msg),
3209                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3210                     zhp->zpool_name);
3211
3212         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3213         if (path) {
3214                 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
3215                     &l2cache, NULL)) == 0)
3216                         return (zfs_error(hdl, EZFS_NODEVICE, msg));
3217
3218                 /*
3219                  * Don't allow error clearing for hot spares.  Do allow
3220                  * error clearing for l2cache devices.
3221                  */
3222                 if (avail_spare)
3223                         return (zfs_error(hdl, EZFS_ISSPARE, msg));
3224
3225                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
3226                     &zc.zc_guid) == 0);
3227         }
3228
3229         zpool_get_rewind_policy(rewindnvl, &policy);
3230         zc.zc_cookie = policy.zrp_request;
3231
3232         if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
3233                 return (-1);
3234
3235         if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
3236                 return (-1);
3237
3238         while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
3239             errno == ENOMEM) {
3240                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3241                         zcmd_free_nvlists(&zc);
3242                         return (-1);
3243                 }
3244         }
3245
3246         if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
3247             errno != EPERM && errno != EACCES)) {
3248                 if (policy.zrp_request &
3249                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
3250                         (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
3251                         zpool_rewind_exclaim(hdl, zc.zc_name,
3252                             ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
3253                             nvi);
3254                         nvlist_free(nvi);
3255                 }
3256                 zcmd_free_nvlists(&zc);
3257                 return (0);
3258         }
3259
3260         zcmd_free_nvlists(&zc);
3261         return (zpool_standard_error(hdl, errno, msg));
3262 }
3263
3264 /*
3265  * Similar to zpool_clear(), but takes a GUID (used by fmd).
3266  */
3267 int
3268 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
3269 {
3270         zfs_cmd_t zc = { 0 };
3271         char msg[1024];
3272         libzfs_handle_t *hdl = zhp->zpool_hdl;
3273
3274         (void) snprintf(msg, sizeof (msg),
3275             dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
3276             guid);
3277
3278         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3279         zc.zc_guid = guid;
3280         zc.zc_cookie = ZPOOL_NO_REWIND;
3281
3282         if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
3283                 return (0);
3284
3285         return (zpool_standard_error(hdl, errno, msg));
3286 }
3287
3288 /*
3289  * Change the GUID for a pool.
3290  */
3291 int
3292 zpool_reguid(zpool_handle_t *zhp)
3293 {
3294         char msg[1024];
3295         libzfs_handle_t *hdl = zhp->zpool_hdl;
3296         zfs_cmd_t zc = { 0 };
3297
3298         (void) snprintf(msg, sizeof (msg),
3299             dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
3300
3301         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3302         if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
3303                 return (0);
3304
3305         return (zpool_standard_error(hdl, errno, msg));
3306 }
3307
3308 /*
3309  * Reopen the pool.
3310  */
3311 int
3312 zpool_reopen(zpool_handle_t *zhp)
3313 {
3314         zfs_cmd_t zc = { 0 };
3315         char msg[1024];
3316         libzfs_handle_t *hdl = zhp->zpool_hdl;
3317
3318         (void) snprintf(msg, sizeof (msg),
3319             dgettext(TEXT_DOMAIN, "cannot reopen '%s'"),
3320             zhp->zpool_name);
3321
3322         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3323         if (zfs_ioctl(hdl, ZFS_IOC_POOL_REOPEN, &zc) == 0)
3324                 return (0);
3325         return (zpool_standard_error(hdl, errno, msg));
3326 }
3327
3328 /*
3329  * Convert from a devid string to a path.
3330  */
3331 static char *
3332 devid_to_path(char *devid_str)
3333 {
3334         ddi_devid_t devid;
3335         char *minor;
3336         char *path;
3337         devid_nmlist_t *list = NULL;
3338         int ret;
3339
3340         if (devid_str_decode(devid_str, &devid, &minor) != 0)
3341                 return (NULL);
3342
3343         ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
3344
3345         devid_str_free(minor);
3346         devid_free(devid);
3347
3348         if (ret != 0)
3349                 return (NULL);
3350
3351         /*
3352          * In a case the strdup() fails, we will just return NULL below.
3353          */
3354         path = strdup(list[0].devname);
3355
3356         devid_free_nmlist(list);
3357
3358         return (path);
3359 }
3360
3361 /*
3362  * Convert from a path to a devid string.
3363  */
3364 static char *
3365 path_to_devid(const char *path)
3366 {
3367 #ifdef have_devid
3368         int fd;
3369         ddi_devid_t devid;
3370         char *minor, *ret;
3371
3372         if ((fd = open(path, O_RDONLY)) < 0)
3373                 return (NULL);
3374
3375         minor = NULL;
3376         ret = NULL;
3377         if (devid_get(fd, &devid) == 0) {
3378                 if (devid_get_minor_name(fd, &minor) == 0)
3379                         ret = devid_str_encode(devid, minor);
3380                 if (minor != NULL)
3381                         devid_str_free(minor);
3382                 devid_free(devid);
3383         }
3384         (void) close(fd);
3385
3386         return (ret);
3387 #else
3388         return (NULL);
3389 #endif
3390 }
3391
3392 /*
3393  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
3394  * ignore any failure here, since a common case is for an unprivileged user to
3395  * type 'zpool status', and we'll display the correct information anyway.
3396  */
3397 static void
3398 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3399 {
3400         zfs_cmd_t zc = { 0 };
3401
3402         (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3403         (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3404         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3405             &zc.zc_guid) == 0);
3406
3407         (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3408 }
3409
3410 /*
3411  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
3412  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3413  * We also check if this is a whole disk, in which case we strip off the
3414  * trailing 's0' slice name.
3415  *
3416  * This routine is also responsible for identifying when disks have been
3417  * reconfigured in a new location.  The kernel will have opened the device by
3418  * devid, but the path will still refer to the old location.  To catch this, we
3419  * first do a path -> devid translation (which is fast for the common case).  If
3420  * the devid matches, we're done.  If not, we do a reverse devid -> path
3421  * translation and issue the appropriate ioctl() to update the path of the vdev.
3422  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3423  * of these checks.
3424  */
3425 char *
3426 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3427     boolean_t verbose)
3428 {
3429         char *path, *devid;
3430         uint64_t value;
3431         char buf[64];
3432         vdev_stat_t *vs;
3433         uint_t vsc;
3434         int have_stats;
3435         int have_path;
3436
3437         have_stats = nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
3438             (uint64_t **)&vs, &vsc) == 0;
3439         have_path = nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0;
3440
3441         /*
3442          * If the device is not currently present, assume it will not
3443          * come back at the same device path.  Display the device by GUID.
3444          */
3445         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 ||
3446             have_path && have_stats && vs->vs_state <= VDEV_STATE_CANT_OPEN) {
3447                 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3448                     &value) == 0);
3449                 (void) snprintf(buf, sizeof (buf), "%llu",
3450                     (u_longlong_t)value);
3451                 path = buf;
3452         } else if (have_path) {
3453
3454                 /*
3455                  * If the device is dead (faulted, offline, etc) then don't
3456                  * bother opening it.  Otherwise we may be forcing the user to
3457                  * open a misbehaving device, which can have undesirable
3458                  * effects.
3459                  */
3460                 if ((have_stats == 0 ||
3461                     vs->vs_state >= VDEV_STATE_DEGRADED) &&
3462                     zhp != NULL &&
3463                     nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3464                         /*
3465                          * Determine if the current path is correct.
3466                          */
3467                         char *newdevid = path_to_devid(path);
3468
3469                         if (newdevid == NULL ||
3470                             strcmp(devid, newdevid) != 0) {
3471                                 char *newpath;
3472
3473                                 if ((newpath = devid_to_path(devid)) != NULL) {
3474                                         /*
3475                                          * Update the path appropriately.
3476                                          */
3477                                         set_path(zhp, nv, newpath);
3478                                         if (nvlist_add_string(nv,
3479                                             ZPOOL_CONFIG_PATH, newpath) == 0)
3480                                                 verify(nvlist_lookup_string(nv,
3481                                                     ZPOOL_CONFIG_PATH,
3482                                                     &path) == 0);
3483                                         free(newpath);
3484                                 }
3485                         }
3486
3487                         if (newdevid)
3488                                 devid_str_free(newdevid);
3489                 }
3490
3491 #ifdef sun
3492                 if (strncmp(path, "/dev/dsk/", 9) == 0)
3493                         path += 9;
3494
3495                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
3496                     &value) == 0 && value) {
3497                         int pathlen = strlen(path);
3498                         char *tmp = zfs_strdup(hdl, path);
3499
3500                         /*
3501                          * If it starts with c#, and ends with "s0", chop
3502                          * the "s0" off, or if it ends with "s0/old", remove
3503                          * the "s0" from the middle.
3504                          */
3505                         if (CTD_CHECK(tmp)) {
3506                                 if (strcmp(&tmp[pathlen - 2], "s0") == 0) {
3507                                         tmp[pathlen - 2] = '\0';
3508                                 } else if (pathlen > 6 &&
3509                                     strcmp(&tmp[pathlen - 6], "s0/old") == 0) {
3510                                         (void) strcpy(&tmp[pathlen - 6],
3511                                             "/old");
3512                                 }
3513                         }
3514                         return (tmp);
3515                 }
3516 #else   /* !sun */
3517                 if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
3518                         path += sizeof(_PATH_DEV) - 1;
3519 #endif  /* !sun */
3520         } else {
3521                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
3522
3523                 /*
3524                  * If it's a raidz device, we need to stick in the parity level.
3525                  */
3526                 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3527                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3528                             &value) == 0);
3529                         (void) snprintf(buf, sizeof (buf), "%s%llu", path,
3530                             (u_longlong_t)value);
3531                         path = buf;
3532                 }
3533
3534                 /*
3535                  * We identify each top-level vdev by using a <type-id>
3536                  * naming convention.
3537                  */
3538                 if (verbose) {
3539                         uint64_t id;
3540
3541                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3542                             &id) == 0);
3543                         (void) snprintf(buf, sizeof (buf), "%s-%llu", path,
3544                             (u_longlong_t)id);
3545                         path = buf;
3546                 }
3547         }
3548
3549         return (zfs_strdup(hdl, path));
3550 }
3551
3552 static int
3553 zbookmark_mem_compare(const void *a, const void *b)
3554 {
3555         return (memcmp(a, b, sizeof (zbookmark_phys_t)));
3556 }
3557
3558 /*
3559  * Retrieve the persistent error log, uniquify the members, and return to the
3560  * caller.
3561  */
3562 int
3563 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3564 {
3565         zfs_cmd_t zc = { 0 };
3566         uint64_t count;
3567         zbookmark_phys_t *zb = NULL;
3568         int i;
3569
3570         /*
3571          * Retrieve the raw error list from the kernel.  If the number of errors
3572          * has increased, allocate more space and continue until we get the
3573          * entire list.
3574          */
3575         verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3576             &count) == 0);
3577         if (count == 0)
3578                 return (0);
3579         if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
3580             count * sizeof (zbookmark_phys_t))) == (uintptr_t)NULL)
3581                 return (-1);
3582         zc.zc_nvlist_dst_size = count;
3583         (void) strcpy(zc.zc_name, zhp->zpool_name);
3584         for (;;) {
3585                 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3586                     &zc) != 0) {
3587                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3588                         if (errno == ENOMEM) {
3589                                 void *dst;
3590
3591                                 count = zc.zc_nvlist_dst_size;
3592                                 dst = zfs_alloc(zhp->zpool_hdl, count *
3593                                     sizeof (zbookmark_phys_t));
3594                                 if (dst == NULL)
3595                                         return (-1);
3596                                 zc.zc_nvlist_dst = (uintptr_t)dst;
3597                         } else {
3598                                 return (-1);
3599                         }
3600                 } else {
3601                         break;
3602                 }
3603         }
3604
3605         /*
3606          * Sort the resulting bookmarks.  This is a little confusing due to the
3607          * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
3608          * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3609          * _not_ copied as part of the process.  So we point the start of our
3610          * array appropriate and decrement the total number of elements.
3611          */
3612         zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) +
3613             zc.zc_nvlist_dst_size;
3614         count -= zc.zc_nvlist_dst_size;
3615
3616         qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
3617
3618         verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3619
3620         /*
3621          * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3622          */
3623         for (i = 0; i < count; i++) {
3624                 nvlist_t *nv;
3625
3626                 /* ignoring zb_blkid and zb_level for now */
3627                 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3628                     zb[i-1].zb_object == zb[i].zb_object)
3629                         continue;
3630
3631                 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3632                         goto nomem;
3633                 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3634                     zb[i].zb_objset) != 0) {
3635                         nvlist_free(nv);
3636                         goto nomem;
3637                 }
3638                 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3639                     zb[i].zb_object) != 0) {
3640                         nvlist_free(nv);
3641                         goto nomem;
3642                 }
3643                 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3644                         nvlist_free(nv);
3645                         goto nomem;
3646                 }
3647                 nvlist_free(nv);
3648         }
3649
3650         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3651         return (0);
3652
3653 nomem:
3654         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3655         return (no_memory(zhp->zpool_hdl));
3656 }
3657
3658 /*
3659  * Upgrade a ZFS pool to the latest on-disk version.
3660  */
3661 int
3662 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
3663 {
3664         zfs_cmd_t zc = { 0 };
3665         libzfs_handle_t *hdl = zhp->zpool_hdl;
3666
3667         (void) strcpy(zc.zc_name, zhp->zpool_name);
3668         zc.zc_cookie = new_version;
3669
3670         if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
3671                 return (zpool_standard_error_fmt(hdl, errno,
3672                     dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
3673                     zhp->zpool_name));
3674         return (0);
3675 }
3676
3677 void
3678 zfs_save_arguments(int argc, char **argv, char *string, int len)
3679 {
3680         (void) strlcpy(string, basename(argv[0]), len);
3681         for (int i = 1; i < argc; i++) {
3682                 (void) strlcat(string, " ", len);
3683                 (void) strlcat(string, argv[i], len);
3684         }
3685 }
3686
3687 int
3688 zpool_log_history(libzfs_handle_t *hdl, const char *message)
3689 {
3690         zfs_cmd_t zc = { 0 };
3691         nvlist_t *args;
3692         int err;
3693
3694         args = fnvlist_alloc();
3695         fnvlist_add_string(args, "message", message);
3696         err = zcmd_write_src_nvlist(hdl, &zc, args);
3697         if (err == 0)
3698                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
3699         nvlist_free(args);
3700         zcmd_free_nvlists(&zc);
3701         return (err);
3702 }
3703
3704 /*
3705  * Perform ioctl to get some command history of a pool.
3706  *
3707  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
3708  * logical offset of the history buffer to start reading from.
3709  *
3710  * Upon return, 'off' is the next logical offset to read from and
3711  * 'len' is the actual amount of bytes read into 'buf'.
3712  */
3713 static int
3714 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
3715 {
3716         zfs_cmd_t zc = { 0 };
3717         libzfs_handle_t *hdl = zhp->zpool_hdl;
3718
3719         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3720
3721         zc.zc_history = (uint64_t)(uintptr_t)buf;
3722         zc.zc_history_len = *len;
3723         zc.zc_history_offset = *off;
3724
3725         if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
3726                 switch (errno) {
3727                 case EPERM:
3728                         return (zfs_error_fmt(hdl, EZFS_PERM,
3729                             dgettext(TEXT_DOMAIN,
3730                             "cannot show history for pool '%s'"),
3731                             zhp->zpool_name));
3732                 case ENOENT:
3733                         return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
3734                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3735                             "'%s'"), zhp->zpool_name));
3736                 case ENOTSUP:
3737                         return (zfs_error_fmt(hdl, EZFS_BADVERSION,
3738                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
3739                             "'%s', pool must be upgraded"), zhp->zpool_name));
3740                 default:
3741                         return (zpool_standard_error_fmt(hdl, errno,
3742                             dgettext(TEXT_DOMAIN,
3743                             "cannot get history for '%s'"), zhp->zpool_name));
3744                 }
3745         }
3746
3747         *len = zc.zc_history_len;
3748         *off = zc.zc_history_offset;
3749
3750         return (0);
3751 }
3752
3753 /*
3754  * Process the buffer of nvlists, unpacking and storing each nvlist record
3755  * into 'records'.  'leftover' is set to the number of bytes that weren't
3756  * processed as there wasn't a complete record.
3757  */
3758 int
3759 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
3760     nvlist_t ***records, uint_t *numrecords)
3761 {
3762         uint64_t reclen;
3763         nvlist_t *nv;
3764         int i;
3765
3766         while (bytes_read > sizeof (reclen)) {
3767
3768                 /* get length of packed record (stored as little endian) */
3769                 for (i = 0, reclen = 0; i < sizeof (reclen); i++)
3770                         reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
3771
3772                 if (bytes_read < sizeof (reclen) + reclen)
3773                         break;
3774
3775                 /* unpack record */
3776                 if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
3777                         return (ENOMEM);
3778                 bytes_read -= sizeof (reclen) + reclen;
3779                 buf += sizeof (reclen) + reclen;
3780
3781                 /* add record to nvlist array */
3782                 (*numrecords)++;
3783                 if (ISP2(*numrecords + 1)) {
3784                         *records = realloc(*records,
3785                             *numrecords * 2 * sizeof (nvlist_t *));
3786                 }
3787                 (*records)[*numrecords - 1] = nv;
3788         }
3789
3790         *leftover = bytes_read;
3791         return (0);
3792 }
3793
3794 /* from spa_history.c: spa_history_create_obj() */
3795 #define HIS_BUF_LEN_DEF (128 << 10)
3796 #define HIS_BUF_LEN_MAX (1 << 30)
3797
3798 /*
3799  * Retrieve the command history of a pool.
3800  */
3801 int
3802 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
3803 {
3804         char *buf = NULL;
3805         uint64_t bufsize = HIS_BUF_LEN_DEF;
3806         uint64_t off = 0;
3807         nvlist_t **records = NULL;
3808         uint_t numrecords = 0;
3809         int err, i;
3810
3811         if ((buf = malloc(bufsize)) == NULL)
3812                 return (ENOMEM);
3813         do {
3814                 uint64_t bytes_read = bufsize;
3815                 uint64_t leftover;
3816
3817                 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
3818                         break;
3819
3820                 /* if nothing else was read in, we're at EOF, just return */
3821                 if (bytes_read == 0)
3822                         break;
3823
3824                 if ((err = zpool_history_unpack(buf, bytes_read,
3825                     &leftover, &records, &numrecords)) != 0)
3826                         break;
3827                 off -= leftover;
3828
3829                 /*
3830                  * If the history block is too big, double the buffer
3831                  * size and try again.
3832                  */
3833                 if (leftover == bytes_read) {
3834                         free(buf);
3835                         buf = NULL;
3836
3837                         bufsize <<= 1;
3838                         if ((bufsize >= HIS_BUF_LEN_MAX) ||
3839                             ((buf = malloc(bufsize)) == NULL)) {
3840                                 err = ENOMEM;
3841                                 break;
3842                         }
3843                 }
3844
3845                 /* CONSTCOND */
3846         } while (1);
3847         free(buf);
3848
3849         if (!err) {
3850                 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
3851                 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
3852                     records, numrecords) == 0);
3853         }
3854         for (i = 0; i < numrecords; i++)
3855                 nvlist_free(records[i]);
3856         free(records);
3857
3858         return (err);
3859 }
3860
3861 void
3862 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
3863     char *pathname, size_t len)
3864 {
3865         zfs_cmd_t zc = { 0 };
3866         boolean_t mounted = B_FALSE;
3867         char *mntpnt = NULL;
3868         char dsname[MAXNAMELEN];
3869
3870         if (dsobj == 0) {
3871                 /* special case for the MOS */
3872                 (void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
3873                 return;
3874         }
3875
3876         /* get the dataset's name */
3877         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3878         zc.zc_obj = dsobj;
3879         if (ioctl(zhp->zpool_hdl->libzfs_fd,
3880             ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
3881                 /* just write out a path of two object numbers */
3882                 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
3883                     dsobj, obj);
3884                 return;
3885         }
3886         (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
3887
3888         /* find out if the dataset is mounted */
3889         mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
3890
3891         /* get the corrupted object's path */
3892         (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
3893         zc.zc_obj = obj;
3894         if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
3895             &zc) == 0) {
3896                 if (mounted) {
3897                         (void) snprintf(pathname, len, "%s%s", mntpnt,
3898                             zc.zc_value);
3899                 } else {
3900                         (void) snprintf(pathname, len, "%s:%s",
3901                             dsname, zc.zc_value);
3902                 }
3903         } else {
3904                 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
3905         }
3906         free(mntpnt);
3907 }
3908
3909 #ifdef sun
3910 /*
3911  * Read the EFI label from the config, if a label does not exist then
3912  * pass back the error to the caller. If the caller has passed a non-NULL
3913  * diskaddr argument then we set it to the starting address of the EFI
3914  * partition.
3915  */
3916 static int
3917 read_efi_label(nvlist_t *config, diskaddr_t *sb)
3918 {
3919         char *path;
3920         int fd;
3921         char diskname[MAXPATHLEN];
3922         int err = -1;
3923
3924         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
3925                 return (err);
3926
3927         (void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
3928             strrchr(path, '/'));
3929         if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
3930                 struct dk_gpt *vtoc;
3931
3932                 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
3933                         if (sb != NULL)
3934                                 *sb = vtoc->efi_parts[0].p_start;
3935                         efi_free(vtoc);
3936                 }
3937                 (void) close(fd);
3938         }
3939         return (err);
3940 }
3941
3942 /*
3943  * determine where a partition starts on a disk in the current
3944  * configuration
3945  */
3946 static diskaddr_t
3947 find_start_block(nvlist_t *config)
3948 {
3949         nvlist_t **child;
3950         uint_t c, children;
3951         diskaddr_t sb = MAXOFFSET_T;
3952         uint64_t wholedisk;
3953
3954         if (nvlist_lookup_nvlist_array(config,
3955             ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
3956                 if (nvlist_lookup_uint64(config,
3957                     ZPOOL_CONFIG_WHOLE_DISK,
3958                     &wholedisk) != 0 || !wholedisk) {
3959                         return (MAXOFFSET_T);
3960                 }
3961                 if (read_efi_label(config, &sb) < 0)
3962                         sb = MAXOFFSET_T;
3963                 return (sb);
3964         }
3965
3966         for (c = 0; c < children; c++) {
3967                 sb = find_start_block(child[c]);
3968                 if (sb != MAXOFFSET_T) {
3969                         return (sb);
3970                 }
3971         }
3972         return (MAXOFFSET_T);
3973 }
3974 #endif /* sun */
3975
3976 /*
3977  * Label an individual disk.  The name provided is the short name,
3978  * stripped of any leading /dev path.
3979  */
3980 int
3981 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name)
3982 {
3983 #ifdef sun
3984         char path[MAXPATHLEN];
3985         struct dk_gpt *vtoc;
3986         int fd;
3987         size_t resv = EFI_MIN_RESV_SIZE;
3988         uint64_t slice_size;
3989         diskaddr_t start_block;
3990         char errbuf[1024];
3991
3992         /* prepare an error message just in case */
3993         (void) snprintf(errbuf, sizeof (errbuf),
3994             dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
3995
3996         if (zhp) {
3997                 nvlist_t *nvroot;
3998
3999                 if (zpool_is_bootable(zhp)) {
4000                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4001                             "EFI labeled devices are not supported on root "
4002                             "pools."));
4003                         return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
4004                 }
4005
4006                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
4007                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
4008
4009                 if (zhp->zpool_start_block == 0)
4010                         start_block = find_start_block(nvroot);
4011                 else
4012                         start_block = zhp->zpool_start_block;
4013                 zhp->zpool_start_block = start_block;
4014         } else {
4015                 /* new pool */
4016                 start_block = NEW_START_BLOCK;
4017         }
4018
4019         (void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
4020             BACKUP_SLICE);
4021
4022         if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
4023                 /*
4024                  * This shouldn't happen.  We've long since verified that this
4025                  * is a valid device.
4026                  */
4027                 zfs_error_aux(hdl,
4028                     dgettext(TEXT_DOMAIN, "unable to open device"));
4029                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
4030         }
4031
4032         if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
4033                 /*
4034                  * The only way this can fail is if we run out of memory, or we
4035                  * were unable to read the disk's capacity
4036                  */
4037                 if (errno == ENOMEM)
4038                         (void) no_memory(hdl);
4039
4040                 (void) close(fd);
4041                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4042                     "unable to read disk capacity"), name);
4043
4044                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
4045         }
4046
4047         slice_size = vtoc->efi_last_u_lba + 1;
4048         slice_size -= EFI_MIN_RESV_SIZE;
4049         if (start_block == MAXOFFSET_T)
4050                 start_block = NEW_START_BLOCK;
4051         slice_size -= start_block;
4052
4053         vtoc->efi_parts[0].p_start = start_block;
4054         vtoc->efi_parts[0].p_size = slice_size;
4055
4056         /*
4057          * Why we use V_USR: V_BACKUP confuses users, and is considered
4058          * disposable by some EFI utilities (since EFI doesn't have a backup
4059          * slice).  V_UNASSIGNED is supposed to be used only for zero size
4060          * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
4061          * etc. were all pretty specific.  V_USR is as close to reality as we
4062          * can get, in the absence of V_OTHER.
4063          */
4064         vtoc->efi_parts[0].p_tag = V_USR;
4065         (void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
4066
4067         vtoc->efi_parts[8].p_start = slice_size + start_block;
4068         vtoc->efi_parts[8].p_size = resv;
4069         vtoc->efi_parts[8].p_tag = V_RESERVED;
4070
4071         if (efi_write(fd, vtoc) != 0) {
4072                 /*
4073                  * Some block drivers (like pcata) may not support EFI
4074                  * GPT labels.  Print out a helpful error message dir-
4075                  * ecting the user to manually label the disk and give
4076                  * a specific slice.
4077                  */
4078                 (void) close(fd);
4079                 efi_free(vtoc);
4080
4081                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4082                     "try using fdisk(1M) and then provide a specific slice"));
4083                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4084         }
4085
4086         (void) close(fd);
4087         efi_free(vtoc);
4088 #endif /* sun */
4089         return (0);
4090 }
4091
4092 static boolean_t
4093 supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
4094 {
4095         char *type;
4096         nvlist_t **child;
4097         uint_t children, c;
4098
4099         verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0);
4100         if (strcmp(type, VDEV_TYPE_FILE) == 0 ||
4101             strcmp(type, VDEV_TYPE_HOLE) == 0 ||
4102             strcmp(type, VDEV_TYPE_MISSING) == 0) {
4103                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4104                     "vdev type '%s' is not supported"), type);
4105                 (void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf);
4106                 return (B_FALSE);
4107         }
4108         if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
4109             &child, &children) == 0) {
4110                 for (c = 0; c < children; c++) {
4111                         if (!supported_dump_vdev_type(hdl, child[c], errbuf))
4112                                 return (B_FALSE);
4113                 }
4114         }
4115         return (B_TRUE);
4116 }
4117
4118 /*
4119  * Check if this zvol is allowable for use as a dump device; zero if
4120  * it is, > 0 if it isn't, < 0 if it isn't a zvol.
4121  *
4122  * Allowable storage configurations include mirrors, all raidz variants, and
4123  * pools with log, cache, and spare devices.  Pools which are backed by files or
4124  * have missing/hole vdevs are not suitable.
4125  */
4126 int
4127 zvol_check_dump_config(char *arg)
4128 {
4129         zpool_handle_t *zhp = NULL;
4130         nvlist_t *config, *nvroot;
4131         char *p, *volname;
4132         nvlist_t **top;
4133         uint_t toplevels;
4134         libzfs_handle_t *hdl;
4135         char errbuf[1024];
4136         char poolname[ZPOOL_MAXNAMELEN];
4137         int pathlen = strlen(ZVOL_FULL_DEV_DIR);
4138         int ret = 1;
4139
4140         if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) {
4141                 return (-1);
4142         }
4143
4144         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4145             "dump is not supported on device '%s'"), arg);
4146
4147         if ((hdl = libzfs_init()) == NULL)
4148                 return (1);
4149         libzfs_print_on_error(hdl, B_TRUE);
4150
4151         volname = arg + pathlen;
4152
4153         /* check the configuration of the pool */
4154         if ((p = strchr(volname, '/')) == NULL) {
4155                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4156                     "malformed dataset name"));
4157                 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4158                 return (1);
4159         } else if (p - volname >= ZFS_MAXNAMELEN) {
4160                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4161                     "dataset name is too long"));
4162                 (void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf);
4163                 return (1);
4164         } else {
4165                 (void) strncpy(poolname, volname, p - volname);
4166                 poolname[p - volname] = '\0';
4167         }
4168
4169         if ((zhp = zpool_open(hdl, poolname)) == NULL) {
4170                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4171                     "could not open pool '%s'"), poolname);
4172                 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
4173                 goto out;
4174         }
4175         config = zpool_get_config(zhp, NULL);
4176         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
4177             &nvroot) != 0) {
4178                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4179                     "could not obtain vdev configuration for  '%s'"), poolname);
4180                 (void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
4181                 goto out;
4182         }
4183
4184         verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
4185             &top, &toplevels) == 0);
4186
4187         if (!supported_dump_vdev_type(hdl, top[0], errbuf)) {
4188                 goto out;
4189         }
4190         ret = 0;
4191
4192 out:
4193         if (zhp)
4194                 zpool_close(zhp);
4195         libzfs_fini(hdl);
4196         return (ret);
4197 }