]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libzfs/libzfs_pool.c
OpenZFS 9166 - zfs storage pool checkpoint
[FreeBSD/FreeBSD.git] / lib / libzfs / libzfs_pool.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
26  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
27  * Copyright (c) 2018 Datto Inc.
28  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
29  */
30
31 #include <ctype.h>
32 #include <errno.h>
33 #include <devid.h>
34 #include <fcntl.h>
35 #include <libintl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <strings.h>
39 #include <unistd.h>
40 #include <libgen.h>
41 #include <zone.h>
42 #include <sys/stat.h>
43 #include <sys/efi_partition.h>
44 #include <sys/systeminfo.h>
45 #include <sys/vtoc.h>
46 #include <sys/zfs_ioctl.h>
47 #include <sys/vdev_disk.h>
48 #include <dlfcn.h>
49
50 #include "zfs_namecheck.h"
51 #include "zfs_prop.h"
52 #include "libzfs_impl.h"
53 #include "zfs_comutil.h"
54 #include "zfeature_common.h"
55
56 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
57 static boolean_t zpool_vdev_is_interior(const char *name);
58
59 typedef struct prop_flags {
60         int create:1;   /* Validate property on creation */
61         int import:1;   /* Validate property on import */
62 } prop_flags_t;
63
64 /*
65  * ====================================================================
66  *   zpool property functions
67  * ====================================================================
68  */
69
70 static int
71 zpool_get_all_props(zpool_handle_t *zhp)
72 {
73         zfs_cmd_t zc = {"\0"};
74         libzfs_handle_t *hdl = zhp->zpool_hdl;
75
76         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
77
78         if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
79                 return (-1);
80
81         while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
82                 if (errno == ENOMEM) {
83                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
84                                 zcmd_free_nvlists(&zc);
85                                 return (-1);
86                         }
87                 } else {
88                         zcmd_free_nvlists(&zc);
89                         return (-1);
90                 }
91         }
92
93         if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
94                 zcmd_free_nvlists(&zc);
95                 return (-1);
96         }
97
98         zcmd_free_nvlists(&zc);
99
100         return (0);
101 }
102
103 static int
104 zpool_props_refresh(zpool_handle_t *zhp)
105 {
106         nvlist_t *old_props;
107
108         old_props = zhp->zpool_props;
109
110         if (zpool_get_all_props(zhp) != 0)
111                 return (-1);
112
113         nvlist_free(old_props);
114         return (0);
115 }
116
117 static const char *
118 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
119     zprop_source_t *src)
120 {
121         nvlist_t *nv, *nvl;
122         uint64_t ival;
123         char *value;
124         zprop_source_t source;
125
126         nvl = zhp->zpool_props;
127         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
128                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
129                 source = ival;
130                 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
131         } else {
132                 source = ZPROP_SRC_DEFAULT;
133                 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
134                         value = "-";
135         }
136
137         if (src)
138                 *src = source;
139
140         return (value);
141 }
142
143 uint64_t
144 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
145 {
146         nvlist_t *nv, *nvl;
147         uint64_t value;
148         zprop_source_t source;
149
150         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
151                 /*
152                  * zpool_get_all_props() has most likely failed because
153                  * the pool is faulted, but if all we need is the top level
154                  * vdev's guid then get it from the zhp config nvlist.
155                  */
156                 if ((prop == ZPOOL_PROP_GUID) &&
157                     (nvlist_lookup_nvlist(zhp->zpool_config,
158                     ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
159                     (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
160                     == 0)) {
161                         return (value);
162                 }
163                 return (zpool_prop_default_numeric(prop));
164         }
165
166         nvl = zhp->zpool_props;
167         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
168                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
169                 source = value;
170                 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
171         } else {
172                 source = ZPROP_SRC_DEFAULT;
173                 value = zpool_prop_default_numeric(prop);
174         }
175
176         if (src)
177                 *src = source;
178
179         return (value);
180 }
181
182 /*
183  * Map VDEV STATE to printed strings.
184  */
185 const char *
186 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
187 {
188         switch (state) {
189         case VDEV_STATE_CLOSED:
190         case VDEV_STATE_OFFLINE:
191                 return (gettext("OFFLINE"));
192         case VDEV_STATE_REMOVED:
193                 return (gettext("REMOVED"));
194         case VDEV_STATE_CANT_OPEN:
195                 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
196                         return (gettext("FAULTED"));
197                 else if (aux == VDEV_AUX_SPLIT_POOL)
198                         return (gettext("SPLIT"));
199                 else
200                         return (gettext("UNAVAIL"));
201         case VDEV_STATE_FAULTED:
202                 return (gettext("FAULTED"));
203         case VDEV_STATE_DEGRADED:
204                 return (gettext("DEGRADED"));
205         case VDEV_STATE_HEALTHY:
206                 return (gettext("ONLINE"));
207
208         default:
209                 break;
210         }
211
212         return (gettext("UNKNOWN"));
213 }
214
215 /*
216  * Map POOL STATE to printed strings.
217  */
218 const char *
219 zpool_pool_state_to_name(pool_state_t state)
220 {
221         switch (state) {
222         default:
223                 break;
224         case POOL_STATE_ACTIVE:
225                 return (gettext("ACTIVE"));
226         case POOL_STATE_EXPORTED:
227                 return (gettext("EXPORTED"));
228         case POOL_STATE_DESTROYED:
229                 return (gettext("DESTROYED"));
230         case POOL_STATE_SPARE:
231                 return (gettext("SPARE"));
232         case POOL_STATE_L2CACHE:
233                 return (gettext("L2CACHE"));
234         case POOL_STATE_UNINITIALIZED:
235                 return (gettext("UNINITIALIZED"));
236         case POOL_STATE_UNAVAIL:
237                 return (gettext("UNAVAIL"));
238         case POOL_STATE_POTENTIALLY_ACTIVE:
239                 return (gettext("POTENTIALLY_ACTIVE"));
240         }
241
242         return (gettext("UNKNOWN"));
243 }
244
245 /*
246  * Given a pool handle, return the pool health string ("ONLINE", "DEGRADED",
247  * "SUSPENDED", etc).
248  */
249 const char *
250 zpool_get_state_str(zpool_handle_t *zhp)
251 {
252         zpool_errata_t errata;
253         zpool_status_t status;
254         nvlist_t *nvroot;
255         vdev_stat_t *vs;
256         uint_t vsc;
257         const char *str;
258
259         status = zpool_get_status(zhp, NULL, &errata);
260
261         if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
262                 str = gettext("FAULTED");
263         } else if (status == ZPOOL_STATUS_IO_FAILURE_WAIT ||
264             status == ZPOOL_STATUS_IO_FAILURE_MMP) {
265                 str = gettext("SUSPENDED");
266         } else {
267                 verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
268                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
269                 verify(nvlist_lookup_uint64_array(nvroot,
270                     ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
271                     == 0);
272                 str = zpool_state_to_name(vs->vs_state, vs->vs_aux);
273         }
274         return (str);
275 }
276
277 /*
278  * Get a zpool property value for 'prop' and return the value in
279  * a pre-allocated buffer.
280  */
281 int
282 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
283     size_t len, zprop_source_t *srctype, boolean_t literal)
284 {
285         uint64_t intval;
286         const char *strval;
287         zprop_source_t src = ZPROP_SRC_NONE;
288
289         if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
290                 switch (prop) {
291                 case ZPOOL_PROP_NAME:
292                         (void) strlcpy(buf, zpool_get_name(zhp), len);
293                         break;
294
295                 case ZPOOL_PROP_HEALTH:
296                         (void) strlcpy(buf, zpool_get_state_str(zhp), len);
297                         break;
298
299                 case ZPOOL_PROP_GUID:
300                         intval = zpool_get_prop_int(zhp, prop, &src);
301                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
302                         break;
303
304                 case ZPOOL_PROP_ALTROOT:
305                 case ZPOOL_PROP_CACHEFILE:
306                 case ZPOOL_PROP_COMMENT:
307                         if (zhp->zpool_props != NULL ||
308                             zpool_get_all_props(zhp) == 0) {
309                                 (void) strlcpy(buf,
310                                     zpool_get_prop_string(zhp, prop, &src),
311                                     len);
312                                 break;
313                         }
314                         /* FALLTHROUGH */
315                 default:
316                         (void) strlcpy(buf, "-", len);
317                         break;
318                 }
319
320                 if (srctype != NULL)
321                         *srctype = src;
322                 return (0);
323         }
324
325         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
326             prop != ZPOOL_PROP_NAME)
327                 return (-1);
328
329         switch (zpool_prop_get_type(prop)) {
330         case PROP_TYPE_STRING:
331                 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
332                     len);
333                 break;
334
335         case PROP_TYPE_NUMBER:
336                 intval = zpool_get_prop_int(zhp, prop, &src);
337
338                 switch (prop) {
339                 case ZPOOL_PROP_SIZE:
340                 case ZPOOL_PROP_ALLOCATED:
341                 case ZPOOL_PROP_FREE:
342                 case ZPOOL_PROP_FREEING:
343                 case ZPOOL_PROP_LEAKED:
344                 case ZPOOL_PROP_ASHIFT:
345                         if (literal)
346                                 (void) snprintf(buf, len, "%llu",
347                                     (u_longlong_t)intval);
348                         else
349                                 (void) zfs_nicenum(intval, buf, len);
350                         break;
351
352                 case ZPOOL_PROP_EXPANDSZ:
353                 case ZPOOL_PROP_CHECKPOINT:
354                         if (intval == 0) {
355                                 (void) strlcpy(buf, "-", len);
356                         } else if (literal) {
357                                 (void) snprintf(buf, len, "%llu",
358                                     (u_longlong_t)intval);
359                         } else {
360                                 (void) zfs_nicebytes(intval, buf, len);
361                         }
362                         break;
363
364                 case ZPOOL_PROP_CAPACITY:
365                         if (literal) {
366                                 (void) snprintf(buf, len, "%llu",
367                                     (u_longlong_t)intval);
368                         } else {
369                                 (void) snprintf(buf, len, "%llu%%",
370                                     (u_longlong_t)intval);
371                         }
372                         break;
373
374                 case ZPOOL_PROP_FRAGMENTATION:
375                         if (intval == UINT64_MAX) {
376                                 (void) strlcpy(buf, "-", len);
377                         } else if (literal) {
378                                 (void) snprintf(buf, len, "%llu",
379                                     (u_longlong_t)intval);
380                         } else {
381                                 (void) snprintf(buf, len, "%llu%%",
382                                     (u_longlong_t)intval);
383                         }
384                         break;
385
386                 case ZPOOL_PROP_DEDUPRATIO:
387                         if (literal)
388                                 (void) snprintf(buf, len, "%llu.%02llu",
389                                     (u_longlong_t)(intval / 100),
390                                     (u_longlong_t)(intval % 100));
391                         else
392                                 (void) snprintf(buf, len, "%llu.%02llux",
393                                     (u_longlong_t)(intval / 100),
394                                     (u_longlong_t)(intval % 100));
395                         break;
396
397                 case ZPOOL_PROP_HEALTH:
398                         (void) strlcpy(buf, zpool_get_state_str(zhp), len);
399                         break;
400                 case ZPOOL_PROP_VERSION:
401                         if (intval >= SPA_VERSION_FEATURES) {
402                                 (void) snprintf(buf, len, "-");
403                                 break;
404                         }
405                         /* FALLTHROUGH */
406                 default:
407                         (void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
408                 }
409                 break;
410
411         case PROP_TYPE_INDEX:
412                 intval = zpool_get_prop_int(zhp, prop, &src);
413                 if (zpool_prop_index_to_string(prop, intval, &strval)
414                     != 0)
415                         return (-1);
416                 (void) strlcpy(buf, strval, len);
417                 break;
418
419         default:
420                 abort();
421         }
422
423         if (srctype)
424                 *srctype = src;
425
426         return (0);
427 }
428
429 /*
430  * Check if the bootfs name has the same pool name as it is set to.
431  * Assuming bootfs is a valid dataset name.
432  */
433 static boolean_t
434 bootfs_name_valid(const char *pool, char *bootfs)
435 {
436         int len = strlen(pool);
437         if (bootfs[0] == '\0')
438                 return (B_TRUE);
439
440         if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
441                 return (B_FALSE);
442
443         if (strncmp(pool, bootfs, len) == 0 &&
444             (bootfs[len] == '/' || bootfs[len] == '\0'))
445                 return (B_TRUE);
446
447         return (B_FALSE);
448 }
449
450 boolean_t
451 zpool_is_bootable(zpool_handle_t *zhp)
452 {
453         char bootfs[ZFS_MAX_DATASET_NAME_LEN];
454
455         return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
456             sizeof (bootfs), NULL, B_FALSE) == 0 && strncmp(bootfs, "-",
457             sizeof (bootfs)) != 0);
458 }
459
460
461 /*
462  * Given an nvlist of zpool properties to be set, validate that they are
463  * correct, and parse any numeric properties (index, boolean, etc) if they are
464  * specified as strings.
465  */
466 static nvlist_t *
467 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
468     nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
469 {
470         nvpair_t *elem;
471         nvlist_t *retprops;
472         zpool_prop_t prop;
473         char *strval;
474         uint64_t intval;
475         char *slash, *check;
476         struct stat64 statbuf;
477         zpool_handle_t *zhp;
478
479         if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
480                 (void) no_memory(hdl);
481                 return (NULL);
482         }
483
484         elem = NULL;
485         while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
486                 const char *propname = nvpair_name(elem);
487
488                 prop = zpool_name_to_prop(propname);
489                 if (prop == ZPOOL_PROP_INVAL && zpool_prop_feature(propname)) {
490                         int err;
491                         char *fname = strchr(propname, '@') + 1;
492
493                         err = zfeature_lookup_name(fname, NULL);
494                         if (err != 0) {
495                                 ASSERT3U(err, ==, ENOENT);
496                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
497                                     "invalid feature '%s'"), fname);
498                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
499                                 goto error;
500                         }
501
502                         if (nvpair_type(elem) != DATA_TYPE_STRING) {
503                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
504                                     "'%s' must be a string"), propname);
505                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
506                                 goto error;
507                         }
508
509                         (void) nvpair_value_string(elem, &strval);
510                         if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0 &&
511                             strcmp(strval, ZFS_FEATURE_DISABLED) != 0) {
512                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
513                                     "property '%s' can only be set to "
514                                     "'enabled' or 'disabled'"), propname);
515                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
516                                 goto error;
517                         }
518
519                         if (!flags.create &&
520                             strcmp(strval, ZFS_FEATURE_DISABLED) == 0) {
521                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
522                                     "property '%s' can only be set to "
523                                     "'disabled' at creation time"), propname);
524                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
525                                 goto error;
526                         }
527
528                         if (nvlist_add_uint64(retprops, propname, 0) != 0) {
529                                 (void) no_memory(hdl);
530                                 goto error;
531                         }
532                         continue;
533                 }
534
535                 /*
536                  * Make sure this property is valid and applies to this type.
537                  */
538                 if (prop == ZPOOL_PROP_INVAL) {
539                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
540                             "invalid property '%s'"), propname);
541                         (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
542                         goto error;
543                 }
544
545                 if (zpool_prop_readonly(prop)) {
546                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
547                             "is readonly"), propname);
548                         (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
549                         goto error;
550                 }
551
552                 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
553                     &strval, &intval, errbuf) != 0)
554                         goto error;
555
556                 /*
557                  * Perform additional checking for specific properties.
558                  */
559                 switch (prop) {
560                 case ZPOOL_PROP_VERSION:
561                         if (intval < version ||
562                             !SPA_VERSION_IS_SUPPORTED(intval)) {
563                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
564                                     "property '%s' number %d is invalid."),
565                                     propname, intval);
566                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
567                                 goto error;
568                         }
569                         break;
570
571                 case ZPOOL_PROP_ASHIFT:
572                         if (intval != 0 &&
573                             (intval < ASHIFT_MIN || intval > ASHIFT_MAX)) {
574                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
575                                     "invalid '%s=%d' property: only values "
576                                     "between %" PRId32 " and %" PRId32 " "
577                                     "are allowed.\n"),
578                                     propname, intval, ASHIFT_MIN, ASHIFT_MAX);
579                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
580                                 goto error;
581                         }
582                         break;
583
584                 case ZPOOL_PROP_BOOTFS:
585                         if (flags.create || flags.import) {
586                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
587                                     "property '%s' cannot be set at creation "
588                                     "or import time"), propname);
589                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
590                                 goto error;
591                         }
592
593                         if (version < SPA_VERSION_BOOTFS) {
594                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
595                                     "pool must be upgraded to support "
596                                     "'%s' property"), propname);
597                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
598                                 goto error;
599                         }
600
601                         /*
602                          * bootfs property value has to be a dataset name and
603                          * the dataset has to be in the same pool as it sets to.
604                          */
605                         if (!bootfs_name_valid(poolname, strval)) {
606                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
607                                     "is an invalid name"), strval);
608                                 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
609                                 goto error;
610                         }
611
612                         if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
613                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
614                                     "could not open pool '%s'"), poolname);
615                                 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
616                                 goto error;
617                         }
618                         zpool_close(zhp);
619                         break;
620
621                 case ZPOOL_PROP_ALTROOT:
622                         if (!flags.create && !flags.import) {
623                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
624                                     "property '%s' can only be set during pool "
625                                     "creation or import"), propname);
626                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
627                                 goto error;
628                         }
629
630                         if (strval[0] != '/') {
631                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
632                                     "bad alternate root '%s'"), strval);
633                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
634                                 goto error;
635                         }
636                         break;
637
638                 case ZPOOL_PROP_CACHEFILE:
639                         if (strval[0] == '\0')
640                                 break;
641
642                         if (strcmp(strval, "none") == 0)
643                                 break;
644
645                         if (strval[0] != '/') {
646                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
647                                     "property '%s' must be empty, an "
648                                     "absolute path, or 'none'"), propname);
649                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
650                                 goto error;
651                         }
652
653                         slash = strrchr(strval, '/');
654
655                         if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
656                             strcmp(slash, "/..") == 0) {
657                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
658                                     "'%s' is not a valid file"), strval);
659                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
660                                 goto error;
661                         }
662
663                         *slash = '\0';
664
665                         if (strval[0] != '\0' &&
666                             (stat64(strval, &statbuf) != 0 ||
667                             !S_ISDIR(statbuf.st_mode))) {
668                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
669                                     "'%s' is not a valid directory"),
670                                     strval);
671                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
672                                 goto error;
673                         }
674
675                         *slash = '/';
676                         break;
677
678                 case ZPOOL_PROP_COMMENT:
679                         for (check = strval; *check != '\0'; check++) {
680                                 if (!isprint(*check)) {
681                                         zfs_error_aux(hdl,
682                                             dgettext(TEXT_DOMAIN,
683                                             "comment may only have printable "
684                                             "characters"));
685                                         (void) zfs_error(hdl, EZFS_BADPROP,
686                                             errbuf);
687                                         goto error;
688                                 }
689                         }
690                         if (strlen(strval) > ZPROP_MAX_COMMENT) {
691                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
692                                     "comment must not exceed %d characters"),
693                                     ZPROP_MAX_COMMENT);
694                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
695                                 goto error;
696                         }
697                         break;
698                 case ZPOOL_PROP_READONLY:
699                         if (!flags.import) {
700                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
701                                     "property '%s' can only be set at "
702                                     "import time"), propname);
703                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
704                                 goto error;
705                         }
706                         break;
707                 case ZPOOL_PROP_TNAME:
708                         if (!flags.create) {
709                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
710                                     "property '%s' can only be set at "
711                                     "creation time"), propname);
712                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
713                                 goto error;
714                         }
715                         break;
716                 case ZPOOL_PROP_MULTIHOST:
717                         if (get_system_hostid() == 0) {
718                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
719                                     "requires a non-zero system hostid"));
720                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
721                                 goto error;
722                         }
723                         break;
724                 default:
725                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
726                             "property '%s'(%d) not defined"), propname, prop);
727                         break;
728                 }
729         }
730
731         return (retprops);
732 error:
733         nvlist_free(retprops);
734         return (NULL);
735 }
736
737 /*
738  * Set zpool property : propname=propval.
739  */
740 int
741 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
742 {
743         zfs_cmd_t zc = {"\0"};
744         int ret = -1;
745         char errbuf[1024];
746         nvlist_t *nvl = NULL;
747         nvlist_t *realprops;
748         uint64_t version;
749         prop_flags_t flags = { 0 };
750
751         (void) snprintf(errbuf, sizeof (errbuf),
752             dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
753             zhp->zpool_name);
754
755         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
756                 return (no_memory(zhp->zpool_hdl));
757
758         if (nvlist_add_string(nvl, propname, propval) != 0) {
759                 nvlist_free(nvl);
760                 return (no_memory(zhp->zpool_hdl));
761         }
762
763         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
764         if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
765             zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
766                 nvlist_free(nvl);
767                 return (-1);
768         }
769
770         nvlist_free(nvl);
771         nvl = realprops;
772
773         /*
774          * Execute the corresponding ioctl() to set this property.
775          */
776         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
777
778         if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
779                 nvlist_free(nvl);
780                 return (-1);
781         }
782
783         ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
784
785         zcmd_free_nvlists(&zc);
786         nvlist_free(nvl);
787
788         if (ret)
789                 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
790         else
791                 (void) zpool_props_refresh(zhp);
792
793         return (ret);
794 }
795
796 int
797 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
798 {
799         libzfs_handle_t *hdl = zhp->zpool_hdl;
800         zprop_list_t *entry;
801         char buf[ZFS_MAXPROPLEN];
802         nvlist_t *features = NULL;
803         nvpair_t *nvp;
804         zprop_list_t **last;
805         boolean_t firstexpand = (NULL == *plp);
806         int i;
807
808         if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
809                 return (-1);
810
811         last = plp;
812         while (*last != NULL)
813                 last = &(*last)->pl_next;
814
815         if ((*plp)->pl_all)
816                 features = zpool_get_features(zhp);
817
818         if ((*plp)->pl_all && firstexpand) {
819                 for (i = 0; i < SPA_FEATURES; i++) {
820                         zprop_list_t *entry = zfs_alloc(hdl,
821                             sizeof (zprop_list_t));
822                         entry->pl_prop = ZPROP_INVAL;
823                         entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
824                             spa_feature_table[i].fi_uname);
825                         entry->pl_width = strlen(entry->pl_user_prop);
826                         entry->pl_all = B_TRUE;
827
828                         *last = entry;
829                         last = &entry->pl_next;
830                 }
831         }
832
833         /* add any unsupported features */
834         for (nvp = nvlist_next_nvpair(features, NULL);
835             nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
836                 char *propname;
837                 boolean_t found;
838                 zprop_list_t *entry;
839
840                 if (zfeature_is_supported(nvpair_name(nvp)))
841                         continue;
842
843                 propname = zfs_asprintf(hdl, "unsupported@%s",
844                     nvpair_name(nvp));
845
846                 /*
847                  * Before adding the property to the list make sure that no
848                  * other pool already added the same property.
849                  */
850                 found = B_FALSE;
851                 entry = *plp;
852                 while (entry != NULL) {
853                         if (entry->pl_user_prop != NULL &&
854                             strcmp(propname, entry->pl_user_prop) == 0) {
855                                 found = B_TRUE;
856                                 break;
857                         }
858                         entry = entry->pl_next;
859                 }
860                 if (found) {
861                         free(propname);
862                         continue;
863                 }
864
865                 entry = zfs_alloc(hdl, sizeof (zprop_list_t));
866                 entry->pl_prop = ZPROP_INVAL;
867                 entry->pl_user_prop = propname;
868                 entry->pl_width = strlen(entry->pl_user_prop);
869                 entry->pl_all = B_TRUE;
870
871                 *last = entry;
872                 last = &entry->pl_next;
873         }
874
875         for (entry = *plp; entry != NULL; entry = entry->pl_next) {
876
877                 if (entry->pl_fixed)
878                         continue;
879
880                 if (entry->pl_prop != ZPROP_INVAL &&
881                     zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
882                     NULL, B_FALSE) == 0) {
883                         if (strlen(buf) > entry->pl_width)
884                                 entry->pl_width = strlen(buf);
885                 }
886         }
887
888         return (0);
889 }
890
891 /*
892  * Get the state for the given feature on the given ZFS pool.
893  */
894 int
895 zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
896     size_t len)
897 {
898         uint64_t refcount;
899         boolean_t found = B_FALSE;
900         nvlist_t *features = zpool_get_features(zhp);
901         boolean_t supported;
902         const char *feature = strchr(propname, '@') + 1;
903
904         supported = zpool_prop_feature(propname);
905         ASSERT(supported || zpool_prop_unsupported(propname));
906
907         /*
908          * Convert from feature name to feature guid. This conversion is
909          * unnecessary for unsupported@... properties because they already
910          * use guids.
911          */
912         if (supported) {
913                 int ret;
914                 spa_feature_t fid;
915
916                 ret = zfeature_lookup_name(feature, &fid);
917                 if (ret != 0) {
918                         (void) strlcpy(buf, "-", len);
919                         return (ENOTSUP);
920                 }
921                 feature = spa_feature_table[fid].fi_guid;
922         }
923
924         if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
925                 found = B_TRUE;
926
927         if (supported) {
928                 if (!found) {
929                         (void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
930                 } else  {
931                         if (refcount == 0)
932                                 (void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
933                         else
934                                 (void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
935                 }
936         } else {
937                 if (found) {
938                         if (refcount == 0) {
939                                 (void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
940                         } else {
941                                 (void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
942                         }
943                 } else {
944                         (void) strlcpy(buf, "-", len);
945                         return (ENOTSUP);
946                 }
947         }
948
949         return (0);
950 }
951
952 /*
953  * Validate the given pool name, optionally putting an extended error message in
954  * 'buf'.
955  */
956 boolean_t
957 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
958 {
959         namecheck_err_t why;
960         char what;
961         int ret;
962
963         ret = pool_namecheck(pool, &why, &what);
964
965         /*
966          * The rules for reserved pool names were extended at a later point.
967          * But we need to support users with existing pools that may now be
968          * invalid.  So we only check for this expanded set of names during a
969          * create (or import), and only in userland.
970          */
971         if (ret == 0 && !isopen &&
972             (strncmp(pool, "mirror", 6) == 0 ||
973             strncmp(pool, "raidz", 5) == 0 ||
974             strncmp(pool, "spare", 5) == 0 ||
975             strcmp(pool, "log") == 0)) {
976                 if (hdl != NULL)
977                         zfs_error_aux(hdl,
978                             dgettext(TEXT_DOMAIN, "name is reserved"));
979                 return (B_FALSE);
980         }
981
982
983         if (ret != 0) {
984                 if (hdl != NULL) {
985                         switch (why) {
986                         case NAME_ERR_TOOLONG:
987                                 zfs_error_aux(hdl,
988                                     dgettext(TEXT_DOMAIN, "name is too long"));
989                                 break;
990
991                         case NAME_ERR_INVALCHAR:
992                                 zfs_error_aux(hdl,
993                                     dgettext(TEXT_DOMAIN, "invalid character "
994                                     "'%c' in pool name"), what);
995                                 break;
996
997                         case NAME_ERR_NOLETTER:
998                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
999                                     "name must begin with a letter"));
1000                                 break;
1001
1002                         case NAME_ERR_RESERVED:
1003                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1004                                     "name is reserved"));
1005                                 break;
1006
1007                         case NAME_ERR_DISKLIKE:
1008                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1009                                     "pool name is reserved"));
1010                                 break;
1011
1012                         case NAME_ERR_LEADING_SLASH:
1013                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1014                                     "leading slash in name"));
1015                                 break;
1016
1017                         case NAME_ERR_EMPTY_COMPONENT:
1018                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1019                                     "empty component in name"));
1020                                 break;
1021
1022                         case NAME_ERR_TRAILING_SLASH:
1023                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1024                                     "trailing slash in name"));
1025                                 break;
1026
1027                         case NAME_ERR_MULTIPLE_DELIMITERS:
1028                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1029                                     "multiple '@' and/or '#' delimiters in "
1030                                     "name"));
1031                                 break;
1032
1033                         case NAME_ERR_NO_AT:
1034                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1035                                     "permission set is missing '@'"));
1036                                 break;
1037
1038                         default:
1039                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1040                                     "(%d) not defined"), why);
1041                                 break;
1042                         }
1043                 }
1044                 return (B_FALSE);
1045         }
1046
1047         return (B_TRUE);
1048 }
1049
1050 /*
1051  * Open a handle to the given pool, even if the pool is currently in the FAULTED
1052  * state.
1053  */
1054 zpool_handle_t *
1055 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
1056 {
1057         zpool_handle_t *zhp;
1058         boolean_t missing;
1059
1060         /*
1061          * Make sure the pool name is valid.
1062          */
1063         if (!zpool_name_valid(hdl, B_TRUE, pool)) {
1064                 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1065                     dgettext(TEXT_DOMAIN, "cannot open '%s'"),
1066                     pool);
1067                 return (NULL);
1068         }
1069
1070         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1071                 return (NULL);
1072
1073         zhp->zpool_hdl = hdl;
1074         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1075
1076         if (zpool_refresh_stats(zhp, &missing) != 0) {
1077                 zpool_close(zhp);
1078                 return (NULL);
1079         }
1080
1081         if (missing) {
1082                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
1083                 (void) zfs_error_fmt(hdl, EZFS_NOENT,
1084                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
1085                 zpool_close(zhp);
1086                 return (NULL);
1087         }
1088
1089         return (zhp);
1090 }
1091
1092 /*
1093  * Like the above, but silent on error.  Used when iterating over pools (because
1094  * the configuration cache may be out of date).
1095  */
1096 int
1097 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
1098 {
1099         zpool_handle_t *zhp;
1100         boolean_t missing;
1101
1102         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
1103                 return (-1);
1104
1105         zhp->zpool_hdl = hdl;
1106         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
1107
1108         if (zpool_refresh_stats(zhp, &missing) != 0) {
1109                 zpool_close(zhp);
1110                 return (-1);
1111         }
1112
1113         if (missing) {
1114                 zpool_close(zhp);
1115                 *ret = NULL;
1116                 return (0);
1117         }
1118
1119         *ret = zhp;
1120         return (0);
1121 }
1122
1123 /*
1124  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
1125  * state.
1126  */
1127 zpool_handle_t *
1128 zpool_open(libzfs_handle_t *hdl, const char *pool)
1129 {
1130         zpool_handle_t *zhp;
1131
1132         if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
1133                 return (NULL);
1134
1135         if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
1136                 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
1137                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
1138                 zpool_close(zhp);
1139                 return (NULL);
1140         }
1141
1142         return (zhp);
1143 }
1144
1145 /*
1146  * Close the handle.  Simply frees the memory associated with the handle.
1147  */
1148 void
1149 zpool_close(zpool_handle_t *zhp)
1150 {
1151         nvlist_free(zhp->zpool_config);
1152         nvlist_free(zhp->zpool_old_config);
1153         nvlist_free(zhp->zpool_props);
1154         free(zhp);
1155 }
1156
1157 /*
1158  * Return the name of the pool.
1159  */
1160 const char *
1161 zpool_get_name(zpool_handle_t *zhp)
1162 {
1163         return (zhp->zpool_name);
1164 }
1165
1166
1167 /*
1168  * Return the state of the pool (ACTIVE or UNAVAILABLE)
1169  */
1170 int
1171 zpool_get_state(zpool_handle_t *zhp)
1172 {
1173         return (zhp->zpool_state);
1174 }
1175
1176 /*
1177  * Create the named pool, using the provided vdev list.  It is assumed
1178  * that the consumer has already validated the contents of the nvlist, so we
1179  * don't have to worry about error semantics.
1180  */
1181 int
1182 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
1183     nvlist_t *props, nvlist_t *fsprops)
1184 {
1185         zfs_cmd_t zc = {"\0"};
1186         nvlist_t *zc_fsprops = NULL;
1187         nvlist_t *zc_props = NULL;
1188         nvlist_t *hidden_args = NULL;
1189         uint8_t *wkeydata = NULL;
1190         uint_t wkeylen = 0;
1191         char msg[1024];
1192         int ret = -1;
1193
1194         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1195             "cannot create '%s'"), pool);
1196
1197         if (!zpool_name_valid(hdl, B_FALSE, pool))
1198                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
1199
1200         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1201                 return (-1);
1202
1203         if (props) {
1204                 prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
1205
1206                 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
1207                     SPA_VERSION_1, flags, msg)) == NULL) {
1208                         goto create_failed;
1209                 }
1210         }
1211
1212         if (fsprops) {
1213                 uint64_t zoned;
1214                 char *zonestr;
1215
1216                 zoned = ((nvlist_lookup_string(fsprops,
1217                     zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
1218                     strcmp(zonestr, "on") == 0);
1219
1220                 if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
1221                     fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) {
1222                         goto create_failed;
1223                 }
1224                 if (!zc_props &&
1225                     (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
1226                         goto create_failed;
1227                 }
1228                 if (zfs_crypto_create(hdl, NULL, zc_fsprops, props,
1229                     &wkeydata, &wkeylen) != 0) {
1230                         zfs_error(hdl, EZFS_CRYPTOFAILED, msg);
1231                         goto create_failed;
1232                 }
1233                 if (nvlist_add_nvlist(zc_props,
1234                     ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
1235                         goto create_failed;
1236                 }
1237                 if (wkeydata != NULL) {
1238                         if (nvlist_alloc(&hidden_args, NV_UNIQUE_NAME, 0) != 0)
1239                                 goto create_failed;
1240
1241                         if (nvlist_add_uint8_array(hidden_args, "wkeydata",
1242                             wkeydata, wkeylen) != 0)
1243                                 goto create_failed;
1244
1245                         if (nvlist_add_nvlist(zc_props, ZPOOL_HIDDEN_ARGS,
1246                             hidden_args) != 0)
1247                                 goto create_failed;
1248                 }
1249         }
1250
1251         if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
1252                 goto create_failed;
1253
1254         (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
1255
1256         if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
1257
1258                 zcmd_free_nvlists(&zc);
1259                 nvlist_free(zc_props);
1260                 nvlist_free(zc_fsprops);
1261                 nvlist_free(hidden_args);
1262                 if (wkeydata != NULL)
1263                         free(wkeydata);
1264
1265                 switch (errno) {
1266                 case EBUSY:
1267                         /*
1268                          * This can happen if the user has specified the same
1269                          * device multiple times.  We can't reliably detect this
1270                          * until we try to add it and see we already have a
1271                          * label.  This can also happen under if the device is
1272                          * part of an active md or lvm device.
1273                          */
1274                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1275                             "one or more vdevs refer to the same device, or "
1276                             "one of\nthe devices is part of an active md or "
1277                             "lvm device"));
1278                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1279
1280                 case ERANGE:
1281                         /*
1282                          * This happens if the record size is smaller or larger
1283                          * than the allowed size range, or not a power of 2.
1284                          *
1285                          * NOTE: although zfs_valid_proplist is called earlier,
1286                          * this case may have slipped through since the
1287                          * pool does not exist yet and it is therefore
1288                          * impossible to read properties e.g. max blocksize
1289                          * from the pool.
1290                          */
1291                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1292                             "record size invalid"));
1293                         return (zfs_error(hdl, EZFS_BADPROP, msg));
1294
1295                 case EOVERFLOW:
1296                         /*
1297                          * This occurs when one of the devices is below
1298                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1299                          * device was the problem device since there's no
1300                          * reliable way to determine device size from userland.
1301                          */
1302                         {
1303                                 char buf[64];
1304
1305                                 zfs_nicebytes(SPA_MINDEVSIZE, buf,
1306                                     sizeof (buf));
1307
1308                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1309                                     "one or more devices is less than the "
1310                                     "minimum size (%s)"), buf);
1311                         }
1312                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1313
1314                 case ENOSPC:
1315                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1316                             "one or more devices is out of space"));
1317                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1318
1319                 case ENOTBLK:
1320                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1321                             "cache device must be a disk or disk slice"));
1322                         return (zfs_error(hdl, EZFS_BADDEV, msg));
1323
1324                 default:
1325                         return (zpool_standard_error(hdl, errno, msg));
1326                 }
1327         }
1328
1329 create_failed:
1330         zcmd_free_nvlists(&zc);
1331         nvlist_free(zc_props);
1332         nvlist_free(zc_fsprops);
1333         nvlist_free(hidden_args);
1334         if (wkeydata != NULL)
1335                 free(wkeydata);
1336         return (ret);
1337 }
1338
1339 /*
1340  * Destroy the given pool.  It is up to the caller to ensure that there are no
1341  * datasets left in the pool.
1342  */
1343 int
1344 zpool_destroy(zpool_handle_t *zhp, const char *log_str)
1345 {
1346         zfs_cmd_t zc = {"\0"};
1347         zfs_handle_t *zfp = NULL;
1348         libzfs_handle_t *hdl = zhp->zpool_hdl;
1349         char msg[1024];
1350
1351         if (zhp->zpool_state == POOL_STATE_ACTIVE &&
1352             (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
1353                 return (-1);
1354
1355         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1356         zc.zc_history = (uint64_t)(uintptr_t)log_str;
1357
1358         if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
1359                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1360                     "cannot destroy '%s'"), zhp->zpool_name);
1361
1362                 if (errno == EROFS) {
1363                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1364                             "one or more devices is read only"));
1365                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1366                 } else {
1367                         (void) zpool_standard_error(hdl, errno, msg);
1368                 }
1369
1370                 if (zfp)
1371                         zfs_close(zfp);
1372                 return (-1);
1373         }
1374
1375         if (zfp) {
1376                 remove_mountpoint(zfp);
1377                 zfs_close(zfp);
1378         }
1379
1380         return (0);
1381 }
1382
1383 /*
1384  * Create a checkpoint in the given pool.
1385  */
1386 int
1387 zpool_checkpoint(zpool_handle_t *zhp)
1388 {
1389         libzfs_handle_t *hdl = zhp->zpool_hdl;
1390         char msg[1024];
1391         int error;
1392
1393         error = lzc_pool_checkpoint(zhp->zpool_name);
1394         if (error != 0) {
1395                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1396                     "cannot checkpoint '%s'"), zhp->zpool_name);
1397                 (void) zpool_standard_error(hdl, error, msg);
1398                 return (-1);
1399         }
1400
1401         return (0);
1402 }
1403
1404 /*
1405  * Discard the checkpoint from the given pool.
1406  */
1407 int
1408 zpool_discard_checkpoint(zpool_handle_t *zhp)
1409 {
1410         libzfs_handle_t *hdl = zhp->zpool_hdl;
1411         char msg[1024];
1412         int error;
1413
1414         error = lzc_pool_checkpoint_discard(zhp->zpool_name);
1415         if (error != 0) {
1416                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1417                     "cannot discard checkpoint in '%s'"), zhp->zpool_name);
1418                 (void) zpool_standard_error(hdl, error, msg);
1419                 return (-1);
1420         }
1421
1422         return (0);
1423 }
1424
1425 /*
1426  * Add the given vdevs to the pool.  The caller must have already performed the
1427  * necessary verification to ensure that the vdev specification is well-formed.
1428  */
1429 int
1430 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
1431 {
1432         zfs_cmd_t zc = {"\0"};
1433         int ret;
1434         libzfs_handle_t *hdl = zhp->zpool_hdl;
1435         char msg[1024];
1436         nvlist_t **spares, **l2cache;
1437         uint_t nspares, nl2cache;
1438
1439         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1440             "cannot add to '%s'"), zhp->zpool_name);
1441
1442         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1443             SPA_VERSION_SPARES &&
1444             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1445             &spares, &nspares) == 0) {
1446                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1447                     "upgraded to add hot spares"));
1448                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1449         }
1450
1451         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1452             SPA_VERSION_L2CACHE &&
1453             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1454             &l2cache, &nl2cache) == 0) {
1455                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1456                     "upgraded to add cache devices"));
1457                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1458         }
1459
1460         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1461                 return (-1);
1462         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1463
1464         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1465                 switch (errno) {
1466                 case EBUSY:
1467                         /*
1468                          * This can happen if the user has specified the same
1469                          * device multiple times.  We can't reliably detect this
1470                          * until we try to add it and see we already have a
1471                          * label.
1472                          */
1473                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1474                             "one or more vdevs refer to the same device"));
1475                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1476                         break;
1477
1478                 case EINVAL:
1479                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1480                             "invalid config; a pool with removing/removed "
1481                             "vdevs does not support adding raidz vdevs"));
1482                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1483                         break;
1484
1485                 case EOVERFLOW:
1486                         /*
1487                          * This occurrs when one of the devices is below
1488                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1489                          * device was the problem device since there's no
1490                          * reliable way to determine device size from userland.
1491                          */
1492                         {
1493                                 char buf[64];
1494
1495                                 zfs_nicebytes(SPA_MINDEVSIZE, buf,
1496                                     sizeof (buf));
1497
1498                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1499                                     "device is less than the minimum "
1500                                     "size (%s)"), buf);
1501                         }
1502                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1503                         break;
1504
1505                 case ENOTSUP:
1506                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1507                             "pool must be upgraded to add these vdevs"));
1508                         (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1509                         break;
1510
1511                 case ENOTBLK:
1512                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1513                             "cache device must be a disk or disk slice"));
1514                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1515                         break;
1516
1517                 default:
1518                         (void) zpool_standard_error(hdl, errno, msg);
1519                 }
1520
1521                 ret = -1;
1522         } else {
1523                 ret = 0;
1524         }
1525
1526         zcmd_free_nvlists(&zc);
1527
1528         return (ret);
1529 }
1530
1531 /*
1532  * Exports the pool from the system.  The caller must ensure that there are no
1533  * mounted datasets in the pool.
1534  */
1535 static int
1536 zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
1537     const char *log_str)
1538 {
1539         zfs_cmd_t zc = {"\0"};
1540         char msg[1024];
1541
1542         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1543             "cannot export '%s'"), zhp->zpool_name);
1544
1545         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1546         zc.zc_cookie = force;
1547         zc.zc_guid = hardforce;
1548         zc.zc_history = (uint64_t)(uintptr_t)log_str;
1549
1550         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1551                 switch (errno) {
1552                 case EXDEV:
1553                         zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1554                             "use '-f' to override the following errors:\n"
1555                             "'%s' has an active shared spare which could be"
1556                             " used by other pools once '%s' is exported."),
1557                             zhp->zpool_name, zhp->zpool_name);
1558                         return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1559                             msg));
1560                 default:
1561                         return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1562                             msg));
1563                 }
1564         }
1565
1566         return (0);
1567 }
1568
1569 int
1570 zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
1571 {
1572         return (zpool_export_common(zhp, force, B_FALSE, log_str));
1573 }
1574
1575 int
1576 zpool_export_force(zpool_handle_t *zhp, const char *log_str)
1577 {
1578         return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
1579 }
1580
1581 static void
1582 zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
1583     nvlist_t *config)
1584 {
1585         nvlist_t *nv = NULL;
1586         uint64_t rewindto;
1587         int64_t loss = -1;
1588         struct tm t;
1589         char timestr[128];
1590
1591         if (!hdl->libzfs_printerr || config == NULL)
1592                 return;
1593
1594         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1595             nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
1596                 return;
1597         }
1598
1599         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1600                 return;
1601         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1602
1603         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1604             strftime(timestr, 128, "%c", &t) != 0) {
1605                 if (dryrun) {
1606                         (void) printf(dgettext(TEXT_DOMAIN,
1607                             "Would be able to return %s "
1608                             "to its state as of %s.\n"),
1609                             name, timestr);
1610                 } else {
1611                         (void) printf(dgettext(TEXT_DOMAIN,
1612                             "Pool %s returned to its state as of %s.\n"),
1613                             name, timestr);
1614                 }
1615                 if (loss > 120) {
1616                         (void) printf(dgettext(TEXT_DOMAIN,
1617                             "%s approximately %lld "),
1618                             dryrun ? "Would discard" : "Discarded",
1619                             ((longlong_t)loss + 30) / 60);
1620                         (void) printf(dgettext(TEXT_DOMAIN,
1621                             "minutes of transactions.\n"));
1622                 } else if (loss > 0) {
1623                         (void) printf(dgettext(TEXT_DOMAIN,
1624                             "%s approximately %lld "),
1625                             dryrun ? "Would discard" : "Discarded",
1626                             (longlong_t)loss);
1627                         (void) printf(dgettext(TEXT_DOMAIN,
1628                             "seconds of transactions.\n"));
1629                 }
1630         }
1631 }
1632
1633 void
1634 zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
1635     nvlist_t *config)
1636 {
1637         nvlist_t *nv = NULL;
1638         int64_t loss = -1;
1639         uint64_t edata = UINT64_MAX;
1640         uint64_t rewindto;
1641         struct tm t;
1642         char timestr[128];
1643
1644         if (!hdl->libzfs_printerr)
1645                 return;
1646
1647         if (reason >= 0)
1648                 (void) printf(dgettext(TEXT_DOMAIN, "action: "));
1649         else
1650                 (void) printf(dgettext(TEXT_DOMAIN, "\t"));
1651
1652         /* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
1653         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
1654             nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
1655             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
1656                 goto no_info;
1657
1658         (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
1659         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
1660             &edata);
1661
1662         (void) printf(dgettext(TEXT_DOMAIN,
1663             "Recovery is possible, but will result in some data loss.\n"));
1664
1665         if (localtime_r((time_t *)&rewindto, &t) != NULL &&
1666             strftime(timestr, 128, "%c", &t) != 0) {
1667                 (void) printf(dgettext(TEXT_DOMAIN,
1668                     "\tReturning the pool to its state as of %s\n"
1669                     "\tshould correct the problem.  "),
1670                     timestr);
1671         } else {
1672                 (void) printf(dgettext(TEXT_DOMAIN,
1673                     "\tReverting the pool to an earlier state "
1674                     "should correct the problem.\n\t"));
1675         }
1676
1677         if (loss > 120) {
1678                 (void) printf(dgettext(TEXT_DOMAIN,
1679                     "Approximately %lld minutes of data\n"
1680                     "\tmust be discarded, irreversibly.  "),
1681                     ((longlong_t)loss + 30) / 60);
1682         } else if (loss > 0) {
1683                 (void) printf(dgettext(TEXT_DOMAIN,
1684                     "Approximately %lld seconds of data\n"
1685                     "\tmust be discarded, irreversibly.  "),
1686                     (longlong_t)loss);
1687         }
1688         if (edata != 0 && edata != UINT64_MAX) {
1689                 if (edata == 1) {
1690                         (void) printf(dgettext(TEXT_DOMAIN,
1691                             "After rewind, at least\n"
1692                             "\tone persistent user-data error will remain.  "));
1693                 } else {
1694                         (void) printf(dgettext(TEXT_DOMAIN,
1695                             "After rewind, several\n"
1696                             "\tpersistent user-data errors will remain.  "));
1697                 }
1698         }
1699         (void) printf(dgettext(TEXT_DOMAIN,
1700             "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
1701             reason >= 0 ? "clear" : "import", name);
1702
1703         (void) printf(dgettext(TEXT_DOMAIN,
1704             "A scrub of the pool\n"
1705             "\tis strongly recommended after recovery.\n"));
1706         return;
1707
1708 no_info:
1709         (void) printf(dgettext(TEXT_DOMAIN,
1710             "Destroy and re-create the pool from\n\ta backup source.\n"));
1711 }
1712
1713 /*
1714  * zpool_import() is a contracted interface. Should be kept the same
1715  * if possible.
1716  *
1717  * Applications should use zpool_import_props() to import a pool with
1718  * new properties value to be set.
1719  */
1720 int
1721 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1722     char *altroot)
1723 {
1724         nvlist_t *props = NULL;
1725         int ret;
1726
1727         if (altroot != NULL) {
1728                 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1729                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1730                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1731                             newname));
1732                 }
1733
1734                 if (nvlist_add_string(props,
1735                     zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
1736                     nvlist_add_string(props,
1737                     zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
1738                         nvlist_free(props);
1739                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1740                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1741                             newname));
1742                 }
1743         }
1744
1745         ret = zpool_import_props(hdl, config, newname, props,
1746             ZFS_IMPORT_NORMAL);
1747         nvlist_free(props);
1748         return (ret);
1749 }
1750
1751 static void
1752 print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
1753     int indent)
1754 {
1755         nvlist_t **child;
1756         uint_t c, children;
1757         char *vname;
1758         uint64_t is_log = 0;
1759
1760         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
1761             &is_log);
1762
1763         if (name != NULL)
1764                 (void) printf("\t%*s%s%s\n", indent, "", name,
1765                     is_log ? " [log]" : "");
1766
1767         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1768             &child, &children) != 0)
1769                 return;
1770
1771         for (c = 0; c < children; c++) {
1772                 vname = zpool_vdev_name(hdl, NULL, child[c], VDEV_NAME_TYPE_ID);
1773                 print_vdev_tree(hdl, vname, child[c], indent + 2);
1774                 free(vname);
1775         }
1776 }
1777
1778 void
1779 zpool_print_unsup_feat(nvlist_t *config)
1780 {
1781         nvlist_t *nvinfo, *unsup_feat;
1782         nvpair_t *nvp;
1783
1784         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
1785             0);
1786         verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
1787             &unsup_feat) == 0);
1788
1789         for (nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
1790             nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
1791                 char *desc;
1792
1793                 verify(nvpair_type(nvp) == DATA_TYPE_STRING);
1794                 verify(nvpair_value_string(nvp, &desc) == 0);
1795
1796                 if (strlen(desc) > 0)
1797                         (void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
1798                 else
1799                         (void) printf("\t%s\n", nvpair_name(nvp));
1800         }
1801 }
1802
1803 /*
1804  * Import the given pool using the known configuration and a list of
1805  * properties to be set. The configuration should have come from
1806  * zpool_find_import(). The 'newname' parameters control whether the pool
1807  * is imported with a different name.
1808  */
1809 int
1810 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1811     nvlist_t *props, int flags)
1812 {
1813         zfs_cmd_t zc = {"\0"};
1814         zpool_load_policy_t policy;
1815         nvlist_t *nv = NULL;
1816         nvlist_t *nvinfo = NULL;
1817         nvlist_t *missing = NULL;
1818         char *thename;
1819         char *origname;
1820         int ret;
1821         int error = 0;
1822         char errbuf[1024];
1823
1824         verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1825             &origname) == 0);
1826
1827         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1828             "cannot import pool '%s'"), origname);
1829
1830         if (newname != NULL) {
1831                 if (!zpool_name_valid(hdl, B_FALSE, newname))
1832                         return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1833                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1834                             newname));
1835                 thename = (char *)newname;
1836         } else {
1837                 thename = origname;
1838         }
1839
1840         if (props != NULL) {
1841                 uint64_t version;
1842                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
1843
1844                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1845                     &version) == 0);
1846
1847                 if ((props = zpool_valid_proplist(hdl, origname,
1848                     props, version, flags, errbuf)) == NULL)
1849                         return (-1);
1850                 if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1851                         nvlist_free(props);
1852                         return (-1);
1853                 }
1854                 nvlist_free(props);
1855         }
1856
1857         (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1858
1859         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1860             &zc.zc_guid) == 0);
1861
1862         if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1863                 zcmd_free_nvlists(&zc);
1864                 return (-1);
1865         }
1866         if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
1867                 zcmd_free_nvlists(&zc);
1868                 return (-1);
1869         }
1870
1871         zc.zc_cookie = flags;
1872         while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
1873             errno == ENOMEM) {
1874                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
1875                         zcmd_free_nvlists(&zc);
1876                         return (-1);
1877                 }
1878         }
1879         if (ret != 0)
1880                 error = errno;
1881
1882         (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
1883
1884         zcmd_free_nvlists(&zc);
1885
1886         zpool_get_load_policy(config, &policy);
1887
1888         if (error) {
1889                 char desc[1024];
1890                 char aux[256];
1891
1892                 /*
1893                  * Dry-run failed, but we print out what success
1894                  * looks like if we found a best txg
1895                  */
1896                 if (policy.zlp_rewind & ZPOOL_TRY_REWIND) {
1897                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
1898                             B_TRUE, nv);
1899                         nvlist_free(nv);
1900                         return (-1);
1901                 }
1902
1903                 if (newname == NULL)
1904                         (void) snprintf(desc, sizeof (desc),
1905                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1906                             thename);
1907                 else
1908                         (void) snprintf(desc, sizeof (desc),
1909                             dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1910                             origname, thename);
1911
1912                 switch (error) {
1913                 case ENOTSUP:
1914                         if (nv != NULL && nvlist_lookup_nvlist(nv,
1915                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1916                             nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
1917                                 (void) printf(dgettext(TEXT_DOMAIN, "This "
1918                                     "pool uses the following feature(s) not "
1919                                     "supported by this system:\n"));
1920                                 zpool_print_unsup_feat(nv);
1921                                 if (nvlist_exists(nvinfo,
1922                                     ZPOOL_CONFIG_CAN_RDONLY)) {
1923                                         (void) printf(dgettext(TEXT_DOMAIN,
1924                                             "All unsupported features are only "
1925                                             "required for writing to the pool."
1926                                             "\nThe pool can be imported using "
1927                                             "'-o readonly=on'.\n"));
1928                                 }
1929                         }
1930                         /*
1931                          * Unsupported version.
1932                          */
1933                         (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1934                         break;
1935
1936                 case EREMOTEIO:
1937                         if (nv != NULL && nvlist_lookup_nvlist(nv,
1938                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0) {
1939                                 char *hostname = "<unknown>";
1940                                 uint64_t hostid = 0;
1941                                 mmp_state_t mmp_state;
1942
1943                                 mmp_state = fnvlist_lookup_uint64(nvinfo,
1944                                     ZPOOL_CONFIG_MMP_STATE);
1945
1946                                 if (nvlist_exists(nvinfo,
1947                                     ZPOOL_CONFIG_MMP_HOSTNAME))
1948                                         hostname = fnvlist_lookup_string(nvinfo,
1949                                             ZPOOL_CONFIG_MMP_HOSTNAME);
1950
1951                                 if (nvlist_exists(nvinfo,
1952                                     ZPOOL_CONFIG_MMP_HOSTID))
1953                                         hostid = fnvlist_lookup_uint64(nvinfo,
1954                                             ZPOOL_CONFIG_MMP_HOSTID);
1955
1956                                 if (mmp_state == MMP_STATE_ACTIVE) {
1957                                         (void) snprintf(aux, sizeof (aux),
1958                                             dgettext(TEXT_DOMAIN, "pool is imp"
1959                                             "orted on host '%s' (hostid=%lx).\n"
1960                                             "Export the pool on the other "
1961                                             "system, then run 'zpool import'."),
1962                                             hostname, (unsigned long) hostid);
1963                                 } else if (mmp_state == MMP_STATE_NO_HOSTID) {
1964                                         (void) snprintf(aux, sizeof (aux),
1965                                             dgettext(TEXT_DOMAIN, "pool has "
1966                                             "the multihost property on and "
1967                                             "the\nsystem's hostid is not set. "
1968                                             "Set a unique system hostid with "
1969                                             "the zgenhostid(8) command.\n"));
1970                                 }
1971
1972                                 (void) zfs_error_aux(hdl, aux);
1973                         }
1974                         (void) zfs_error(hdl, EZFS_ACTIVE_POOL, desc);
1975                         break;
1976
1977                 case EINVAL:
1978                         (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1979                         break;
1980
1981                 case EROFS:
1982                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1983                             "one or more devices is read only"));
1984                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
1985                         break;
1986
1987                 case ENXIO:
1988                         if (nv && nvlist_lookup_nvlist(nv,
1989                             ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
1990                             nvlist_lookup_nvlist(nvinfo,
1991                             ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
1992                                 (void) printf(dgettext(TEXT_DOMAIN,
1993                                     "The devices below are missing or "
1994                                     "corrupted, use '-m' to import the pool "
1995                                     "anyway:\n"));
1996                                 print_vdev_tree(hdl, NULL, missing, 2);
1997                                 (void) printf("\n");
1998                         }
1999                         (void) zpool_standard_error(hdl, error, desc);
2000                         break;
2001
2002                 case EEXIST:
2003                         (void) zpool_standard_error(hdl, error, desc);
2004                         break;
2005
2006                 case EBUSY:
2007                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2008                             "one or more devices are already in use\n"));
2009                         (void) zfs_error(hdl, EZFS_BADDEV, desc);
2010                         break;
2011                 case ENAMETOOLONG:
2012                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2013                             "new name of at least one dataset is longer than "
2014                             "the maximum allowable length"));
2015                         (void) zfs_error(hdl, EZFS_NAMETOOLONG, desc);
2016                         break;
2017                 default:
2018                         (void) zpool_standard_error(hdl, error, desc);
2019                         zpool_explain_recover(hdl,
2020                             newname ? origname : thename, -error, nv);
2021                         break;
2022                 }
2023
2024                 nvlist_free(nv);
2025                 ret = -1;
2026         } else {
2027                 zpool_handle_t *zhp;
2028
2029                 /*
2030                  * This should never fail, but play it safe anyway.
2031                  */
2032                 if (zpool_open_silent(hdl, thename, &zhp) != 0)
2033                         ret = -1;
2034                 else if (zhp != NULL)
2035                         zpool_close(zhp);
2036                 if (policy.zlp_rewind &
2037                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
2038                         zpool_rewind_exclaim(hdl, newname ? origname : thename,
2039                             ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0), nv);
2040                 }
2041                 nvlist_free(nv);
2042                 return (0);
2043         }
2044
2045         return (ret);
2046 }
2047
2048 /*
2049  * Scan the pool.
2050  */
2051 int
2052 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
2053 {
2054         zfs_cmd_t zc = {"\0"};
2055         char msg[1024];
2056         int err;
2057         libzfs_handle_t *hdl = zhp->zpool_hdl;
2058
2059         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2060         zc.zc_cookie = func;
2061         zc.zc_flags = cmd;
2062
2063         if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0)
2064                 return (0);
2065
2066         err = errno;
2067
2068         /* ECANCELED on a scrub means we resumed a paused scrub */
2069         if (err == ECANCELED && func == POOL_SCAN_SCRUB &&
2070             cmd == POOL_SCRUB_NORMAL)
2071                 return (0);
2072
2073         if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL)
2074                 return (0);
2075
2076         if (func == POOL_SCAN_SCRUB) {
2077                 if (cmd == POOL_SCRUB_PAUSE) {
2078                         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2079                             "cannot pause scrubbing %s"), zc.zc_name);
2080                 } else {
2081                         assert(cmd == POOL_SCRUB_NORMAL);
2082                         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2083                             "cannot scrub %s"), zc.zc_name);
2084                 }
2085         } else if (func == POOL_SCAN_NONE) {
2086                 (void) snprintf(msg, sizeof (msg),
2087                     dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
2088                     zc.zc_name);
2089         } else {
2090                 assert(!"unexpected result");
2091         }
2092
2093         if (err == EBUSY) {
2094                 nvlist_t *nvroot;
2095                 pool_scan_stat_t *ps = NULL;
2096                 uint_t psc;
2097
2098                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
2099                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2100                 (void) nvlist_lookup_uint64_array(nvroot,
2101                     ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
2102                 if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
2103                         if (cmd == POOL_SCRUB_PAUSE)
2104                                 return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
2105                         else
2106                                 return (zfs_error(hdl, EZFS_SCRUBBING, msg));
2107                 } else {
2108                         return (zfs_error(hdl, EZFS_RESILVERING, msg));
2109                 }
2110         } else if (err == ENOENT) {
2111                 return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
2112         } else {
2113                 return (zpool_standard_error(hdl, err, msg));
2114         }
2115 }
2116
2117 /*
2118  * Find a vdev that matches the search criteria specified. We use the
2119  * the nvpair name to determine how we should look for the device.
2120  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
2121  * spare; but FALSE if its an INUSE spare.
2122  */
2123 static nvlist_t *
2124 vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
2125     boolean_t *l2cache, boolean_t *log)
2126 {
2127         uint_t c, children;
2128         nvlist_t **child;
2129         nvlist_t *ret;
2130         uint64_t is_log;
2131         char *srchkey;
2132         nvpair_t *pair = nvlist_next_nvpair(search, NULL);
2133
2134         /* Nothing to look for */
2135         if (search == NULL || pair == NULL)
2136                 return (NULL);
2137
2138         /* Obtain the key we will use to search */
2139         srchkey = nvpair_name(pair);
2140
2141         switch (nvpair_type(pair)) {
2142         case DATA_TYPE_UINT64:
2143                 if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
2144                         uint64_t srchval, theguid;
2145
2146                         verify(nvpair_value_uint64(pair, &srchval) == 0);
2147                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2148                             &theguid) == 0);
2149                         if (theguid == srchval)
2150                                 return (nv);
2151                 }
2152                 break;
2153
2154         case DATA_TYPE_STRING: {
2155                 char *srchval, *val;
2156
2157                 verify(nvpair_value_string(pair, &srchval) == 0);
2158                 if (nvlist_lookup_string(nv, srchkey, &val) != 0)
2159                         break;
2160
2161                 /*
2162                  * Search for the requested value. Special cases:
2163                  *
2164                  * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in
2165                  *   "-part1", or "p1".  The suffix is hidden from the user,
2166                  *   but included in the string, so this matches around it.
2167                  * - ZPOOL_CONFIG_PATH for short names zfs_strcmp_shortname()
2168                  *   is used to check all possible expanded paths.
2169                  * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
2170                  *
2171                  * Otherwise, all other searches are simple string compares.
2172                  */
2173                 if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
2174                         uint64_t wholedisk = 0;
2175
2176                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2177                             &wholedisk);
2178                         if (zfs_strcmp_pathname(srchval, val, wholedisk) == 0)
2179                                 return (nv);
2180
2181                 } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
2182                         char *type, *idx, *end, *p;
2183                         uint64_t id, vdev_id;
2184
2185                         /*
2186                          * Determine our vdev type, keeping in mind
2187                          * that the srchval is composed of a type and
2188                          * vdev id pair (i.e. mirror-4).
2189                          */
2190                         if ((type = strdup(srchval)) == NULL)
2191                                 return (NULL);
2192
2193                         if ((p = strrchr(type, '-')) == NULL) {
2194                                 free(type);
2195                                 break;
2196                         }
2197                         idx = p + 1;
2198                         *p = '\0';
2199
2200                         /*
2201                          * If the types don't match then keep looking.
2202                          */
2203                         if (strncmp(val, type, strlen(val)) != 0) {
2204                                 free(type);
2205                                 break;
2206                         }
2207
2208                         verify(zpool_vdev_is_interior(type));
2209                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
2210                             &id) == 0);
2211
2212                         errno = 0;
2213                         vdev_id = strtoull(idx, &end, 10);
2214
2215                         free(type);
2216                         if (errno != 0)
2217                                 return (NULL);
2218
2219                         /*
2220                          * Now verify that we have the correct vdev id.
2221                          */
2222                         if (vdev_id == id)
2223                                 return (nv);
2224                 }
2225
2226                 /*
2227                  * Common case
2228                  */
2229                 if (strcmp(srchval, val) == 0)
2230                         return (nv);
2231                 break;
2232         }
2233
2234         default:
2235                 break;
2236         }
2237
2238         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
2239             &child, &children) != 0)
2240                 return (NULL);
2241
2242         for (c = 0; c < children; c++) {
2243                 if ((ret = vdev_to_nvlist_iter(child[c], search,
2244                     avail_spare, l2cache, NULL)) != NULL) {
2245                         /*
2246                          * The 'is_log' value is only set for the toplevel
2247                          * vdev, not the leaf vdevs.  So we always lookup the
2248                          * log device from the root of the vdev tree (where
2249                          * 'log' is non-NULL).
2250                          */
2251                         if (log != NULL &&
2252                             nvlist_lookup_uint64(child[c],
2253                             ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
2254                             is_log) {
2255                                 *log = B_TRUE;
2256                         }
2257                         return (ret);
2258                 }
2259         }
2260
2261         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
2262             &child, &children) == 0) {
2263                 for (c = 0; c < children; c++) {
2264                         if ((ret = vdev_to_nvlist_iter(child[c], search,
2265                             avail_spare, l2cache, NULL)) != NULL) {
2266                                 *avail_spare = B_TRUE;
2267                                 return (ret);
2268                         }
2269                 }
2270         }
2271
2272         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
2273             &child, &children) == 0) {
2274                 for (c = 0; c < children; c++) {
2275                         if ((ret = vdev_to_nvlist_iter(child[c], search,
2276                             avail_spare, l2cache, NULL)) != NULL) {
2277                                 *l2cache = B_TRUE;
2278                                 return (ret);
2279                         }
2280                 }
2281         }
2282
2283         return (NULL);
2284 }
2285
2286 /*
2287  * Given a physical path (minus the "/devices" prefix), find the
2288  * associated vdev.
2289  */
2290 nvlist_t *
2291 zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
2292     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
2293 {
2294         nvlist_t *search, *nvroot, *ret;
2295
2296         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2297         verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
2298
2299         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2300             &nvroot) == 0);
2301
2302         *avail_spare = B_FALSE;
2303         *l2cache = B_FALSE;
2304         if (log != NULL)
2305                 *log = B_FALSE;
2306         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2307         nvlist_free(search);
2308
2309         return (ret);
2310 }
2311
2312 /*
2313  * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
2314  */
2315 static boolean_t
2316 zpool_vdev_is_interior(const char *name)
2317 {
2318         if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
2319             strncmp(name, VDEV_TYPE_SPARE, strlen(VDEV_TYPE_SPARE)) == 0 ||
2320             strncmp(name,
2321             VDEV_TYPE_REPLACING, strlen(VDEV_TYPE_REPLACING)) == 0 ||
2322             strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
2323                 return (B_TRUE);
2324         return (B_FALSE);
2325 }
2326
2327 nvlist_t *
2328 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
2329     boolean_t *l2cache, boolean_t *log)
2330 {
2331         char *end;
2332         nvlist_t *nvroot, *search, *ret;
2333         uint64_t guid;
2334
2335         verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2336
2337         guid = strtoull(path, &end, 0);
2338         if (guid != 0 && *end == '\0') {
2339                 verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
2340         } else if (zpool_vdev_is_interior(path)) {
2341                 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
2342         } else {
2343                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
2344         }
2345
2346         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
2347             &nvroot) == 0);
2348
2349         *avail_spare = B_FALSE;
2350         *l2cache = B_FALSE;
2351         if (log != NULL)
2352                 *log = B_FALSE;
2353         ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
2354         nvlist_free(search);
2355
2356         return (ret);
2357 }
2358
2359 static int
2360 vdev_is_online(nvlist_t *nv)
2361 {
2362         uint64_t ival;
2363
2364         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
2365             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
2366             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
2367                 return (0);
2368
2369         return (1);
2370 }
2371
2372 /*
2373  * Helper function for zpool_get_physpaths().
2374  */
2375 static int
2376 vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
2377     size_t *bytes_written)
2378 {
2379         size_t bytes_left, pos, rsz;
2380         char *tmppath;
2381         const char *format;
2382
2383         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
2384             &tmppath) != 0)
2385                 return (EZFS_NODEVICE);
2386
2387         pos = *bytes_written;
2388         bytes_left = physpath_size - pos;
2389         format = (pos == 0) ? "%s" : " %s";
2390
2391         rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
2392         *bytes_written += rsz;
2393
2394         if (rsz >= bytes_left) {
2395                 /* if physpath was not copied properly, clear it */
2396                 if (bytes_left != 0) {
2397                         physpath[pos] = 0;
2398                 }
2399                 return (EZFS_NOSPC);
2400         }
2401         return (0);
2402 }
2403
2404 static int
2405 vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
2406     size_t *rsz, boolean_t is_spare)
2407 {
2408         char *type;
2409         int ret;
2410
2411         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
2412                 return (EZFS_INVALCONFIG);
2413
2414         if (strcmp(type, VDEV_TYPE_DISK) == 0) {
2415                 /*
2416                  * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
2417                  * For a spare vdev, we only want to boot from the active
2418                  * spare device.
2419                  */
2420                 if (is_spare) {
2421                         uint64_t spare = 0;
2422                         (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
2423                             &spare);
2424                         if (!spare)
2425                                 return (EZFS_INVALCONFIG);
2426                 }
2427
2428                 if (vdev_is_online(nv)) {
2429                         if ((ret = vdev_get_one_physpath(nv, physpath,
2430                             phypath_size, rsz)) != 0)
2431                                 return (ret);
2432                 }
2433         } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
2434             strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
2435             strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
2436             (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
2437                 nvlist_t **child;
2438                 uint_t count;
2439                 int i, ret;
2440
2441                 if (nvlist_lookup_nvlist_array(nv,
2442                     ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
2443                         return (EZFS_INVALCONFIG);
2444
2445                 for (i = 0; i < count; i++) {
2446                         ret = vdev_get_physpaths(child[i], physpath,
2447                             phypath_size, rsz, is_spare);
2448                         if (ret == EZFS_NOSPC)
2449                                 return (ret);
2450                 }
2451         }
2452
2453         return (EZFS_POOL_INVALARG);
2454 }
2455
2456 /*
2457  * Get phys_path for a root pool config.
2458  * Return 0 on success; non-zero on failure.
2459  */
2460 static int
2461 zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
2462 {
2463         size_t rsz;
2464         nvlist_t *vdev_root;
2465         nvlist_t **child;
2466         uint_t count;
2467         char *type;
2468
2469         rsz = 0;
2470
2471         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2472             &vdev_root) != 0)
2473                 return (EZFS_INVALCONFIG);
2474
2475         if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
2476             nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
2477             &child, &count) != 0)
2478                 return (EZFS_INVALCONFIG);
2479
2480         /*
2481          * root pool can only have a single top-level vdev.
2482          */
2483         if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1)
2484                 return (EZFS_POOL_INVALARG);
2485
2486         (void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
2487             B_FALSE);
2488
2489         /* No online devices */
2490         if (rsz == 0)
2491                 return (EZFS_NODEVICE);
2492
2493         return (0);
2494 }
2495
2496 /*
2497  * Get phys_path for a root pool
2498  * Return 0 on success; non-zero on failure.
2499  */
2500 int
2501 zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
2502 {
2503         return (zpool_get_config_physpath(zhp->zpool_config, physpath,
2504             phypath_size));
2505 }
2506
2507 /*
2508  * If the device has being dynamically expanded then we need to relabel
2509  * the disk to use the new unallocated space.
2510  */
2511 static int
2512 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
2513 {
2514         int fd, error;
2515
2516         if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
2517                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2518                     "relabel '%s': unable to open device: %d"), path, errno);
2519                 return (zfs_error(hdl, EZFS_OPENFAILED, msg));
2520         }
2521
2522         /*
2523          * It's possible that we might encounter an error if the device
2524          * does not have any unallocated space left. If so, we simply
2525          * ignore that error and continue on.
2526          *
2527          * Also, we don't call efi_rescan() - that would just return EBUSY.
2528          * The module will do it for us in vdev_disk_open().
2529          */
2530         error = efi_use_whole_disk(fd);
2531
2532         /* Flush the buffers to disk and invalidate the page cache. */
2533         (void) fsync(fd);
2534         (void) ioctl(fd, BLKFLSBUF);
2535
2536         (void) close(fd);
2537         if (error && error != VT_ENOSPC) {
2538                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2539                     "relabel '%s': unable to read disk capacity"), path);
2540                 return (zfs_error(hdl, EZFS_NOCAP, msg));
2541         }
2542
2543         return (0);
2544 }
2545
2546 /*
2547  * Convert a vdev path to a GUID.  Returns GUID or 0 on error.
2548  *
2549  * If is_spare, is_l2cache, or is_log is non-NULL, then store within it
2550  * if the VDEV is a spare, l2cache, or log device.  If they're NULL then
2551  * ignore them.
2552  */
2553 static uint64_t
2554 zpool_vdev_path_to_guid_impl(zpool_handle_t *zhp, const char *path,
2555     boolean_t *is_spare, boolean_t *is_l2cache, boolean_t *is_log)
2556 {
2557         uint64_t guid;
2558         boolean_t spare = B_FALSE, l2cache = B_FALSE, log = B_FALSE;
2559         nvlist_t *tgt;
2560
2561         if ((tgt = zpool_find_vdev(zhp, path, &spare, &l2cache,
2562             &log)) == NULL)
2563                 return (0);
2564
2565         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &guid) == 0);
2566         if (is_spare != NULL)
2567                 *is_spare = spare;
2568         if (is_l2cache != NULL)
2569                 *is_l2cache = l2cache;
2570         if (is_log != NULL)
2571                 *is_log = log;
2572
2573         return (guid);
2574 }
2575
2576 /* Convert a vdev path to a GUID.  Returns GUID or 0 on error. */
2577 uint64_t
2578 zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path)
2579 {
2580         return (zpool_vdev_path_to_guid_impl(zhp, path, NULL, NULL, NULL));
2581 }
2582
2583 /*
2584  * Bring the specified vdev online.   The 'flags' parameter is a set of the
2585  * ZFS_ONLINE_* flags.
2586  */
2587 int
2588 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
2589     vdev_state_t *newstate)
2590 {
2591         zfs_cmd_t zc = {"\0"};
2592         char msg[1024];
2593         char *pathname;
2594         nvlist_t *tgt;
2595         boolean_t avail_spare, l2cache, islog;
2596         libzfs_handle_t *hdl = zhp->zpool_hdl;
2597         int error;
2598
2599         if (flags & ZFS_ONLINE_EXPAND) {
2600                 (void) snprintf(msg, sizeof (msg),
2601                     dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
2602         } else {
2603                 (void) snprintf(msg, sizeof (msg),
2604                     dgettext(TEXT_DOMAIN, "cannot online %s"), path);
2605         }
2606
2607         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2608         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2609             &islog)) == NULL)
2610                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2611
2612         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2613
2614         if (avail_spare)
2615                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2616
2617         if ((flags & ZFS_ONLINE_EXPAND ||
2618             zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) &&
2619             nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &pathname) == 0) {
2620                 uint64_t wholedisk = 0;
2621
2622                 (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
2623                     &wholedisk);
2624
2625                 /*
2626                  * XXX - L2ARC 1.0 devices can't support expansion.
2627                  */
2628                 if (l2cache) {
2629                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2630                             "cannot expand cache devices"));
2631                         return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
2632                 }
2633
2634                 if (wholedisk) {
2635                         const char *fullpath = path;
2636                         char buf[MAXPATHLEN];
2637
2638                         if (path[0] != '/') {
2639                                 error = zfs_resolve_shortname(path, buf,
2640                                     sizeof (buf));
2641                                 if (error != 0)
2642                                         return (zfs_error(hdl, EZFS_NODEVICE,
2643                                             msg));
2644
2645                                 fullpath = buf;
2646                         }
2647
2648                         error = zpool_relabel_disk(hdl, fullpath, msg);
2649                         if (error != 0)
2650                                 return (error);
2651                 }
2652         }
2653
2654         zc.zc_cookie = VDEV_STATE_ONLINE;
2655         zc.zc_obj = flags;
2656
2657         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
2658                 if (errno == EINVAL) {
2659                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
2660                             "from this pool into a new one.  Use '%s' "
2661                             "instead"), "zpool detach");
2662                         return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
2663                 }
2664                 return (zpool_standard_error(hdl, errno, msg));
2665         }
2666
2667         *newstate = zc.zc_cookie;
2668         return (0);
2669 }
2670
2671 /*
2672  * Take the specified vdev offline
2673  */
2674 int
2675 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
2676 {
2677         zfs_cmd_t zc = {"\0"};
2678         char msg[1024];
2679         nvlist_t *tgt;
2680         boolean_t avail_spare, l2cache;
2681         libzfs_handle_t *hdl = zhp->zpool_hdl;
2682
2683         (void) snprintf(msg, sizeof (msg),
2684             dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
2685
2686         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2687         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2688             NULL)) == NULL)
2689                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2690
2691         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2692
2693         if (avail_spare)
2694                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2695
2696         zc.zc_cookie = VDEV_STATE_OFFLINE;
2697         zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
2698
2699         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2700                 return (0);
2701
2702         switch (errno) {
2703         case EBUSY:
2704
2705                 /*
2706                  * There are no other replicas of this device.
2707                  */
2708                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2709
2710         case EEXIST:
2711                 /*
2712                  * The log device has unplayed logs
2713                  */
2714                 return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
2715
2716         default:
2717                 return (zpool_standard_error(hdl, errno, msg));
2718         }
2719 }
2720
2721 /*
2722  * Mark the given vdev faulted.
2723  */
2724 int
2725 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2726 {
2727         zfs_cmd_t zc = {"\0"};
2728         char msg[1024];
2729         libzfs_handle_t *hdl = zhp->zpool_hdl;
2730
2731         (void) snprintf(msg, sizeof (msg),
2732             dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
2733
2734         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2735         zc.zc_guid = guid;
2736         zc.zc_cookie = VDEV_STATE_FAULTED;
2737         zc.zc_obj = aux;
2738
2739         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2740                 return (0);
2741
2742         switch (errno) {
2743         case EBUSY:
2744
2745                 /*
2746                  * There are no other replicas of this device.
2747                  */
2748                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
2749
2750         default:
2751                 return (zpool_standard_error(hdl, errno, msg));
2752         }
2753
2754 }
2755
2756 /*
2757  * Mark the given vdev degraded.
2758  */
2759 int
2760 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
2761 {
2762         zfs_cmd_t zc = {"\0"};
2763         char msg[1024];
2764         libzfs_handle_t *hdl = zhp->zpool_hdl;
2765
2766         (void) snprintf(msg, sizeof (msg),
2767             dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
2768
2769         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2770         zc.zc_guid = guid;
2771         zc.zc_cookie = VDEV_STATE_DEGRADED;
2772         zc.zc_obj = aux;
2773
2774         if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
2775                 return (0);
2776
2777         return (zpool_standard_error(hdl, errno, msg));
2778 }
2779
2780 /*
2781  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
2782  * a hot spare.
2783  */
2784 static boolean_t
2785 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
2786 {
2787         nvlist_t **child;
2788         uint_t c, children;
2789         char *type;
2790
2791         if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
2792             &children) == 0) {
2793                 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
2794                     &type) == 0);
2795
2796                 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
2797                     children == 2 && child[which] == tgt)
2798                         return (B_TRUE);
2799
2800                 for (c = 0; c < children; c++)
2801                         if (is_replacing_spare(child[c], tgt, which))
2802                                 return (B_TRUE);
2803         }
2804
2805         return (B_FALSE);
2806 }
2807
2808 /*
2809  * Attach new_disk (fully described by nvroot) to old_disk.
2810  * If 'replacing' is specified, the new disk will replace the old one.
2811  */
2812 int
2813 zpool_vdev_attach(zpool_handle_t *zhp,
2814     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
2815 {
2816         zfs_cmd_t zc = {"\0"};
2817         char msg[1024];
2818         int ret;
2819         nvlist_t *tgt;
2820         boolean_t avail_spare, l2cache, islog;
2821         uint64_t val;
2822         char *newname;
2823         nvlist_t **child;
2824         uint_t children;
2825         nvlist_t *config_root;
2826         libzfs_handle_t *hdl = zhp->zpool_hdl;
2827         boolean_t rootpool = zpool_is_bootable(zhp);
2828
2829         if (replacing)
2830                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2831                     "cannot replace %s with %s"), old_disk, new_disk);
2832         else
2833                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
2834                     "cannot attach %s to %s"), new_disk, old_disk);
2835
2836         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2837         if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
2838             &islog)) == NULL)
2839                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2840
2841         if (avail_spare)
2842                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2843
2844         if (l2cache)
2845                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
2846
2847         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
2848         zc.zc_cookie = replacing;
2849
2850         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2851             &child, &children) != 0 || children != 1) {
2852                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2853                     "new device must be a single disk"));
2854                 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
2855         }
2856
2857         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
2858             ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
2859
2860         if ((newname = zpool_vdev_name(NULL, NULL, child[0], 0)) == NULL)
2861                 return (-1);
2862
2863         /*
2864          * If the target is a hot spare that has been swapped in, we can only
2865          * replace it with another hot spare.
2866          */
2867         if (replacing &&
2868             nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
2869             (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
2870             NULL) == NULL || !avail_spare) &&
2871             is_replacing_spare(config_root, tgt, 1)) {
2872                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2873                     "can only be replaced by another hot spare"));
2874                 free(newname);
2875                 return (zfs_error(hdl, EZFS_BADTARGET, msg));
2876         }
2877
2878         free(newname);
2879
2880         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
2881                 return (-1);
2882
2883         ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
2884
2885         zcmd_free_nvlists(&zc);
2886
2887         if (ret == 0) {
2888                 if (rootpool) {
2889                         /*
2890                          * XXX need a better way to prevent user from
2891                          * booting up a half-baked vdev.
2892                          */
2893                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
2894                             "sure to wait until resilver is done "
2895                             "before rebooting.\n"));
2896                 }
2897                 return (0);
2898         }
2899
2900         switch (errno) {
2901         case ENOTSUP:
2902                 /*
2903                  * Can't attach to or replace this type of vdev.
2904                  */
2905                 if (replacing) {
2906                         uint64_t version = zpool_get_prop_int(zhp,
2907                             ZPOOL_PROP_VERSION, NULL);
2908
2909                         if (islog)
2910                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2911                                     "cannot replace a log with a spare"));
2912                         else if (version >= SPA_VERSION_MULTI_REPLACE)
2913                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2914                                     "already in replacing/spare config; wait "
2915                                     "for completion or use 'zpool detach'"));
2916                         else
2917                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2918                                     "cannot replace a replacing device"));
2919                 } else {
2920                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2921                             "can only attach to mirrors and top-level "
2922                             "disks"));
2923                 }
2924                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
2925                 break;
2926
2927         case EINVAL:
2928                 /*
2929                  * The new device must be a single disk.
2930                  */
2931                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2932                     "new device must be a single disk"));
2933                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
2934                 break;
2935
2936         case EBUSY:
2937                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, "
2938                     "or device removal is in progress"),
2939                     new_disk);
2940                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2941                 break;
2942
2943         case EOVERFLOW:
2944                 /*
2945                  * The new device is too small.
2946                  */
2947                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2948                     "device is too small"));
2949                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2950                 break;
2951
2952         case EDOM:
2953                 /*
2954                  * The new device has a different optimal sector size.
2955                  */
2956                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2957                     "new device has a different optimal sector size; use the "
2958                     "option '-o ashift=N' to override the optimal size"));
2959                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
2960                 break;
2961
2962         case ENAMETOOLONG:
2963                 /*
2964                  * The resulting top-level vdev spec won't fit in the label.
2965                  */
2966                 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
2967                 break;
2968
2969         default:
2970                 (void) zpool_standard_error(hdl, errno, msg);
2971         }
2972
2973         return (-1);
2974 }
2975
2976 /*
2977  * Detach the specified device.
2978  */
2979 int
2980 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
2981 {
2982         zfs_cmd_t zc = {"\0"};
2983         char msg[1024];
2984         nvlist_t *tgt;
2985         boolean_t avail_spare, l2cache;
2986         libzfs_handle_t *hdl = zhp->zpool_hdl;
2987
2988         (void) snprintf(msg, sizeof (msg),
2989             dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
2990
2991         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2992         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
2993             NULL)) == NULL)
2994                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
2995
2996         if (avail_spare)
2997                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
2998
2999         if (l2cache)
3000                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
3001
3002         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
3003
3004         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
3005                 return (0);
3006
3007         switch (errno) {
3008
3009         case ENOTSUP:
3010                 /*
3011                  * Can't detach from this type of vdev.
3012                  */
3013                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
3014                     "applicable to mirror and replacing vdevs"));
3015                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
3016                 break;
3017
3018         case EBUSY:
3019                 /*
3020                  * There are no other replicas of this device.
3021                  */
3022                 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
3023                 break;
3024
3025         default:
3026                 (void) zpool_standard_error(hdl, errno, msg);
3027         }
3028
3029         return (-1);
3030 }
3031
3032 /*
3033  * Find a mirror vdev in the source nvlist.
3034  *
3035  * The mchild array contains a list of disks in one of the top-level mirrors
3036  * of the source pool.  The schild array contains a list of disks that the
3037  * user specified on the command line.  We loop over the mchild array to
3038  * see if any entry in the schild array matches.
3039  *
3040  * If a disk in the mchild array is found in the schild array, we return
3041  * the index of that entry.  Otherwise we return -1.
3042  */
3043 static int
3044 find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
3045     nvlist_t **schild, uint_t schildren)
3046 {
3047         uint_t mc;
3048
3049         for (mc = 0; mc < mchildren; mc++) {
3050                 uint_t sc;
3051                 char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
3052                     mchild[mc], 0);
3053
3054                 for (sc = 0; sc < schildren; sc++) {
3055                         char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
3056                             schild[sc], 0);
3057                         boolean_t result = (strcmp(mpath, spath) == 0);
3058
3059                         free(spath);
3060                         if (result) {
3061                                 free(mpath);
3062                                 return (mc);
3063                         }
3064                 }
3065
3066                 free(mpath);
3067         }
3068
3069         return (-1);
3070 }
3071
3072 /*
3073  * Split a mirror pool.  If newroot points to null, then a new nvlist
3074  * is generated and it is the responsibility of the caller to free it.
3075  */
3076 int
3077 zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
3078     nvlist_t *props, splitflags_t flags)
3079 {
3080         zfs_cmd_t zc = {"\0"};
3081         char msg[1024];
3082         nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
3083         nvlist_t **varray = NULL, *zc_props = NULL;
3084         uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
3085         libzfs_handle_t *hdl = zhp->zpool_hdl;
3086         uint64_t vers, readonly = B_FALSE;
3087         boolean_t freelist = B_FALSE, memory_err = B_TRUE;
3088         int retval = 0;
3089
3090         (void) snprintf(msg, sizeof (msg),
3091             dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
3092
3093         if (!zpool_name_valid(hdl, B_FALSE, newname))
3094                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
3095
3096         if ((config = zpool_get_config(zhp, NULL)) == NULL) {
3097                 (void) fprintf(stderr, gettext("Internal error: unable to "
3098                     "retrieve pool configuration\n"));
3099                 return (-1);
3100         }
3101
3102         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
3103             == 0);
3104         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
3105
3106         if (props) {
3107                 prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
3108                 if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
3109                     props, vers, flags, msg)) == NULL)
3110                         return (-1);
3111                 (void) nvlist_lookup_uint64(zc_props,
3112                     zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
3113                 if (readonly) {
3114                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3115                             "property %s can only be set at import time"),
3116                             zpool_prop_to_name(ZPOOL_PROP_READONLY));
3117                         return (-1);
3118                 }
3119         }
3120
3121         if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
3122             &children) != 0) {
3123                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3124                     "Source pool is missing vdev tree"));
3125                 nvlist_free(zc_props);
3126                 return (-1);
3127         }
3128
3129         varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
3130         vcount = 0;
3131
3132         if (*newroot == NULL ||
3133             nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
3134             &newchild, &newchildren) != 0)
3135                 newchildren = 0;
3136
3137         for (c = 0; c < children; c++) {
3138                 uint64_t is_log = B_FALSE, is_hole = B_FALSE;
3139                 char *type;
3140                 nvlist_t **mchild, *vdev;
3141                 uint_t mchildren;
3142                 int entry;
3143
3144                 /*
3145                  * Unlike cache & spares, slogs are stored in the
3146                  * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
3147                  */
3148                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
3149                     &is_log);
3150                 (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
3151                     &is_hole);
3152                 if (is_log || is_hole) {
3153                         /*
3154                          * Create a hole vdev and put it in the config.
3155                          */
3156                         if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
3157                                 goto out;
3158                         if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
3159                             VDEV_TYPE_HOLE) != 0)
3160                                 goto out;
3161                         if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
3162                             1) != 0)
3163                                 goto out;
3164                         if (lastlog == 0)
3165                                 lastlog = vcount;
3166                         varray[vcount++] = vdev;
3167                         continue;
3168                 }
3169                 lastlog = 0;
3170                 verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
3171                     == 0);
3172                 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
3173                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3174                             "Source pool must be composed only of mirrors\n"));
3175                         retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3176                         goto out;
3177                 }
3178
3179                 verify(nvlist_lookup_nvlist_array(child[c],
3180                     ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
3181
3182                 /* find or add an entry for this top-level vdev */
3183                 if (newchildren > 0 &&
3184                     (entry = find_vdev_entry(zhp, mchild, mchildren,
3185                     newchild, newchildren)) >= 0) {
3186                         /* We found a disk that the user specified. */
3187                         vdev = mchild[entry];
3188                         ++found;
3189                 } else {
3190                         /* User didn't specify a disk for this vdev. */
3191                         vdev = mchild[mchildren - 1];
3192                 }
3193
3194                 if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
3195                         goto out;
3196         }
3197
3198         /* did we find every disk the user specified? */
3199         if (found != newchildren) {
3200                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
3201                     "include at most one disk from each mirror"));
3202                 retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
3203                 goto out;
3204         }
3205
3206         /* Prepare the nvlist for populating. */
3207         if (*newroot == NULL) {
3208                 if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
3209                         goto out;
3210                 freelist = B_TRUE;
3211                 if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
3212                     VDEV_TYPE_ROOT) != 0)
3213                         goto out;
3214         } else {
3215                 verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
3216         }
3217
3218         /* Add all the children we found */
3219         if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
3220             lastlog == 0 ? vcount : lastlog) != 0)
3221                 goto out;
3222
3223         /*
3224          * If we're just doing a dry run, exit now with success.
3225          */
3226         if (flags.dryrun) {
3227                 memory_err = B_FALSE;
3228                 freelist = B_FALSE;
3229                 goto out;
3230         }
3231
3232         /* now build up the config list & call the ioctl */
3233         if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
3234                 goto out;
3235
3236         if (nvlist_add_nvlist(newconfig,
3237             ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
3238             nvlist_add_string(newconfig,
3239             ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
3240             nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
3241                 goto out;
3242
3243         /*
3244          * The new pool is automatically part of the namespace unless we
3245          * explicitly export it.
3246          */
3247         if (!flags.import)
3248                 zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
3249         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3250         (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
3251         if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
3252                 goto out;
3253         if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
3254                 goto out;
3255
3256         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
3257                 retval = zpool_standard_error(hdl, errno, msg);
3258                 goto out;
3259         }
3260
3261         freelist = B_FALSE;
3262         memory_err = B_FALSE;
3263
3264 out:
3265         if (varray != NULL) {
3266                 int v;
3267
3268                 for (v = 0; v < vcount; v++)
3269                         nvlist_free(varray[v]);
3270                 free(varray);
3271         }
3272         zcmd_free_nvlists(&zc);
3273         nvlist_free(zc_props);
3274         nvlist_free(newconfig);
3275         if (freelist) {
3276                 nvlist_free(*newroot);
3277                 *newroot = NULL;
3278         }
3279
3280         if (retval != 0)
3281                 return (retval);
3282
3283         if (memory_err)
3284                 return (no_memory(hdl));
3285
3286         return (0);
3287 }
3288
3289 /*
3290  * Remove the given device.
3291  */
3292 int
3293 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
3294 {
3295         zfs_cmd_t zc = {"\0"};
3296         char msg[1024];
3297         nvlist_t *tgt;
3298         boolean_t avail_spare, l2cache, islog;
3299         libzfs_handle_t *hdl = zhp->zpool_hdl;
3300         uint64_t version;
3301
3302         (void) snprintf(msg, sizeof (msg),
3303             dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
3304
3305         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3306         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3307             &islog)) == NULL)
3308                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3309
3310         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
3311         if (islog && version < SPA_VERSION_HOLES) {
3312                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3313                     "pool must be upgraded to support log removal"));
3314                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
3315         }
3316
3317         if (!islog && !avail_spare && !l2cache && zpool_is_bootable(zhp)) {
3318                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3319                     "root pool can not have removed devices, "
3320                     "because GRUB does not understand them"));
3321                 return (zfs_error(hdl, EINVAL, msg));
3322         }
3323
3324         zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
3325
3326         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3327                 return (0);
3328
3329         switch (errno) {
3330
3331         case EINVAL:
3332                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3333                     "invalid config; all top-level vdevs must "
3334                     "have the same sector size and not be raidz."));
3335                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
3336                 break;
3337
3338         case EBUSY:
3339                 if (islog) {
3340                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3341                             "Mount encrypted datasets to replay logs."));
3342                 } else {
3343                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3344                             "Pool busy; removal may already be in progress"));
3345                 }
3346                 (void) zfs_error(hdl, EZFS_BUSY, msg);
3347                 break;
3348
3349         case EACCES:
3350                 if (islog) {
3351                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3352                             "Mount encrypted datasets to replay logs."));
3353                         (void) zfs_error(hdl, EZFS_BUSY, msg);
3354                 } else {
3355                         (void) zpool_standard_error(hdl, errno, msg);
3356                 }
3357                 break;
3358
3359         default:
3360                 (void) zpool_standard_error(hdl, errno, msg);
3361         }
3362         return (-1);
3363 }
3364
3365 int
3366 zpool_vdev_remove_cancel(zpool_handle_t *zhp)
3367 {
3368         zfs_cmd_t zc;
3369         char msg[1024];
3370         libzfs_handle_t *hdl = zhp->zpool_hdl;
3371
3372         (void) snprintf(msg, sizeof (msg),
3373             dgettext(TEXT_DOMAIN, "cannot cancel removal"));
3374
3375         bzero(&zc, sizeof (zc));
3376         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3377         zc.zc_cookie = 1;
3378
3379         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
3380                 return (0);
3381
3382         return (zpool_standard_error(hdl, errno, msg));
3383 }
3384
3385 int
3386 zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
3387     uint64_t *sizep)
3388 {
3389         char msg[1024];
3390         nvlist_t *tgt;
3391         boolean_t avail_spare, l2cache, islog;
3392         libzfs_handle_t *hdl = zhp->zpool_hdl;
3393
3394         (void) snprintf(msg, sizeof (msg),
3395             dgettext(TEXT_DOMAIN, "cannot determine indirect size of %s"),
3396             path);
3397
3398         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
3399             &islog)) == NULL)
3400                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
3401
3402         if (avail_spare || l2cache || islog) {
3403                 *sizep = 0;
3404                 return (0);
3405         }
3406
3407         if (nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_INDIRECT_SIZE, sizep) != 0) {
3408                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3409                     "indirect size not available"));
3410                 return (zfs_error(hdl, EINVAL, msg));
3411         }
3412         return (0);
3413 }
3414
3415 /*
3416  * Clear the errors for the pool, or the particular device if specified.
3417  */
3418 int
3419 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
3420 {
3421         zfs_cmd_t zc = {"\0"};
3422         char msg[1024];
3423         nvlist_t *tgt;
3424         zpool_load_policy_t policy;
3425         boolean_t avail_spare, l2cache;
3426         libzfs_handle_t *hdl = zhp->zpool_hdl;
3427         nvlist_t *nvi = NULL;
3428         int error;
3429
3430         if (path)
3431                 (void) snprintf(msg, sizeof (msg),
3432                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3433                     path);
3434         else
3435                 (void) snprintf(msg, sizeof (msg),
3436                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
3437                     zhp->zpool_name);
3438
3439         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3440         if (path) {
3441                 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
3442                     &l2cache, NULL)) == NULL)
3443                         return (zfs_error(hdl, EZFS_NODEVICE, msg));
3444
3445                 /*
3446                  * Don't allow error clearing for hot spares.  Do allow
3447                  * error clearing for l2cache devices.
3448                  */
3449                 if (avail_spare)
3450                         return (zfs_error(hdl, EZFS_ISSPARE, msg));
3451
3452                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
3453                     &zc.zc_guid) == 0);
3454         }
3455
3456         zpool_get_load_policy(rewindnvl, &policy);
3457         zc.zc_cookie = policy.zlp_rewind;
3458
3459         if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
3460                 return (-1);
3461
3462         if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
3463                 return (-1);
3464
3465         while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
3466             errno == ENOMEM) {
3467                 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
3468                         zcmd_free_nvlists(&zc);
3469                         return (-1);
3470                 }
3471         }
3472
3473         if (!error || ((policy.zlp_rewind & ZPOOL_TRY_REWIND) &&
3474             errno != EPERM && errno != EACCES)) {
3475                 if (policy.zlp_rewind &
3476                     (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
3477                         (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
3478                         zpool_rewind_exclaim(hdl, zc.zc_name,
3479                             ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0),
3480                             nvi);
3481                         nvlist_free(nvi);
3482                 }
3483                 zcmd_free_nvlists(&zc);
3484                 return (0);
3485         }
3486
3487         zcmd_free_nvlists(&zc);
3488         return (zpool_standard_error(hdl, errno, msg));
3489 }
3490
3491 /*
3492  * Similar to zpool_clear(), but takes a GUID (used by fmd).
3493  */
3494 int
3495 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
3496 {
3497         zfs_cmd_t zc = {"\0"};
3498         char msg[1024];
3499         libzfs_handle_t *hdl = zhp->zpool_hdl;
3500
3501         (void) snprintf(msg, sizeof (msg),
3502             dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
3503             (u_longlong_t)guid);
3504
3505         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3506         zc.zc_guid = guid;
3507         zc.zc_cookie = ZPOOL_NO_REWIND;
3508
3509         if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
3510                 return (0);
3511
3512         return (zpool_standard_error(hdl, errno, msg));
3513 }
3514
3515 /*
3516  * Change the GUID for a pool.
3517  */
3518 int
3519 zpool_reguid(zpool_handle_t *zhp)
3520 {
3521         char msg[1024];
3522         libzfs_handle_t *hdl = zhp->zpool_hdl;
3523         zfs_cmd_t zc = {"\0"};
3524
3525         (void) snprintf(msg, sizeof (msg),
3526             dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
3527
3528         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3529         if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
3530                 return (0);
3531
3532         return (zpool_standard_error(hdl, errno, msg));
3533 }
3534
3535 /*
3536  * Reopen the pool.
3537  */
3538 int
3539 zpool_reopen_one(zpool_handle_t *zhp, void *data)
3540 {
3541         libzfs_handle_t *hdl = zpool_get_handle(zhp);
3542         const char *pool_name = zpool_get_name(zhp);
3543         boolean_t *scrub_restart = data;
3544         int error;
3545
3546         error = lzc_reopen(pool_name, *scrub_restart);
3547         if (error) {
3548                 return (zpool_standard_error_fmt(hdl, error,
3549                     dgettext(TEXT_DOMAIN, "cannot reopen '%s'"), pool_name));
3550         }
3551
3552         return (0);
3553 }
3554
3555 /* call into libzfs_core to execute the sync IOCTL per pool */
3556 int
3557 zpool_sync_one(zpool_handle_t *zhp, void *data)
3558 {
3559         int ret;
3560         libzfs_handle_t *hdl = zpool_get_handle(zhp);
3561         const char *pool_name = zpool_get_name(zhp);
3562         boolean_t *force = data;
3563         nvlist_t *innvl = fnvlist_alloc();
3564
3565         fnvlist_add_boolean_value(innvl, "force", *force);
3566         if ((ret = lzc_sync(pool_name, innvl, NULL)) != 0) {
3567                 nvlist_free(innvl);
3568                 return (zpool_standard_error_fmt(hdl, ret,
3569                     dgettext(TEXT_DOMAIN, "sync '%s' failed"), pool_name));
3570         }
3571         nvlist_free(innvl);
3572
3573         return (0);
3574 }
3575
3576 #if defined(__sun__) || defined(__sun)
3577 /*
3578  * Convert from a devid string to a path.
3579  */
3580 static char *
3581 devid_to_path(char *devid_str)
3582 {
3583         ddi_devid_t devid;
3584         char *minor;
3585         char *path;
3586         devid_nmlist_t *list = NULL;
3587         int ret;
3588
3589         if (devid_str_decode(devid_str, &devid, &minor) != 0)
3590                 return (NULL);
3591
3592         ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
3593
3594         devid_str_free(minor);
3595         devid_free(devid);
3596
3597         if (ret != 0)
3598                 return (NULL);
3599
3600         /*
3601          * In a case the strdup() fails, we will just return NULL below.
3602          */
3603         path = strdup(list[0].devname);
3604
3605         devid_free_nmlist(list);
3606
3607         return (path);
3608 }
3609
3610 /*
3611  * Convert from a path to a devid string.
3612  */
3613 static char *
3614 path_to_devid(const char *path)
3615 {
3616         int fd;
3617         ddi_devid_t devid;
3618         char *minor, *ret;
3619
3620         if ((fd = open(path, O_RDONLY)) < 0)
3621                 return (NULL);
3622
3623         minor = NULL;
3624         ret = NULL;
3625         if (devid_get(fd, &devid) == 0) {
3626                 if (devid_get_minor_name(fd, &minor) == 0)
3627                         ret = devid_str_encode(devid, minor);
3628                 if (minor != NULL)
3629                         devid_str_free(minor);
3630                 devid_free(devid);
3631         }
3632         (void) close(fd);
3633
3634         return (ret);
3635 }
3636
3637 /*
3638  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
3639  * ignore any failure here, since a common case is for an unprivileged user to
3640  * type 'zpool status', and we'll display the correct information anyway.
3641  */
3642 static void
3643 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
3644 {
3645         zfs_cmd_t zc = {"\0"};
3646
3647         (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
3648         (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
3649         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
3650             &zc.zc_guid) == 0);
3651
3652         (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
3653 }
3654 #endif /* sun */
3655
3656 /*
3657  * Remove partition suffix from a vdev path.  Partition suffixes may take three
3658  * forms: "-partX", "pX", or "X", where X is a string of digits.  The second
3659  * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
3660  * third case only occurs when preceded by a string matching the regular
3661  * expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk.
3662  *
3663  * caller must free the returned string
3664  */
3665 char *
3666 zfs_strip_partition(char *path)
3667 {
3668         char *tmp = strdup(path);
3669         char *part = NULL, *d = NULL;
3670         if (!tmp)
3671                 return (NULL);
3672
3673         if ((part = strstr(tmp, "-part")) && part != tmp) {
3674                 d = part + 5;
3675         } else if ((part = strrchr(tmp, 'p')) &&
3676             part > tmp + 1 && isdigit(*(part-1))) {
3677                 d = part + 1;
3678         } else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') &&
3679             tmp[1] == 'd') {
3680                 for (d = &tmp[2]; isalpha(*d); part = ++d) { }
3681         } else if (strncmp("xvd", tmp, 3) == 0) {
3682                 for (d = &tmp[3]; isalpha(*d); part = ++d) { }
3683         }
3684         if (part && d && *d != '\0') {
3685                 for (; isdigit(*d); d++) { }
3686                 if (*d == '\0')
3687                         *part = '\0';
3688         }
3689
3690         return (tmp);
3691 }
3692
3693 /*
3694  * Same as zfs_strip_partition, but allows "/dev/" to be in the pathname
3695  *
3696  * path:        /dev/sda1
3697  * returns:     /dev/sda
3698  *
3699  * Returned string must be freed.
3700  */
3701 char *
3702 zfs_strip_partition_path(char *path)
3703 {
3704         char *newpath = strdup(path);
3705         char *sd_offset;
3706         char *new_sd;
3707
3708         if (!newpath)
3709                 return (NULL);
3710
3711         /* Point to "sda1" part of "/dev/sda1" */
3712         sd_offset = strrchr(newpath, '/') + 1;
3713
3714         /* Get our new name "sda" */
3715         new_sd = zfs_strip_partition(sd_offset);
3716         if (!new_sd) {
3717                 free(newpath);
3718                 return (NULL);
3719         }
3720
3721         /* Paste the "sda" where "sda1" was */
3722         strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1);
3723
3724         /* Free temporary "sda" */
3725         free(new_sd);
3726
3727         return (newpath);
3728 }
3729
3730 #define PATH_BUF_LEN    64
3731
3732 /*
3733  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
3734  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
3735  * We also check if this is a whole disk, in which case we strip off the
3736  * trailing 's0' slice name.
3737  *
3738  * This routine is also responsible for identifying when disks have been
3739  * reconfigured in a new location.  The kernel will have opened the device by
3740  * devid, but the path will still refer to the old location.  To catch this, we
3741  * first do a path -> devid translation (which is fast for the common case).  If
3742  * the devid matches, we're done.  If not, we do a reverse devid -> path
3743  * translation and issue the appropriate ioctl() to update the path of the vdev.
3744  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
3745  * of these checks.
3746  */
3747 char *
3748 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
3749     int name_flags)
3750 {
3751         char *path, *type, *env;
3752         uint64_t value;
3753         char buf[PATH_BUF_LEN];
3754         char tmpbuf[PATH_BUF_LEN];
3755
3756         /*
3757          * vdev_name will be "root"/"root-0" for the root vdev, but it is the
3758          * zpool name that will be displayed to the user.
3759          */
3760         verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
3761         if (zhp != NULL && strcmp(type, "root") == 0)
3762                 return (zfs_strdup(hdl, zpool_get_name(zhp)));
3763
3764         env = getenv("ZPOOL_VDEV_NAME_PATH");
3765         if (env && (strtoul(env, NULL, 0) > 0 ||
3766             !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3767                 name_flags |= VDEV_NAME_PATH;
3768
3769         env = getenv("ZPOOL_VDEV_NAME_GUID");
3770         if (env && (strtoul(env, NULL, 0) > 0 ||
3771             !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3772                 name_flags |= VDEV_NAME_GUID;
3773
3774         env = getenv("ZPOOL_VDEV_NAME_FOLLOW_LINKS");
3775         if (env && (strtoul(env, NULL, 0) > 0 ||
3776             !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
3777                 name_flags |= VDEV_NAME_FOLLOW_LINKS;
3778
3779         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 ||
3780             name_flags & VDEV_NAME_GUID) {
3781                 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value);
3782                 (void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)value);
3783                 path = buf;
3784         } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
3785 #if defined(__sun__) || defined(__sun)
3786                 /*
3787                  * Live VDEV path updates to a kernel VDEV during a
3788                  * zpool_vdev_name lookup are not supported on Linux.
3789                  */
3790                 char *devid;
3791                 vdev_stat_t *vs;
3792                 uint_t vsc;
3793
3794                 /*
3795                  * If the device is dead (faulted, offline, etc) then don't
3796                  * bother opening it.  Otherwise we may be forcing the user to
3797                  * open a misbehaving device, which can have undesirable
3798                  * effects.
3799                  */
3800                 if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
3801                     (uint64_t **)&vs, &vsc) != 0 ||
3802                     vs->vs_state >= VDEV_STATE_DEGRADED) &&
3803                     zhp != NULL &&
3804                     nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
3805                         /*
3806                          * Determine if the current path is correct.
3807                          */
3808                         char *newdevid = path_to_devid(path);
3809
3810                         if (newdevid == NULL ||
3811                             strcmp(devid, newdevid) != 0) {
3812                                 char *newpath;
3813
3814                                 if ((newpath = devid_to_path(devid)) != NULL) {
3815                                         /*
3816                                          * Update the path appropriately.
3817                                          */
3818                                         set_path(zhp, nv, newpath);
3819                                         if (nvlist_add_string(nv,
3820                                             ZPOOL_CONFIG_PATH, newpath) == 0)
3821                                                 verify(nvlist_lookup_string(nv,
3822                                                     ZPOOL_CONFIG_PATH,
3823                                                     &path) == 0);
3824                                         free(newpath);
3825                                 }
3826                         }
3827
3828                         if (newdevid)
3829                                 devid_str_free(newdevid);
3830                 }
3831 #endif /* sun */
3832
3833                 if (name_flags & VDEV_NAME_FOLLOW_LINKS) {
3834                         char *rp = realpath(path, NULL);
3835                         if (rp) {
3836                                 strlcpy(buf, rp, sizeof (buf));
3837                                 path = buf;
3838                                 free(rp);
3839                         }
3840                 }
3841
3842                 /*
3843                  * For a block device only use the name.
3844                  */
3845                 if ((strcmp(type, VDEV_TYPE_DISK) == 0) &&
3846                     !(name_flags & VDEV_NAME_PATH)) {
3847                         path = strrchr(path, '/');
3848                         path++;
3849                 }
3850
3851                 /*
3852                  * Remove the partition from the path it this is a whole disk.
3853                  */
3854                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
3855                     == 0 && value && !(name_flags & VDEV_NAME_PATH)) {
3856                         return (zfs_strip_partition(path));
3857                 }
3858         } else {
3859                 path = type;
3860
3861                 /*
3862                  * If it's a raidz device, we need to stick in the parity level.
3863                  */
3864                 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
3865                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
3866                             &value) == 0);
3867                         (void) snprintf(buf, sizeof (buf), "%s%llu", path,
3868                             (u_longlong_t)value);
3869                         path = buf;
3870                 }
3871
3872                 /*
3873                  * We identify each top-level vdev by using a <type-id>
3874                  * naming convention.
3875                  */
3876                 if (name_flags & VDEV_NAME_TYPE_ID) {
3877                         uint64_t id;
3878                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
3879                             &id) == 0);
3880                         (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s-%llu",
3881                             path, (u_longlong_t)id);
3882                         path = tmpbuf;
3883                 }
3884         }
3885
3886         return (zfs_strdup(hdl, path));
3887 }
3888
3889 static int
3890 zbookmark_mem_compare(const void *a, const void *b)
3891 {
3892         return (memcmp(a, b, sizeof (zbookmark_phys_t)));
3893 }
3894
3895 /*
3896  * Retrieve the persistent error log, uniquify the members, and return to the
3897  * caller.
3898  */
3899 int
3900 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
3901 {
3902         zfs_cmd_t zc = {"\0"};
3903         libzfs_handle_t *hdl = zhp->zpool_hdl;
3904         uint64_t count;
3905         zbookmark_phys_t *zb = NULL;
3906         int i;
3907
3908         /*
3909          * Retrieve the raw error list from the kernel.  If the number of errors
3910          * has increased, allocate more space and continue until we get the
3911          * entire list.
3912          */
3913         verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
3914             &count) == 0);
3915         if (count == 0)
3916                 return (0);
3917         zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
3918             count * sizeof (zbookmark_phys_t));
3919         zc.zc_nvlist_dst_size = count;
3920         (void) strcpy(zc.zc_name, zhp->zpool_name);
3921         for (;;) {
3922                 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
3923                     &zc) != 0) {
3924                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3925                         if (errno == ENOMEM) {
3926                                 void *dst;
3927
3928                                 count = zc.zc_nvlist_dst_size;
3929                                 dst = zfs_alloc(zhp->zpool_hdl, count *
3930                                     sizeof (zbookmark_phys_t));
3931                                 zc.zc_nvlist_dst = (uintptr_t)dst;
3932                         } else {
3933                                 return (zpool_standard_error_fmt(hdl, errno,
3934                                     dgettext(TEXT_DOMAIN, "errors: List of "
3935                                     "errors unavailable")));
3936                         }
3937                 } else {
3938                         break;
3939                 }
3940         }
3941
3942         /*
3943          * Sort the resulting bookmarks.  This is a little confusing due to the
3944          * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
3945          * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
3946          * _not_ copied as part of the process.  So we point the start of our
3947          * array appropriate and decrement the total number of elements.
3948          */
3949         zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) +
3950             zc.zc_nvlist_dst_size;
3951         count -= zc.zc_nvlist_dst_size;
3952
3953         qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
3954
3955         verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
3956
3957         /*
3958          * Fill in the nverrlistp with nvlist's of dataset and object numbers.
3959          */
3960         for (i = 0; i < count; i++) {
3961                 nvlist_t *nv;
3962
3963                 /* ignoring zb_blkid and zb_level for now */
3964                 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
3965                     zb[i-1].zb_object == zb[i].zb_object)
3966                         continue;
3967
3968                 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
3969                         goto nomem;
3970                 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
3971                     zb[i].zb_objset) != 0) {
3972                         nvlist_free(nv);
3973                         goto nomem;
3974                 }
3975                 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
3976                     zb[i].zb_object) != 0) {
3977                         nvlist_free(nv);
3978                         goto nomem;
3979                 }
3980                 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
3981                         nvlist_free(nv);
3982                         goto nomem;
3983                 }
3984                 nvlist_free(nv);
3985         }
3986
3987         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3988         return (0);
3989
3990 nomem:
3991         free((void *)(uintptr_t)zc.zc_nvlist_dst);
3992         return (no_memory(zhp->zpool_hdl));
3993 }
3994
3995 /*
3996  * Upgrade a ZFS pool to the latest on-disk version.
3997  */
3998 int
3999 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
4000 {
4001         zfs_cmd_t zc = {"\0"};
4002         libzfs_handle_t *hdl = zhp->zpool_hdl;
4003
4004         (void) strcpy(zc.zc_name, zhp->zpool_name);
4005         zc.zc_cookie = new_version;
4006
4007         if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
4008                 return (zpool_standard_error_fmt(hdl, errno,
4009                     dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
4010                     zhp->zpool_name));
4011         return (0);
4012 }
4013
4014 void
4015 zfs_save_arguments(int argc, char **argv, char *string, int len)
4016 {
4017         int i;
4018
4019         (void) strlcpy(string, basename(argv[0]), len);
4020         for (i = 1; i < argc; i++) {
4021                 (void) strlcat(string, " ", len);
4022                 (void) strlcat(string, argv[i], len);
4023         }
4024 }
4025
4026 int
4027 zpool_log_history(libzfs_handle_t *hdl, const char *message)
4028 {
4029         zfs_cmd_t zc = {"\0"};
4030         nvlist_t *args;
4031         int err;
4032
4033         args = fnvlist_alloc();
4034         fnvlist_add_string(args, "message", message);
4035         err = zcmd_write_src_nvlist(hdl, &zc, args);
4036         if (err == 0)
4037                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
4038         nvlist_free(args);
4039         zcmd_free_nvlists(&zc);
4040         return (err);
4041 }
4042
4043 /*
4044  * Perform ioctl to get some command history of a pool.
4045  *
4046  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
4047  * logical offset of the history buffer to start reading from.
4048  *
4049  * Upon return, 'off' is the next logical offset to read from and
4050  * 'len' is the actual amount of bytes read into 'buf'.
4051  */
4052 static int
4053 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
4054 {
4055         zfs_cmd_t zc = {"\0"};
4056         libzfs_handle_t *hdl = zhp->zpool_hdl;
4057
4058         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
4059
4060         zc.zc_history = (uint64_t)(uintptr_t)buf;
4061         zc.zc_history_len = *len;
4062         zc.zc_history_offset = *off;
4063
4064         if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
4065                 switch (errno) {
4066                 case EPERM:
4067                         return (zfs_error_fmt(hdl, EZFS_PERM,
4068                             dgettext(TEXT_DOMAIN,
4069                             "cannot show history for pool '%s'"),
4070                             zhp->zpool_name));
4071                 case ENOENT:
4072                         return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
4073                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
4074                             "'%s'"), zhp->zpool_name));
4075                 case ENOTSUP:
4076                         return (zfs_error_fmt(hdl, EZFS_BADVERSION,
4077                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
4078                             "'%s', pool must be upgraded"), zhp->zpool_name));
4079                 default:
4080                         return (zpool_standard_error_fmt(hdl, errno,
4081                             dgettext(TEXT_DOMAIN,
4082                             "cannot get history for '%s'"), zhp->zpool_name));
4083                 }
4084         }
4085
4086         *len = zc.zc_history_len;
4087         *off = zc.zc_history_offset;
4088
4089         return (0);
4090 }
4091
4092 /*
4093  * Process the buffer of nvlists, unpacking and storing each nvlist record
4094  * into 'records'.  'leftover' is set to the number of bytes that weren't
4095  * processed as there wasn't a complete record.
4096  */
4097 int
4098 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
4099     nvlist_t ***records, uint_t *numrecords)
4100 {
4101         uint64_t reclen;
4102         nvlist_t *nv;
4103         int i;
4104         void *tmp;
4105
4106         while (bytes_read > sizeof (reclen)) {
4107
4108                 /* get length of packed record (stored as little endian) */
4109                 for (i = 0, reclen = 0; i < sizeof (reclen); i++)
4110                         reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
4111
4112                 if (bytes_read < sizeof (reclen) + reclen)
4113                         break;
4114
4115                 /* unpack record */
4116                 if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
4117                         return (ENOMEM);
4118                 bytes_read -= sizeof (reclen) + reclen;
4119                 buf += sizeof (reclen) + reclen;
4120
4121                 /* add record to nvlist array */
4122                 (*numrecords)++;
4123                 if (ISP2(*numrecords + 1)) {
4124                         tmp = realloc(*records,
4125                             *numrecords * 2 * sizeof (nvlist_t *));
4126                         if (tmp == NULL) {
4127                                 nvlist_free(nv);
4128                                 (*numrecords)--;
4129                                 return (ENOMEM);
4130                         }
4131                         *records = tmp;
4132                 }
4133                 (*records)[*numrecords - 1] = nv;
4134         }
4135
4136         *leftover = bytes_read;
4137         return (0);
4138 }
4139
4140 /*
4141  * Retrieve the command history of a pool.
4142  */
4143 int
4144 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
4145 {
4146         char *buf;
4147         int buflen = 128 * 1024;
4148         uint64_t off = 0;
4149         nvlist_t **records = NULL;
4150         uint_t numrecords = 0;
4151         int err, i;
4152
4153         buf = malloc(buflen);
4154         if (buf == NULL)
4155                 return (ENOMEM);
4156         do {
4157                 uint64_t bytes_read = buflen;
4158                 uint64_t leftover;
4159
4160                 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
4161                         break;
4162
4163                 /* if nothing else was read in, we're at EOF, just return */
4164                 if (!bytes_read)
4165                         break;
4166
4167                 if ((err = zpool_history_unpack(buf, bytes_read,
4168                     &leftover, &records, &numrecords)) != 0)
4169                         break;
4170                 off -= leftover;
4171                 if (leftover == bytes_read) {
4172                         /*
4173                          * no progress made, because buffer is not big enough
4174                          * to hold this record; resize and retry.
4175                          */
4176                         buflen *= 2;
4177                         free(buf);
4178                         buf = malloc(buflen);
4179                         if (buf == NULL)
4180                                 return (ENOMEM);
4181                 }
4182
4183                 /* CONSTCOND */
4184         } while (1);
4185
4186         free(buf);
4187
4188         if (!err) {
4189                 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
4190                 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
4191                     records, numrecords) == 0);
4192         }
4193         for (i = 0; i < numrecords; i++)
4194                 nvlist_free(records[i]);
4195         free(records);
4196
4197         return (err);
4198 }
4199
4200 /*
4201  * Retrieve the next event given the passed 'zevent_fd' file descriptor.
4202  * If there is a new event available 'nvp' will contain a newly allocated
4203  * nvlist and 'dropped' will be set to the number of missed events since
4204  * the last call to this function.  When 'nvp' is set to NULL it indicates
4205  * no new events are available.  In either case the function returns 0 and
4206  * it is up to the caller to free 'nvp'.  In the case of a fatal error the
4207  * function will return a non-zero value.  When the function is called in
4208  * blocking mode (the default, unless the ZEVENT_NONBLOCK flag is passed),
4209  * it will not return until a new event is available.
4210  */
4211 int
4212 zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
4213     int *dropped, unsigned flags, int zevent_fd)
4214 {
4215         zfs_cmd_t zc = {"\0"};
4216         int error = 0;
4217
4218         *nvp = NULL;
4219         *dropped = 0;
4220         zc.zc_cleanup_fd = zevent_fd;
4221
4222         if (flags & ZEVENT_NONBLOCK)
4223                 zc.zc_guid = ZEVENT_NONBLOCK;
4224
4225         if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
4226                 return (-1);
4227
4228 retry:
4229         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
4230                 switch (errno) {
4231                 case ESHUTDOWN:
4232                         error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
4233                             dgettext(TEXT_DOMAIN, "zfs shutdown"));
4234                         goto out;
4235                 case ENOENT:
4236                         /* Blocking error case should not occur */
4237                         if (!(flags & ZEVENT_NONBLOCK))
4238                                 error = zpool_standard_error_fmt(hdl, errno,
4239                                     dgettext(TEXT_DOMAIN, "cannot get event"));
4240
4241                         goto out;
4242                 case ENOMEM:
4243                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
4244                                 error = zfs_error_fmt(hdl, EZFS_NOMEM,
4245                                     dgettext(TEXT_DOMAIN, "cannot get event"));
4246                                 goto out;
4247                         } else {
4248                                 goto retry;
4249                         }
4250                 default:
4251                         error = zpool_standard_error_fmt(hdl, errno,
4252                             dgettext(TEXT_DOMAIN, "cannot get event"));
4253                         goto out;
4254                 }
4255         }
4256
4257         error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
4258         if (error != 0)
4259                 goto out;
4260
4261         *dropped = (int)zc.zc_cookie;
4262 out:
4263         zcmd_free_nvlists(&zc);
4264
4265         return (error);
4266 }
4267
4268 /*
4269  * Clear all events.
4270  */
4271 int
4272 zpool_events_clear(libzfs_handle_t *hdl, int *count)
4273 {
4274         zfs_cmd_t zc = {"\0"};
4275         char msg[1024];
4276
4277         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
4278             "cannot clear events"));
4279
4280         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
4281                 return (zpool_standard_error_fmt(hdl, errno, msg));
4282
4283         if (count != NULL)
4284                 *count = (int)zc.zc_cookie; /* # of events cleared */
4285
4286         return (0);
4287 }
4288
4289 /*
4290  * Seek to a specific EID, ZEVENT_SEEK_START, or ZEVENT_SEEK_END for
4291  * the passed zevent_fd file handle.  On success zero is returned,
4292  * otherwise -1 is returned and hdl->libzfs_error is set to the errno.
4293  */
4294 int
4295 zpool_events_seek(libzfs_handle_t *hdl, uint64_t eid, int zevent_fd)
4296 {
4297         zfs_cmd_t zc = {"\0"};
4298         int error = 0;
4299
4300         zc.zc_guid = eid;
4301         zc.zc_cleanup_fd = zevent_fd;
4302
4303         if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_SEEK, &zc) != 0) {
4304                 switch (errno) {
4305                 case ENOENT:
4306                         error = zfs_error_fmt(hdl, EZFS_NOENT,
4307                             dgettext(TEXT_DOMAIN, "cannot get event"));
4308                         break;
4309
4310                 case ENOMEM:
4311                         error = zfs_error_fmt(hdl, EZFS_NOMEM,
4312                             dgettext(TEXT_DOMAIN, "cannot get event"));
4313                         break;
4314
4315                 default:
4316                         error = zpool_standard_error_fmt(hdl, errno,
4317                             dgettext(TEXT_DOMAIN, "cannot get event"));
4318                         break;
4319                 }
4320         }
4321
4322         return (error);
4323 }
4324
4325 void
4326 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
4327     char *pathname, size_t len)
4328 {
4329         zfs_cmd_t zc = {"\0"};
4330         boolean_t mounted = B_FALSE;
4331         char *mntpnt = NULL;
4332         char dsname[ZFS_MAX_DATASET_NAME_LEN];
4333
4334         if (dsobj == 0) {
4335                 /* special case for the MOS */
4336                 (void) snprintf(pathname, len, "<metadata>:<0x%llx>",
4337                     (longlong_t)obj);
4338                 return;
4339         }
4340
4341         /* get the dataset's name */
4342         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
4343         zc.zc_obj = dsobj;
4344         if (ioctl(zhp->zpool_hdl->libzfs_fd,
4345             ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
4346                 /* just write out a path of two object numbers */
4347                 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
4348                     (longlong_t)dsobj, (longlong_t)obj);
4349                 return;
4350         }
4351         (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
4352
4353         /* find out if the dataset is mounted */
4354         mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
4355
4356         /* get the corrupted object's path */
4357         (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
4358         zc.zc_obj = obj;
4359         if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
4360             &zc) == 0) {
4361                 if (mounted) {
4362                         (void) snprintf(pathname, len, "%s%s", mntpnt,
4363                             zc.zc_value);
4364                 } else {
4365                         (void) snprintf(pathname, len, "%s:%s",
4366                             dsname, zc.zc_value);
4367                 }
4368         } else {
4369                 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname,
4370                     (longlong_t)obj);
4371         }
4372         free(mntpnt);
4373 }
4374
4375 /*
4376  * Read the EFI label from the config, if a label does not exist then
4377  * pass back the error to the caller. If the caller has passed a non-NULL
4378  * diskaddr argument then we set it to the starting address of the EFI
4379  * partition.
4380  */
4381 static int
4382 read_efi_label(nvlist_t *config, diskaddr_t *sb)
4383 {
4384         char *path;
4385         int fd;
4386         char diskname[MAXPATHLEN];
4387         int err = -1;
4388
4389         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
4390                 return (err);
4391
4392         (void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
4393             strrchr(path, '/'));
4394         if ((fd = open(diskname, O_RDONLY|O_DIRECT)) >= 0) {
4395                 struct dk_gpt *vtoc;
4396
4397                 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
4398                         if (sb != NULL)
4399                                 *sb = vtoc->efi_parts[0].p_start;
4400                         efi_free(vtoc);
4401                 }
4402                 (void) close(fd);
4403         }
4404         return (err);
4405 }
4406
4407 /*
4408  * determine where a partition starts on a disk in the current
4409  * configuration
4410  */
4411 static diskaddr_t
4412 find_start_block(nvlist_t *config)
4413 {
4414         nvlist_t **child;
4415         uint_t c, children;
4416         diskaddr_t sb = MAXOFFSET_T;
4417         uint64_t wholedisk;
4418
4419         if (nvlist_lookup_nvlist_array(config,
4420             ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
4421                 if (nvlist_lookup_uint64(config,
4422                     ZPOOL_CONFIG_WHOLE_DISK,
4423                     &wholedisk) != 0 || !wholedisk) {
4424                         return (MAXOFFSET_T);
4425                 }
4426                 if (read_efi_label(config, &sb) < 0)
4427                         sb = MAXOFFSET_T;
4428                 return (sb);
4429         }
4430
4431         for (c = 0; c < children; c++) {
4432                 sb = find_start_block(child[c]);
4433                 if (sb != MAXOFFSET_T) {
4434                         return (sb);
4435                 }
4436         }
4437         return (MAXOFFSET_T);
4438 }
4439
4440 static int
4441 zpool_label_disk_check(char *path)
4442 {
4443         struct dk_gpt *vtoc;
4444         int fd, err;
4445
4446         if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
4447                 return (errno);
4448
4449         if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
4450                 (void) close(fd);
4451                 return (err);
4452         }
4453
4454         if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
4455                 efi_free(vtoc);
4456                 (void) close(fd);
4457                 return (EIDRM);
4458         }
4459
4460         efi_free(vtoc);
4461         (void) close(fd);
4462         return (0);
4463 }
4464
4465 /*
4466  * Generate a unique partition name for the ZFS member.  Partitions must
4467  * have unique names to ensure udev will be able to create symlinks under
4468  * /dev/disk/by-partlabel/ for all pool members.  The partition names are
4469  * of the form <pool>-<unique-id>.
4470  */
4471 static void
4472 zpool_label_name(char *label_name, int label_size)
4473 {
4474         uint64_t id = 0;
4475         int fd;
4476
4477         fd = open("/dev/urandom", O_RDONLY);
4478         if (fd >= 0) {
4479                 if (read(fd, &id, sizeof (id)) != sizeof (id))
4480                         id = 0;
4481
4482                 close(fd);
4483         }
4484
4485         if (id == 0)
4486                 id = (((uint64_t)rand()) << 32) | (uint64_t)rand();
4487
4488         snprintf(label_name, label_size, "zfs-%016llx", (u_longlong_t)id);
4489 }
4490
4491 /*
4492  * Label an individual disk.  The name provided is the short name,
4493  * stripped of any leading /dev path.
4494  */
4495 int
4496 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
4497 {
4498         char path[MAXPATHLEN];
4499         struct dk_gpt *vtoc;
4500         int rval, fd;
4501         size_t resv = EFI_MIN_RESV_SIZE;
4502         uint64_t slice_size;
4503         diskaddr_t start_block;
4504         char errbuf[1024];
4505
4506         /* prepare an error message just in case */
4507         (void) snprintf(errbuf, sizeof (errbuf),
4508             dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
4509
4510         if (zhp) {
4511                 nvlist_t *nvroot;
4512
4513                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
4514                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
4515
4516                 if (zhp->zpool_start_block == 0)
4517                         start_block = find_start_block(nvroot);
4518                 else
4519                         start_block = zhp->zpool_start_block;
4520                 zhp->zpool_start_block = start_block;
4521         } else {
4522                 /* new pool */
4523                 start_block = NEW_START_BLOCK;
4524         }
4525
4526         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4527
4528         if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) {
4529                 /*
4530                  * This shouldn't happen.  We've long since verified that this
4531                  * is a valid device.
4532                  */
4533                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4534                     "label '%s': unable to open device: %d"), path, errno);
4535                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
4536         }
4537
4538         if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
4539                 /*
4540                  * The only way this can fail is if we run out of memory, or we
4541                  * were unable to read the disk's capacity
4542                  */
4543                 if (errno == ENOMEM)
4544                         (void) no_memory(hdl);
4545
4546                 (void) close(fd);
4547                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
4548                     "label '%s': unable to read disk capacity"), path);
4549
4550                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
4551         }
4552
4553         slice_size = vtoc->efi_last_u_lba + 1;
4554         slice_size -= EFI_MIN_RESV_SIZE;
4555         if (start_block == MAXOFFSET_T)
4556                 start_block = NEW_START_BLOCK;
4557         slice_size -= start_block;
4558         slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
4559
4560         vtoc->efi_parts[0].p_start = start_block;
4561         vtoc->efi_parts[0].p_size = slice_size;
4562
4563         /*
4564          * Why we use V_USR: V_BACKUP confuses users, and is considered
4565          * disposable by some EFI utilities (since EFI doesn't have a backup
4566          * slice).  V_UNASSIGNED is supposed to be used only for zero size
4567          * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
4568          * etc. were all pretty specific.  V_USR is as close to reality as we
4569          * can get, in the absence of V_OTHER.
4570          */
4571         vtoc->efi_parts[0].p_tag = V_USR;
4572         zpool_label_name(vtoc->efi_parts[0].p_name, EFI_PART_NAME_LEN);
4573
4574         vtoc->efi_parts[8].p_start = slice_size + start_block;
4575         vtoc->efi_parts[8].p_size = resv;
4576         vtoc->efi_parts[8].p_tag = V_RESERVED;
4577
4578         rval = efi_write(fd, vtoc);
4579
4580         /* Flush the buffers to disk and invalidate the page cache. */
4581         (void) fsync(fd);
4582         (void) ioctl(fd, BLKFLSBUF);
4583
4584         if (rval == 0)
4585                 rval = efi_rescan(fd);
4586
4587         /*
4588          * Some block drivers (like pcata) may not support EFI GPT labels.
4589          * Print out a helpful error message directing the user to manually
4590          * label the disk and give a specific slice.
4591          */
4592         if (rval != 0) {
4593                 (void) close(fd);
4594                 efi_free(vtoc);
4595
4596                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
4597                     "parted(8) and then provide a specific slice: %d"), rval);
4598                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4599         }
4600
4601         (void) close(fd);
4602         efi_free(vtoc);
4603
4604         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4605         (void) zfs_append_partition(path, MAXPATHLEN);
4606
4607         /* Wait to udev to signal use the device has settled. */
4608         rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT);
4609         if (rval) {
4610                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
4611                     "detect device partitions on '%s': %d"), path, rval);
4612                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4613         }
4614
4615         /* We can't be to paranoid.  Read the label back and verify it. */
4616         (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
4617         rval = zpool_label_disk_check(path);
4618         if (rval) {
4619                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
4620                     "EFI label on '%s' is damaged.  Ensure\nthis device "
4621                     "is not in in use, and is functioning properly: %d"),
4622                     path, rval);
4623                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
4624         }
4625
4626         return (0);
4627 }
4628
4629 /*
4630  * Allocate and return the underlying device name for a device mapper device.
4631  * If a device mapper device maps to multiple devices, return the first device.
4632  *
4633  * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a
4634  * DM device (like /dev/disk/by-vdev/A0) are also allowed.
4635  *
4636  * Returns device name, or NULL on error or no match.  If dm_name is not a DM
4637  * device then return NULL.
4638  *
4639  * NOTE: The returned name string must be *freed*.
4640  */
4641 char *
4642 dm_get_underlying_path(char *dm_name)
4643 {
4644         DIR *dp = NULL;
4645         struct dirent *ep;
4646         char *realp;
4647         char *tmp = NULL;
4648         char *path = NULL;
4649         char *dev_str;
4650         int size;
4651
4652         if (dm_name == NULL)
4653                 return (NULL);
4654
4655         /* dm name may be a symlink (like /dev/disk/by-vdev/A0) */
4656         realp = realpath(dm_name, NULL);
4657         if (realp == NULL)
4658                 return (NULL);
4659
4660         /*
4661          * If they preface 'dev' with a path (like "/dev") then strip it off.
4662          * We just want the 'dm-N' part.
4663          */
4664         tmp = strrchr(realp, '/');
4665         if (tmp != NULL)
4666                 dev_str = tmp + 1;    /* +1 since we want the chr after '/' */
4667         else
4668                 dev_str = tmp;
4669
4670         size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str);
4671         if (size == -1 || !tmp)
4672                 goto end;
4673
4674         dp = opendir(tmp);
4675         if (dp == NULL)
4676                 goto end;
4677
4678         /* Return first sd* entry in /sys/block/dm-N/slaves/ */
4679         while ((ep = readdir(dp))) {
4680                 if (ep->d_type != DT_DIR) {     /* skip "." and ".." dirs */
4681                         size = asprintf(&path, "/dev/%s", ep->d_name);
4682                         break;
4683                 }
4684         }
4685
4686 end:
4687         if (dp != NULL)
4688                 closedir(dp);
4689         free(tmp);
4690         free(realp);
4691         return (path);
4692 }
4693
4694 /*
4695  * Return 1 if device is a device mapper or multipath device.
4696  * Return 0 if not.
4697  */
4698 int
4699 zfs_dev_is_dm(char *dev_name)
4700 {
4701
4702         char *tmp;
4703         tmp = dm_get_underlying_path(dev_name);
4704         if (tmp == NULL)
4705                 return (0);
4706
4707         free(tmp);
4708         return (1);
4709 }
4710
4711 /*
4712  * By "whole disk" we mean an entire physical disk (something we can
4713  * label, toggle the write cache on, etc.) as opposed to the full
4714  * capacity of a pseudo-device such as lofi or did.  We act as if we
4715  * are labeling the disk, which should be a pretty good test of whether
4716  * it's a viable device or not.  Returns B_TRUE if it is and B_FALSE if
4717  * it isn't.
4718  */
4719 int
4720 zfs_dev_is_whole_disk(char *dev_name)
4721 {
4722         struct dk_gpt *label;
4723         int fd;
4724
4725         if ((fd = open(dev_name, O_RDONLY | O_DIRECT)) < 0)
4726                 return (0);
4727
4728         if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
4729                 (void) close(fd);
4730                 return (0);
4731         }
4732
4733         efi_free(label);
4734         (void) close(fd);
4735
4736         return (1);
4737 }
4738
4739 /*
4740  * Lookup the underlying device for a device name
4741  *
4742  * Often you'll have a symlink to a device, a partition device,
4743  * or a multipath device, and want to look up the underlying device.
4744  * This function returns the underlying device name.  If the device
4745  * name is already the underlying device, then just return the same
4746  * name.  If the device is a DM device with multiple underlying devices
4747  * then return the first one.
4748  *
4749  * For example:
4750  *
4751  * 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
4752  * dev_name:    /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
4753  * returns:     /dev/sda
4754  *
4755  * 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
4756  * dev_name:    /dev/mapper/mpatha
4757  * returns:     /dev/sda (first device)
4758  *
4759  * 3. /dev/sda (already the underlying device)
4760  * dev_name:    /dev/sda
4761  * returns:     /dev/sda
4762  *
4763  * 4. /dev/dm-3 (mapped to /dev/sda)
4764  * dev_name:    /dev/dm-3
4765  * returns:     /dev/sda
4766  *
4767  * 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
4768  * dev_name:    /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
4769  * returns:     /dev/sdb
4770  *
4771  * 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
4772  * dev_name:    /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
4773  * returns:     /dev/sda
4774  *
4775  * Returns underlying device name, or NULL on error or no match.
4776  *
4777  * NOTE: The returned name string must be *freed*.
4778  */
4779 char *
4780 zfs_get_underlying_path(char *dev_name)
4781 {
4782         char *name = NULL;
4783         char *tmp;
4784
4785         if (dev_name == NULL)
4786                 return (NULL);
4787
4788         tmp = dm_get_underlying_path(dev_name);
4789
4790         /* dev_name not a DM device, so just un-symlinkize it */
4791         if (tmp == NULL)
4792                 tmp = realpath(dev_name, NULL);
4793
4794         if (tmp != NULL) {
4795                 name = zfs_strip_partition_path(tmp);
4796                 free(tmp);
4797         }
4798
4799         return (name);
4800 }
4801
4802 /*
4803  * Given a dev name like "sda", return the full enclosure sysfs path to
4804  * the disk.  You can also pass in the name with "/dev" prepended
4805  * to it (like /dev/sda).
4806  *
4807  * For example, disk "sda" in enclosure slot 1:
4808  *     dev:            "sda"
4809  *     returns:        "/sys/class/enclosure/1:0:3:0/Slot 1"
4810  *
4811  * 'dev' must be a non-devicemapper device.
4812  *
4813  * Returned string must be freed.
4814  */
4815 char *
4816 zfs_get_enclosure_sysfs_path(char *dev_name)
4817 {
4818         DIR *dp = NULL;
4819         struct dirent *ep;
4820         char buf[MAXPATHLEN];
4821         char *tmp1 = NULL;
4822         char *tmp2 = NULL;
4823         char *tmp3 = NULL;
4824         char *path = NULL;
4825         size_t size;
4826         int tmpsize;
4827
4828         if (dev_name == NULL)
4829                 return (NULL);
4830
4831         /* If they preface 'dev' with a path (like "/dev") then strip it off */
4832         tmp1 = strrchr(dev_name, '/');
4833         if (tmp1 != NULL)
4834                 dev_name = tmp1 + 1;    /* +1 since we want the chr after '/' */
4835
4836         tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
4837         if (tmpsize == -1 || tmp1 == NULL) {
4838                 tmp1 = NULL;
4839                 goto end;
4840         }
4841
4842         dp = opendir(tmp1);
4843         if (dp == NULL) {
4844                 tmp1 = NULL;    /* To make free() at the end a NOP */
4845                 goto end;
4846         }
4847
4848         /*
4849          * Look though all sysfs entries in /sys/block/<dev>/device for
4850          * the enclosure symlink.
4851          */
4852         while ((ep = readdir(dp))) {
4853                 /* Ignore everything that's not our enclosure_device link */
4854                 if (strstr(ep->d_name, "enclosure_device") == NULL)
4855                         continue;
4856
4857                 if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 ||
4858                     tmp2 == NULL)
4859                         break;
4860
4861                 size = readlink(tmp2, buf, sizeof (buf));
4862
4863                 /* Did readlink fail or crop the link name? */
4864                 if (size == -1 || size >= sizeof (buf)) {
4865                         free(tmp2);
4866                         tmp2 = NULL;    /* To make free() at the end a NOP */
4867                         break;
4868                 }
4869
4870                 /*
4871                  * We got a valid link.  readlink() doesn't terminate strings
4872                  * so we have to do it.
4873                  */
4874                 buf[size] = '\0';
4875
4876                 /*
4877                  * Our link will look like:
4878                  *
4879                  * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
4880                  *
4881                  * We want to grab the "enclosure/1:0:3:0/SLOT 1" part
4882                  */
4883                 tmp3 = strstr(buf, "enclosure");
4884                 if (tmp3 == NULL)
4885                         break;
4886
4887                 if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
4888                         /* If asprintf() fails, 'path' is undefined */
4889                         path = NULL;
4890                         break;
4891                 }
4892
4893                 if (path == NULL)
4894                         break;
4895         }
4896
4897 end:
4898         free(tmp2);
4899         free(tmp1);
4900
4901         if (dp != NULL)
4902                 closedir(dp);
4903
4904         return (path);
4905 }