]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / cddl / contrib / opensolaris / lib / libzfs / common / libzfs_pool.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <assert.h>
30 #include <ctype.h>
31 #include <errno.h>
32 #include <devid.h>
33 #include <dirent.h>
34 #include <fcntl.h>
35 #include <libintl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <strings.h>
39 #include <unistd.h>
40 #include <zone.h>
41 #include <sys/zfs_ioctl.h>
42 #include <sys/zio.h>
43 #include <strings.h>
44 #include <umem.h>
45
46 #include "zfs_namecheck.h"
47 #include "zfs_prop.h"
48 #include "libzfs_impl.h"
49
50 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
51
52 /*
53  * ====================================================================
54  *   zpool property functions
55  * ====================================================================
56  */
57
58 static int
59 zpool_get_all_props(zpool_handle_t *zhp)
60 {
61         zfs_cmd_t zc = { 0 };
62         libzfs_handle_t *hdl = zhp->zpool_hdl;
63
64         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
65
66         if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
67                 return (-1);
68
69         while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
70                 if (errno == ENOMEM) {
71                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
72                                 zcmd_free_nvlists(&zc);
73                                 return (-1);
74                         }
75                 } else {
76                         zcmd_free_nvlists(&zc);
77                         return (-1);
78                 }
79         }
80
81         if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
82                 zcmd_free_nvlists(&zc);
83                 return (-1);
84         }
85
86         zcmd_free_nvlists(&zc);
87
88         return (0);
89 }
90
91 static int
92 zpool_props_refresh(zpool_handle_t *zhp)
93 {
94         nvlist_t *old_props;
95
96         old_props = zhp->zpool_props;
97
98         if (zpool_get_all_props(zhp) != 0)
99                 return (-1);
100
101         nvlist_free(old_props);
102         return (0);
103 }
104
105 static char *
106 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
107     zprop_source_t *src)
108 {
109         nvlist_t *nv, *nvl;
110         uint64_t ival;
111         char *value;
112         zprop_source_t source;
113
114         nvl = zhp->zpool_props;
115         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
116                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
117                 source = ival;
118                 verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
119         } else {
120                 source = ZPROP_SRC_DEFAULT;
121                 if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
122                         value = "-";
123         }
124
125         if (src)
126                 *src = source;
127
128         return (value);
129 }
130
131 uint64_t
132 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
133 {
134         nvlist_t *nv, *nvl;
135         uint64_t value;
136         zprop_source_t source;
137
138         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
139                 /*
140                  * zpool_get_all_props() has most likely failed because
141                  * the pool is faulted, but if all we need is the top level
142                  * vdev's guid then get it from the zhp config nvlist.
143                  */
144                 if ((prop == ZPOOL_PROP_GUID) &&
145                     (nvlist_lookup_nvlist(zhp->zpool_config,
146                     ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
147                     (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
148                     == 0)) {
149                         return (value);
150                 }
151                 return (zpool_prop_default_numeric(prop));
152         }
153
154         nvl = zhp->zpool_props;
155         if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
156                 verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
157                 source = value;
158                 verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
159         } else {
160                 source = ZPROP_SRC_DEFAULT;
161                 value = zpool_prop_default_numeric(prop);
162         }
163
164         if (src)
165                 *src = source;
166
167         return (value);
168 }
169
170 /*
171  * Map VDEV STATE to printed strings.
172  */
173 char *
174 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
175 {
176         switch (state) {
177         case VDEV_STATE_CLOSED:
178         case VDEV_STATE_OFFLINE:
179                 return (gettext("OFFLINE"));
180         case VDEV_STATE_REMOVED:
181                 return (gettext("REMOVED"));
182         case VDEV_STATE_CANT_OPEN:
183                 if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
184                         return (gettext("FAULTED"));
185                 else
186                         return (gettext("UNAVAIL"));
187         case VDEV_STATE_FAULTED:
188                 return (gettext("FAULTED"));
189         case VDEV_STATE_DEGRADED:
190                 return (gettext("DEGRADED"));
191         case VDEV_STATE_HEALTHY:
192                 return (gettext("ONLINE"));
193         }
194
195         return (gettext("UNKNOWN"));
196 }
197
198 /*
199  * Get a zpool property value for 'prop' and return the value in
200  * a pre-allocated buffer.
201  */
202 int
203 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
204     zprop_source_t *srctype)
205 {
206         uint64_t intval;
207         const char *strval;
208         zprop_source_t src = ZPROP_SRC_NONE;
209         nvlist_t *nvroot;
210         vdev_stat_t *vs;
211         uint_t vsc;
212
213         if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
214                 if (prop == ZPOOL_PROP_NAME)
215                         (void) strlcpy(buf, zpool_get_name(zhp), len);
216                 else if (prop == ZPOOL_PROP_HEALTH)
217                         (void) strlcpy(buf, "FAULTED", len);
218                 else
219                         (void) strlcpy(buf, "-", len);
220                 return (0);
221         }
222
223         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
224             prop != ZPOOL_PROP_NAME)
225                 return (-1);
226
227         switch (zpool_prop_get_type(prop)) {
228         case PROP_TYPE_STRING:
229                 (void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
230                     len);
231                 break;
232
233         case PROP_TYPE_NUMBER:
234                 intval = zpool_get_prop_int(zhp, prop, &src);
235
236                 switch (prop) {
237                 case ZPOOL_PROP_SIZE:
238                 case ZPOOL_PROP_USED:
239                 case ZPOOL_PROP_AVAILABLE:
240                         (void) zfs_nicenum(intval, buf, len);
241                         break;
242
243                 case ZPOOL_PROP_CAPACITY:
244                         (void) snprintf(buf, len, "%llu%%",
245                             (u_longlong_t)intval);
246                         break;
247
248                 case ZPOOL_PROP_HEALTH:
249                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
250                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
251                         verify(nvlist_lookup_uint64_array(nvroot,
252                             ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
253
254                         (void) strlcpy(buf, zpool_state_to_name(intval,
255                             vs->vs_aux), len);
256                         break;
257                 default:
258                         (void) snprintf(buf, len, "%llu", intval);
259                 }
260                 break;
261
262         case PROP_TYPE_INDEX:
263                 intval = zpool_get_prop_int(zhp, prop, &src);
264                 if (zpool_prop_index_to_string(prop, intval, &strval)
265                     != 0)
266                         return (-1);
267                 (void) strlcpy(buf, strval, len);
268                 break;
269
270         default:
271                 abort();
272         }
273
274         if (srctype)
275                 *srctype = src;
276
277         return (0);
278 }
279
280 /*
281  * Check if the bootfs name has the same pool name as it is set to.
282  * Assuming bootfs is a valid dataset name.
283  */
284 static boolean_t
285 bootfs_name_valid(const char *pool, char *bootfs)
286 {
287         int len = strlen(pool);
288
289         if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
290                 return (B_FALSE);
291
292         if (strncmp(pool, bootfs, len) == 0 &&
293             (bootfs[len] == '/' || bootfs[len] == '\0'))
294                 return (B_TRUE);
295
296         return (B_FALSE);
297 }
298
299 #if defined(sun)
300 /*
301  * Inspect the configuration to determine if any of the devices contain
302  * an EFI label.
303  */
304 static boolean_t
305 pool_uses_efi(nvlist_t *config)
306 {
307         nvlist_t **child;
308         uint_t c, children;
309
310         if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
311             &child, &children) != 0)
312                 return (read_efi_label(config, NULL) >= 0);
313
314         for (c = 0; c < children; c++) {
315                 if (pool_uses_efi(child[c]))
316                         return (B_TRUE);
317         }
318         return (B_FALSE);
319 }
320 #endif
321
322 /*
323  * Given an nvlist of zpool properties to be set, validate that they are
324  * correct, and parse any numeric properties (index, boolean, etc) if they are
325  * specified as strings.
326  */
327 static nvlist_t *
328 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
329     nvlist_t *props, uint64_t version, boolean_t create_or_import, char *errbuf)
330 {
331         nvpair_t *elem;
332         nvlist_t *retprops;
333         zpool_prop_t prop;
334         char *strval;
335         uint64_t intval;
336         char *slash;
337         struct stat64 statbuf;
338         zpool_handle_t *zhp;
339         nvlist_t *nvroot;
340
341         if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
342                 (void) no_memory(hdl);
343                 return (NULL);
344         }
345
346         elem = NULL;
347         while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
348                 const char *propname = nvpair_name(elem);
349
350                 /*
351                  * Make sure this property is valid and applies to this type.
352                  */
353                 if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
354                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
355                             "invalid property '%s'"), propname);
356                         (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
357                         goto error;
358                 }
359
360                 if (zpool_prop_readonly(prop)) {
361                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
362                             "is readonly"), propname);
363                         (void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
364                         goto error;
365                 }
366
367                 if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
368                     &strval, &intval, errbuf) != 0)
369                         goto error;
370
371                 /*
372                  * Perform additional checking for specific properties.
373                  */
374                 switch (prop) {
375                 case ZPOOL_PROP_VERSION:
376                         if (intval < version || intval > SPA_VERSION) {
377                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
378                                     "property '%s' number %d is invalid."),
379                                     propname, intval);
380                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
381                                 goto error;
382                         }
383                         break;
384
385                 case ZPOOL_PROP_BOOTFS:
386                         if (create_or_import) {
387                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
388                                     "property '%s' cannot be set at creation "
389                                     "or import time"), propname);
390                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
391                                 goto error;
392                         }
393
394                         if (version < SPA_VERSION_BOOTFS) {
395                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
396                                     "pool must be upgraded to support "
397                                     "'%s' property"), propname);
398                                 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
399                                 goto error;
400                         }
401
402                         /*
403                          * bootfs property value has to be a dataset name and
404                          * the dataset has to be in the same pool as it sets to.
405                          */
406                         if (strval[0] != '\0' && !bootfs_name_valid(poolname,
407                             strval)) {
408                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
409                                     "is an invalid name"), strval);
410                                 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
411                                 goto error;
412                         }
413
414                         if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
415                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
416                                     "could not open pool '%s'"), poolname);
417                                 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
418                                 goto error;
419                         }
420                         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
421                             ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
422
423 #if defined(sun)
424                         /*
425                          * bootfs property cannot be set on a disk which has
426                          * been EFI labeled.
427                          */
428                         if (pool_uses_efi(nvroot)) {
429                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
430                                     "property '%s' not supported on "
431                                     "EFI labeled devices"), propname);
432                                 (void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
433                                 zpool_close(zhp);
434                                 goto error;
435                         }
436 #endif
437                         zpool_close(zhp);
438                         break;
439
440                 case ZPOOL_PROP_ALTROOT:
441                         if (!create_or_import) {
442                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
443                                     "property '%s' can only be set during pool "
444                                     "creation or import"), propname);
445                                 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
446                                 goto error;
447                         }
448
449                         if (strval[0] != '/') {
450                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
451                                     "bad alternate root '%s'"), strval);
452                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
453                                 goto error;
454                         }
455                         break;
456
457                 case ZPOOL_PROP_CACHEFILE:
458                         if (strval[0] == '\0')
459                                 break;
460
461                         if (strcmp(strval, "none") == 0)
462                                 break;
463
464                         if (strval[0] != '/') {
465                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
466                                     "property '%s' must be empty, an "
467                                     "absolute path, or 'none'"), propname);
468                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
469                                 goto error;
470                         }
471
472                         slash = strrchr(strval, '/');
473
474                         if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
475                             strcmp(slash, "/..") == 0) {
476                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
477                                     "'%s' is not a valid file"), strval);
478                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
479                                 goto error;
480                         }
481
482                         *slash = '\0';
483
484                         if (strval[0] != '\0' &&
485                             (stat64(strval, &statbuf) != 0 ||
486                             !S_ISDIR(statbuf.st_mode))) {
487                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
488                                     "'%s' is not a valid directory"),
489                                     strval);
490                                 (void) zfs_error(hdl, EZFS_BADPATH, errbuf);
491                                 goto error;
492                         }
493
494                         *slash = '/';
495                         break;
496                 }
497         }
498
499         return (retprops);
500 error:
501         nvlist_free(retprops);
502         return (NULL);
503 }
504
505 /*
506  * Set zpool property : propname=propval.
507  */
508 int
509 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
510 {
511         zfs_cmd_t zc = { 0 };
512         int ret = -1;
513         char errbuf[1024];
514         nvlist_t *nvl = NULL;
515         nvlist_t *realprops;
516         uint64_t version;
517
518         (void) snprintf(errbuf, sizeof (errbuf),
519             dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
520             zhp->zpool_name);
521
522         if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
523                 return (zfs_error(zhp->zpool_hdl, EZFS_POOLPROPS, errbuf));
524
525         if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
526                 return (no_memory(zhp->zpool_hdl));
527
528         if (nvlist_add_string(nvl, propname, propval) != 0) {
529                 nvlist_free(nvl);
530                 return (no_memory(zhp->zpool_hdl));
531         }
532
533         version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
534         if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
535             zhp->zpool_name, nvl, version, B_FALSE, errbuf)) == NULL) {
536                 nvlist_free(nvl);
537                 return (-1);
538         }
539
540         nvlist_free(nvl);
541         nvl = realprops;
542
543         /*
544          * Execute the corresponding ioctl() to set this property.
545          */
546         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
547
548         if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
549                 nvlist_free(nvl);
550                 return (-1);
551         }
552
553         ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
554
555         zcmd_free_nvlists(&zc);
556         nvlist_free(nvl);
557
558         if (ret)
559                 (void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
560         else
561                 (void) zpool_props_refresh(zhp);
562
563         return (ret);
564 }
565
566 int
567 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
568 {
569         libzfs_handle_t *hdl = zhp->zpool_hdl;
570         zprop_list_t *entry;
571         char buf[ZFS_MAXPROPLEN];
572
573         if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
574                 return (-1);
575
576         for (entry = *plp; entry != NULL; entry = entry->pl_next) {
577
578                 if (entry->pl_fixed)
579                         continue;
580
581                 if (entry->pl_prop != ZPROP_INVAL &&
582                     zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
583                     NULL) == 0) {
584                         if (strlen(buf) > entry->pl_width)
585                                 entry->pl_width = strlen(buf);
586                 }
587         }
588
589         return (0);
590 }
591
592
593 /*
594  * Validate the given pool name, optionally putting an extended error message in
595  * 'buf'.
596  */
597 boolean_t
598 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
599 {
600         namecheck_err_t why;
601         char what;
602         int ret;
603
604         ret = pool_namecheck(pool, &why, &what);
605
606         /*
607          * The rules for reserved pool names were extended at a later point.
608          * But we need to support users with existing pools that may now be
609          * invalid.  So we only check for this expanded set of names during a
610          * create (or import), and only in userland.
611          */
612         if (ret == 0 && !isopen &&
613             (strncmp(pool, "mirror", 6) == 0 ||
614             strncmp(pool, "raidz", 5) == 0 ||
615             strncmp(pool, "spare", 5) == 0 ||
616             strcmp(pool, "log") == 0)) {
617                 if (hdl != NULL)
618                         zfs_error_aux(hdl,
619                             dgettext(TEXT_DOMAIN, "name is reserved"));
620                 return (B_FALSE);
621         }
622
623
624         if (ret != 0) {
625                 if (hdl != NULL) {
626                         switch (why) {
627                         case NAME_ERR_TOOLONG:
628                                 zfs_error_aux(hdl,
629                                     dgettext(TEXT_DOMAIN, "name is too long"));
630                                 break;
631
632                         case NAME_ERR_INVALCHAR:
633                                 zfs_error_aux(hdl,
634                                     dgettext(TEXT_DOMAIN, "invalid character "
635                                     "'%c' in pool name"), what);
636                                 break;
637
638                         case NAME_ERR_NOLETTER:
639                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
640                                     "name must begin with a letter"));
641                                 break;
642
643                         case NAME_ERR_RESERVED:
644                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
645                                     "name is reserved"));
646                                 break;
647
648                         case NAME_ERR_DISKLIKE:
649                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
650                                     "pool name is reserved"));
651                                 break;
652
653                         case NAME_ERR_LEADING_SLASH:
654                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
655                                     "leading slash in name"));
656                                 break;
657
658                         case NAME_ERR_EMPTY_COMPONENT:
659                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
660                                     "empty component in name"));
661                                 break;
662
663                         case NAME_ERR_TRAILING_SLASH:
664                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
665                                     "trailing slash in name"));
666                                 break;
667
668                         case NAME_ERR_MULTIPLE_AT:
669                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
670                                     "multiple '@' delimiters in name"));
671                                 break;
672
673                         }
674                 }
675                 return (B_FALSE);
676         }
677
678         return (B_TRUE);
679 }
680
681 /*
682  * Open a handle to the given pool, even if the pool is currently in the FAULTED
683  * state.
684  */
685 zpool_handle_t *
686 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
687 {
688         zpool_handle_t *zhp;
689         boolean_t missing;
690
691         /*
692          * Make sure the pool name is valid.
693          */
694         if (!zpool_name_valid(hdl, B_TRUE, pool)) {
695                 (void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
696                     dgettext(TEXT_DOMAIN, "cannot open '%s'"),
697                     pool);
698                 return (NULL);
699         }
700
701         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
702                 return (NULL);
703
704         zhp->zpool_hdl = hdl;
705         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
706
707         if (zpool_refresh_stats(zhp, &missing) != 0) {
708                 zpool_close(zhp);
709                 return (NULL);
710         }
711
712         if (missing) {
713                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
714                 (void) zfs_error_fmt(hdl, EZFS_NOENT,
715                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
716                 zpool_close(zhp);
717                 return (NULL);
718         }
719
720         return (zhp);
721 }
722
723 /*
724  * Like the above, but silent on error.  Used when iterating over pools (because
725  * the configuration cache may be out of date).
726  */
727 int
728 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
729 {
730         zpool_handle_t *zhp;
731         boolean_t missing;
732
733         if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
734                 return (-1);
735
736         zhp->zpool_hdl = hdl;
737         (void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
738
739         if (zpool_refresh_stats(zhp, &missing) != 0) {
740                 zpool_close(zhp);
741                 return (-1);
742         }
743
744         if (missing) {
745                 zpool_close(zhp);
746                 *ret = NULL;
747                 return (0);
748         }
749
750         *ret = zhp;
751         return (0);
752 }
753
754 /*
755  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
756  * state.
757  */
758 zpool_handle_t *
759 zpool_open(libzfs_handle_t *hdl, const char *pool)
760 {
761         zpool_handle_t *zhp;
762
763         if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
764                 return (NULL);
765
766         if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
767                 (void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
768                     dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
769                 zpool_close(zhp);
770                 return (NULL);
771         }
772
773         return (zhp);
774 }
775
776 /*
777  * Close the handle.  Simply frees the memory associated with the handle.
778  */
779 void
780 zpool_close(zpool_handle_t *zhp)
781 {
782         if (zhp->zpool_config)
783                 nvlist_free(zhp->zpool_config);
784         if (zhp->zpool_old_config)
785                 nvlist_free(zhp->zpool_old_config);
786         if (zhp->zpool_props)
787                 nvlist_free(zhp->zpool_props);
788         free(zhp);
789 }
790
791 /*
792  * Return the name of the pool.
793  */
794 const char *
795 zpool_get_name(zpool_handle_t *zhp)
796 {
797         return (zhp->zpool_name);
798 }
799
800
801 /*
802  * Return the state of the pool (ACTIVE or UNAVAILABLE)
803  */
804 int
805 zpool_get_state(zpool_handle_t *zhp)
806 {
807         return (zhp->zpool_state);
808 }
809
810 /*
811  * Create the named pool, using the provided vdev list.  It is assumed
812  * that the consumer has already validated the contents of the nvlist, so we
813  * don't have to worry about error semantics.
814  */
815 int
816 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
817     nvlist_t *props, nvlist_t *fsprops)
818 {
819         zfs_cmd_t zc = { 0 };
820         nvlist_t *zc_fsprops = NULL;
821         nvlist_t *zc_props = NULL;
822         char msg[1024];
823         char *altroot;
824         int ret = -1;
825
826         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
827             "cannot create '%s'"), pool);
828
829         if (!zpool_name_valid(hdl, B_FALSE, pool))
830                 return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
831
832         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
833                 return (-1);
834
835         if (props) {
836                 if ((zc_props = zpool_valid_proplist(hdl, pool, props,
837                     SPA_VERSION_1, B_TRUE, msg)) == NULL) {
838                         goto create_failed;
839                 }
840         }
841
842         if (fsprops) {
843                 uint64_t zoned;
844                 char *zonestr;
845
846                 zoned = ((nvlist_lookup_string(fsprops,
847                     zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
848                     strcmp(zonestr, "on") == 0);
849
850                 if ((zc_fsprops = zfs_valid_proplist(hdl,
851                     ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
852                         goto create_failed;
853                 }
854                 if (!zc_props &&
855                     (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
856                         goto create_failed;
857                 }
858                 if (nvlist_add_nvlist(zc_props,
859                     ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
860                         goto create_failed;
861                 }
862         }
863
864         if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
865                 goto create_failed;
866
867         (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
868
869         if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
870
871                 zcmd_free_nvlists(&zc);
872                 nvlist_free(zc_props);
873                 nvlist_free(zc_fsprops);
874
875                 switch (errno) {
876                 case EBUSY:
877                         /*
878                          * This can happen if the user has specified the same
879                          * device multiple times.  We can't reliably detect this
880                          * until we try to add it and see we already have a
881                          * label.
882                          */
883                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
884                             "one or more vdevs refer to the same device"));
885                         return (zfs_error(hdl, EZFS_BADDEV, msg));
886
887                 case EOVERFLOW:
888                         /*
889                          * This occurs when one of the devices is below
890                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
891                          * device was the problem device since there's no
892                          * reliable way to determine device size from userland.
893                          */
894                         {
895                                 char buf[64];
896
897                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
898
899                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
900                                     "one or more devices is less than the "
901                                     "minimum size (%s)"), buf);
902                         }
903                         return (zfs_error(hdl, EZFS_BADDEV, msg));
904
905                 case ENOSPC:
906                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
907                             "one or more devices is out of space"));
908                         return (zfs_error(hdl, EZFS_BADDEV, msg));
909
910                 case ENOTBLK:
911                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
912                             "cache device must be a disk or disk slice"));
913                         return (zfs_error(hdl, EZFS_BADDEV, msg));
914
915                 default:
916                         return (zpool_standard_error(hdl, errno, msg));
917                 }
918         }
919
920         /*
921          * If this is an alternate root pool, then we automatically set the
922          * mountpoint of the root dataset to be '/'.
923          */
924         if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
925             &altroot) == 0) {
926                 zfs_handle_t *zhp;
927
928                 verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
929                 verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
930                     "/") == 0);
931
932                 zfs_close(zhp);
933         }
934
935 create_failed:
936         zcmd_free_nvlists(&zc);
937         nvlist_free(zc_props);
938         nvlist_free(zc_fsprops);
939         return (ret);
940 }
941
942 /*
943  * Destroy the given pool.  It is up to the caller to ensure that there are no
944  * datasets left in the pool.
945  */
946 int
947 zpool_destroy(zpool_handle_t *zhp)
948 {
949         zfs_cmd_t zc = { 0 };
950         zfs_handle_t *zfp = NULL;
951         libzfs_handle_t *hdl = zhp->zpool_hdl;
952         char msg[1024];
953
954         if (zhp->zpool_state == POOL_STATE_ACTIVE &&
955             (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
956             ZFS_TYPE_FILESYSTEM)) == NULL)
957                 return (-1);
958
959         if (zpool_remove_zvol_links(zhp) != 0)
960                 return (-1);
961
962         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
963
964         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
965                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
966                     "cannot destroy '%s'"), zhp->zpool_name);
967
968                 if (errno == EROFS) {
969                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
970                             "one or more devices is read only"));
971                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
972                 } else {
973                         (void) zpool_standard_error(hdl, errno, msg);
974                 }
975
976                 if (zfp)
977                         zfs_close(zfp);
978                 return (-1);
979         }
980
981         if (zfp) {
982                 remove_mountpoint(zfp);
983                 zfs_close(zfp);
984         }
985
986         return (0);
987 }
988
989 /*
990  * Add the given vdevs to the pool.  The caller must have already performed the
991  * necessary verification to ensure that the vdev specification is well-formed.
992  */
993 int
994 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
995 {
996         zfs_cmd_t zc = { 0 };
997         int ret;
998         libzfs_handle_t *hdl = zhp->zpool_hdl;
999         char msg[1024];
1000         nvlist_t **spares, **l2cache;
1001         uint_t nspares, nl2cache;
1002
1003         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1004             "cannot add to '%s'"), zhp->zpool_name);
1005
1006         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1007             SPA_VERSION_SPARES &&
1008             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1009             &spares, &nspares) == 0) {
1010                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1011                     "upgraded to add hot spares"));
1012                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1013         }
1014
1015         if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1016             SPA_VERSION_L2CACHE &&
1017             nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1018             &l2cache, &nl2cache) == 0) {
1019                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1020                     "upgraded to add cache devices"));
1021                 return (zfs_error(hdl, EZFS_BADVERSION, msg));
1022         }
1023
1024         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1025                 return (-1);
1026         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1027
1028         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1029                 switch (errno) {
1030                 case EBUSY:
1031                         /*
1032                          * This can happen if the user has specified the same
1033                          * device multiple times.  We can't reliably detect this
1034                          * until we try to add it and see we already have a
1035                          * label.
1036                          */
1037                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1038                             "one or more vdevs refer to the same device"));
1039                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1040                         break;
1041
1042                 case EOVERFLOW:
1043                         /*
1044                          * This occurrs when one of the devices is below
1045                          * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1046                          * device was the problem device since there's no
1047                          * reliable way to determine device size from userland.
1048                          */
1049                         {
1050                                 char buf[64];
1051
1052                                 zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1053
1054                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1055                                     "device is less than the minimum "
1056                                     "size (%s)"), buf);
1057                         }
1058                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1059                         break;
1060
1061                 case ENOTSUP:
1062                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1063                             "pool must be upgraded to add these vdevs"));
1064                         (void) zfs_error(hdl, EZFS_BADVERSION, msg);
1065                         break;
1066
1067                 case EDOM:
1068                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1069                             "root pool can not have multiple vdevs"
1070                             " or separate logs"));
1071                         (void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
1072                         break;
1073
1074                 case ENOTBLK:
1075                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1076                             "cache device must be a disk or disk slice"));
1077                         (void) zfs_error(hdl, EZFS_BADDEV, msg);
1078                         break;
1079
1080                 default:
1081                         (void) zpool_standard_error(hdl, errno, msg);
1082                 }
1083
1084                 ret = -1;
1085         } else {
1086                 ret = 0;
1087         }
1088
1089         zcmd_free_nvlists(&zc);
1090
1091         return (ret);
1092 }
1093
1094 /*
1095  * Exports the pool from the system.  The caller must ensure that there are no
1096  * mounted datasets in the pool.
1097  */
1098 int
1099 zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
1100 {
1101         zfs_cmd_t zc = { 0 };
1102         char msg[1024];
1103
1104         if (zpool_remove_zvol_links(zhp) != 0)
1105                 return (-1);
1106
1107         (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1108             "cannot export '%s'"), zhp->zpool_name);
1109
1110         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1111         zc.zc_cookie = force;
1112         zc.zc_guid = hardforce;
1113
1114         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1115                 switch (errno) {
1116                 case EXDEV:
1117                         zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1118                             "use '-f' to override the following errors:\n"
1119                             "'%s' has an active shared spare which could be"
1120                             " used by other pools once '%s' is exported."),
1121                             zhp->zpool_name, zhp->zpool_name);
1122                         return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1123                             msg));
1124                 default:
1125                         return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1126                             msg));
1127                 }
1128         }
1129
1130         return (0);
1131 }
1132
1133 int
1134 zpool_export(zpool_handle_t *zhp, boolean_t force)
1135 {
1136         return (zpool_export_common(zhp, force, B_FALSE));
1137 }
1138
1139 int
1140 zpool_export_force(zpool_handle_t *zhp)
1141 {
1142         return (zpool_export_common(zhp, B_TRUE, B_TRUE));
1143 }
1144
1145 /*
1146  * zpool_import() is a contracted interface. Should be kept the same
1147  * if possible.
1148  *
1149  * Applications should use zpool_import_props() to import a pool with
1150  * new properties value to be set.
1151  */
1152 int
1153 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1154     char *altroot)
1155 {
1156         nvlist_t *props = NULL;
1157         int ret;
1158
1159         if (altroot != NULL) {
1160                 if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1161                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1162                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1163                             newname));
1164                 }
1165
1166                 if (nvlist_add_string(props,
1167                     zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0) {
1168                         nvlist_free(props);
1169                         return (zfs_error_fmt(hdl, EZFS_NOMEM,
1170                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1171                             newname));
1172                 }
1173         }
1174
1175         ret = zpool_import_props(hdl, config, newname, props, B_FALSE);
1176         if (props)
1177                 nvlist_free(props);
1178         return (ret);
1179 }
1180
1181 /*
1182  * Import the given pool using the known configuration and a list of
1183  * properties to be set. The configuration should have come from
1184  * zpool_find_import(). The 'newname' parameters control whether the pool
1185  * is imported with a different name.
1186  */
1187 int
1188 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1189     nvlist_t *props, boolean_t importfaulted)
1190 {
1191         zfs_cmd_t zc = { 0 };
1192         char *thename;
1193         char *origname;
1194         int ret;
1195         char errbuf[1024];
1196
1197         verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1198             &origname) == 0);
1199
1200         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1201             "cannot import pool '%s'"), origname);
1202
1203         if (newname != NULL) {
1204                 if (!zpool_name_valid(hdl, B_FALSE, newname))
1205                         return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1206                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1207                             newname));
1208                 thename = (char *)newname;
1209         } else {
1210                 thename = origname;
1211         }
1212
1213         if (props) {
1214                 uint64_t version;
1215
1216                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1217                     &version) == 0);
1218
1219                 if ((props = zpool_valid_proplist(hdl, origname,
1220                     props, version, B_TRUE, errbuf)) == NULL) {
1221                         return (-1);
1222                 } else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1223                         nvlist_free(props);
1224                         return (-1);
1225                 }
1226         }
1227
1228         (void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1229
1230         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1231             &zc.zc_guid) == 0);
1232
1233         if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1234                 nvlist_free(props);
1235                 return (-1);
1236         }
1237
1238         zc.zc_cookie = (uint64_t)importfaulted;
1239         ret = 0;
1240         if (zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
1241                 char desc[1024];
1242                 if (newname == NULL)
1243                         (void) snprintf(desc, sizeof (desc),
1244                             dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1245                             thename);
1246                 else
1247                         (void) snprintf(desc, sizeof (desc),
1248                             dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1249                             origname, thename);
1250
1251                 switch (errno) {
1252                 case ENOTSUP:
1253                         /*
1254                          * Unsupported version.
1255                          */
1256                         (void) zfs_error(hdl, EZFS_BADVERSION, desc);
1257                         break;
1258
1259                 case EINVAL:
1260                         (void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1261                         break;
1262
1263                 default:
1264                         (void) zpool_standard_error(hdl, errno, desc);
1265                 }
1266
1267                 ret = -1;
1268         } else {
1269                 zpool_handle_t *zhp;
1270
1271                 /*
1272                  * This should never fail, but play it safe anyway.
1273                  */
1274                 if (zpool_open_silent(hdl, thename, &zhp) != 0) {
1275                         ret = -1;
1276                 } else if (zhp != NULL) {
1277                         ret = zpool_create_zvol_links(zhp);
1278                         zpool_close(zhp);
1279                 }
1280
1281         }
1282
1283         zcmd_free_nvlists(&zc);
1284         nvlist_free(props);
1285
1286         return (ret);
1287 }
1288
1289 /*
1290  * Scrub the pool.
1291  */
1292 int
1293 zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
1294 {
1295         zfs_cmd_t zc = { 0 };
1296         char msg[1024];
1297         libzfs_handle_t *hdl = zhp->zpool_hdl;
1298
1299         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1300         zc.zc_cookie = type;
1301
1302         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCRUB, &zc) == 0)
1303                 return (0);
1304
1305         (void) snprintf(msg, sizeof (msg),
1306             dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1307
1308         if (errno == EBUSY)
1309                 return (zfs_error(hdl, EZFS_RESILVERING, msg));
1310         else
1311                 return (zpool_standard_error(hdl, errno, msg));
1312 }
1313
1314 /*
1315  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1316  * spare; but FALSE if its an INUSE spare.
1317  */
1318 static nvlist_t *
1319 vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
1320     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
1321 {
1322         uint_t c, children;
1323         nvlist_t **child;
1324         uint64_t theguid, present;
1325         char *path;
1326         uint64_t wholedisk = 0;
1327         nvlist_t *ret;
1328         uint64_t is_log;
1329
1330         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
1331
1332         if (search == NULL &&
1333             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
1334                 /*
1335                  * If the device has never been present since import, the only
1336                  * reliable way to match the vdev is by GUID.
1337                  */
1338                 if (theguid == guid)
1339                         return (nv);
1340         } else if (search != NULL &&
1341             nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
1342                 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1343                     &wholedisk);
1344                 if (wholedisk) {
1345                         /*
1346                          * For whole disks, the internal path has 's0', but the
1347                          * path passed in by the user doesn't.
1348                          */
1349                         if (strlen(search) == strlen(path) - 2 &&
1350                             strncmp(search, path, strlen(search)) == 0)
1351                                 return (nv);
1352                 } else if (strcmp(search, path) == 0) {
1353                         return (nv);
1354                 }
1355         }
1356
1357         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1358             &child, &children) != 0)
1359                 return (NULL);
1360
1361         for (c = 0; c < children; c++) {
1362                 if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1363                     avail_spare, l2cache, NULL)) != NULL) {
1364                         /*
1365                          * The 'is_log' value is only set for the toplevel
1366                          * vdev, not the leaf vdevs.  So we always lookup the
1367                          * log device from the root of the vdev tree (where
1368                          * 'log' is non-NULL).
1369                          */
1370                         if (log != NULL &&
1371                             nvlist_lookup_uint64(child[c],
1372                             ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
1373                             is_log) {
1374                                 *log = B_TRUE;
1375                         }
1376                         return (ret);
1377                 }
1378         }
1379
1380         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1381             &child, &children) == 0) {
1382                 for (c = 0; c < children; c++) {
1383                         if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1384                             avail_spare, l2cache, NULL)) != NULL) {
1385                                 *avail_spare = B_TRUE;
1386                                 return (ret);
1387                         }
1388                 }
1389         }
1390
1391         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1392             &child, &children) == 0) {
1393                 for (c = 0; c < children; c++) {
1394                         if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1395                             avail_spare, l2cache, NULL)) != NULL) {
1396                                 *l2cache = B_TRUE;
1397                                 return (ret);
1398                         }
1399                 }
1400         }
1401
1402         return (NULL);
1403 }
1404
1405 nvlist_t *
1406 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
1407     boolean_t *l2cache, boolean_t *log)
1408 {
1409         char buf[MAXPATHLEN];
1410         const char *search;
1411         char *end;
1412         nvlist_t *nvroot;
1413         uint64_t guid;
1414
1415         guid = strtoull(path, &end, 10);
1416         if (guid != 0 && *end == '\0') {
1417                 search = NULL;
1418         } else if (path[0] != '/') {
1419                 (void) snprintf(buf, sizeof (buf), "%s%s", _PATH_DEV, path);
1420                 search = buf;
1421         } else {
1422                 search = path;
1423         }
1424
1425         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1426             &nvroot) == 0);
1427
1428         *avail_spare = B_FALSE;
1429         *l2cache = B_FALSE;
1430         if (log != NULL)
1431                 *log = B_FALSE;
1432         return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare,
1433             l2cache, log));
1434 }
1435
1436 static int
1437 vdev_online(nvlist_t *nv)
1438 {
1439         uint64_t ival;
1440
1441         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
1442             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
1443             nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
1444                 return (0);
1445
1446         return (1);
1447 }
1448
1449 /*
1450  * Get phys_path for a root pool
1451  * Return 0 on success; non-zeron on failure.
1452  */
1453 int
1454 zpool_get_physpath(zpool_handle_t *zhp, char *physpath)
1455 {
1456         char bootfs[ZPOOL_MAXNAMELEN];
1457         nvlist_t *vdev_root;
1458         nvlist_t **child;
1459         uint_t count;
1460         int i;
1461
1462         /*
1463          * Make sure this is a root pool, as phys_path doesn't mean
1464          * anything to a non-root pool.
1465          */
1466         if (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
1467             sizeof (bootfs), NULL) != 0)
1468                 return (-1);
1469
1470         verify(nvlist_lookup_nvlist(zhp->zpool_config,
1471             ZPOOL_CONFIG_VDEV_TREE, &vdev_root) == 0);
1472
1473         if (nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
1474             &child, &count) != 0)
1475                 return (-2);
1476
1477         for (i = 0; i < count; i++) {
1478                 nvlist_t **child2;
1479                 uint_t count2;
1480                 char *type;
1481                 char *tmppath;
1482                 int j;
1483
1484                 if (nvlist_lookup_string(child[i], ZPOOL_CONFIG_TYPE, &type)
1485                     != 0)
1486                         return (-3);
1487
1488                 if (strcmp(type, VDEV_TYPE_DISK) == 0) {
1489                         if (!vdev_online(child[i]))
1490                                 return (-8);
1491                         verify(nvlist_lookup_string(child[i],
1492                             ZPOOL_CONFIG_PHYS_PATH, &tmppath) == 0);
1493                         (void) strncpy(physpath, tmppath, strlen(tmppath));
1494                 } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) {
1495                         if (nvlist_lookup_nvlist_array(child[i],
1496                             ZPOOL_CONFIG_CHILDREN, &child2, &count2) != 0)
1497                                 return (-4);
1498
1499                         for (j = 0; j < count2; j++) {
1500                                 if (!vdev_online(child2[j]))
1501                                         return (-8);
1502                                 if (nvlist_lookup_string(child2[j],
1503                                     ZPOOL_CONFIG_PHYS_PATH, &tmppath) != 0)
1504                                         return (-5);
1505
1506                                 if ((strlen(physpath) + strlen(tmppath)) >
1507                                     MAXNAMELEN)
1508                                         return (-6);
1509
1510                                 if (strlen(physpath) == 0) {
1511                                         (void) strncpy(physpath, tmppath,
1512                                             strlen(tmppath));
1513                                 } else {
1514                                         (void) strcat(physpath, " ");
1515                                         (void) strcat(physpath, tmppath);
1516                                 }
1517                         }
1518                 } else {
1519                         return (-7);
1520                 }
1521         }
1522
1523         return (0);
1524 }
1525
1526 /*
1527  * Returns TRUE if the given guid corresponds to the given type.
1528  * This is used to check for hot spares (INUSE or not), and level 2 cache
1529  * devices.
1530  */
1531 static boolean_t
1532 is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
1533 {
1534         uint64_t target_guid;
1535         nvlist_t *nvroot;
1536         nvlist_t **list;
1537         uint_t count;
1538         int i;
1539
1540         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1541             &nvroot) == 0);
1542         if (nvlist_lookup_nvlist_array(nvroot, type, &list, &count) == 0) {
1543                 for (i = 0; i < count; i++) {
1544                         verify(nvlist_lookup_uint64(list[i], ZPOOL_CONFIG_GUID,
1545                             &target_guid) == 0);
1546                         if (guid == target_guid)
1547                                 return (B_TRUE);
1548                 }
1549         }
1550
1551         return (B_FALSE);
1552 }
1553
1554 /*
1555  * Bring the specified vdev online.   The 'flags' parameter is a set of the
1556  * ZFS_ONLINE_* flags.
1557  */
1558 int
1559 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
1560     vdev_state_t *newstate)
1561 {
1562         zfs_cmd_t zc = { 0 };
1563         char msg[1024];
1564         nvlist_t *tgt;
1565         boolean_t avail_spare, l2cache;
1566         libzfs_handle_t *hdl = zhp->zpool_hdl;
1567
1568         (void) snprintf(msg, sizeof (msg),
1569             dgettext(TEXT_DOMAIN, "cannot online %s"), path);
1570
1571         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1572         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1573             NULL)) == NULL)
1574                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
1575
1576         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1577
1578         if (avail_spare ||
1579             is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
1580                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
1581
1582         zc.zc_cookie = VDEV_STATE_ONLINE;
1583         zc.zc_obj = flags;
1584
1585         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0)
1586                 return (zpool_standard_error(hdl, errno, msg));
1587
1588         *newstate = zc.zc_cookie;
1589         return (0);
1590 }
1591
1592 /*
1593  * Take the specified vdev offline
1594  */
1595 int
1596 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
1597 {
1598         zfs_cmd_t zc = { 0 };
1599         char msg[1024];
1600         nvlist_t *tgt;
1601         boolean_t avail_spare, l2cache;
1602         libzfs_handle_t *hdl = zhp->zpool_hdl;
1603
1604         (void) snprintf(msg, sizeof (msg),
1605             dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
1606
1607         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1608         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1609             NULL)) == NULL)
1610                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
1611
1612         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1613
1614         if (avail_spare ||
1615             is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
1616                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
1617
1618         zc.zc_cookie = VDEV_STATE_OFFLINE;
1619         zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
1620
1621         if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1622                 return (0);
1623
1624         switch (errno) {
1625         case EBUSY:
1626
1627                 /*
1628                  * There are no other replicas of this device.
1629                  */
1630                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
1631
1632         default:
1633                 return (zpool_standard_error(hdl, errno, msg));
1634         }
1635 }
1636
1637 /*
1638  * Mark the given vdev faulted.
1639  */
1640 int
1641 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
1642 {
1643         zfs_cmd_t zc = { 0 };
1644         char msg[1024];
1645         libzfs_handle_t *hdl = zhp->zpool_hdl;
1646
1647         (void) snprintf(msg, sizeof (msg),
1648             dgettext(TEXT_DOMAIN, "cannot fault %llu"), guid);
1649
1650         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1651         zc.zc_guid = guid;
1652         zc.zc_cookie = VDEV_STATE_FAULTED;
1653
1654         if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1655                 return (0);
1656
1657         switch (errno) {
1658         case EBUSY:
1659
1660                 /*
1661                  * There are no other replicas of this device.
1662                  */
1663                 return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
1664
1665         default:
1666                 return (zpool_standard_error(hdl, errno, msg));
1667         }
1668
1669 }
1670
1671 /*
1672  * Mark the given vdev degraded.
1673  */
1674 int
1675 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid)
1676 {
1677         zfs_cmd_t zc = { 0 };
1678         char msg[1024];
1679         libzfs_handle_t *hdl = zhp->zpool_hdl;
1680
1681         (void) snprintf(msg, sizeof (msg),
1682             dgettext(TEXT_DOMAIN, "cannot degrade %llu"), guid);
1683
1684         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1685         zc.zc_guid = guid;
1686         zc.zc_cookie = VDEV_STATE_DEGRADED;
1687
1688         if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1689                 return (0);
1690
1691         return (zpool_standard_error(hdl, errno, msg));
1692 }
1693
1694 /*
1695  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
1696  * a hot spare.
1697  */
1698 static boolean_t
1699 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
1700 {
1701         nvlist_t **child;
1702         uint_t c, children;
1703         char *type;
1704
1705         if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
1706             &children) == 0) {
1707                 verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
1708                     &type) == 0);
1709
1710                 if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
1711                     children == 2 && child[which] == tgt)
1712                         return (B_TRUE);
1713
1714                 for (c = 0; c < children; c++)
1715                         if (is_replacing_spare(child[c], tgt, which))
1716                                 return (B_TRUE);
1717         }
1718
1719         return (B_FALSE);
1720 }
1721
1722 /*
1723  * Attach new_disk (fully described by nvroot) to old_disk.
1724  * If 'replacing' is specified, the new disk will replace the old one.
1725  */
1726 int
1727 zpool_vdev_attach(zpool_handle_t *zhp,
1728     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
1729 {
1730         zfs_cmd_t zc = { 0 };
1731         char msg[1024];
1732         int ret;
1733         nvlist_t *tgt;
1734         boolean_t avail_spare, l2cache, islog;
1735         uint64_t val;
1736         char *path, *newname;
1737         nvlist_t **child;
1738         uint_t children;
1739         nvlist_t *config_root;
1740         libzfs_handle_t *hdl = zhp->zpool_hdl;
1741
1742         if (replacing)
1743                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1744                     "cannot replace %s with %s"), old_disk, new_disk);
1745         else
1746                 (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1747                     "cannot attach %s to %s"), new_disk, old_disk);
1748
1749         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1750         if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
1751             &islog)) == 0)
1752                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
1753
1754         if (avail_spare)
1755                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
1756
1757         if (l2cache)
1758                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1759
1760         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1761         zc.zc_cookie = replacing;
1762
1763         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1764             &child, &children) != 0 || children != 1) {
1765                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1766                     "new device must be a single disk"));
1767                 return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
1768         }
1769
1770         verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
1771             ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
1772
1773         if ((newname = zpool_vdev_name(NULL, NULL, child[0])) == NULL)
1774                 return (-1);
1775
1776         /*
1777          * If the target is a hot spare that has been swapped in, we can only
1778          * replace it with another hot spare.
1779          */
1780         if (replacing &&
1781             nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
1782             (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
1783             NULL) == NULL || !avail_spare) &&
1784             is_replacing_spare(config_root, tgt, 1)) {
1785                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1786                     "can only be replaced by another hot spare"));
1787                 free(newname);
1788                 return (zfs_error(hdl, EZFS_BADTARGET, msg));
1789         }
1790
1791         /*
1792          * If we are attempting to replace a spare, it canot be applied to an
1793          * already spared device.
1794          */
1795         if (replacing &&
1796             nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
1797             zpool_find_vdev(zhp, newname, &avail_spare,
1798             &l2cache, NULL) != NULL && avail_spare &&
1799             is_replacing_spare(config_root, tgt, 0)) {
1800                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1801                     "device has already been replaced with a spare"));
1802                 free(newname);
1803                 return (zfs_error(hdl, EZFS_BADTARGET, msg));
1804         }
1805
1806         free(newname);
1807
1808         if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1809                 return (-1);
1810
1811         ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ATTACH, &zc);
1812
1813         zcmd_free_nvlists(&zc);
1814
1815         if (ret == 0)
1816                 return (0);
1817
1818         switch (errno) {
1819         case ENOTSUP:
1820                 /*
1821                  * Can't attach to or replace this type of vdev.
1822                  */
1823                 if (replacing) {
1824                         if (islog)
1825                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1826                                     "cannot replace a log with a spare"));
1827                         else
1828                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1829                                     "cannot replace a replacing device"));
1830                 } else {
1831                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1832                             "can only attach to mirrors and top-level "
1833                             "disks"));
1834                 }
1835                 (void) zfs_error(hdl, EZFS_BADTARGET, msg);
1836                 break;
1837
1838         case EINVAL:
1839                 /*
1840                  * The new device must be a single disk.
1841                  */
1842                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1843                     "new device must be a single disk"));
1844                 (void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
1845                 break;
1846
1847         case EBUSY:
1848                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
1849                     new_disk);
1850                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1851                 break;
1852
1853         case EOVERFLOW:
1854                 /*
1855                  * The new device is too small.
1856                  */
1857                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1858                     "device is too small"));
1859                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1860                 break;
1861
1862         case EDOM:
1863                 /*
1864                  * The new device has a different alignment requirement.
1865                  */
1866                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1867                     "devices have different sector alignment"));
1868                 (void) zfs_error(hdl, EZFS_BADDEV, msg);
1869                 break;
1870
1871         case ENAMETOOLONG:
1872                 /*
1873                  * The resulting top-level vdev spec won't fit in the label.
1874                  */
1875                 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
1876                 break;
1877
1878         default:
1879                 (void) zpool_standard_error(hdl, errno, msg);
1880         }
1881
1882         return (-1);
1883 }
1884
1885 /*
1886  * Detach the specified device.
1887  */
1888 int
1889 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
1890 {
1891         zfs_cmd_t zc = { 0 };
1892         char msg[1024];
1893         nvlist_t *tgt;
1894         boolean_t avail_spare, l2cache;
1895         libzfs_handle_t *hdl = zhp->zpool_hdl;
1896
1897         (void) snprintf(msg, sizeof (msg),
1898             dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
1899
1900         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1901         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1902             NULL)) == 0)
1903                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
1904
1905         if (avail_spare)
1906                 return (zfs_error(hdl, EZFS_ISSPARE, msg));
1907
1908         if (l2cache)
1909                 return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1910
1911         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1912
1913         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
1914                 return (0);
1915
1916         switch (errno) {
1917
1918         case ENOTSUP:
1919                 /*
1920                  * Can't detach from this type of vdev.
1921                  */
1922                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
1923                     "applicable to mirror and replacing vdevs"));
1924                 (void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
1925                 break;
1926
1927         case EBUSY:
1928                 /*
1929                  * There are no other replicas of this device.
1930                  */
1931                 (void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
1932                 break;
1933
1934         default:
1935                 (void) zpool_standard_error(hdl, errno, msg);
1936         }
1937
1938         return (-1);
1939 }
1940
1941 /*
1942  * Remove the given device.  Currently, this is supported only for hot spares
1943  * and level 2 cache devices.
1944  */
1945 int
1946 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
1947 {
1948         zfs_cmd_t zc = { 0 };
1949         char msg[1024];
1950         nvlist_t *tgt;
1951         boolean_t avail_spare, l2cache;
1952         libzfs_handle_t *hdl = zhp->zpool_hdl;
1953
1954         (void) snprintf(msg, sizeof (msg),
1955             dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
1956
1957         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1958         if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1959             NULL)) == 0)
1960                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
1961
1962         if (!avail_spare && !l2cache) {
1963                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1964                     "only inactive hot spares or cache devices "
1965                     "can be removed"));
1966                 return (zfs_error(hdl, EZFS_NODEVICE, msg));
1967         }
1968
1969         verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1970
1971         if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
1972                 return (0);
1973
1974         return (zpool_standard_error(hdl, errno, msg));
1975 }
1976
1977 /*
1978  * Clear the errors for the pool, or the particular device if specified.
1979  */
1980 int
1981 zpool_clear(zpool_handle_t *zhp, const char *path)
1982 {
1983         zfs_cmd_t zc = { 0 };
1984         char msg[1024];
1985         nvlist_t *tgt;
1986         boolean_t avail_spare, l2cache;
1987         libzfs_handle_t *hdl = zhp->zpool_hdl;
1988
1989         if (path)
1990                 (void) snprintf(msg, sizeof (msg),
1991                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1992                     path);
1993         else
1994                 (void) snprintf(msg, sizeof (msg),
1995                     dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1996                     zhp->zpool_name);
1997
1998         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1999         if (path) {
2000                 if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
2001                     &l2cache, NULL)) == 0)
2002                         return (zfs_error(hdl, EZFS_NODEVICE, msg));
2003
2004                 /*
2005                  * Don't allow error clearing for hot spares.  Do allow
2006                  * error clearing for l2cache devices.
2007                  */
2008                 if (avail_spare)
2009                         return (zfs_error(hdl, EZFS_ISSPARE, msg));
2010
2011                 verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
2012                     &zc.zc_guid) == 0);
2013         }
2014
2015         if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
2016                 return (0);
2017
2018         return (zpool_standard_error(hdl, errno, msg));
2019 }
2020
2021 /*
2022  * Similar to zpool_clear(), but takes a GUID (used by fmd).
2023  */
2024 int
2025 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
2026 {
2027         zfs_cmd_t zc = { 0 };
2028         char msg[1024];
2029         libzfs_handle_t *hdl = zhp->zpool_hdl;
2030
2031         (void) snprintf(msg, sizeof (msg),
2032             dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
2033             guid);
2034
2035         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2036         zc.zc_guid = guid;
2037
2038         if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
2039                 return (0);
2040
2041         return (zpool_standard_error(hdl, errno, msg));
2042 }
2043
2044 /*
2045  * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
2046  * hierarchy.
2047  */
2048 int
2049 zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
2050     void *data)
2051 {
2052         libzfs_handle_t *hdl = zhp->zpool_hdl;
2053         char (*paths)[MAXPATHLEN];
2054         char path[MAXPATHLEN];
2055         size_t size = 4;
2056         int curr, fd, base, ret = 0;
2057         DIR *dirp;
2058         struct dirent *dp;
2059         struct stat st;
2060
2061         if ((base = open(ZVOL_FULL_DEV_DIR, O_RDONLY)) < 0)
2062                 return (errno == ENOENT ? 0 : -1);
2063
2064         snprintf(path, sizeof(path), "%s/%s", ZVOL_FULL_DEV_DIR,
2065             zhp->zpool_name);
2066         if (stat(path, &st) != 0) {
2067                 int err = errno;
2068                 (void) close(base);
2069                 return (err == ENOENT ? 0 : -1);
2070         }
2071
2072         /*
2073          * Oddly this wasn't a directory -- ignore that failure since we
2074          * know there are no links lower in the (non-existant) hierarchy.
2075          */
2076         if (!S_ISDIR(st.st_mode)) {
2077                 (void) close(base);
2078                 return (0);
2079         }
2080
2081         if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
2082                 (void) close(base);
2083                 return (-1);
2084         }
2085
2086         (void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
2087         curr = 0;
2088
2089         while (curr >= 0) {
2090                 snprintf(path, sizeof(path), "%s/%s", ZVOL_FULL_DEV_DIR,
2091                     paths[curr]);
2092                 if (lstat(path, &st) != 0)
2093                         goto err;
2094
2095                 if (S_ISDIR(st.st_mode)) {
2096                         if ((dirp = opendir(path)) == NULL) {
2097                                 goto err;
2098                         }
2099
2100                         while ((dp = readdir(dirp)) != NULL) {
2101                                 if (dp->d_name[0] == '.')
2102                                         continue;
2103
2104                                 if (curr + 1 == size) {
2105                                         paths = zfs_realloc(hdl, paths,
2106                                             size * sizeof (paths[0]),
2107                                             size * 2 * sizeof (paths[0]));
2108                                         if (paths == NULL) {
2109                                                 (void) closedir(dirp);
2110                                                 goto err;
2111                                         }
2112
2113                                         size *= 2;
2114                                 }
2115
2116                                 (void) strlcpy(paths[curr + 1], paths[curr],
2117                                     sizeof (paths[curr + 1]));
2118                                 (void) strlcat(paths[curr], "/",
2119                                     sizeof (paths[curr]));
2120                                 (void) strlcat(paths[curr], dp->d_name,
2121                                     sizeof (paths[curr]));
2122                                 curr++;
2123                         }
2124
2125                         (void) closedir(dirp);
2126
2127                 } else {
2128                         if ((ret = cb(paths[curr], data)) != 0)
2129                                 break;
2130                 }
2131
2132                 curr--;
2133         }
2134
2135         free(paths);
2136         (void) close(base);
2137
2138         return (ret);
2139
2140 err:
2141         free(paths);
2142         (void) close(base);
2143         return (-1);
2144 }
2145
2146 typedef struct zvol_cb {
2147         zpool_handle_t *zcb_pool;
2148         boolean_t zcb_create;
2149 } zvol_cb_t;
2150
2151 /*ARGSUSED*/
2152 static int
2153 do_zvol_create(zfs_handle_t *zhp, void *data)
2154 {
2155         int ret = 0;
2156
2157         if (ZFS_IS_VOLUME(zhp)) {
2158                 (void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
2159                 ret = zfs_iter_snapshots(zhp, do_zvol_create, NULL);
2160         }
2161
2162         if (ret == 0)
2163                 ret = zfs_iter_filesystems(zhp, do_zvol_create, NULL);
2164
2165         zfs_close(zhp);
2166
2167         return (ret);
2168 }
2169
2170 /*
2171  * Iterate over all zvols in the pool and make any necessary minor nodes.
2172  */
2173 int
2174 zpool_create_zvol_links(zpool_handle_t *zhp)
2175 {
2176         zfs_handle_t *zfp;
2177         int ret;
2178
2179         /*
2180          * If the pool is unavailable, just return success.
2181          */
2182         if ((zfp = make_dataset_handle(zhp->zpool_hdl,
2183             zhp->zpool_name)) == NULL)
2184                 return (0);
2185
2186         ret = zfs_iter_filesystems(zfp, do_zvol_create, NULL);
2187
2188         zfs_close(zfp);
2189         return (ret);
2190 }
2191
2192 static int
2193 do_zvol_remove(const char *dataset, void *data)
2194 {
2195         zpool_handle_t *zhp = data;
2196
2197         return (zvol_remove_link(zhp->zpool_hdl, dataset));
2198 }
2199
2200 /*
2201  * Iterate over all zvols in the pool and remove any minor nodes.  We iterate
2202  * by examining the /dev links so that a corrupted pool doesn't impede this
2203  * operation.
2204  */
2205 int
2206 zpool_remove_zvol_links(zpool_handle_t *zhp)
2207 {
2208         return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
2209 }
2210
2211 /*
2212  * Convert from a devid string to a path.
2213  */
2214 static char *
2215 devid_to_path(char *devid_str)
2216 {
2217         ddi_devid_t devid;
2218         char *minor;
2219         char *path;
2220         devid_nmlist_t *list = NULL;
2221         int ret;
2222
2223         if (devid_str_decode(devid_str, &devid, &minor) != 0)
2224                 return (NULL);
2225
2226         ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
2227
2228         devid_str_free(minor);
2229         devid_free(devid);
2230
2231         if (ret != 0)
2232                 return (NULL);
2233
2234         if ((path = strdup(list[0].devname)) == NULL)
2235                 return (NULL);
2236
2237         devid_free_nmlist(list);
2238
2239         return (path);
2240 }
2241
2242 /*
2243  * Convert from a path to a devid string.
2244  */
2245 static char *
2246 path_to_devid(const char *path)
2247 {
2248         int fd;
2249         ddi_devid_t devid;
2250         char *minor, *ret;
2251
2252         if ((fd = open(path, O_RDONLY)) < 0)
2253                 return (NULL);
2254
2255         minor = NULL;
2256         ret = NULL;
2257         if (devid_get(fd, &devid) == 0) {
2258                 if (devid_get_minor_name(fd, &minor) == 0)
2259                         ret = devid_str_encode(devid, minor);
2260                 if (minor != NULL)
2261                         devid_str_free(minor);
2262                 devid_free(devid);
2263         }
2264         (void) close(fd);
2265
2266         return (ret);
2267 }
2268
2269 /*
2270  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
2271  * ignore any failure here, since a common case is for an unprivileged user to
2272  * type 'zpool status', and we'll display the correct information anyway.
2273  */
2274 static void
2275 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
2276 {
2277         zfs_cmd_t zc = { 0 };
2278
2279         (void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2280         (void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
2281         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2282             &zc.zc_guid) == 0);
2283
2284         (void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
2285 }
2286
2287 /*
2288  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
2289  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
2290  * We also check if this is a whole disk, in which case we strip off the
2291  * trailing 's0' slice name.
2292  *
2293  * This routine is also responsible for identifying when disks have been
2294  * reconfigured in a new location.  The kernel will have opened the device by
2295  * devid, but the path will still refer to the old location.  To catch this, we
2296  * first do a path -> devid translation (which is fast for the common case).  If
2297  * the devid matches, we're done.  If not, we do a reverse devid -> path
2298  * translation and issue the appropriate ioctl() to update the path of the vdev.
2299  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
2300  * of these checks.
2301  */
2302 char *
2303 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
2304 {
2305         char *path, *devid;
2306         uint64_t value;
2307         char buf[64];
2308         vdev_stat_t *vs;
2309         uint_t vsc;
2310
2311         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
2312             &value) == 0) {
2313                 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2314                     &value) == 0);
2315                 (void) snprintf(buf, sizeof (buf), "%llu",
2316                     (u_longlong_t)value);
2317                 path = buf;
2318         } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
2319
2320                 /*
2321                  * If the device is dead (faulted, offline, etc) then don't
2322                  * bother opening it.  Otherwise we may be forcing the user to
2323                  * open a misbehaving device, which can have undesirable
2324                  * effects.
2325                  */
2326                 if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
2327                     (uint64_t **)&vs, &vsc) != 0 ||
2328                     vs->vs_state >= VDEV_STATE_DEGRADED) &&
2329                     zhp != NULL &&
2330                     nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
2331                         /*
2332                          * Determine if the current path is correct.
2333                          */
2334                         char *newdevid = path_to_devid(path);
2335
2336                         if (newdevid == NULL ||
2337                             strcmp(devid, newdevid) != 0) {
2338                                 char *newpath;
2339
2340                                 if ((newpath = devid_to_path(devid)) != NULL) {
2341                                         /*
2342                                          * Update the path appropriately.
2343                                          */
2344                                         set_path(zhp, nv, newpath);
2345                                         if (nvlist_add_string(nv,
2346                                             ZPOOL_CONFIG_PATH, newpath) == 0)
2347                                                 verify(nvlist_lookup_string(nv,
2348                                                     ZPOOL_CONFIG_PATH,
2349                                                     &path) == 0);
2350                                         free(newpath);
2351                                 }
2352                         }
2353
2354                         if (newdevid)
2355                                 devid_str_free(newdevid);
2356                 }
2357
2358                 if (strncmp(path, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0)
2359                         path += sizeof(_PATH_DEV) - 1;
2360
2361                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2362                     &value) == 0 && value) {
2363                         char *tmp = zfs_strdup(hdl, path);
2364                         if (tmp == NULL)
2365                                 return (NULL);
2366                         tmp[strlen(path) - 2] = '\0';
2367                         return (tmp);
2368                 }
2369         } else {
2370                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
2371
2372                 /*
2373                  * If it's a raidz device, we need to stick in the parity level.
2374                  */
2375                 if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
2376                         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
2377                             &value) == 0);
2378                         (void) snprintf(buf, sizeof (buf), "%s%llu", path,
2379                             (u_longlong_t)value);
2380                         path = buf;
2381                 }
2382         }
2383
2384         return (zfs_strdup(hdl, path));
2385 }
2386
2387 static int
2388 zbookmark_compare(const void *a, const void *b)
2389 {
2390         return (memcmp(a, b, sizeof (zbookmark_t)));
2391 }
2392
2393 /*
2394  * Retrieve the persistent error log, uniquify the members, and return to the
2395  * caller.
2396  */
2397 int
2398 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
2399 {
2400         zfs_cmd_t zc = { 0 };
2401         uint64_t count;
2402         zbookmark_t *zb = NULL;
2403         int i;
2404
2405         /*
2406          * Retrieve the raw error list from the kernel.  If the number of errors
2407          * has increased, allocate more space and continue until we get the
2408          * entire list.
2409          */
2410         verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
2411             &count) == 0);
2412         if (count == 0)
2413                 return (0);
2414         if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
2415             count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
2416                 return (-1);
2417         zc.zc_nvlist_dst_size = count;
2418         (void) strcpy(zc.zc_name, zhp->zpool_name);
2419         for (;;) {
2420                 if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
2421                     &zc) != 0) {
2422                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
2423                         if (errno == ENOMEM) {
2424                                 count = zc.zc_nvlist_dst_size;
2425                                 if ((zc.zc_nvlist_dst = (uintptr_t)
2426                                     zfs_alloc(zhp->zpool_hdl, count *
2427                                     sizeof (zbookmark_t))) == (uintptr_t)NULL)
2428                                         return (-1);
2429                         } else {
2430                                 return (-1);
2431                         }
2432                 } else {
2433                         break;
2434                 }
2435         }
2436
2437         /*
2438          * Sort the resulting bookmarks.  This is a little confusing due to the
2439          * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
2440          * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
2441          * _not_ copied as part of the process.  So we point the start of our
2442          * array appropriate and decrement the total number of elements.
2443          */
2444         zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
2445             zc.zc_nvlist_dst_size;
2446         count -= zc.zc_nvlist_dst_size;
2447
2448         qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
2449
2450         verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
2451
2452         /*
2453          * Fill in the nverrlistp with nvlist's of dataset and object numbers.
2454          */
2455         for (i = 0; i < count; i++) {
2456                 nvlist_t *nv;
2457
2458                 /* ignoring zb_blkid and zb_level for now */
2459                 if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
2460                     zb[i-1].zb_object == zb[i].zb_object)
2461                         continue;
2462
2463                 if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
2464                         goto nomem;
2465                 if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
2466                     zb[i].zb_objset) != 0) {
2467                         nvlist_free(nv);
2468                         goto nomem;
2469                 }
2470                 if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
2471                     zb[i].zb_object) != 0) {
2472                         nvlist_free(nv);
2473                         goto nomem;
2474                 }
2475                 if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
2476                         nvlist_free(nv);
2477                         goto nomem;
2478                 }
2479                 nvlist_free(nv);
2480         }
2481
2482         free((void *)(uintptr_t)zc.zc_nvlist_dst);
2483         return (0);
2484
2485 nomem:
2486         free((void *)(uintptr_t)zc.zc_nvlist_dst);
2487         return (no_memory(zhp->zpool_hdl));
2488 }
2489
2490 /*
2491  * Upgrade a ZFS pool to the latest on-disk version.
2492  */
2493 int
2494 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
2495 {
2496         zfs_cmd_t zc = { 0 };
2497         libzfs_handle_t *hdl = zhp->zpool_hdl;
2498
2499         (void) strcpy(zc.zc_name, zhp->zpool_name);
2500         zc.zc_cookie = new_version;
2501
2502         if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
2503                 return (zpool_standard_error_fmt(hdl, errno,
2504                     dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
2505                     zhp->zpool_name));
2506         return (0);
2507 }
2508
2509 void
2510 zpool_set_history_str(const char *subcommand, int argc, char **argv,
2511     char *history_str)
2512 {
2513         int i;
2514
2515         (void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
2516         for (i = 1; i < argc; i++) {
2517                 if (strlen(history_str) + 1 + strlen(argv[i]) >
2518                     HIS_MAX_RECORD_LEN)
2519                         break;
2520                 (void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
2521                 (void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
2522         }
2523 }
2524
2525 /*
2526  * Stage command history for logging.
2527  */
2528 int
2529 zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
2530 {
2531         if (history_str == NULL)
2532                 return (EINVAL);
2533
2534         if (strlen(history_str) > HIS_MAX_RECORD_LEN)
2535                 return (EINVAL);
2536
2537         if (hdl->libzfs_log_str != NULL)
2538                 free(hdl->libzfs_log_str);
2539
2540         if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
2541                 return (no_memory(hdl));
2542
2543         return (0);
2544 }
2545
2546 /*
2547  * Perform ioctl to get some command history of a pool.
2548  *
2549  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
2550  * logical offset of the history buffer to start reading from.
2551  *
2552  * Upon return, 'off' is the next logical offset to read from and
2553  * 'len' is the actual amount of bytes read into 'buf'.
2554  */
2555 static int
2556 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
2557 {
2558         zfs_cmd_t zc = { 0 };
2559         libzfs_handle_t *hdl = zhp->zpool_hdl;
2560
2561         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2562
2563         zc.zc_history = (uint64_t)(uintptr_t)buf;
2564         zc.zc_history_len = *len;
2565         zc.zc_history_offset = *off;
2566
2567         if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
2568                 switch (errno) {
2569                 case EPERM:
2570                         return (zfs_error_fmt(hdl, EZFS_PERM,
2571                             dgettext(TEXT_DOMAIN,
2572                             "cannot show history for pool '%s'"),
2573                             zhp->zpool_name));
2574                 case ENOENT:
2575                         return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
2576                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
2577                             "'%s'"), zhp->zpool_name));
2578                 case ENOTSUP:
2579                         return (zfs_error_fmt(hdl, EZFS_BADVERSION,
2580                             dgettext(TEXT_DOMAIN, "cannot get history for pool "
2581                             "'%s', pool must be upgraded"), zhp->zpool_name));
2582                 default:
2583                         return (zpool_standard_error_fmt(hdl, errno,
2584                             dgettext(TEXT_DOMAIN,
2585                             "cannot get history for '%s'"), zhp->zpool_name));
2586                 }
2587         }
2588
2589         *len = zc.zc_history_len;
2590         *off = zc.zc_history_offset;
2591
2592         return (0);
2593 }
2594
2595 /*
2596  * Process the buffer of nvlists, unpacking and storing each nvlist record
2597  * into 'records'.  'leftover' is set to the number of bytes that weren't
2598  * processed as there wasn't a complete record.
2599  */
2600 static int
2601 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
2602     nvlist_t ***records, uint_t *numrecords)
2603 {
2604         uint64_t reclen;
2605         nvlist_t *nv;
2606         int i;
2607
2608         while (bytes_read > sizeof (reclen)) {
2609
2610                 /* get length of packed record (stored as little endian) */
2611                 for (i = 0, reclen = 0; i < sizeof (reclen); i++)
2612                         reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
2613
2614                 if (bytes_read < sizeof (reclen) + reclen)
2615                         break;
2616
2617                 /* unpack record */
2618                 if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
2619                         return (ENOMEM);
2620                 bytes_read -= sizeof (reclen) + reclen;
2621                 buf += sizeof (reclen) + reclen;
2622
2623                 /* add record to nvlist array */
2624                 (*numrecords)++;
2625                 if (ISP2(*numrecords + 1)) {
2626                         *records = realloc(*records,
2627                             *numrecords * 2 * sizeof (nvlist_t *));
2628                 }
2629                 (*records)[*numrecords - 1] = nv;
2630         }
2631
2632         *leftover = bytes_read;
2633         return (0);
2634 }
2635
2636 #define HIS_BUF_LEN     (128*1024)
2637
2638 /*
2639  * Retrieve the command history of a pool.
2640  */
2641 int
2642 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
2643 {
2644         char buf[HIS_BUF_LEN];
2645         uint64_t off = 0;
2646         nvlist_t **records = NULL;
2647         uint_t numrecords = 0;
2648         int err, i;
2649
2650         do {
2651                 uint64_t bytes_read = sizeof (buf);
2652                 uint64_t leftover;
2653
2654                 if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
2655                         break;
2656
2657                 /* if nothing else was read in, we're at EOF, just return */
2658                 if (!bytes_read)
2659                         break;
2660
2661                 if ((err = zpool_history_unpack(buf, bytes_read,
2662                     &leftover, &records, &numrecords)) != 0)
2663                         break;
2664                 off -= leftover;
2665
2666                 /* CONSTCOND */
2667         } while (1);
2668
2669         if (!err) {
2670                 verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
2671                 verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
2672                     records, numrecords) == 0);
2673         }
2674         for (i = 0; i < numrecords; i++)
2675                 nvlist_free(records[i]);
2676         free(records);
2677
2678         return (err);
2679 }
2680
2681 void
2682 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
2683     char *pathname, size_t len)
2684 {
2685         zfs_cmd_t zc = { 0 };
2686         boolean_t mounted = B_FALSE;
2687         char *mntpnt = NULL;
2688         char dsname[MAXNAMELEN];
2689
2690         if (dsobj == 0) {
2691                 /* special case for the MOS */
2692                 (void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
2693                 return;
2694         }
2695
2696         /* get the dataset's name */
2697         (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2698         zc.zc_obj = dsobj;
2699         if (ioctl(zhp->zpool_hdl->libzfs_fd,
2700             ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
2701                 /* just write out a path of two object numbers */
2702                 (void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
2703                     dsobj, obj);
2704                 return;
2705         }
2706         (void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
2707
2708         /* find out if the dataset is mounted */
2709         mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
2710
2711         /* get the corrupted object's path */
2712         (void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
2713         zc.zc_obj = obj;
2714         if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
2715             &zc) == 0) {
2716                 if (mounted) {
2717                         (void) snprintf(pathname, len, "%s%s", mntpnt,
2718                             zc.zc_value);
2719                 } else {
2720                         (void) snprintf(pathname, len, "%s:%s",
2721                             dsname, zc.zc_value);
2722                 }
2723         } else {
2724                 (void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
2725         }
2726         free(mntpnt);
2727 }
2728
2729 #define RDISK_ROOT      "/dev/rdsk"
2730 #define BACKUP_SLICE    "s2"
2731 /*
2732  * Don't start the slice at the default block of 34; many storage
2733  * devices will use a stripe width of 128k, so start there instead.
2734  */
2735 #define NEW_START_BLOCK 256
2736
2737 #if defined(sun)
2738 /*
2739  * Read the EFI label from the config, if a label does not exist then
2740  * pass back the error to the caller. If the caller has passed a non-NULL
2741  * diskaddr argument then we set it to the starting address of the EFI
2742  * partition.
2743  */
2744 static int
2745 read_efi_label(nvlist_t *config, diskaddr_t *sb)
2746 {
2747         char *path;
2748         int fd;
2749         char diskname[MAXPATHLEN];
2750         int err = -1;
2751
2752         if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
2753                 return (err);
2754
2755         (void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
2756             strrchr(path, '/'));
2757         if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
2758                 struct dk_gpt *vtoc;
2759
2760                 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
2761                         if (sb != NULL)
2762                                 *sb = vtoc->efi_parts[0].p_start;
2763                         efi_free(vtoc);
2764                 }
2765                 (void) close(fd);
2766         }
2767         return (err);
2768 }
2769
2770 /*
2771  * determine where a partition starts on a disk in the current
2772  * configuration
2773  */
2774 static diskaddr_t
2775 find_start_block(nvlist_t *config)
2776 {
2777         nvlist_t **child;
2778         uint_t c, children;
2779         diskaddr_t sb = MAXOFFSET_T;
2780         uint64_t wholedisk;
2781
2782         if (nvlist_lookup_nvlist_array(config,
2783             ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
2784                 if (nvlist_lookup_uint64(config,
2785                     ZPOOL_CONFIG_WHOLE_DISK,
2786                     &wholedisk) != 0 || !wholedisk) {
2787                         return (MAXOFFSET_T);
2788                 }
2789                 if (read_efi_label(config, &sb) < 0)
2790                         sb = MAXOFFSET_T;
2791                 return (sb);
2792         }
2793
2794         for (c = 0; c < children; c++) {
2795                 sb = find_start_block(child[c]);
2796                 if (sb != MAXOFFSET_T) {
2797                         return (sb);
2798                 }
2799         }
2800         return (MAXOFFSET_T);
2801 }
2802 #endif /* sun */
2803
2804 /*
2805  * Label an individual disk.  The name provided is the short name,
2806  * stripped of any leading /dev path.
2807  */
2808 int
2809 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
2810 {
2811 #if defined(sun)
2812         char path[MAXPATHLEN];
2813         struct dk_gpt *vtoc;
2814         int fd;
2815         size_t resv = EFI_MIN_RESV_SIZE;
2816         uint64_t slice_size;
2817         diskaddr_t start_block;
2818         char errbuf[1024];
2819
2820         /* prepare an error message just in case */
2821         (void) snprintf(errbuf, sizeof (errbuf),
2822             dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
2823
2824         if (zhp) {
2825                 nvlist_t *nvroot;
2826
2827                 verify(nvlist_lookup_nvlist(zhp->zpool_config,
2828                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2829
2830                 if (zhp->zpool_start_block == 0)
2831                         start_block = find_start_block(nvroot);
2832                 else
2833                         start_block = zhp->zpool_start_block;
2834                 zhp->zpool_start_block = start_block;
2835         } else {
2836                 /* new pool */
2837                 start_block = NEW_START_BLOCK;
2838         }
2839
2840         (void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
2841             BACKUP_SLICE);
2842
2843         if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
2844                 /*
2845                  * This shouldn't happen.  We've long since verified that this
2846                  * is a valid device.
2847                  */
2848                 zfs_error_aux(hdl,
2849                     dgettext(TEXT_DOMAIN, "unable to open device"));
2850                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2851         }
2852
2853         if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
2854                 /*
2855                  * The only way this can fail is if we run out of memory, or we
2856                  * were unable to read the disk's capacity
2857                  */
2858                 if (errno == ENOMEM)
2859                         (void) no_memory(hdl);
2860
2861                 (void) close(fd);
2862                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2863                     "unable to read disk capacity"), name);
2864
2865                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2866         }
2867
2868         slice_size = vtoc->efi_last_u_lba + 1;
2869         slice_size -= EFI_MIN_RESV_SIZE;
2870         if (start_block == MAXOFFSET_T)
2871                 start_block = NEW_START_BLOCK;
2872         slice_size -= start_block;
2873
2874         vtoc->efi_parts[0].p_start = start_block;
2875         vtoc->efi_parts[0].p_size = slice_size;
2876
2877         /*
2878          * Why we use V_USR: V_BACKUP confuses users, and is considered
2879          * disposable by some EFI utilities (since EFI doesn't have a backup
2880          * slice).  V_UNASSIGNED is supposed to be used only for zero size
2881          * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
2882          * etc. were all pretty specific.  V_USR is as close to reality as we
2883          * can get, in the absence of V_OTHER.
2884          */
2885         vtoc->efi_parts[0].p_tag = V_USR;
2886         (void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
2887
2888         vtoc->efi_parts[8].p_start = slice_size + start_block;
2889         vtoc->efi_parts[8].p_size = resv;
2890         vtoc->efi_parts[8].p_tag = V_RESERVED;
2891
2892         if (efi_write(fd, vtoc) != 0) {
2893                 /*
2894                  * Some block drivers (like pcata) may not support EFI
2895                  * GPT labels.  Print out a helpful error message dir-
2896                  * ecting the user to manually label the disk and give
2897                  * a specific slice.
2898                  */
2899                 (void) close(fd);
2900                 efi_free(vtoc);
2901
2902                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2903                     "try using fdisk(1M) and then provide a specific slice"));
2904                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
2905         }
2906
2907         (void) close(fd);
2908         efi_free(vtoc);
2909 #endif /* sun */
2910         return (0);
2911 }
2912
2913 static boolean_t
2914 supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
2915 {
2916         char *type;
2917         nvlist_t **child;
2918         uint_t children, c;
2919
2920         verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0);
2921         if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
2922             strcmp(type, VDEV_TYPE_FILE) == 0 ||
2923             strcmp(type, VDEV_TYPE_LOG) == 0 ||
2924             strcmp(type, VDEV_TYPE_MISSING) == 0) {
2925                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2926                     "vdev type '%s' is not supported"), type);
2927                 (void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf);
2928                 return (B_FALSE);
2929         }
2930         if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
2931             &child, &children) == 0) {
2932                 for (c = 0; c < children; c++) {
2933                         if (!supported_dump_vdev_type(hdl, child[c], errbuf))
2934                                 return (B_FALSE);
2935                 }
2936         }
2937         return (B_TRUE);
2938 }
2939
2940 /*
2941  * check if this zvol is allowable for use as a dump device; zero if
2942  * it is, > 0 if it isn't, < 0 if it isn't a zvol
2943  */
2944 int
2945 zvol_check_dump_config(char *arg)
2946 {
2947         zpool_handle_t *zhp = NULL;
2948         nvlist_t *config, *nvroot;
2949         char *p, *volname;
2950         nvlist_t **top;
2951         uint_t toplevels;
2952         libzfs_handle_t *hdl;
2953         char errbuf[1024];
2954         char poolname[ZPOOL_MAXNAMELEN];
2955         int pathlen = strlen(ZVOL_FULL_DEV_DIR);
2956         int ret = 1;
2957
2958         if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) {
2959                 return (-1);
2960         }
2961
2962         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2963             "dump is not supported on device '%s'"), arg);
2964
2965         if ((hdl = libzfs_init()) == NULL)
2966                 return (1);
2967         libzfs_print_on_error(hdl, B_TRUE);
2968
2969         volname = arg + pathlen;
2970
2971         /* check the configuration of the pool */
2972         if ((p = strchr(volname, '/')) == NULL) {
2973                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2974                     "malformed dataset name"));
2975                 (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
2976                 return (1);
2977         } else if (p - volname >= ZFS_MAXNAMELEN) {
2978                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2979                     "dataset name is too long"));
2980                 (void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf);
2981                 return (1);
2982         } else {
2983                 (void) strncpy(poolname, volname, p - volname);
2984                 poolname[p - volname] = '\0';
2985         }
2986
2987         if ((zhp = zpool_open(hdl, poolname)) == NULL) {
2988                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2989                     "could not open pool '%s'"), poolname);
2990                 (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
2991                 goto out;
2992         }
2993         config = zpool_get_config(zhp, NULL);
2994         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2995             &nvroot) != 0) {
2996                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2997                     "could not obtain vdev configuration for  '%s'"), poolname);
2998                 (void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
2999                 goto out;
3000         }
3001
3002         verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
3003             &top, &toplevels) == 0);
3004         if (toplevels != 1) {
3005                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3006                     "'%s' has multiple top level vdevs"), poolname);
3007                 (void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf);
3008                 goto out;
3009         }
3010
3011         if (!supported_dump_vdev_type(hdl, top[0], errbuf)) {
3012                 goto out;
3013         }
3014         ret = 0;
3015
3016 out:
3017         if (zhp)
3018                 zpool_close(zhp);
3019         libzfs_fini(hdl);
3020         return (ret);
3021 }