]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c
Create releng/7.2 from stable/7 in preparation for 7.2-RELEASE.
[FreeBSD/releng/7.2.git] / cddl / contrib / opensolaris / lib / libzfs / common / libzfs_import.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25
26 #pragma ident   "%Z%%M% %I%     %E% SMI"
27
28 /*
29  * Pool import support functions.
30  *
31  * To import a pool, we rely on reading the configuration information from the
32  * ZFS label of each device.  If we successfully read the label, then we
33  * organize the configuration information in the following hierarchy:
34  *
35  *      pool guid -> toplevel vdev guid -> label txg
36  *
37  * Duplicate entries matching this same tuple will be discarded.  Once we have
38  * examined every device, we pick the best label txg config for each toplevel
39  * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
40  * update any paths that have changed.  Finally, we attempt to import the pool
41  * using our derived config, and record the results.
42  */
43
44 #include <devid.h>
45 #include <dirent.h>
46 #include <errno.h>
47 #include <libintl.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <sys/stat.h>
51 #include <unistd.h>
52 #include <fcntl.h>
53 #include <libgeom.h>
54
55 #include <sys/vdev_impl.h>
56
57 #include "libzfs.h"
58 #include "libzfs_impl.h"
59
60 /*
61  * Intermediate structures used to gather configuration information.
62  */
63 typedef struct config_entry {
64         uint64_t                ce_txg;
65         nvlist_t                *ce_config;
66         struct config_entry     *ce_next;
67 } config_entry_t;
68
69 typedef struct vdev_entry {
70         uint64_t                ve_guid;
71         config_entry_t          *ve_configs;
72         struct vdev_entry       *ve_next;
73 } vdev_entry_t;
74
75 typedef struct pool_entry {
76         uint64_t                pe_guid;
77         vdev_entry_t            *pe_vdevs;
78         struct pool_entry       *pe_next;
79 } pool_entry_t;
80
81 typedef struct name_entry {
82         char                    *ne_name;
83         uint64_t                ne_guid;
84         struct name_entry       *ne_next;
85 } name_entry_t;
86
87 typedef struct pool_list {
88         pool_entry_t            *pools;
89         name_entry_t            *names;
90 } pool_list_t;
91
92 static char *
93 get_devid(const char *path)
94 {
95         int fd;
96         ddi_devid_t devid;
97         char *minor, *ret;
98
99         if ((fd = open(path, O_RDONLY)) < 0)
100                 return (NULL);
101
102         minor = NULL;
103         ret = NULL;
104         if (devid_get(fd, &devid) == 0) {
105                 if (devid_get_minor_name(fd, &minor) == 0)
106                         ret = devid_str_encode(devid, minor);
107                 if (minor != NULL)
108                         devid_str_free(minor);
109                 devid_free(devid);
110         }
111         (void) close(fd);
112
113         return (ret);
114 }
115
116 /*
117  * Go through and fix up any path and/or devid information for the given vdev
118  * configuration.
119  */
120 static int
121 fix_paths(nvlist_t *nv, name_entry_t *names)
122 {
123         nvlist_t **child;
124         uint_t c, children;
125         uint64_t guid;
126         name_entry_t *ne, *best;
127         char *path, *devid;
128         int matched;
129
130         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
131             &child, &children) == 0) {
132                 for (c = 0; c < children; c++)
133                         if (fix_paths(child[c], names) != 0)
134                                 return (-1);
135                 return (0);
136         }
137
138         /*
139          * This is a leaf (file or disk) vdev.  In either case, go through
140          * the name list and see if we find a matching guid.  If so, replace
141          * the path and see if we can calculate a new devid.
142          *
143          * There may be multiple names associated with a particular guid, in
144          * which case we have overlapping slices or multiple paths to the same
145          * disk.  If this is the case, then we want to pick the path that is
146          * the most similar to the original, where "most similar" is the number
147          * of matching characters starting from the end of the path.  This will
148          * preserve slice numbers even if the disks have been reorganized, and
149          * will also catch preferred disk names if multiple paths exist.
150          */
151         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
152         if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
153                 path = NULL;
154
155         matched = 0;
156         best = NULL;
157         for (ne = names; ne != NULL; ne = ne->ne_next) {
158                 if (ne->ne_guid == guid) {
159                         const char *src, *dst;
160                         int count;
161
162                         if (path == NULL) {
163                                 best = ne;
164                                 break;
165                         }
166
167                         src = ne->ne_name + strlen(ne->ne_name) - 1;
168                         dst = path + strlen(path) - 1;
169                         for (count = 0; src >= ne->ne_name && dst >= path;
170                             src--, dst--, count++)
171                                 if (*src != *dst)
172                                         break;
173
174                         /*
175                          * At this point, 'count' is the number of characters
176                          * matched from the end.
177                          */
178                         if (count > matched || best == NULL) {
179                                 best = ne;
180                                 matched = count;
181                         }
182                 }
183         }
184
185         if (best == NULL)
186                 return (0);
187
188         if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
189                 return (-1);
190
191         if ((devid = get_devid(best->ne_name)) == NULL) {
192                 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
193         } else {
194                 if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0)
195                         return (-1);
196                 devid_str_free(devid);
197         }
198
199         return (0);
200 }
201
202 /*
203  * Add the given configuration to the list of known devices.
204  */
205 static int
206 add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
207     nvlist_t *config)
208 {
209         uint64_t pool_guid, vdev_guid, top_guid, txg, state;
210         pool_entry_t *pe;
211         vdev_entry_t *ve;
212         config_entry_t *ce;
213         name_entry_t *ne;
214
215         /*
216          * If this is a hot spare not currently in use, add it to the list of
217          * names to translate, but don't do anything else.
218          */
219         if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
220             &state) == 0 && state == POOL_STATE_SPARE &&
221             nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
222                 if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
223                         return (-1);
224
225                 if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
226                         free(ne);
227                         return (-1);
228                 }
229                 ne->ne_guid = vdev_guid;
230                 ne->ne_next = pl->names;
231                 pl->names = ne;
232                 return (0);
233         }
234
235         /*
236          * If we have a valid config but cannot read any of these fields, then
237          * it means we have a half-initialized label.  In vdev_label_init()
238          * we write a label with txg == 0 so that we can identify the device
239          * in case the user refers to the same disk later on.  If we fail to
240          * create the pool, we'll be left with a label in this state
241          * which should not be considered part of a valid pool.
242          */
243         if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
244             &pool_guid) != 0 ||
245             nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
246             &vdev_guid) != 0 ||
247             nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
248             &top_guid) != 0 ||
249             nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
250             &txg) != 0 || txg == 0) {
251                 nvlist_free(config);
252                 return (0);
253         }
254
255         /*
256          * First, see if we know about this pool.  If not, then add it to the
257          * list of known pools.
258          */
259         for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
260                 if (pe->pe_guid == pool_guid)
261                         break;
262         }
263
264         if (pe == NULL) {
265                 if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
266                         nvlist_free(config);
267                         return (-1);
268                 }
269                 pe->pe_guid = pool_guid;
270                 pe->pe_next = pl->pools;
271                 pl->pools = pe;
272         }
273
274         /*
275          * Second, see if we know about this toplevel vdev.  Add it if its
276          * missing.
277          */
278         for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
279                 if (ve->ve_guid == top_guid)
280                         break;
281         }
282
283         if (ve == NULL) {
284                 if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
285                         nvlist_free(config);
286                         return (-1);
287                 }
288                 ve->ve_guid = top_guid;
289                 ve->ve_next = pe->pe_vdevs;
290                 pe->pe_vdevs = ve;
291         }
292
293         /*
294          * Third, see if we have a config with a matching transaction group.  If
295          * so, then we do nothing.  Otherwise, add it to the list of known
296          * configs.
297          */
298         for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
299                 if (ce->ce_txg == txg)
300                         break;
301         }
302
303         if (ce == NULL) {
304                 if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
305                         nvlist_free(config);
306                         return (-1);
307                 }
308                 ce->ce_txg = txg;
309                 ce->ce_config = config;
310                 ce->ce_next = ve->ve_configs;
311                 ve->ve_configs = ce;
312         } else {
313                 nvlist_free(config);
314         }
315
316         /*
317          * At this point we've successfully added our config to the list of
318          * known configs.  The last thing to do is add the vdev guid -> path
319          * mappings so that we can fix up the configuration as necessary before
320          * doing the import.
321          */
322         if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
323                 return (-1);
324
325         if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
326                 free(ne);
327                 return (-1);
328         }
329
330         ne->ne_guid = vdev_guid;
331         ne->ne_next = pl->names;
332         pl->names = ne;
333
334         return (0);
335 }
336
337 /*
338  * Returns true if the named pool matches the given GUID.
339  */
340 static int
341 pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
342     boolean_t *isactive)
343 {
344         zpool_handle_t *zhp;
345         uint64_t theguid;
346
347         if (zpool_open_silent(hdl, name, &zhp) != 0)
348                 return (-1);
349
350         if (zhp == NULL) {
351                 *isactive = B_FALSE;
352                 return (0);
353         }
354
355         verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
356             &theguid) == 0);
357
358         zpool_close(zhp);
359
360         *isactive = (theguid == guid);
361         return (0);
362 }
363
364 /*
365  * Convert our list of pools into the definitive set of configurations.  We
366  * start by picking the best config for each toplevel vdev.  Once that's done,
367  * we assemble the toplevel vdevs into a full config for the pool.  We make a
368  * pass to fix up any incorrect paths, and then add it to the main list to
369  * return to the user.
370  */
371 static nvlist_t *
372 get_configs(libzfs_handle_t *hdl, pool_list_t *pl)
373 {
374         pool_entry_t *pe;
375         vdev_entry_t *ve;
376         config_entry_t *ce;
377         nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
378         nvlist_t **spares;
379         uint_t i, nspares;
380         boolean_t config_seen;
381         uint64_t best_txg;
382         char *name, *hostname;
383         zfs_cmd_t zc = { 0 };
384         uint64_t version, guid;
385         size_t len;
386         int err;
387         uint_t children = 0;
388         nvlist_t **child = NULL;
389         uint_t c;
390         boolean_t isactive;
391         uint64_t hostid;
392
393         if (nvlist_alloc(&ret, 0, 0) != 0)
394                 goto nomem;
395
396         for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
397                 uint64_t id;
398
399                 if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
400                         goto nomem;
401                 config_seen = B_FALSE;
402
403                 /*
404                  * Iterate over all toplevel vdevs.  Grab the pool configuration
405                  * from the first one we find, and then go through the rest and
406                  * add them as necessary to the 'vdevs' member of the config.
407                  */
408                 for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
409
410                         /*
411                          * Determine the best configuration for this vdev by
412                          * selecting the config with the latest transaction
413                          * group.
414                          */
415                         best_txg = 0;
416                         for (ce = ve->ve_configs; ce != NULL;
417                             ce = ce->ce_next) {
418
419                                 if (ce->ce_txg > best_txg) {
420                                         tmp = ce->ce_config;
421                                         best_txg = ce->ce_txg;
422                                 }
423                         }
424
425                         if (!config_seen) {
426                                 /*
427                                  * Copy the relevant pieces of data to the pool
428                                  * configuration:
429                                  *
430                                  *      version
431                                  *      pool guid
432                                  *      name
433                                  *      pool state
434                                  *      hostid (if available)
435                                  *      hostname (if available)
436                                  */
437                                 uint64_t state;
438
439                                 verify(nvlist_lookup_uint64(tmp,
440                                     ZPOOL_CONFIG_VERSION, &version) == 0);
441                                 if (nvlist_add_uint64(config,
442                                     ZPOOL_CONFIG_VERSION, version) != 0)
443                                         goto nomem;
444                                 verify(nvlist_lookup_uint64(tmp,
445                                     ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
446                                 if (nvlist_add_uint64(config,
447                                     ZPOOL_CONFIG_POOL_GUID, guid) != 0)
448                                         goto nomem;
449                                 verify(nvlist_lookup_string(tmp,
450                                     ZPOOL_CONFIG_POOL_NAME, &name) == 0);
451                                 if (nvlist_add_string(config,
452                                     ZPOOL_CONFIG_POOL_NAME, name) != 0)
453                                         goto nomem;
454                                 verify(nvlist_lookup_uint64(tmp,
455                                     ZPOOL_CONFIG_POOL_STATE, &state) == 0);
456                                 if (nvlist_add_uint64(config,
457                                     ZPOOL_CONFIG_POOL_STATE, state) != 0)
458                                         goto nomem;
459                                 hostid = 0;
460                                 if (nvlist_lookup_uint64(tmp,
461                                     ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
462                                         if (nvlist_add_uint64(config,
463                                             ZPOOL_CONFIG_HOSTID, hostid) != 0)
464                                                 goto nomem;
465                                         verify(nvlist_lookup_string(tmp,
466                                             ZPOOL_CONFIG_HOSTNAME,
467                                             &hostname) == 0);
468                                         if (nvlist_add_string(config,
469                                             ZPOOL_CONFIG_HOSTNAME,
470                                             hostname) != 0)
471                                                 goto nomem;
472                                 }
473
474                                 config_seen = B_TRUE;
475                         }
476
477                         /*
478                          * Add this top-level vdev to the child array.
479                          */
480                         verify(nvlist_lookup_nvlist(tmp,
481                             ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
482                         verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
483                             &id) == 0);
484                         if (id >= children) {
485                                 nvlist_t **newchild;
486
487                                 newchild = zfs_alloc(hdl, (id + 1) *
488                                     sizeof (nvlist_t *));
489                                 if (newchild == NULL)
490                                         goto nomem;
491
492                                 for (c = 0; c < children; c++)
493                                         newchild[c] = child[c];
494
495                                 free(child);
496                                 child = newchild;
497                                 children = id + 1;
498                         }
499                         if (nvlist_dup(nvtop, &child[id], 0) != 0)
500                                 goto nomem;
501
502                 }
503
504                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
505                     &guid) == 0);
506
507                 /*
508                  * Look for any missing top-level vdevs.  If this is the case,
509                  * create a faked up 'missing' vdev as a placeholder.  We cannot
510                  * simply compress the child array, because the kernel performs
511                  * certain checks to make sure the vdev IDs match their location
512                  * in the configuration.
513                  */
514                 for (c = 0; c < children; c++)
515                         if (child[c] == NULL) {
516                                 nvlist_t *missing;
517                                 if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
518                                     0) != 0)
519                                         goto nomem;
520                                 if (nvlist_add_string(missing,
521                                     ZPOOL_CONFIG_TYPE,
522                                     VDEV_TYPE_MISSING) != 0 ||
523                                     nvlist_add_uint64(missing,
524                                     ZPOOL_CONFIG_ID, c) != 0 ||
525                                     nvlist_add_uint64(missing,
526                                     ZPOOL_CONFIG_GUID, 0ULL) != 0) {
527                                         nvlist_free(missing);
528                                         goto nomem;
529                                 }
530                                 child[c] = missing;
531                         }
532
533                 /*
534                  * Put all of this pool's top-level vdevs into a root vdev.
535                  */
536                 if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
537                         goto nomem;
538                 if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
539                     VDEV_TYPE_ROOT) != 0 ||
540                     nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
541                     nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
542                     nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
543                     child, children) != 0) {
544                         nvlist_free(nvroot);
545                         goto nomem;
546                 }
547
548                 for (c = 0; c < children; c++)
549                         nvlist_free(child[c]);
550                 free(child);
551                 children = 0;
552                 child = NULL;
553
554                 /*
555                  * Go through and fix up any paths and/or devids based on our
556                  * known list of vdev GUID -> path mappings.
557                  */
558                 if (fix_paths(nvroot, pl->names) != 0) {
559                         nvlist_free(nvroot);
560                         goto nomem;
561                 }
562
563                 /*
564                  * Add the root vdev to this pool's configuration.
565                  */
566                 if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
567                     nvroot) != 0) {
568                         nvlist_free(nvroot);
569                         goto nomem;
570                 }
571                 nvlist_free(nvroot);
572
573                 /*
574                  * Determine if this pool is currently active, in which case we
575                  * can't actually import it.
576                  */
577                 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
578                     &name) == 0);
579                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
580                     &guid) == 0);
581
582                 if (pool_active(hdl, name, guid, &isactive) != 0)
583                         goto error;
584
585                 if (isactive) {
586                         nvlist_free(config);
587                         config = NULL;
588                         continue;
589                 }
590
591                 /*
592                  * Try to do the import in order to get vdev state.
593                  */
594                 if (zcmd_write_src_nvlist(hdl, &zc, config, &len) != 0)
595                         goto error;
596
597                 nvlist_free(config);
598                 config = NULL;
599
600                 if (zcmd_alloc_dst_nvlist(hdl, &zc, len * 2) != 0) {
601                         zcmd_free_nvlists(&zc);
602                         goto error;
603                 }
604
605                 while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
606                     &zc)) != 0 && errno == ENOMEM) {
607                         if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
608                                 zcmd_free_nvlists(&zc);
609                                 goto error;
610                         }
611                 }
612
613                 if (err) {
614                         (void) zpool_standard_error(hdl, errno,
615                             dgettext(TEXT_DOMAIN, "cannot discover pools"));
616                         zcmd_free_nvlists(&zc);
617                         goto error;
618                 }
619
620                 if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) {
621                         zcmd_free_nvlists(&zc);
622                         goto error;
623                 }
624
625                 zcmd_free_nvlists(&zc);
626
627                 /*
628                  * Go through and update the paths for spares, now that we have
629                  * them.
630                  */
631                 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
632                     &nvroot) == 0);
633                 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
634                     &spares, &nspares) == 0) {
635                         for (i = 0; i < nspares; i++) {
636                                 if (fix_paths(spares[i], pl->names) != 0)
637                                         goto nomem;
638                         }
639                 }
640
641                 /*
642                  * Restore the original information read from the actual label.
643                  */
644                 (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
645                     DATA_TYPE_UINT64);
646                 (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
647                     DATA_TYPE_STRING);
648                 if (hostid != 0) {
649                         verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
650                             hostid) == 0);
651                         verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
652                             hostname) == 0);
653                 }
654
655                 /*
656                  * Add this pool to the list of configs.
657                  */
658                 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
659                     &name) == 0);
660                 if (nvlist_add_nvlist(ret, name, config) != 0)
661                         goto nomem;
662
663                 nvlist_free(config);
664                 config = NULL;
665         }
666
667         return (ret);
668
669 nomem:
670         (void) no_memory(hdl);
671 error:
672         nvlist_free(config);
673         nvlist_free(ret);
674         for (c = 0; c < children; c++)
675                 nvlist_free(child[c]);
676         free(child);
677
678         return (NULL);
679 }
680
681 /*
682  * Return the offset of the given label.
683  */
684 static uint64_t
685 label_offset(size_t size, int l)
686 {
687         return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
688             0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
689 }
690
691 /*
692  * Given a file descriptor, read the label information and return an nvlist
693  * describing the configuration, if there is one.
694  */
695 int
696 zpool_read_label(int fd, nvlist_t **config)
697 {
698         struct stat64 statbuf;
699         int l;
700         vdev_label_t *label;
701         uint64_t state, txg;
702
703         *config = NULL;
704
705         if (fstat64(fd, &statbuf) == -1)
706                 return (0);
707
708         if ((label = malloc(sizeof (vdev_label_t))) == NULL)
709                 return (-1);
710
711         for (l = 0; l < VDEV_LABELS; l++) {
712                 if (pread(fd, label, sizeof (vdev_label_t),
713                     label_offset(statbuf.st_size, l)) != sizeof (vdev_label_t))
714                         continue;
715
716                 if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
717                     sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
718                         continue;
719
720                 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
721                     &state) != 0 || state > POOL_STATE_SPARE) {
722                         nvlist_free(*config);
723                         continue;
724                 }
725
726                 if (state != POOL_STATE_SPARE &&
727                     (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
728                     &txg) != 0 || txg == 0)) {
729                         nvlist_free(*config);
730                         continue;
731                 }
732
733                 free(label);
734                 return (0);
735         }
736
737         free(label);
738         *config = NULL;
739         return (0);
740 }
741
742 /*
743  * Given a list of directories to search, find all pools stored on disk.  This
744  * includes partial pools which are not available to import.  If no args are
745  * given (argc is 0), then the default directory (/dev) is searched.
746  */
747 nvlist_t *
748 zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
749 {
750         int i;
751         char path[MAXPATHLEN];
752         nvlist_t *ret = NULL, *config;
753         int fd;
754         pool_list_t pools = { 0 };
755         pool_entry_t *pe, *penext;
756         vdev_entry_t *ve, *venext;
757         config_entry_t *ce, *cenext;
758         name_entry_t *ne, *nenext;
759         struct gmesh mesh;
760         struct gclass *mp;
761         struct ggeom *gp;
762         struct gprovider *pp;
763
764         /*
765          * Go through and read the label configuration information from every
766          * possible device, organizing the information according to pool GUID
767          * and toplevel GUID.
768          */
769
770         fd = geom_gettree(&mesh);
771         assert(fd == 0);
772
773         LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
774                 LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
775                         LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
776
777                                 (void) snprintf(path, sizeof (path), "%s%s",
778                                     _PATH_DEV, pp->lg_name);
779
780                                 if ((fd = open64(path, O_RDONLY)) < 0)
781                                         continue;
782
783                                 if ((zpool_read_label(fd, &config)) != 0) {
784                                         (void) no_memory(hdl);
785                                         goto error;
786                                 }
787
788                                 (void) close(fd);
789
790                                 if (config == NULL)
791                                         continue;
792
793                                 if (add_config(hdl, &pools, path, config) != 0)
794                                         goto error;
795                         }
796                 }
797         }
798
799         geom_deletetree(&mesh);
800
801         ret = get_configs(hdl, &pools);
802
803 error:
804         for (pe = pools.pools; pe != NULL; pe = penext) {
805                 penext = pe->pe_next;
806                 for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
807                         venext = ve->ve_next;
808                         for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
809                                 cenext = ce->ce_next;
810                                 if (ce->ce_config)
811                                         nvlist_free(ce->ce_config);
812                                 free(ce);
813                         }
814                         free(ve);
815                 }
816                 free(pe);
817         }
818
819         for (ne = pools.names; ne != NULL; ne = nenext) {
820                 nenext = ne->ne_next;
821                 if (ne->ne_name)
822                         free(ne->ne_name);
823                 free(ne);
824         }
825
826         return (ret);
827 }
828
829 boolean_t
830 find_guid(nvlist_t *nv, uint64_t guid)
831 {
832         uint64_t tmp;
833         nvlist_t **child;
834         uint_t c, children;
835
836         verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
837         if (tmp == guid)
838                 return (B_TRUE);
839
840         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
841             &child, &children) == 0) {
842                 for (c = 0; c < children; c++)
843                         if (find_guid(child[c], guid))
844                                 return (B_TRUE);
845         }
846
847         return (B_FALSE);
848 }
849
850 typedef struct spare_cbdata {
851         uint64_t        cb_guid;
852         zpool_handle_t  *cb_zhp;
853 } spare_cbdata_t;
854
855 static int
856 find_spare(zpool_handle_t *zhp, void *data)
857 {
858         spare_cbdata_t *cbp = data;
859         nvlist_t **spares;
860         uint_t i, nspares;
861         uint64_t guid;
862         nvlist_t *nvroot;
863
864         verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
865             &nvroot) == 0);
866
867         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
868             &spares, &nspares) == 0) {
869                 for (i = 0; i < nspares; i++) {
870                         verify(nvlist_lookup_uint64(spares[i],
871                             ZPOOL_CONFIG_GUID, &guid) == 0);
872                         if (guid == cbp->cb_guid) {
873                                 cbp->cb_zhp = zhp;
874                                 return (1);
875                         }
876                 }
877         }
878
879         zpool_close(zhp);
880         return (0);
881 }
882
883 /*
884  * Determines if the pool is in use.  If so, it returns true and the state of
885  * the pool as well as the name of the pool.  Both strings are allocated and
886  * must be freed by the caller.
887  */
888 int
889 zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
890     boolean_t *inuse)
891 {
892         nvlist_t *config;
893         char *name;
894         boolean_t ret;
895         uint64_t guid, vdev_guid;
896         zpool_handle_t *zhp;
897         nvlist_t *pool_config;
898         uint64_t stateval, isspare;
899         spare_cbdata_t cb = { 0 };
900         boolean_t isactive;
901
902         *inuse = B_FALSE;
903
904         if (zpool_read_label(fd, &config) != 0) {
905                 (void) no_memory(hdl);
906                 return (-1);
907         }
908
909         if (config == NULL)
910                 return (0);
911
912         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
913             &stateval) == 0);
914         verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
915             &vdev_guid) == 0);
916
917         if (stateval != POOL_STATE_SPARE) {
918                 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
919                     &name) == 0);
920                 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
921                     &guid) == 0);
922         }
923
924         switch (stateval) {
925         case POOL_STATE_EXPORTED:
926                 ret = B_TRUE;
927                 break;
928
929         case POOL_STATE_ACTIVE:
930                 /*
931                  * For an active pool, we have to determine if it's really part
932                  * of a currently active pool (in which case the pool will exist
933                  * and the guid will be the same), or whether it's part of an
934                  * active pool that was disconnected without being explicitly
935                  * exported.
936                  */
937                 if (pool_active(hdl, name, guid, &isactive) != 0) {
938                         nvlist_free(config);
939                         return (-1);
940                 }
941
942                 if (isactive) {
943                         /*
944                          * Because the device may have been removed while
945                          * offlined, we only report it as active if the vdev is
946                          * still present in the config.  Otherwise, pretend like
947                          * it's not in use.
948                          */
949                         if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
950                             (pool_config = zpool_get_config(zhp, NULL))
951                             != NULL) {
952                                 nvlist_t *nvroot;
953
954                                 verify(nvlist_lookup_nvlist(pool_config,
955                                     ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
956                                 ret = find_guid(nvroot, vdev_guid);
957                         } else {
958                                 ret = B_FALSE;
959                         }
960
961                         /*
962                          * If this is an active spare within another pool, we
963                          * treat it like an unused hot spare.  This allows the
964                          * user to create a pool with a hot spare that currently
965                          * in use within another pool.  Since we return B_TRUE,
966                          * libdiskmgt will continue to prevent generic consumers
967                          * from using the device.
968                          */
969                         if (ret && nvlist_lookup_uint64(config,
970                             ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
971                                 stateval = POOL_STATE_SPARE;
972
973                         if (zhp != NULL)
974                                 zpool_close(zhp);
975                 } else {
976                         stateval = POOL_STATE_POTENTIALLY_ACTIVE;
977                         ret = B_TRUE;
978                 }
979                 break;
980
981         case POOL_STATE_SPARE:
982                 /*
983                  * For a hot spare, it can be either definitively in use, or
984                  * potentially active.  To determine if it's in use, we iterate
985                  * over all pools in the system and search for one with a spare
986                  * with a matching guid.
987                  *
988                  * Due to the shared nature of spares, we don't actually report
989                  * the potentially active case as in use.  This means the user
990                  * can freely create pools on the hot spares of exported pools,
991                  * but to do otherwise makes the resulting code complicated, and
992                  * we end up having to deal with this case anyway.
993                  */
994                 cb.cb_zhp = NULL;
995                 cb.cb_guid = vdev_guid;
996                 if (zpool_iter(hdl, find_spare, &cb) == 1) {
997                         name = (char *)zpool_get_name(cb.cb_zhp);
998                         ret = TRUE;
999                 } else {
1000                         ret = FALSE;
1001                 }
1002                 break;
1003
1004         default:
1005                 ret = B_FALSE;
1006         }
1007
1008
1009         if (ret) {
1010                 if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
1011                         nvlist_free(config);
1012                         return (-1);
1013                 }
1014                 *state = (pool_state_t)stateval;
1015         }
1016
1017         if (cb.cb_zhp)
1018                 zpool_close(cb.cb_zhp);
1019
1020         nvlist_free(config);
1021         *inuse = ret;
1022         return (0);
1023 }