]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
MFC r305456 (by avg): fix zfs pool creation accidentally broken by r305331
[FreeBSD/stable/10.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / vdev_geom.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23  * All rights reserved.
24  *
25  * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26  */
27
28 #include <sys/zfs_context.h>
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/bio.h>
32 #include <sys/disk.h>
33 #include <sys/spa.h>
34 #include <sys/spa_impl.h>
35 #include <sys/vdev_impl.h>
36 #include <sys/fs/zfs.h>
37 #include <sys/zio.h>
38 #include <geom/geom.h>
39 #include <geom/geom_int.h>
40
41 /*
42  * Virtual device vector for GEOM.
43  */
44
45 static g_attrchanged_t vdev_geom_attrchanged;
46 struct g_class zfs_vdev_class = {
47         .name = "ZFS::VDEV",
48         .version = G_VERSION,
49         .attrchanged = vdev_geom_attrchanged,
50 };
51
52 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
53
54 SYSCTL_DECL(_vfs_zfs_vdev);
55 /* Don't send BIO_FLUSH. */
56 static int vdev_geom_bio_flush_disable = 0;
57 TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
58 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
59     &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
60 /* Don't send BIO_DELETE. */
61 static int vdev_geom_bio_delete_disable = 0;
62 TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
63 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
64     &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
65
66 static void
67 vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
68
69         int error;
70         uint16_t rate;
71
72         error = g_getattr("GEOM::rotation_rate", cp, &rate);
73         if (error == 0)
74                 vd->vdev_rotation_rate = rate;
75         else
76                 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN;
77 }
78
79 static void
80 vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
81 {
82         vdev_t *vd;
83         spa_t *spa;
84         char *physpath;
85         int error, physpath_len;
86
87         vd = cp->private;
88         if (vd == NULL)
89                 return;
90
91         if (strcmp(attr, "GEOM::rotation_rate") == 0) {
92                 vdev_geom_set_rotation_rate(vd, cp);
93                 return;
94         }
95
96         if (strcmp(attr, "GEOM::physpath") != 0)
97                 return;
98
99         if (g_access(cp, 1, 0, 0) != 0)
100                 return;
101
102         /*
103          * Record/Update physical path information for this device.
104          */
105         spa = vd->vdev_spa;
106         physpath_len = MAXPATHLEN;
107         physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
108         error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
109         g_access(cp, -1, 0, 0);
110         if (error == 0) {
111                 char *old_physpath;
112
113                 /* g_topology lock ensures that vdev has not been closed */
114                 g_topology_assert();
115                 old_physpath = vd->vdev_physpath;
116                 vd->vdev_physpath = spa_strdup(physpath);
117                 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
118
119                 if (old_physpath != NULL)
120                         spa_strfree(old_physpath);
121         }
122         g_free(physpath);
123 }
124
125 static void
126 vdev_geom_orphan(struct g_consumer *cp)
127 {
128         vdev_t *vd;
129
130         g_topology_assert();
131
132         vd = cp->private;
133         if (vd == NULL) {
134                 /* Vdev close in progress.  Ignore the event. */
135                 return;
136         }
137
138         /*
139          * Orphan callbacks occur from the GEOM event thread.
140          * Concurrent with this call, new I/O requests may be
141          * working their way through GEOM about to find out
142          * (only once executed by the g_down thread) that we've
143          * been orphaned from our disk provider.  These I/Os
144          * must be retired before we can detach our consumer.
145          * This is most easily achieved by acquiring the
146          * SPA ZIO configuration lock as a writer, but doing
147          * so with the GEOM topology lock held would cause
148          * a lock order reversal.  Instead, rely on the SPA's
149          * async removal support to invoke a close on this
150          * vdev once it is safe to do so.
151          */
152         vd->vdev_remove_wanted = B_TRUE;
153         spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
154 }
155
156 static struct g_consumer *
157 vdev_geom_attach(struct g_provider *pp, vdev_t *vd)
158 {
159         struct g_geom *gp;
160         struct g_consumer *cp;
161         int error;
162
163         g_topology_assert();
164
165         ZFS_LOG(1, "Attaching to %s.", pp->name);
166         /* Do we have geom already? No? Create one. */
167         LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
168                 if (gp->flags & G_GEOM_WITHER)
169                         continue;
170                 if (strcmp(gp->name, "zfs::vdev") != 0)
171                         continue;
172                 break;
173         }
174         if (gp == NULL) {
175                 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
176                 gp->orphan = vdev_geom_orphan;
177                 gp->attrchanged = vdev_geom_attrchanged;
178                 cp = g_new_consumer(gp);
179                 error = g_attach(cp, pp);
180                 if (error != 0) {
181                         ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__,
182                             __LINE__, error);
183                         g_wither_geom(gp, ENXIO);
184                         return (NULL);
185                 }
186                 error = g_access(cp, 1, 0, 1);
187                 if (error != 0) {
188                         ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__,
189                                __LINE__, error);
190                         g_wither_geom(gp, ENXIO);
191                         return (NULL);
192                 }
193                 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
194         } else {
195                 /* Check if we are already connected to this provider. */
196                 LIST_FOREACH(cp, &gp->consumer, consumer) {
197                         if (cp->provider == pp) {
198                                 ZFS_LOG(1, "Found consumer for %s.", pp->name);
199                                 break;
200                         }
201                 }
202                 if (cp == NULL) {
203                         cp = g_new_consumer(gp);
204                         error = g_attach(cp, pp);
205                         if (error != 0) {
206                                 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n",
207                                     __func__, __LINE__, error);
208                                 g_destroy_consumer(cp);
209                                 return (NULL);
210                         }
211                         error = g_access(cp, 1, 0, 1);
212                         if (error != 0) {
213                                 ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
214                                     __func__, __LINE__, error);
215                                 g_detach(cp);
216                                 g_destroy_consumer(cp);
217                                 return (NULL);
218                         }
219                         ZFS_LOG(1, "Created consumer for %s.", pp->name);
220                 } else {
221                         error = g_access(cp, 1, 0, 1);
222                         if (error != 0) {
223                                 ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
224                                     __func__, __LINE__, error);
225                                 return (NULL);
226                         }
227                         ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
228                 }
229         }
230
231         /* 
232          * BUG: cp may already belong to a vdev.  This could happen if:
233          * 1) That vdev is a shared spare, or
234          * 2) We are trying to reopen a missing vdev and we are scanning by
235          *    guid.  In that case, we'll ultimately fail to open this consumer,
236          *    but not until after setting the private field.
237          * The solution is to:
238          * 1) Don't set the private field until after the open succeeds, and
239          * 2) Set it to a linked list of vdevs, not just a single vdev
240          */
241         cp->private = vd;
242         vd->vdev_tsd = cp;
243
244         cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
245         return (cp);
246 }
247
248 static void
249 vdev_geom_close_locked(vdev_t *vd)
250 {
251         struct g_geom *gp;
252         struct g_consumer *cp;
253
254         g_topology_assert();
255
256         cp = vd->vdev_tsd;
257         if (cp == NULL)
258                 return;
259
260         ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
261         KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__));
262         vd->vdev_tsd = NULL;
263         vd->vdev_delayed_close = B_FALSE;
264         cp->private = NULL;
265
266         gp = cp->geom;
267         g_access(cp, -1, 0, -1);
268         /* Destroy consumer on last close. */
269         if (cp->acr == 0 && cp->ace == 0) {
270                 if (cp->acw > 0)
271                         g_access(cp, 0, -cp->acw, 0);
272                 if (cp->provider != NULL) {
273                         ZFS_LOG(1, "Destroyed consumer to %s.",
274                             cp->provider->name);
275                         g_detach(cp);
276                 }
277                 g_destroy_consumer(cp);
278         }
279         /* Destroy geom if there are no consumers left. */
280         if (LIST_EMPTY(&gp->consumer)) {
281                 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
282                 g_wither_geom(gp, ENXIO);
283         }
284 }
285
286 static void
287 nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid)
288 {
289
290         (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid);
291         (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid);
292 }
293
294 static int
295 vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
296 {
297         struct bio *bp;
298         u_char *p;
299         off_t off, maxio;
300         int error;
301
302         ASSERT((offset % cp->provider->sectorsize) == 0);
303         ASSERT((size % cp->provider->sectorsize) == 0);
304
305         bp = g_alloc_bio();
306         off = offset;
307         offset += size;
308         p = data;
309         maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
310         error = 0;
311
312         for (; off < offset; off += maxio, p += maxio, size -= maxio) {
313                 bzero(bp, sizeof(*bp));
314                 bp->bio_cmd = cmd;
315                 bp->bio_done = NULL;
316                 bp->bio_offset = off;
317                 bp->bio_length = MIN(size, maxio);
318                 bp->bio_data = p;
319                 g_io_request(bp, cp);
320                 error = biowait(bp, "vdev_geom_io");
321                 if (error != 0)
322                         break;
323         }
324
325         g_destroy_bio(bp);
326         return (error);
327 }
328
329 static void
330 vdev_geom_taste_orphan(struct g_consumer *cp)
331 {
332
333         KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
334             cp->provider->name));
335 }
336
337 static int
338 vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
339 {
340         struct g_provider *pp;
341         vdev_label_t *label;
342         char *p, *buf;
343         size_t buflen;
344         uint64_t psize;
345         off_t offset, size;
346         uint64_t state, txg;
347         int error, l, len;
348
349         g_topology_assert_not();
350
351         pp = cp->provider;
352         ZFS_LOG(1, "Reading config from %s...", pp->name);
353
354         psize = pp->mediasize;
355         psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
356
357         size = sizeof(*label) + pp->sectorsize -
358             ((sizeof(*label) - 1) % pp->sectorsize) - 1;
359
360         label = kmem_alloc(size, KM_SLEEP);
361         buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
362
363         *config = NULL;
364         for (l = 0; l < VDEV_LABELS; l++) {
365
366                 offset = vdev_label_offset(psize, l, 0);
367                 if ((offset % pp->sectorsize) != 0)
368                         continue;
369
370                 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
371                         continue;
372                 buf = label->vl_vdev_phys.vp_nvlist;
373
374                 if (nvlist_unpack(buf, buflen, config, 0) != 0)
375                         continue;
376
377                 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
378                     &state) != 0 || state > POOL_STATE_L2CACHE) {
379                         nvlist_free(*config);
380                         *config = NULL;
381                         continue;
382                 }
383
384                 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
385                     (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
386                     &txg) != 0 || txg == 0)) {
387                         nvlist_free(*config);
388                         *config = NULL;
389                         continue;
390                 }
391
392                 break;
393         }
394
395         kmem_free(label, size);
396         return (*config == NULL ? ENOENT : 0);
397 }
398
399 static void
400 resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
401 {
402         nvlist_t **new_configs;
403         uint64_t i;
404
405         if (id < *count)
406                 return;
407         new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
408             KM_SLEEP);
409         for (i = 0; i < *count; i++)
410                 new_configs[i] = (*configs)[i];
411         if (*configs != NULL)
412                 kmem_free(*configs, *count * sizeof(void *));
413         *configs = new_configs;
414         *count = id + 1;
415 }
416
417 static void
418 process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
419     const char *name, uint64_t* known_pool_guid)
420 {
421         nvlist_t *vdev_tree;
422         uint64_t pool_guid;
423         uint64_t vdev_guid, known_guid;
424         uint64_t id, txg, known_txg;
425         char *pname;
426         int i;
427
428         if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
429             strcmp(pname, name) != 0)
430                 goto ignore;
431
432         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
433                 goto ignore;
434
435         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
436                 goto ignore;
437
438         if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
439                 goto ignore;
440
441         if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
442                 goto ignore;
443
444         VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
445
446         if (*known_pool_guid != 0) {
447                 if (pool_guid != *known_pool_guid)
448                         goto ignore;
449         } else
450                 *known_pool_guid = pool_guid;
451
452         resize_configs(configs, count, id);
453
454         if ((*configs)[id] != NULL) {
455                 VERIFY(nvlist_lookup_uint64((*configs)[id],
456                     ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
457                 if (txg <= known_txg)
458                         goto ignore;
459                 nvlist_free((*configs)[id]);
460         }
461
462         (*configs)[id] = cfg;
463         return;
464
465 ignore:
466         nvlist_free(cfg);
467 }
468
469 static int
470 vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
471 {
472         int error;
473
474         if (pp->flags & G_PF_WITHER)
475                 return (EINVAL);
476         g_attach(cp, pp);
477         error = g_access(cp, 1, 0, 0);
478         if (error == 0) {
479                 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
480                         error = EINVAL;
481                 else if (pp->mediasize < SPA_MINDEVSIZE)
482                         error = EINVAL;
483                 if (error != 0)
484                         g_access(cp, -1, 0, 0);
485         }
486         if (error != 0)
487                 g_detach(cp);
488         return (error);
489 }
490
491 static void
492 vdev_geom_detach_taster(struct g_consumer *cp)
493 {
494         g_access(cp, -1, 0, 0);
495         g_detach(cp);
496 }
497
498 int
499 vdev_geom_read_pool_label(const char *name,
500     nvlist_t ***configs, uint64_t *count)
501 {
502         struct g_class *mp;
503         struct g_geom *gp, *zgp;
504         struct g_provider *pp;
505         struct g_consumer *zcp;
506         nvlist_t *vdev_cfg;
507         uint64_t pool_guid;
508         int error;
509
510         DROP_GIANT();
511         g_topology_lock();
512
513         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
514         /* This orphan function should be never called. */
515         zgp->orphan = vdev_geom_taste_orphan;
516         zcp = g_new_consumer(zgp);
517
518         *configs = NULL;
519         *count = 0;
520         pool_guid = 0;
521         LIST_FOREACH(mp, &g_classes, class) {
522                 if (mp == &zfs_vdev_class)
523                         continue;
524                 LIST_FOREACH(gp, &mp->geom, geom) {
525                         if (gp->flags & G_GEOM_WITHER)
526                                 continue;
527                         LIST_FOREACH(pp, &gp->provider, provider) {
528                                 if (pp->flags & G_PF_WITHER)
529                                         continue;
530                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
531                                         continue;
532                                 g_topology_unlock();
533                                 error = vdev_geom_read_config(zcp, &vdev_cfg);
534                                 g_topology_lock();
535                                 vdev_geom_detach_taster(zcp);
536                                 if (error)
537                                         continue;
538                                 ZFS_LOG(1, "successfully read vdev config");
539
540                                 process_vdev_config(configs, count,
541                                     vdev_cfg, name, &pool_guid);
542                         }
543                 }
544         }
545
546         g_destroy_consumer(zcp);
547         g_destroy_geom(zgp);
548         g_topology_unlock();
549         PICKUP_GIANT();
550
551         return (*count > 0 ? 0 : ENOENT);
552 }
553
554 static void
555 vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid)
556 {
557         nvlist_t *config;
558
559         g_topology_assert_not();
560
561         *pguid = 0;
562         *vguid = 0;
563         if (vdev_geom_read_config(cp, &config) == 0) {
564                 nvlist_get_guids(config, pguid, vguid);
565                 nvlist_free(config);
566         }
567 }
568
569 static struct g_consumer *
570 vdev_geom_attach_by_guids(vdev_t *vd)
571 {
572         struct g_class *mp;
573         struct g_geom *gp, *zgp;
574         struct g_provider *pp;
575         struct g_consumer *cp, *zcp;
576         uint64_t pguid, vguid;
577
578         g_topology_assert();
579
580         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
581         /* This orphan function should be never called. */
582         zgp->orphan = vdev_geom_taste_orphan;
583         zcp = g_new_consumer(zgp);
584
585         cp = NULL;
586         LIST_FOREACH(mp, &g_classes, class) {
587                 if (mp == &zfs_vdev_class)
588                         continue;
589                 LIST_FOREACH(gp, &mp->geom, geom) {
590                         if (gp->flags & G_GEOM_WITHER)
591                                 continue;
592                         LIST_FOREACH(pp, &gp->provider, provider) {
593                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
594                                         continue;
595                                 g_topology_unlock();
596                                 vdev_geom_read_guids(zcp, &pguid, &vguid);
597                                 g_topology_lock();
598                                 vdev_geom_detach_taster(zcp);
599                                 /* 
600                                  * Check that the label's vdev guid matches the
601                                  * desired guid.  If the label has a pool guid,
602                                  * check that it matches too. (Inactive spares
603                                  * and L2ARCs do not have any pool guid in the
604                                  * label.)
605                                 */
606                                 if ((pguid != 0 &&
607                                      pguid != spa_guid(vd->vdev_spa)) ||
608                                     vguid != vd->vdev_guid)
609                                         continue;
610                                 cp = vdev_geom_attach(pp, vd);
611                                 if (cp == NULL) {
612                                         printf("ZFS WARNING: Unable to "
613                                             "attach to %s.\n", pp->name);
614                                         continue;
615                                 }
616                                 break;
617                         }
618                         if (cp != NULL)
619                                 break;
620                 }
621                 if (cp != NULL)
622                         break;
623         }
624 end:
625         g_destroy_consumer(zcp);
626         g_destroy_geom(zgp);
627         return (cp);
628 }
629
630 static struct g_consumer *
631 vdev_geom_open_by_guids(vdev_t *vd)
632 {
633         struct g_consumer *cp;
634         char *buf;
635         size_t len;
636
637         g_topology_assert();
638
639         ZFS_LOG(1, "Searching by guids [%ju:%ju].",
640                 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid);
641         cp = vdev_geom_attach_by_guids(vd);
642         if (cp != NULL) {
643                 len = strlen(cp->provider->name) + strlen("/dev/") + 1;
644                 buf = kmem_alloc(len, KM_SLEEP);
645
646                 snprintf(buf, len, "/dev/%s", cp->provider->name);
647                 spa_strfree(vd->vdev_path);
648                 vd->vdev_path = buf;
649
650                 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
651                     (uintmax_t)spa_guid(vd->vdev_spa),
652                     (uintmax_t)vd->vdev_guid, vd->vdev_path);
653         } else {
654                 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
655                     (uintmax_t)spa_guid(vd->vdev_spa),
656                     (uintmax_t)vd->vdev_guid);
657         }
658
659         return (cp);
660 }
661
662 static struct g_consumer *
663 vdev_geom_open_by_path(vdev_t *vd, int check_guid)
664 {
665         struct g_provider *pp;
666         struct g_consumer *cp;
667         uint64_t pguid, vguid;
668
669         g_topology_assert();
670
671         cp = NULL;
672         pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
673         if (pp != NULL) {
674                 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
675                 cp = vdev_geom_attach(pp, vd);
676                 if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
677                     pp->sectorsize <= VDEV_PAD_SIZE) {
678                         g_topology_unlock();
679                         vdev_geom_read_guids(cp, &pguid, &vguid);
680                         g_topology_lock();
681                         /*
682                          * Check that the label's vdev guid matches the
683                          * desired guid.  If the label has a pool guid,
684                          * check that it matches too. (Inactive spares
685                          * and L2ARCs do not have any pool guid in the
686                          * label.)
687                          */
688                         if ((pguid != 0 &&
689                             pguid != spa_guid(vd->vdev_spa)) ||
690                             vguid != vd->vdev_guid) {
691                                 vdev_geom_close_locked(vd);
692                                 cp = NULL;
693                                 ZFS_LOG(1, "guid mismatch for provider %s: "
694                                     "%ju:%ju != %ju:%ju.", vd->vdev_path,
695                                     (uintmax_t)spa_guid(vd->vdev_spa),
696                                     (uintmax_t)vd->vdev_guid,
697                                     (uintmax_t)pguid, (uintmax_t)vguid);
698                         } else {
699                                 ZFS_LOG(1, "guid match for provider %s.",
700                                     vd->vdev_path);
701                         }
702                 }
703         }
704
705         return (cp);
706 }
707
708 static int
709 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
710     uint64_t *logical_ashift, uint64_t *physical_ashift)
711 {
712         struct g_provider *pp;
713         struct g_consumer *cp;
714         size_t bufsize;
715         int error;
716
717         /*
718          * We must have a pathname, and it must be absolute.
719          */
720         if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
721                 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
722                 return (EINVAL);
723         }
724
725         vd->vdev_tsd = NULL;
726
727         DROP_GIANT();
728         g_topology_lock();
729         error = 0;
730
731         if (vd->vdev_spa->spa_splitting_newspa ||
732             (vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
733              vd->vdev_spa->spa_load_state == SPA_LOAD_NONE ||
734              vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) {
735                 /*
736                  * We are dealing with a vdev that hasn't been previously
737                  * opened (since boot), and we are not loading an
738                  * existing pool configuration.  This looks like a
739                  * vdev add operation to a new or existing pool.
740                  * Assume the user knows what he/she is doing and find
741                  * GEOM provider by its name, ignoring GUID mismatches.
742                  *
743                  * XXPOLICY: It would be safer to only allow a device
744                  *           that is unlabeled or labeled but missing
745                  *           GUID information to be opened in this fashion,
746                  *           unless we are doing a split, in which case we
747                  *           should allow any guid.
748                  */
749                 cp = vdev_geom_open_by_path(vd, 0);
750         } else {
751                 /*
752                  * Try using the recorded path for this device, but only
753                  * accept it if its label data contains the expected GUIDs.
754                  */
755                 cp = vdev_geom_open_by_path(vd, 1);
756                 if (cp == NULL) {
757                         /*
758                          * The device at vd->vdev_path doesn't have the
759                          * expected GUIDs. The disks might have merely
760                          * moved around so try all other GEOM providers
761                          * to find one with the right GUIDs.
762                          */
763                         cp = vdev_geom_open_by_guids(vd);
764                 }
765         }
766
767         if (cp == NULL) {
768                 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
769                 error = ENOENT;
770         } else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
771             !ISP2(cp->provider->sectorsize)) {
772                 ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
773                     vd->vdev_path);
774
775                 vdev_geom_close_locked(vd);
776                 error = EINVAL;
777                 cp = NULL;
778         } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
779                 int i;
780
781                 for (i = 0; i < 5; i++) {
782                         error = g_access(cp, 0, 1, 0);
783                         if (error == 0)
784                                 break;
785                         g_topology_unlock();
786                         tsleep(vd, 0, "vdev", hz / 2);
787                         g_topology_lock();
788                 }
789                 if (error != 0) {
790                         printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
791                             vd->vdev_path, error);
792                         vdev_geom_close_locked(vd);
793                         cp = NULL;
794                 }
795         }
796
797         /* Fetch initial physical path information for this device. */
798         if (cp != NULL)
799                 vdev_geom_attrchanged(cp, "GEOM::physpath");
800         
801         g_topology_unlock();
802         PICKUP_GIANT();
803         if (cp == NULL) {
804                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
805                 return (error);
806         }
807         pp = cp->provider;
808
809         /*
810          * Determine the actual size of the device.
811          */
812         *max_psize = *psize = pp->mediasize;
813
814         /*
815          * Determine the device's minimum transfer size and preferred
816          * transfer size.
817          */
818         *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
819         *physical_ashift = 0;
820         if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) &&
821             pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0)
822                 *physical_ashift = highbit(pp->stripesize) - 1;
823
824         /*
825          * Clear the nowritecache settings, so that on a vdev_reopen()
826          * we will try again.
827          */
828         vd->vdev_nowritecache = B_FALSE;
829
830         /*
831          * Determine the device's rotation rate.
832          */
833         vdev_geom_set_rotation_rate(vd, cp);
834
835         return (0);
836 }
837
838 static void
839 vdev_geom_close(vdev_t *vd)
840 {
841
842         DROP_GIANT();
843         g_topology_lock();
844         vdev_geom_close_locked(vd);
845         g_topology_unlock();
846         PICKUP_GIANT();
847 }
848
849 static void
850 vdev_geom_io_intr(struct bio *bp)
851 {
852         vdev_t *vd;
853         zio_t *zio;
854
855         zio = bp->bio_caller1;
856         vd = zio->io_vd;
857         zio->io_error = bp->bio_error;
858         if (zio->io_error == 0 && bp->bio_resid != 0)
859                 zio->io_error = SET_ERROR(EIO);
860
861         switch(zio->io_error) {
862         case ENOTSUP:
863                 /*
864                  * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
865                  * that future attempts will never succeed. In this case
866                  * we set a persistent flag so that we don't bother with
867                  * requests in the future.
868                  */
869                 switch(bp->bio_cmd) {
870                 case BIO_FLUSH:
871                         vd->vdev_nowritecache = B_TRUE;
872                         break;
873                 case BIO_DELETE:
874                         vd->vdev_notrim = B_TRUE;
875                         break;
876                 }
877                 break;
878         case ENXIO:
879                 if (!vd->vdev_remove_wanted) {
880                         /*
881                          * If provider's error is set we assume it is being
882                          * removed.
883                          */
884                         if (bp->bio_to->error != 0) {
885                                 vd->vdev_remove_wanted = B_TRUE;
886                                 spa_async_request(zio->io_spa,
887                                     SPA_ASYNC_REMOVE);
888                         } else if (!vd->vdev_delayed_close) {
889                                 vd->vdev_delayed_close = B_TRUE;
890                         }
891                 }
892                 break;
893         }
894         g_destroy_bio(bp);
895         zio_delay_interrupt(zio);
896 }
897
898 static void
899 vdev_geom_io_start(zio_t *zio)
900 {
901         vdev_t *vd;
902         struct g_consumer *cp;
903         struct bio *bp;
904         int error;
905
906         vd = zio->io_vd;
907
908         switch (zio->io_type) {
909         case ZIO_TYPE_IOCTL:
910                 /* XXPOLICY */
911                 if (!vdev_readable(vd)) {
912                         zio->io_error = SET_ERROR(ENXIO);
913                         zio_interrupt(zio);
914                         return;
915                 } else {
916                         switch (zio->io_cmd) {
917                         case DKIOCFLUSHWRITECACHE:
918                                 if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
919                                         break;
920                                 if (vd->vdev_nowritecache) {
921                                         zio->io_error = SET_ERROR(ENOTSUP);
922                                         break;
923                                 }
924                                 goto sendreq;
925                         default:
926                                 zio->io_error = SET_ERROR(ENOTSUP);
927                         }
928                 }
929
930                 zio_execute(zio);
931                 return;
932         case ZIO_TYPE_FREE:
933                 if (vd->vdev_notrim) {
934                         zio->io_error = SET_ERROR(ENOTSUP);
935                 } else if (!vdev_geom_bio_delete_disable) {
936                         goto sendreq;
937                 }
938                 zio_execute(zio);
939                 return;
940         }
941 sendreq:
942         ASSERT(zio->io_type == ZIO_TYPE_READ ||
943             zio->io_type == ZIO_TYPE_WRITE ||
944             zio->io_type == ZIO_TYPE_FREE ||
945             zio->io_type == ZIO_TYPE_IOCTL);
946
947         cp = vd->vdev_tsd;
948         if (cp == NULL) {
949                 zio->io_error = SET_ERROR(ENXIO);
950                 zio_interrupt(zio);
951                 return;
952         }
953         bp = g_alloc_bio();
954         bp->bio_caller1 = zio;
955         switch (zio->io_type) {
956         case ZIO_TYPE_READ:
957         case ZIO_TYPE_WRITE:
958                 zio->io_target_timestamp = zio_handle_io_delay(zio);
959                 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
960                 bp->bio_data = zio->io_data;
961                 bp->bio_offset = zio->io_offset;
962                 bp->bio_length = zio->io_size;
963                 break;
964         case ZIO_TYPE_FREE:
965                 bp->bio_cmd = BIO_DELETE;
966                 bp->bio_data = NULL;
967                 bp->bio_offset = zio->io_offset;
968                 bp->bio_length = zio->io_size;
969                 break;
970         case ZIO_TYPE_IOCTL:
971                 bp->bio_cmd = BIO_FLUSH;
972                 bp->bio_flags |= BIO_ORDERED;
973                 bp->bio_data = NULL;
974                 bp->bio_offset = cp->provider->mediasize;
975                 bp->bio_length = 0;
976                 break;
977         }
978         bp->bio_done = vdev_geom_io_intr;
979
980         g_io_request(bp, cp);
981 }
982
983 static void
984 vdev_geom_io_done(zio_t *zio)
985 {
986 }
987
988 static void
989 vdev_geom_hold(vdev_t *vd)
990 {
991 }
992
993 static void
994 vdev_geom_rele(vdev_t *vd)
995 {
996 }
997
998 vdev_ops_t vdev_geom_ops = {
999         vdev_geom_open,
1000         vdev_geom_close,
1001         vdev_default_asize,
1002         vdev_geom_io_start,
1003         vdev_geom_io_done,
1004         NULL,
1005         vdev_geom_hold,
1006         vdev_geom_rele,
1007         VDEV_TYPE_DISK,         /* name of this vdev type */
1008         B_TRUE                  /* leaf vdev */
1009 };