]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
MFC r297986, r298017 to vdev_geom.c
[FreeBSD/stable/10.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / vdev_geom.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23  * All rights reserved.
24  *
25  * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26  */
27
28 #include <sys/zfs_context.h>
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/bio.h>
32 #include <sys/disk.h>
33 #include <sys/spa.h>
34 #include <sys/spa_impl.h>
35 #include <sys/vdev_impl.h>
36 #include <sys/fs/zfs.h>
37 #include <sys/zio.h>
38 #include <geom/geom.h>
39 #include <geom/geom_int.h>
40
41 /*
42  * Virtual device vector for GEOM.
43  */
44
45 static g_attrchanged_t vdev_geom_attrchanged;
46 struct g_class zfs_vdev_class = {
47         .name = "ZFS::VDEV",
48         .version = G_VERSION,
49         .attrchanged = vdev_geom_attrchanged,
50 };
51
52 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
53
54 SYSCTL_DECL(_vfs_zfs_vdev);
55 /* Don't send BIO_FLUSH. */
56 static int vdev_geom_bio_flush_disable = 0;
57 TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
58 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
59     &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
60 /* Don't send BIO_DELETE. */
61 static int vdev_geom_bio_delete_disable = 0;
62 TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
63 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
64     &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
65
66 static void
67 vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
68
69         int error;
70         uint16_t rate;
71
72         error = g_getattr("GEOM::rotation_rate", cp, &rate);
73         if (error == 0)
74                 vd->vdev_rotation_rate = rate;
75         else
76                 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN;
77 }
78
79 static void
80 vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
81 {
82         vdev_t *vd;
83         spa_t *spa;
84         char *physpath;
85         int error, physpath_len;
86
87         vd = cp->private;
88         if (vd == NULL)
89                 return;
90
91         if (strcmp(attr, "GEOM::rotation_rate") == 0) {
92                 vdev_geom_set_rotation_rate(vd, cp);
93                 return;
94         }
95
96         if (strcmp(attr, "GEOM::physpath") != 0)
97                 return;
98
99         if (g_access(cp, 1, 0, 0) != 0)
100                 return;
101
102         /*
103          * Record/Update physical path information for this device.
104          */
105         spa = vd->vdev_spa;
106         physpath_len = MAXPATHLEN;
107         physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
108         error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
109         g_access(cp, -1, 0, 0);
110         if (error == 0) {
111                 char *old_physpath;
112
113                 /* g_topology lock ensures that vdev has not been closed */
114                 g_topology_assert();
115                 old_physpath = vd->vdev_physpath;
116                 vd->vdev_physpath = spa_strdup(physpath);
117                 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
118
119                 if (old_physpath != NULL)
120                         spa_strfree(old_physpath);
121         }
122         g_free(physpath);
123 }
124
125 static void
126 vdev_geom_orphan(struct g_consumer *cp)
127 {
128         vdev_t *vd;
129
130         g_topology_assert();
131
132         vd = cp->private;
133         if (vd == NULL) {
134                 /* Vdev close in progress.  Ignore the event. */
135                 return;
136         }
137
138         /*
139          * Orphan callbacks occur from the GEOM event thread.
140          * Concurrent with this call, new I/O requests may be
141          * working their way through GEOM about to find out
142          * (only once executed by the g_down thread) that we've
143          * been orphaned from our disk provider.  These I/Os
144          * must be retired before we can detach our consumer.
145          * This is most easily achieved by acquiring the
146          * SPA ZIO configuration lock as a writer, but doing
147          * so with the GEOM topology lock held would cause
148          * a lock order reversal.  Instead, rely on the SPA's
149          * async removal support to invoke a close on this
150          * vdev once it is safe to do so.
151          */
152         vd->vdev_remove_wanted = B_TRUE;
153         spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
154 }
155
156 static struct g_consumer *
157 vdev_geom_attach(struct g_provider *pp, vdev_t *vd)
158 {
159         struct g_geom *gp;
160         struct g_consumer *cp;
161         int error;
162
163         g_topology_assert();
164
165         ZFS_LOG(1, "Attaching to %s.", pp->name);
166         /* Do we have geom already? No? Create one. */
167         LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
168                 if (gp->flags & G_GEOM_WITHER)
169                         continue;
170                 if (strcmp(gp->name, "zfs::vdev") != 0)
171                         continue;
172                 break;
173         }
174         if (gp == NULL) {
175                 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
176                 gp->orphan = vdev_geom_orphan;
177                 gp->attrchanged = vdev_geom_attrchanged;
178                 cp = g_new_consumer(gp);
179                 error = g_attach(cp, pp);
180                 if (error != 0) {
181                         ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__,
182                             __LINE__, error);
183                         g_wither_geom(gp, ENXIO);
184                         return (NULL);
185                 }
186                 error = g_access(cp, 1, 0, 1);
187                 if (error != 0) {
188                         ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__,
189                                __LINE__, error);
190                         g_wither_geom(gp, ENXIO);
191                         return (NULL);
192                 }
193                 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
194         } else {
195                 /* Check if we are already connected to this provider. */
196                 LIST_FOREACH(cp, &gp->consumer, consumer) {
197                         if (cp->provider == pp) {
198                                 ZFS_LOG(1, "Found consumer for %s.", pp->name);
199                                 break;
200                         }
201                 }
202                 if (cp == NULL) {
203                         cp = g_new_consumer(gp);
204                         error = g_attach(cp, pp);
205                         if (error != 0) {
206                                 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n",
207                                     __func__, __LINE__, error);
208                                 g_destroy_consumer(cp);
209                                 return (NULL);
210                         }
211                         error = g_access(cp, 1, 0, 1);
212                         if (error != 0) {
213                                 ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
214                                     __func__, __LINE__, error);
215                                 g_detach(cp);
216                                 g_destroy_consumer(cp);
217                                 return (NULL);
218                         }
219                         ZFS_LOG(1, "Created consumer for %s.", pp->name);
220                 } else {
221                         error = g_access(cp, 1, 0, 1);
222                         if (error != 0) {
223                                 ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
224                                     __func__, __LINE__, error);
225                                 return (NULL);
226                         }
227                         ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
228                 }
229         }
230
231         /* 
232          * BUG: cp may already belong to a vdev.  This could happen if:
233          * 1) That vdev is a shared spare, or
234          * 2) We are trying to reopen a missing vdev and we are scanning by
235          *    guid.  In that case, we'll ultimately fail to open this consumer,
236          *    but not until after setting the private field.
237          * The solution is to:
238          * 1) Don't set the private field until after the open succeeds, and
239          * 2) Set it to a linked list of vdevs, not just a single vdev
240          */
241         cp->private = vd;
242         vd->vdev_tsd = cp;
243
244         /* Fetch initial physical path information for this device. */
245         vdev_geom_attrchanged(cp, "GEOM::physpath");
246         
247         cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
248         return (cp);
249 }
250
251 static void
252 vdev_geom_close_locked(vdev_t *vd)
253 {
254         struct g_geom *gp;
255         struct g_consumer *cp;
256
257         g_topology_assert();
258
259         cp = vd->vdev_tsd;
260         if (cp == NULL)
261                 return;
262
263         ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
264         KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__));
265         vd->vdev_tsd = NULL;
266         vd->vdev_delayed_close = B_FALSE;
267         cp->private = NULL;
268
269         gp = cp->geom;
270         g_access(cp, -1, 0, -1);
271         /* Destroy consumer on last close. */
272         if (cp->acr == 0 && cp->ace == 0) {
273                 if (cp->acw > 0)
274                         g_access(cp, 0, -cp->acw, 0);
275                 if (cp->provider != NULL) {
276                         ZFS_LOG(1, "Destroyed consumer to %s.",
277                             cp->provider->name);
278                         g_detach(cp);
279                 }
280                 g_destroy_consumer(cp);
281         }
282         /* Destroy geom if there are no consumers left. */
283         if (LIST_EMPTY(&gp->consumer)) {
284                 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
285                 g_wither_geom(gp, ENXIO);
286         }
287 }
288
289 static void
290 nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid)
291 {
292
293         (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid);
294         (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid);
295 }
296
297 static int
298 vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
299 {
300         struct bio *bp;
301         u_char *p;
302         off_t off, maxio;
303         int error;
304
305         ASSERT((offset % cp->provider->sectorsize) == 0);
306         ASSERT((size % cp->provider->sectorsize) == 0);
307
308         bp = g_alloc_bio();
309         off = offset;
310         offset += size;
311         p = data;
312         maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
313         error = 0;
314
315         for (; off < offset; off += maxio, p += maxio, size -= maxio) {
316                 bzero(bp, sizeof(*bp));
317                 bp->bio_cmd = cmd;
318                 bp->bio_done = NULL;
319                 bp->bio_offset = off;
320                 bp->bio_length = MIN(size, maxio);
321                 bp->bio_data = p;
322                 g_io_request(bp, cp);
323                 error = biowait(bp, "vdev_geom_io");
324                 if (error != 0)
325                         break;
326         }
327
328         g_destroy_bio(bp);
329         return (error);
330 }
331
332 static void
333 vdev_geom_taste_orphan(struct g_consumer *cp)
334 {
335
336         KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
337             cp->provider->name));
338 }
339
340 static int
341 vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
342 {
343         struct g_provider *pp;
344         vdev_label_t *label;
345         char *p, *buf;
346         size_t buflen;
347         uint64_t psize;
348         off_t offset, size;
349         uint64_t state, txg;
350         int error, l, len;
351
352         g_topology_assert_not();
353
354         pp = cp->provider;
355         ZFS_LOG(1, "Reading config from %s...", pp->name);
356
357         psize = pp->mediasize;
358         psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
359
360         size = sizeof(*label) + pp->sectorsize -
361             ((sizeof(*label) - 1) % pp->sectorsize) - 1;
362
363         label = kmem_alloc(size, KM_SLEEP);
364         buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
365
366         *config = NULL;
367         for (l = 0; l < VDEV_LABELS; l++) {
368
369                 offset = vdev_label_offset(psize, l, 0);
370                 if ((offset % pp->sectorsize) != 0)
371                         continue;
372
373                 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
374                         continue;
375                 buf = label->vl_vdev_phys.vp_nvlist;
376
377                 if (nvlist_unpack(buf, buflen, config, 0) != 0)
378                         continue;
379
380                 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
381                     &state) != 0 || state > POOL_STATE_L2CACHE) {
382                         nvlist_free(*config);
383                         *config = NULL;
384                         continue;
385                 }
386
387                 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
388                     (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
389                     &txg) != 0 || txg == 0)) {
390                         nvlist_free(*config);
391                         *config = NULL;
392                         continue;
393                 }
394
395                 break;
396         }
397
398         kmem_free(label, size);
399         return (*config == NULL ? ENOENT : 0);
400 }
401
402 static void
403 resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
404 {
405         nvlist_t **new_configs;
406         uint64_t i;
407
408         if (id < *count)
409                 return;
410         new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
411             KM_SLEEP);
412         for (i = 0; i < *count; i++)
413                 new_configs[i] = (*configs)[i];
414         if (*configs != NULL)
415                 kmem_free(*configs, *count * sizeof(void *));
416         *configs = new_configs;
417         *count = id + 1;
418 }
419
420 static void
421 process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
422     const char *name, uint64_t* known_pool_guid)
423 {
424         nvlist_t *vdev_tree;
425         uint64_t pool_guid;
426         uint64_t vdev_guid, known_guid;
427         uint64_t id, txg, known_txg;
428         char *pname;
429         int i;
430
431         if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
432             strcmp(pname, name) != 0)
433                 goto ignore;
434
435         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
436                 goto ignore;
437
438         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
439                 goto ignore;
440
441         if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
442                 goto ignore;
443
444         if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
445                 goto ignore;
446
447         VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
448
449         if (*known_pool_guid != 0) {
450                 if (pool_guid != *known_pool_guid)
451                         goto ignore;
452         } else
453                 *known_pool_guid = pool_guid;
454
455         resize_configs(configs, count, id);
456
457         if ((*configs)[id] != NULL) {
458                 VERIFY(nvlist_lookup_uint64((*configs)[id],
459                     ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
460                 if (txg <= known_txg)
461                         goto ignore;
462                 nvlist_free((*configs)[id]);
463         }
464
465         (*configs)[id] = cfg;
466         return;
467
468 ignore:
469         nvlist_free(cfg);
470 }
471
472 static int
473 vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
474 {
475         int error;
476
477         if (pp->flags & G_PF_WITHER)
478                 return (EINVAL);
479         g_attach(cp, pp);
480         error = g_access(cp, 1, 0, 0);
481         if (error == 0) {
482                 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
483                         error = EINVAL;
484                 else if (pp->mediasize < SPA_MINDEVSIZE)
485                         error = EINVAL;
486                 if (error != 0)
487                         g_access(cp, -1, 0, 0);
488         }
489         if (error != 0)
490                 g_detach(cp);
491         return (error);
492 }
493
494 static void
495 vdev_geom_detach_taster(struct g_consumer *cp)
496 {
497         g_access(cp, -1, 0, 0);
498         g_detach(cp);
499 }
500
501 int
502 vdev_geom_read_pool_label(const char *name,
503     nvlist_t ***configs, uint64_t *count)
504 {
505         struct g_class *mp;
506         struct g_geom *gp, *zgp;
507         struct g_provider *pp;
508         struct g_consumer *zcp;
509         nvlist_t *vdev_cfg;
510         uint64_t pool_guid;
511         int error;
512
513         DROP_GIANT();
514         g_topology_lock();
515
516         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
517         /* This orphan function should be never called. */
518         zgp->orphan = vdev_geom_taste_orphan;
519         zcp = g_new_consumer(zgp);
520
521         *configs = NULL;
522         *count = 0;
523         pool_guid = 0;
524         LIST_FOREACH(mp, &g_classes, class) {
525                 if (mp == &zfs_vdev_class)
526                         continue;
527                 LIST_FOREACH(gp, &mp->geom, geom) {
528                         if (gp->flags & G_GEOM_WITHER)
529                                 continue;
530                         LIST_FOREACH(pp, &gp->provider, provider) {
531                                 if (pp->flags & G_PF_WITHER)
532                                         continue;
533                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
534                                         continue;
535                                 g_topology_unlock();
536                                 error = vdev_geom_read_config(zcp, &vdev_cfg);
537                                 g_topology_lock();
538                                 vdev_geom_detach_taster(zcp);
539                                 if (error)
540                                         continue;
541                                 ZFS_LOG(1, "successfully read vdev config");
542
543                                 process_vdev_config(configs, count,
544                                     vdev_cfg, name, &pool_guid);
545                         }
546                 }
547         }
548
549         g_destroy_consumer(zcp);
550         g_destroy_geom(zgp);
551         g_topology_unlock();
552         PICKUP_GIANT();
553
554         return (*count > 0 ? 0 : ENOENT);
555 }
556
557 static void
558 vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid)
559 {
560         nvlist_t *config;
561
562         g_topology_assert_not();
563
564         *pguid = 0;
565         *vguid = 0;
566         if (vdev_geom_read_config(cp, &config) == 0) {
567                 nvlist_get_guids(config, pguid, vguid);
568                 nvlist_free(config);
569         }
570 }
571
572 static struct g_consumer *
573 vdev_geom_attach_by_guids(vdev_t *vd)
574 {
575         struct g_class *mp;
576         struct g_geom *gp, *zgp;
577         struct g_provider *pp;
578         struct g_consumer *cp, *zcp;
579         uint64_t pguid, vguid;
580
581         g_topology_assert();
582
583         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
584         /* This orphan function should be never called. */
585         zgp->orphan = vdev_geom_taste_orphan;
586         zcp = g_new_consumer(zgp);
587
588         cp = NULL;
589         LIST_FOREACH(mp, &g_classes, class) {
590                 if (mp == &zfs_vdev_class)
591                         continue;
592                 LIST_FOREACH(gp, &mp->geom, geom) {
593                         if (gp->flags & G_GEOM_WITHER)
594                                 continue;
595                         LIST_FOREACH(pp, &gp->provider, provider) {
596                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
597                                         continue;
598                                 g_topology_unlock();
599                                 vdev_geom_read_guids(zcp, &pguid, &vguid);
600                                 g_topology_lock();
601                                 vdev_geom_detach_taster(zcp);
602                                 /* 
603                                  * Check that the label's vdev guid matches the
604                                  * desired guid.  If the label has a pool guid,
605                                  * check that it matches too. (Inactive spares
606                                  * and L2ARCs do not have any pool guid in the
607                                  * label.)
608                                 */
609                                 if ((pguid != 0 &&
610                                      pguid != spa_guid(vd->vdev_spa)) ||
611                                     vguid != vd->vdev_guid)
612                                         continue;
613                                 cp = vdev_geom_attach(pp, vd);
614                                 if (cp == NULL) {
615                                         printf("ZFS WARNING: Unable to "
616                                             "attach to %s.\n", pp->name);
617                                         continue;
618                                 }
619                                 break;
620                         }
621                         if (cp != NULL)
622                                 break;
623                 }
624                 if (cp != NULL)
625                         break;
626         }
627 end:
628         g_destroy_consumer(zcp);
629         g_destroy_geom(zgp);
630         return (cp);
631 }
632
633 static struct g_consumer *
634 vdev_geom_open_by_guids(vdev_t *vd)
635 {
636         struct g_consumer *cp;
637         char *buf;
638         size_t len;
639
640         g_topology_assert();
641
642         ZFS_LOG(1, "Searching by guids [%ju:%ju].",
643                 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid);
644         cp = vdev_geom_attach_by_guids(vd);
645         if (cp != NULL) {
646                 len = strlen(cp->provider->name) + strlen("/dev/") + 1;
647                 buf = kmem_alloc(len, KM_SLEEP);
648
649                 snprintf(buf, len, "/dev/%s", cp->provider->name);
650                 spa_strfree(vd->vdev_path);
651                 vd->vdev_path = buf;
652
653                 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
654                     (uintmax_t)spa_guid(vd->vdev_spa),
655                     (uintmax_t)vd->vdev_guid, vd->vdev_path);
656         } else {
657                 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
658                     (uintmax_t)spa_guid(vd->vdev_spa),
659                     (uintmax_t)vd->vdev_guid);
660         }
661
662         return (cp);
663 }
664
665 static struct g_consumer *
666 vdev_geom_open_by_path(vdev_t *vd, int check_guid)
667 {
668         struct g_provider *pp;
669         struct g_consumer *cp;
670         uint64_t pguid, vguid;
671
672         g_topology_assert();
673
674         cp = NULL;
675         pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
676         if (pp != NULL) {
677                 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
678                 cp = vdev_geom_attach(pp, vd);
679                 if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
680                     pp->sectorsize <= VDEV_PAD_SIZE) {
681                         g_topology_unlock();
682                         vdev_geom_read_guids(cp, &pguid, &vguid);
683                         g_topology_lock();
684                         /*
685                          * Check that the label's vdev guid matches the
686                          * desired guid.  If the label has a pool guid,
687                          * check that it matches too. (Inactive spares
688                          * and L2ARCs do not have any pool guid in the
689                          * label.)
690                          */
691                         if ((pguid != 0 &&
692                             pguid != spa_guid(vd->vdev_spa)) ||
693                             vguid != vd->vdev_guid) {
694                                 vdev_geom_close_locked(vd);
695                                 cp = NULL;
696                                 ZFS_LOG(1, "guid mismatch for provider %s: "
697                                     "%ju:%ju != %ju:%ju.", vd->vdev_path,
698                                     (uintmax_t)spa_guid(vd->vdev_spa),
699                                     (uintmax_t)vd->vdev_guid,
700                                     (uintmax_t)pguid, (uintmax_t)vguid);
701                         } else {
702                                 ZFS_LOG(1, "guid match for provider %s.",
703                                     vd->vdev_path);
704                         }
705                 }
706         }
707
708         return (cp);
709 }
710
711 static int
712 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
713     uint64_t *logical_ashift, uint64_t *physical_ashift)
714 {
715         struct g_provider *pp;
716         struct g_consumer *cp;
717         size_t bufsize;
718         int error;
719
720         /*
721          * We must have a pathname, and it must be absolute.
722          */
723         if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
724                 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
725                 return (EINVAL);
726         }
727
728         vd->vdev_tsd = NULL;
729
730         DROP_GIANT();
731         g_topology_lock();
732         error = 0;
733
734         if (vd->vdev_spa->spa_splitting_newspa ||
735             (vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
736              vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)) {
737                 /*
738                  * We are dealing with a vdev that hasn't been previously
739                  * opened (since boot), and we are not loading an
740                  * existing pool configuration.  This looks like a
741                  * vdev add operation to a new or existing pool.
742                  * Assume the user knows what he/she is doing and find
743                  * GEOM provider by its name, ignoring GUID mismatches.
744                  *
745                  * XXPOLICY: It would be safer to only allow a device
746                  *           that is unlabeled or labeled but missing
747                  *           GUID information to be opened in this fashion,
748                  *           unless we are doing a split, in which case we
749                  *           should allow any guid.
750                  */
751                 cp = vdev_geom_open_by_path(vd, 0);
752         } else {
753                 /*
754                  * Try using the recorded path for this device, but only
755                  * accept it if its label data contains the expected GUIDs.
756                  */
757                 cp = vdev_geom_open_by_path(vd, 1);
758                 if (cp == NULL) {
759                         /*
760                          * The device at vd->vdev_path doesn't have the
761                          * expected GUIDs. The disks might have merely
762                          * moved around so try all other GEOM providers
763                          * to find one with the right GUIDs.
764                          */
765                         cp = vdev_geom_open_by_guids(vd);
766                 }
767         }
768
769         if (cp == NULL) {
770                 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
771                 error = ENOENT;
772         } else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
773             !ISP2(cp->provider->sectorsize)) {
774                 ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
775                     vd->vdev_path);
776
777                 vdev_geom_close_locked(vd);
778                 error = EINVAL;
779                 cp = NULL;
780         } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
781                 int i;
782
783                 for (i = 0; i < 5; i++) {
784                         error = g_access(cp, 0, 1, 0);
785                         if (error == 0)
786                                 break;
787                         g_topology_unlock();
788                         tsleep(vd, 0, "vdev", hz / 2);
789                         g_topology_lock();
790                 }
791                 if (error != 0) {
792                         printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
793                             vd->vdev_path, error);
794                         vdev_geom_close_locked(vd);
795                         cp = NULL;
796                 }
797         }
798
799         g_topology_unlock();
800         PICKUP_GIANT();
801         if (cp == NULL) {
802                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
803                 return (error);
804         }
805         pp = cp->provider;
806
807         /*
808          * Determine the actual size of the device.
809          */
810         *max_psize = *psize = pp->mediasize;
811
812         /*
813          * Determine the device's minimum transfer size and preferred
814          * transfer size.
815          */
816         *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
817         *physical_ashift = 0;
818         if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) &&
819             pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0)
820                 *physical_ashift = highbit(pp->stripesize) - 1;
821
822         /*
823          * Clear the nowritecache settings, so that on a vdev_reopen()
824          * we will try again.
825          */
826         vd->vdev_nowritecache = B_FALSE;
827
828         /*
829          * Determine the device's rotation rate.
830          */
831         vdev_geom_set_rotation_rate(vd, cp);
832
833         return (0);
834 }
835
836 static void
837 vdev_geom_close(vdev_t *vd)
838 {
839
840         DROP_GIANT();
841         g_topology_lock();
842         vdev_geom_close_locked(vd);
843         g_topology_unlock();
844         PICKUP_GIANT();
845 }
846
847 static void
848 vdev_geom_io_intr(struct bio *bp)
849 {
850         vdev_t *vd;
851         zio_t *zio;
852
853         zio = bp->bio_caller1;
854         vd = zio->io_vd;
855         zio->io_error = bp->bio_error;
856         if (zio->io_error == 0 && bp->bio_resid != 0)
857                 zio->io_error = SET_ERROR(EIO);
858
859         switch(zio->io_error) {
860         case ENOTSUP:
861                 /*
862                  * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
863                  * that future attempts will never succeed. In this case
864                  * we set a persistent flag so that we don't bother with
865                  * requests in the future.
866                  */
867                 switch(bp->bio_cmd) {
868                 case BIO_FLUSH:
869                         vd->vdev_nowritecache = B_TRUE;
870                         break;
871                 case BIO_DELETE:
872                         vd->vdev_notrim = B_TRUE;
873                         break;
874                 }
875                 break;
876         case ENXIO:
877                 if (!vd->vdev_remove_wanted) {
878                         /*
879                          * If provider's error is set we assume it is being
880                          * removed.
881                          */
882                         if (bp->bio_to->error != 0) {
883                                 vd->vdev_remove_wanted = B_TRUE;
884                                 spa_async_request(zio->io_spa,
885                                     SPA_ASYNC_REMOVE);
886                         } else if (!vd->vdev_delayed_close) {
887                                 vd->vdev_delayed_close = B_TRUE;
888                         }
889                 }
890                 break;
891         }
892         g_destroy_bio(bp);
893         zio_delay_interrupt(zio);
894 }
895
896 static void
897 vdev_geom_io_start(zio_t *zio)
898 {
899         vdev_t *vd;
900         struct g_consumer *cp;
901         struct bio *bp;
902         int error;
903
904         vd = zio->io_vd;
905
906         switch (zio->io_type) {
907         case ZIO_TYPE_IOCTL:
908                 /* XXPOLICY */
909                 if (!vdev_readable(vd)) {
910                         zio->io_error = SET_ERROR(ENXIO);
911                         zio_interrupt(zio);
912                         return;
913                 } else {
914                         switch (zio->io_cmd) {
915                         case DKIOCFLUSHWRITECACHE:
916                                 if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
917                                         break;
918                                 if (vd->vdev_nowritecache) {
919                                         zio->io_error = SET_ERROR(ENOTSUP);
920                                         break;
921                                 }
922                                 goto sendreq;
923                         default:
924                                 zio->io_error = SET_ERROR(ENOTSUP);
925                         }
926                 }
927
928                 zio_execute(zio);
929                 return;
930         case ZIO_TYPE_FREE:
931                 if (vd->vdev_notrim) {
932                         zio->io_error = SET_ERROR(ENOTSUP);
933                 } else if (!vdev_geom_bio_delete_disable) {
934                         goto sendreq;
935                 }
936                 zio_execute(zio);
937                 return;
938         }
939 sendreq:
940         ASSERT(zio->io_type == ZIO_TYPE_READ ||
941             zio->io_type == ZIO_TYPE_WRITE ||
942             zio->io_type == ZIO_TYPE_FREE ||
943             zio->io_type == ZIO_TYPE_IOCTL);
944
945         cp = vd->vdev_tsd;
946         if (cp == NULL) {
947                 zio->io_error = SET_ERROR(ENXIO);
948                 zio_interrupt(zio);
949                 return;
950         }
951         bp = g_alloc_bio();
952         bp->bio_caller1 = zio;
953         switch (zio->io_type) {
954         case ZIO_TYPE_READ:
955         case ZIO_TYPE_WRITE:
956                 zio->io_target_timestamp = zio_handle_io_delay(zio);
957                 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
958                 bp->bio_data = zio->io_data;
959                 bp->bio_offset = zio->io_offset;
960                 bp->bio_length = zio->io_size;
961                 break;
962         case ZIO_TYPE_FREE:
963                 bp->bio_cmd = BIO_DELETE;
964                 bp->bio_data = NULL;
965                 bp->bio_offset = zio->io_offset;
966                 bp->bio_length = zio->io_size;
967                 break;
968         case ZIO_TYPE_IOCTL:
969                 bp->bio_cmd = BIO_FLUSH;
970                 bp->bio_flags |= BIO_ORDERED;
971                 bp->bio_data = NULL;
972                 bp->bio_offset = cp->provider->mediasize;
973                 bp->bio_length = 0;
974                 break;
975         }
976         bp->bio_done = vdev_geom_io_intr;
977
978         g_io_request(bp, cp);
979 }
980
981 static void
982 vdev_geom_io_done(zio_t *zio)
983 {
984 }
985
986 static void
987 vdev_geom_hold(vdev_t *vd)
988 {
989 }
990
991 static void
992 vdev_geom_rele(vdev_t *vd)
993 {
994 }
995
996 vdev_ops_t vdev_geom_ops = {
997         vdev_geom_open,
998         vdev_geom_close,
999         vdev_default_asize,
1000         vdev_geom_io_start,
1001         vdev_geom_io_done,
1002         NULL,
1003         vdev_geom_hold,
1004         vdev_geom_rele,
1005         VDEV_TYPE_DISK,         /* name of this vdev type */
1006         B_TRUE                  /* leaf vdev */
1007 };