]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
Upgrade to OpenSSH 7.2p2.
[FreeBSD/FreeBSD.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / vdev_geom.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23  * All rights reserved.
24  *
25  * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26  */
27
28 #include <sys/zfs_context.h>
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/bio.h>
32 #include <sys/disk.h>
33 #include <sys/spa.h>
34 #include <sys/spa_impl.h>
35 #include <sys/vdev_impl.h>
36 #include <sys/fs/zfs.h>
37 #include <sys/zio.h>
38 #include <geom/geom.h>
39 #include <geom/geom_int.h>
40
41 /*
42  * Virtual device vector for GEOM.
43  */
44
45 static g_attrchanged_t vdev_geom_attrchanged;
46 struct g_class zfs_vdev_class = {
47         .name = "ZFS::VDEV",
48         .version = G_VERSION,
49         .attrchanged = vdev_geom_attrchanged,
50 };
51
52 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
53
54 SYSCTL_DECL(_vfs_zfs_vdev);
55 /* Don't send BIO_FLUSH. */
56 static int vdev_geom_bio_flush_disable;
57 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RWTUN,
58     &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
59 /* Don't send BIO_DELETE. */
60 static int vdev_geom_bio_delete_disable;
61 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN,
62     &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
63
64 /*
65  * Thread local storage used to indicate when a thread is probing geoms
66  * for their guids.  If NULL, this thread is not tasting geoms.  If non NULL,
67  * it is looking for a replacement for the vdev_t* that is its value.
68  */
69 uint_t zfs_geom_probe_vdev_key;
70
71 static void
72 vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
73
74         int error;
75         uint16_t rate;
76
77         error = g_getattr("GEOM::rotation_rate", cp, &rate);
78         if (error == 0)
79                 vd->vdev_rotation_rate = rate;
80         else
81                 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN;
82 }
83
84 static void
85 vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
86 {
87         vdev_t *vd;
88         spa_t *spa;
89         char *physpath;
90         int error, physpath_len;
91
92         vd = cp->private;
93         if (vd == NULL)
94                 return;
95
96         if (strcmp(attr, "GEOM::rotation_rate") == 0) {
97                 vdev_geom_set_rotation_rate(vd, cp);
98                 return;
99         }
100
101         if (strcmp(attr, "GEOM::physpath") != 0)
102                 return;
103
104         if (g_access(cp, 1, 0, 0) != 0)
105                 return;
106
107         /*
108          * Record/Update physical path information for this device.
109          */
110         spa = vd->vdev_spa;
111         physpath_len = MAXPATHLEN;
112         physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
113         error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
114         g_access(cp, -1, 0, 0);
115         if (error == 0) {
116                 char *old_physpath;
117
118                 old_physpath = vd->vdev_physpath;
119                 vd->vdev_physpath = spa_strdup(physpath);
120                 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
121
122                 if (old_physpath != NULL) {
123                         int held_lock;
124
125                         held_lock = spa_config_held(spa, SCL_STATE, RW_WRITER);
126                         if (held_lock == 0) {
127                                 g_topology_unlock();
128                                 spa_config_enter(spa, SCL_STATE, FTAG,
129                                     RW_WRITER);
130                         }
131
132                         spa_strfree(old_physpath);
133
134                         if (held_lock == 0) {
135                                 spa_config_exit(spa, SCL_STATE, FTAG);
136                                 g_topology_lock();
137                         }
138                 }
139         }
140         g_free(physpath);
141 }
142
143 static void
144 vdev_geom_orphan(struct g_consumer *cp)
145 {
146         vdev_t *vd;
147
148         g_topology_assert();
149
150         vd = cp->private;
151         if (vd == NULL) {
152                 /* Vdev close in progress.  Ignore the event. */
153                 return;
154         }
155
156         /*
157          * Orphan callbacks occur from the GEOM event thread.
158          * Concurrent with this call, new I/O requests may be
159          * working their way through GEOM about to find out
160          * (only once executed by the g_down thread) that we've
161          * been orphaned from our disk provider.  These I/Os
162          * must be retired before we can detach our consumer.
163          * This is most easily achieved by acquiring the
164          * SPA ZIO configuration lock as a writer, but doing
165          * so with the GEOM topology lock held would cause
166          * a lock order reversal.  Instead, rely on the SPA's
167          * async removal support to invoke a close on this
168          * vdev once it is safe to do so.
169          */
170         vd->vdev_remove_wanted = B_TRUE;
171         spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
172 }
173
174 static struct g_consumer *
175 vdev_geom_attach(struct g_provider *pp, vdev_t *vd)
176 {
177         struct g_geom *gp;
178         struct g_consumer *cp;
179
180         g_topology_assert();
181
182         ZFS_LOG(1, "Attaching to %s.", pp->name);
183         /* Do we have geom already? No? Create one. */
184         LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
185                 if (gp->flags & G_GEOM_WITHER)
186                         continue;
187                 if (strcmp(gp->name, "zfs::vdev") != 0)
188                         continue;
189                 break;
190         }
191         if (gp == NULL) {
192                 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
193                 gp->orphan = vdev_geom_orphan;
194                 gp->attrchanged = vdev_geom_attrchanged;
195                 cp = g_new_consumer(gp);
196                 if (g_attach(cp, pp) != 0) {
197                         g_wither_geom(gp, ENXIO);
198                         return (NULL);
199                 }
200                 if (g_access(cp, 1, 0, 1) != 0) {
201                         g_wither_geom(gp, ENXIO);
202                         return (NULL);
203                 }
204                 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
205         } else {
206                 /* Check if we are already connected to this provider. */
207                 LIST_FOREACH(cp, &gp->consumer, consumer) {
208                         if (cp->provider == pp) {
209                                 ZFS_LOG(1, "Found consumer for %s.", pp->name);
210                                 break;
211                         }
212                 }
213                 if (cp == NULL) {
214                         cp = g_new_consumer(gp);
215                         if (g_attach(cp, pp) != 0) {
216                                 g_destroy_consumer(cp);
217                                 return (NULL);
218                         }
219                         if (g_access(cp, 1, 0, 1) != 0) {
220                                 g_detach(cp);
221                                 g_destroy_consumer(cp);
222                                 return (NULL);
223                         }
224                         ZFS_LOG(1, "Created consumer for %s.", pp->name);
225                 } else {
226                         if (g_access(cp, 1, 0, 1) != 0)
227                                 return (NULL);
228                         ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
229                 }
230         }
231
232         /* 
233          * BUG: cp may already belong to a vdev.  This could happen if:
234          * 1) That vdev is a shared spare, or
235          * 2) We are trying to reopen a missing vdev and we are scanning by
236          *    guid.  In that case, we'll ultimately fail to open this consumer,
237          *    but not until after setting the private field.
238          * The solution is to:
239          * 1) Don't set the private field until after the open succeeds, and
240          * 2) Set it to a linked list of vdevs, not just a single vdev
241          */
242         cp->private = vd;
243         vd->vdev_tsd = cp;
244
245         /* Fetch initial physical path information for this device. */
246         vdev_geom_attrchanged(cp, "GEOM::physpath");
247         
248         cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
249         return (cp);
250 }
251
252 static void
253 vdev_geom_close_locked(vdev_t *vd)
254 {
255         struct g_geom *gp;
256         struct g_consumer *cp;
257
258         g_topology_assert();
259
260         cp = vd->vdev_tsd;
261         if (cp == NULL)
262                 return;
263
264         ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
265         KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__));
266         vd->vdev_tsd = NULL;
267         vd->vdev_delayed_close = B_FALSE;
268         cp->private = NULL;
269
270         gp = cp->geom;
271         g_access(cp, -1, 0, -1);
272         /* Destroy consumer on last close. */
273         if (cp->acr == 0 && cp->ace == 0) {
274                 if (cp->acw > 0)
275                         g_access(cp, 0, -cp->acw, 0);
276                 if (cp->provider != NULL) {
277                         ZFS_LOG(1, "Destroyed consumer to %s.",
278                             cp->provider->name);
279                         g_detach(cp);
280                 }
281                 g_destroy_consumer(cp);
282         }
283         /* Destroy geom if there are no consumers left. */
284         if (LIST_EMPTY(&gp->consumer)) {
285                 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
286                 g_wither_geom(gp, ENXIO);
287         }
288 }
289
290 static void
291 nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid)
292 {
293
294         (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid);
295         (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid);
296 }
297
298 static int
299 vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
300 {
301         struct bio *bp;
302         u_char *p;
303         off_t off, maxio;
304         int error;
305
306         ASSERT((offset % cp->provider->sectorsize) == 0);
307         ASSERT((size % cp->provider->sectorsize) == 0);
308
309         bp = g_alloc_bio();
310         off = offset;
311         offset += size;
312         p = data;
313         maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
314         error = 0;
315
316         for (; off < offset; off += maxio, p += maxio, size -= maxio) {
317                 g_reset_bio(bp);
318                 bp->bio_cmd = cmd;
319                 bp->bio_done = NULL;
320                 bp->bio_offset = off;
321                 bp->bio_length = MIN(size, maxio);
322                 bp->bio_data = p;
323                 g_io_request(bp, cp);
324                 error = biowait(bp, "vdev_geom_io");
325                 if (error != 0)
326                         break;
327         }
328
329         g_destroy_bio(bp);
330         return (error);
331 }
332
333 static void
334 vdev_geom_taste_orphan(struct g_consumer *cp)
335 {
336         ZFS_LOG(0, "WARNING: Orphan %s while tasting its VDev GUID.",
337             cp->provider->name);
338 }
339
340 static int
341 vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
342 {
343         struct g_provider *pp;
344         vdev_label_t *label;
345         char *p, *buf;
346         size_t buflen;
347         uint64_t psize;
348         off_t offset, size;
349         uint64_t state, txg;
350         int error, l, len;
351
352         g_topology_assert_not();
353
354         pp = cp->provider;
355         ZFS_LOG(1, "Reading config from %s...", pp->name);
356
357         psize = pp->mediasize;
358         psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
359
360         size = sizeof(*label) + pp->sectorsize -
361             ((sizeof(*label) - 1) % pp->sectorsize) - 1;
362
363         label = kmem_alloc(size, KM_SLEEP);
364         buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
365
366         *config = NULL;
367         for (l = 0; l < VDEV_LABELS; l++) {
368
369                 offset = vdev_label_offset(psize, l, 0);
370                 if ((offset % pp->sectorsize) != 0)
371                         continue;
372
373                 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
374                         continue;
375                 buf = label->vl_vdev_phys.vp_nvlist;
376
377                 if (nvlist_unpack(buf, buflen, config, 0) != 0)
378                         continue;
379
380                 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
381                     &state) != 0 || state > POOL_STATE_L2CACHE) {
382                         nvlist_free(*config);
383                         *config = NULL;
384                         continue;
385                 }
386
387                 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
388                     (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
389                     &txg) != 0 || txg == 0)) {
390                         nvlist_free(*config);
391                         *config = NULL;
392                         continue;
393                 }
394
395                 break;
396         }
397
398         kmem_free(label, size);
399         return (*config == NULL ? ENOENT : 0);
400 }
401
402 static void
403 resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
404 {
405         nvlist_t **new_configs;
406         uint64_t i;
407
408         if (id < *count)
409                 return;
410         new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
411             KM_SLEEP);
412         for (i = 0; i < *count; i++)
413                 new_configs[i] = (*configs)[i];
414         if (*configs != NULL)
415                 kmem_free(*configs, *count * sizeof(void *));
416         *configs = new_configs;
417         *count = id + 1;
418 }
419
420 static void
421 process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
422     const char *name, uint64_t* known_pool_guid)
423 {
424         nvlist_t *vdev_tree;
425         uint64_t pool_guid;
426         uint64_t vdev_guid, known_guid;
427         uint64_t id, txg, known_txg;
428         char *pname;
429         int i;
430
431         if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
432             strcmp(pname, name) != 0)
433                 goto ignore;
434
435         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
436                 goto ignore;
437
438         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
439                 goto ignore;
440
441         if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
442                 goto ignore;
443
444         if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
445                 goto ignore;
446
447         VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
448
449         if (*known_pool_guid != 0) {
450                 if (pool_guid != *known_pool_guid)
451                         goto ignore;
452         } else
453                 *known_pool_guid = pool_guid;
454
455         resize_configs(configs, count, id);
456
457         if ((*configs)[id] != NULL) {
458                 VERIFY(nvlist_lookup_uint64((*configs)[id],
459                     ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
460                 if (txg <= known_txg)
461                         goto ignore;
462                 nvlist_free((*configs)[id]);
463         }
464
465         (*configs)[id] = cfg;
466         return;
467
468 ignore:
469         nvlist_free(cfg);
470 }
471
472 static int
473 vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
474 {
475         int error;
476
477         if (pp->flags & G_PF_WITHER)
478                 return (EINVAL);
479         g_attach(cp, pp);
480         error = g_access(cp, 1, 0, 0);
481         if (error == 0) {
482                 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
483                         error = EINVAL;
484                 else if (pp->mediasize < SPA_MINDEVSIZE)
485                         error = EINVAL;
486                 if (error != 0)
487                         g_access(cp, -1, 0, 0);
488         }
489         if (error != 0)
490                 g_detach(cp);
491         return (error);
492 }
493
494 static void
495 vdev_geom_detach_taster(struct g_consumer *cp)
496 {
497         g_access(cp, -1, 0, 0);
498         g_detach(cp);
499 }
500
501 int
502 vdev_geom_read_pool_label(const char *name,
503     nvlist_t ***configs, uint64_t *count)
504 {
505         struct g_class *mp;
506         struct g_geom *gp, *zgp;
507         struct g_provider *pp;
508         struct g_consumer *zcp;
509         nvlist_t *vdev_cfg;
510         uint64_t pool_guid;
511         int error;
512
513         DROP_GIANT();
514         g_topology_lock();
515
516         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
517         /* This orphan function should be never called. */
518         zgp->orphan = vdev_geom_taste_orphan;
519         zcp = g_new_consumer(zgp);
520
521         *configs = NULL;
522         *count = 0;
523         pool_guid = 0;
524         LIST_FOREACH(mp, &g_classes, class) {
525                 if (mp == &zfs_vdev_class)
526                         continue;
527                 LIST_FOREACH(gp, &mp->geom, geom) {
528                         if (gp->flags & G_GEOM_WITHER)
529                                 continue;
530                         LIST_FOREACH(pp, &gp->provider, provider) {
531                                 if (pp->flags & G_PF_WITHER)
532                                         continue;
533                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
534                                         continue;
535                                 g_topology_unlock();
536                                 error = vdev_geom_read_config(zcp, &vdev_cfg);
537                                 g_topology_lock();
538                                 vdev_geom_detach_taster(zcp);
539                                 if (error)
540                                         continue;
541                                 ZFS_LOG(1, "successfully read vdev config");
542
543                                 process_vdev_config(configs, count,
544                                     vdev_cfg, name, &pool_guid);
545                         }
546                 }
547         }
548
549         g_destroy_consumer(zcp);
550         g_destroy_geom(zgp);
551         g_topology_unlock();
552         PICKUP_GIANT();
553
554         return (*count > 0 ? 0 : ENOENT);
555 }
556
557 static void
558 vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid)
559 {
560         nvlist_t *config;
561
562         g_topology_assert_not();
563
564         *pguid = 0;
565         *vguid = 0;
566         if (vdev_geom_read_config(cp, &config) == 0) {
567                 nvlist_get_guids(config, pguid, vguid);
568                 nvlist_free(config);
569         }
570 }
571
572 static struct g_consumer *
573 vdev_geom_attach_by_guids(vdev_t *vd)
574 {
575         struct g_class *mp;
576         struct g_geom *gp, *zgp;
577         struct g_provider *pp;
578         struct g_consumer *cp, *zcp;
579         uint64_t pguid, vguid;
580
581         g_topology_assert();
582
583         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
584         zgp->orphan = vdev_geom_taste_orphan;
585         zcp = g_new_consumer(zgp);
586
587         cp = NULL;
588         LIST_FOREACH(mp, &g_classes, class) {
589                 if (mp == &zfs_vdev_class)
590                         continue;
591                 LIST_FOREACH(gp, &mp->geom, geom) {
592                         if (gp->flags & G_GEOM_WITHER)
593                                 continue;
594                         LIST_FOREACH(pp, &gp->provider, provider) {
595                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
596                                         continue;
597                                 g_topology_unlock();
598                                 vdev_geom_read_guids(zcp, &pguid, &vguid);
599                                 g_topology_lock();
600                                 vdev_geom_detach_taster(zcp);
601                                 /* 
602                                  * Check that the label's vdev guid matches the
603                                  * desired guid.  If the label has a pool guid,
604                                  * check that it matches too. (Inactive spares
605                                  * and L2ARCs do not have any pool guid in the
606                                  * label.)
607                                 */
608                                 if ((pguid != 0 &&
609                                      pguid != spa_guid(vd->vdev_spa)) ||
610                                     vguid != vd->vdev_guid)
611                                         continue;
612                                 cp = vdev_geom_attach(pp, vd);
613                                 if (cp == NULL) {
614                                         printf("ZFS WARNING: Unable to "
615                                             "attach to %s.\n", pp->name);
616                                         continue;
617                                 }
618                                 break;
619                         }
620                         if (cp != NULL)
621                                 break;
622                 }
623                 if (cp != NULL)
624                         break;
625         }
626 end:
627         g_destroy_consumer(zcp);
628         g_destroy_geom(zgp);
629         return (cp);
630 }
631
632 static struct g_consumer *
633 vdev_geom_open_by_guids(vdev_t *vd)
634 {
635         struct g_consumer *cp;
636         char *buf;
637         size_t len;
638
639         g_topology_assert();
640
641         ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid);
642         cp = vdev_geom_attach_by_guids(vd);
643         if (cp != NULL) {
644                 len = strlen(cp->provider->name) + strlen("/dev/") + 1;
645                 buf = kmem_alloc(len, KM_SLEEP);
646
647                 snprintf(buf, len, "/dev/%s", cp->provider->name);
648                 spa_strfree(vd->vdev_path);
649                 vd->vdev_path = buf;
650
651                 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
652                     (uintmax_t)spa_guid(vd->vdev_spa),
653                     (uintmax_t)vd->vdev_guid, vd->vdev_path);
654         } else {
655                 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
656                     (uintmax_t)spa_guid(vd->vdev_spa),
657                     (uintmax_t)vd->vdev_guid);
658         }
659
660         return (cp);
661 }
662
663 static struct g_consumer *
664 vdev_geom_open_by_path(vdev_t *vd, int check_guid)
665 {
666         struct g_provider *pp;
667         struct g_consumer *cp;
668         uint64_t pguid, vguid;
669
670         g_topology_assert();
671
672         cp = NULL;
673         pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
674         if (pp != NULL) {
675                 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
676                 cp = vdev_geom_attach(pp, vd);
677                 if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
678                     pp->sectorsize <= VDEV_PAD_SIZE) {
679                         g_topology_unlock();
680                         vdev_geom_read_guids(cp, &pguid, &vguid);
681                         g_topology_lock();
682                         if (pguid != spa_guid(vd->vdev_spa) ||
683                             vguid != vd->vdev_guid) {
684                                 vdev_geom_close_locked(vd);
685                                 cp = NULL;
686                                 ZFS_LOG(1, "guid mismatch for provider %s: "
687                                     "%ju:%ju != %ju:%ju.", vd->vdev_path,
688                                     (uintmax_t)spa_guid(vd->vdev_spa),
689                                     (uintmax_t)vd->vdev_guid,
690                                     (uintmax_t)pguid, (uintmax_t)vguid);
691                         } else {
692                                 ZFS_LOG(1, "guid match for provider %s.",
693                                     vd->vdev_path);
694                         }
695                 }
696         }
697
698         return (cp);
699 }
700
701 static int
702 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
703     uint64_t *logical_ashift, uint64_t *physical_ashift)
704 {
705         struct g_provider *pp;
706         struct g_consumer *cp;
707         size_t bufsize;
708         int error;
709
710         /* Set the TLS to indicate downstack that we should not access zvols*/
711         VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0);
712
713         /*
714          * We must have a pathname, and it must be absolute.
715          */
716         if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
717                 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
718                 return (EINVAL);
719         }
720
721         vd->vdev_tsd = NULL;
722
723         DROP_GIANT();
724         g_topology_lock();
725         error = 0;
726
727         if (vd->vdev_spa->spa_splitting_newspa ||
728             (vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
729              vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)) {
730                 /*
731                  * We are dealing with a vdev that hasn't been previously
732                  * opened (since boot), and we are not loading an
733                  * existing pool configuration.  This looks like a
734                  * vdev add operation to a new or existing pool.
735                  * Assume the user knows what he/she is doing and find
736                  * GEOM provider by its name, ignoring GUID mismatches.
737                  *
738                  * XXPOLICY: It would be safer to only allow a device
739                  *           that is unlabeled or labeled but missing
740                  *           GUID information to be opened in this fashion,
741                  *           unless we are doing a split, in which case we
742                  *           should allow any guid.
743                  */
744                 cp = vdev_geom_open_by_path(vd, 0);
745         } else {
746                 /*
747                  * Try using the recorded path for this device, but only
748                  * accept it if its label data contains the expected GUIDs.
749                  */
750                 cp = vdev_geom_open_by_path(vd, 1);
751                 if (cp == NULL) {
752                         /*
753                          * The device at vd->vdev_path doesn't have the
754                          * expected GUIDs. The disks might have merely
755                          * moved around so try all other GEOM providers
756                          * to find one with the right GUIDs.
757                          */
758                         cp = vdev_geom_open_by_guids(vd);
759                 }
760         }
761
762         /* Clear the TLS now that tasting is done */
763         VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0);
764
765         if (cp == NULL) {
766                 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
767                 error = ENOENT;
768         } else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
769             !ISP2(cp->provider->sectorsize)) {
770                 ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
771                     vd->vdev_path);
772
773                 vdev_geom_close_locked(vd);
774                 error = EINVAL;
775                 cp = NULL;
776         } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
777                 int i;
778
779                 for (i = 0; i < 5; i++) {
780                         error = g_access(cp, 0, 1, 0);
781                         if (error == 0)
782                                 break;
783                         g_topology_unlock();
784                         tsleep(vd, 0, "vdev", hz / 2);
785                         g_topology_lock();
786                 }
787                 if (error != 0) {
788                         printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
789                             vd->vdev_path, error);
790                         vdev_geom_close_locked(vd);
791                         cp = NULL;
792                 }
793         }
794
795         g_topology_unlock();
796         PICKUP_GIANT();
797         if (cp == NULL) {
798                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
799                 return (error);
800         }
801         pp = cp->provider;
802
803         /*
804          * Determine the actual size of the device.
805          */
806         *max_psize = *psize = pp->mediasize;
807
808         /*
809          * Determine the device's minimum transfer size and preferred
810          * transfer size.
811          */
812         *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
813         *physical_ashift = 0;
814         if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) &&
815             pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0)
816                 *physical_ashift = highbit(pp->stripesize) - 1;
817
818         /*
819          * Clear the nowritecache settings, so that on a vdev_reopen()
820          * we will try again.
821          */
822         vd->vdev_nowritecache = B_FALSE;
823
824         /*
825          * Determine the device's rotation rate.
826          */
827         vdev_geom_set_rotation_rate(vd, cp);
828
829         return (0);
830 }
831
832 static void
833 vdev_geom_close(vdev_t *vd)
834 {
835
836         DROP_GIANT();
837         g_topology_lock();
838         vdev_geom_close_locked(vd);
839         g_topology_unlock();
840         PICKUP_GIANT();
841 }
842
843 static void
844 vdev_geom_io_intr(struct bio *bp)
845 {
846         vdev_t *vd;
847         zio_t *zio;
848
849         zio = bp->bio_caller1;
850         vd = zio->io_vd;
851         zio->io_error = bp->bio_error;
852         if (zio->io_error == 0 && bp->bio_resid != 0)
853                 zio->io_error = SET_ERROR(EIO);
854
855         switch(zio->io_error) {
856         case ENOTSUP:
857                 /*
858                  * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
859                  * that future attempts will never succeed. In this case
860                  * we set a persistent flag so that we don't bother with
861                  * requests in the future.
862                  */
863                 switch(bp->bio_cmd) {
864                 case BIO_FLUSH:
865                         vd->vdev_nowritecache = B_TRUE;
866                         break;
867                 case BIO_DELETE:
868                         vd->vdev_notrim = B_TRUE;
869                         break;
870                 }
871                 break;
872         case ENXIO:
873                 if (!vd->vdev_remove_wanted) {
874                         /*
875                          * If provider's error is set we assume it is being
876                          * removed.
877                          */
878                         if (bp->bio_to->error != 0) {
879                                 vd->vdev_remove_wanted = B_TRUE;
880                                 spa_async_request(zio->io_spa,
881                                     SPA_ASYNC_REMOVE);
882                         } else if (!vd->vdev_delayed_close) {
883                                 vd->vdev_delayed_close = B_TRUE;
884                         }
885                 }
886                 break;
887         }
888         g_destroy_bio(bp);
889         zio_delay_interrupt(zio);
890 }
891
892 static void
893 vdev_geom_io_start(zio_t *zio)
894 {
895         vdev_t *vd;
896         struct g_consumer *cp;
897         struct bio *bp;
898         int error;
899
900         vd = zio->io_vd;
901
902         switch (zio->io_type) {
903         case ZIO_TYPE_IOCTL:
904                 /* XXPOLICY */
905                 if (!vdev_readable(vd)) {
906                         zio->io_error = SET_ERROR(ENXIO);
907                         zio_interrupt(zio);
908                         return;
909                 } else {
910                         switch (zio->io_cmd) {
911                         case DKIOCFLUSHWRITECACHE:
912                                 if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
913                                         break;
914                                 if (vd->vdev_nowritecache) {
915                                         zio->io_error = SET_ERROR(ENOTSUP);
916                                         break;
917                                 }
918                                 goto sendreq;
919                         default:
920                                 zio->io_error = SET_ERROR(ENOTSUP);
921                         }
922                 }
923
924                 zio_execute(zio);
925                 return;
926         case ZIO_TYPE_FREE:
927                 if (vd->vdev_notrim) {
928                         zio->io_error = SET_ERROR(ENOTSUP);
929                 } else if (!vdev_geom_bio_delete_disable) {
930                         goto sendreq;
931                 }
932                 zio_execute(zio);
933                 return;
934         }
935 sendreq:
936         ASSERT(zio->io_type == ZIO_TYPE_READ ||
937             zio->io_type == ZIO_TYPE_WRITE ||
938             zio->io_type == ZIO_TYPE_FREE ||
939             zio->io_type == ZIO_TYPE_IOCTL);
940
941         cp = vd->vdev_tsd;
942         if (cp == NULL) {
943                 zio->io_error = SET_ERROR(ENXIO);
944                 zio_interrupt(zio);
945                 return;
946         }
947         bp = g_alloc_bio();
948         bp->bio_caller1 = zio;
949         switch (zio->io_type) {
950         case ZIO_TYPE_READ:
951         case ZIO_TYPE_WRITE:
952                 zio->io_target_timestamp = zio_handle_io_delay(zio);
953                 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
954                 bp->bio_data = zio->io_data;
955                 bp->bio_offset = zio->io_offset;
956                 bp->bio_length = zio->io_size;
957                 break;
958         case ZIO_TYPE_FREE:
959                 bp->bio_cmd = BIO_DELETE;
960                 bp->bio_data = NULL;
961                 bp->bio_offset = zio->io_offset;
962                 bp->bio_length = zio->io_size;
963                 break;
964         case ZIO_TYPE_IOCTL:
965                 bp->bio_cmd = BIO_FLUSH;
966                 bp->bio_flags |= BIO_ORDERED;
967                 bp->bio_data = NULL;
968                 bp->bio_offset = cp->provider->mediasize;
969                 bp->bio_length = 0;
970                 break;
971         }
972         bp->bio_done = vdev_geom_io_intr;
973
974         g_io_request(bp, cp);
975 }
976
977 static void
978 vdev_geom_io_done(zio_t *zio)
979 {
980 }
981
982 static void
983 vdev_geom_hold(vdev_t *vd)
984 {
985 }
986
987 static void
988 vdev_geom_rele(vdev_t *vd)
989 {
990 }
991
992 vdev_ops_t vdev_geom_ops = {
993         vdev_geom_open,
994         vdev_geom_close,
995         vdev_default_asize,
996         vdev_geom_io_start,
997         vdev_geom_io_done,
998         NULL,
999         vdev_geom_hold,
1000         vdev_geom_rele,
1001         VDEV_TYPE_DISK,         /* name of this vdev type */
1002         B_TRUE                  /* leaf vdev */
1003 };