]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
MFC r296510, r296563, r296567: MFV r296505:
[FreeBSD/stable/10.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / vdev_geom.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23  * All rights reserved.
24  *
25  * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26  */
27
28 #include <sys/zfs_context.h>
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/bio.h>
32 #include <sys/disk.h>
33 #include <sys/spa.h>
34 #include <sys/spa_impl.h>
35 #include <sys/vdev_impl.h>
36 #include <sys/fs/zfs.h>
37 #include <sys/zio.h>
38 #include <geom/geom.h>
39 #include <geom/geom_int.h>
40
41 /*
42  * Virtual device vector for GEOM.
43  */
44
45 static g_attrchanged_t vdev_geom_attrchanged;
46 struct g_class zfs_vdev_class = {
47         .name = "ZFS::VDEV",
48         .version = G_VERSION,
49         .attrchanged = vdev_geom_attrchanged,
50 };
51
52 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
53
54 SYSCTL_DECL(_vfs_zfs_vdev);
55 /* Don't send BIO_FLUSH. */
56 static int vdev_geom_bio_flush_disable = 0;
57 TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
58 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
59     &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
60 /* Don't send BIO_DELETE. */
61 static int vdev_geom_bio_delete_disable = 0;
62 TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
63 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
64     &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
65
66 static void
67 vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
68
69         int error;
70         uint16_t rate;
71
72         error = g_getattr("GEOM::rotation_rate", cp, &rate);
73         if (error == 0)
74                 vd->vdev_rotation_rate = rate;
75         else
76                 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN;
77 }
78
79 static void
80 vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
81 {
82         vdev_t *vd;
83         spa_t *spa;
84         char *physpath;
85         int error, physpath_len;
86
87         vd = cp->private;
88         if (vd == NULL)
89                 return;
90
91         if (strcmp(attr, "GEOM::rotation_rate") == 0) {
92                 vdev_geom_set_rotation_rate(vd, cp);
93                 return;
94         }
95
96         if (strcmp(attr, "GEOM::physpath") != 0)
97                 return;
98
99         if (g_access(cp, 1, 0, 0) != 0)
100                 return;
101
102         /*
103          * Record/Update physical path information for this device.
104          */
105         spa = vd->vdev_spa;
106         physpath_len = MAXPATHLEN;
107         physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
108         error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
109         g_access(cp, -1, 0, 0);
110         if (error == 0) {
111                 char *old_physpath;
112
113                 old_physpath = vd->vdev_physpath;
114                 vd->vdev_physpath = spa_strdup(physpath);
115                 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
116
117                 if (old_physpath != NULL) {
118                         int held_lock;
119
120                         held_lock = spa_config_held(spa, SCL_STATE, RW_WRITER);
121                         if (held_lock == 0) {
122                                 g_topology_unlock();
123                                 spa_config_enter(spa, SCL_STATE, FTAG,
124                                     RW_WRITER);
125                         }
126
127                         spa_strfree(old_physpath);
128
129                         if (held_lock == 0) {
130                                 spa_config_exit(spa, SCL_STATE, FTAG);
131                                 g_topology_lock();
132                         }
133                 }
134         }
135         g_free(physpath);
136 }
137
138 static void
139 vdev_geom_orphan(struct g_consumer *cp)
140 {
141         vdev_t *vd;
142
143         g_topology_assert();
144
145         vd = cp->private;
146         if (vd == NULL) {
147                 /* Vdev close in progress.  Ignore the event. */
148                 return;
149         }
150
151         /*
152          * Orphan callbacks occur from the GEOM event thread.
153          * Concurrent with this call, new I/O requests may be
154          * working their way through GEOM about to find out
155          * (only once executed by the g_down thread) that we've
156          * been orphaned from our disk provider.  These I/Os
157          * must be retired before we can detach our consumer.
158          * This is most easily achieved by acquiring the
159          * SPA ZIO configuration lock as a writer, but doing
160          * so with the GEOM topology lock held would cause
161          * a lock order reversal.  Instead, rely on the SPA's
162          * async removal support to invoke a close on this
163          * vdev once it is safe to do so.
164          */
165         vd->vdev_remove_wanted = B_TRUE;
166         spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
167 }
168
169 static struct g_consumer *
170 vdev_geom_attach(struct g_provider *pp, vdev_t *vd)
171 {
172         struct g_geom *gp;
173         struct g_consumer *cp;
174
175         g_topology_assert();
176
177         ZFS_LOG(1, "Attaching to %s.", pp->name);
178         /* Do we have geom already? No? Create one. */
179         LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
180                 if (gp->flags & G_GEOM_WITHER)
181                         continue;
182                 if (strcmp(gp->name, "zfs::vdev") != 0)
183                         continue;
184                 break;
185         }
186         if (gp == NULL) {
187                 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
188                 gp->orphan = vdev_geom_orphan;
189                 gp->attrchanged = vdev_geom_attrchanged;
190                 cp = g_new_consumer(gp);
191                 if (g_attach(cp, pp) != 0) {
192                         g_wither_geom(gp, ENXIO);
193                         return (NULL);
194                 }
195                 if (g_access(cp, 1, 0, 1) != 0) {
196                         g_wither_geom(gp, ENXIO);
197                         return (NULL);
198                 }
199                 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
200         } else {
201                 /* Check if we are already connected to this provider. */
202                 LIST_FOREACH(cp, &gp->consumer, consumer) {
203                         if (cp->provider == pp) {
204                                 ZFS_LOG(1, "Found consumer for %s.", pp->name);
205                                 break;
206                         }
207                 }
208                 if (cp == NULL) {
209                         cp = g_new_consumer(gp);
210                         if (g_attach(cp, pp) != 0) {
211                                 g_destroy_consumer(cp);
212                                 return (NULL);
213                         }
214                         if (g_access(cp, 1, 0, 1) != 0) {
215                                 g_detach(cp);
216                                 g_destroy_consumer(cp);
217                                 return (NULL);
218                         }
219                         ZFS_LOG(1, "Created consumer for %s.", pp->name);
220                 } else {
221                         if (g_access(cp, 1, 0, 1) != 0)
222                                 return (NULL);
223                         ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
224                 }
225         }
226
227         /* 
228          * BUG: cp may already belong to a vdev.  This could happen if:
229          * 1) That vdev is a shared spare, or
230          * 2) We are trying to reopen a missing vdev and we are scanning by
231          *    guid.  In that case, we'll ultimately fail to open this consumer,
232          *    but not until after setting the private field.
233          * The solution is to:
234          * 1) Don't set the private field until after the open succeeds, and
235          * 2) Set it to a linked list of vdevs, not just a single vdev
236          */
237         cp->private = vd;
238         vd->vdev_tsd = cp;
239
240         /* Fetch initial physical path information for this device. */
241         vdev_geom_attrchanged(cp, "GEOM::physpath");
242         
243         cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
244         return (cp);
245 }
246
247 static void
248 vdev_geom_close_locked(vdev_t *vd)
249 {
250         struct g_geom *gp;
251         struct g_consumer *cp;
252
253         g_topology_assert();
254
255         cp = vd->vdev_tsd;
256         if (cp == NULL)
257                 return;
258
259         ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
260         KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__));
261         vd->vdev_tsd = NULL;
262         vd->vdev_delayed_close = B_FALSE;
263         cp->private = NULL;
264
265         gp = cp->geom;
266         g_access(cp, -1, 0, -1);
267         /* Destroy consumer on last close. */
268         if (cp->acr == 0 && cp->ace == 0) {
269                 if (cp->acw > 0)
270                         g_access(cp, 0, -cp->acw, 0);
271                 if (cp->provider != NULL) {
272                         ZFS_LOG(1, "Destroyed consumer to %s.",
273                             cp->provider->name);
274                         g_detach(cp);
275                 }
276                 g_destroy_consumer(cp);
277         }
278         /* Destroy geom if there are no consumers left. */
279         if (LIST_EMPTY(&gp->consumer)) {
280                 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
281                 g_wither_geom(gp, ENXIO);
282         }
283 }
284
285 static void
286 nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid)
287 {
288
289         (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid);
290         (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid);
291 }
292
293 static int
294 vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
295 {
296         struct bio *bp;
297         u_char *p;
298         off_t off, maxio;
299         int error;
300
301         ASSERT((offset % cp->provider->sectorsize) == 0);
302         ASSERT((size % cp->provider->sectorsize) == 0);
303
304         bp = g_alloc_bio();
305         off = offset;
306         offset += size;
307         p = data;
308         maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
309         error = 0;
310
311         for (; off < offset; off += maxio, p += maxio, size -= maxio) {
312                 bzero(bp, sizeof(*bp));
313                 bp->bio_cmd = cmd;
314                 bp->bio_done = NULL;
315                 bp->bio_offset = off;
316                 bp->bio_length = MIN(size, maxio);
317                 bp->bio_data = p;
318                 g_io_request(bp, cp);
319                 error = biowait(bp, "vdev_geom_io");
320                 if (error != 0)
321                         break;
322         }
323
324         g_destroy_bio(bp);
325         return (error);
326 }
327
328 static void
329 vdev_geom_taste_orphan(struct g_consumer *cp)
330 {
331
332         KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
333             cp->provider->name));
334 }
335
336 static int
337 vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
338 {
339         struct g_provider *pp;
340         vdev_label_t *label;
341         char *p, *buf;
342         size_t buflen;
343         uint64_t psize;
344         off_t offset, size;
345         uint64_t state, txg;
346         int error, l, len;
347
348         g_topology_assert_not();
349
350         pp = cp->provider;
351         ZFS_LOG(1, "Reading config from %s...", pp->name);
352
353         psize = pp->mediasize;
354         psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
355
356         size = sizeof(*label) + pp->sectorsize -
357             ((sizeof(*label) - 1) % pp->sectorsize) - 1;
358
359         label = kmem_alloc(size, KM_SLEEP);
360         buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
361
362         *config = NULL;
363         for (l = 0; l < VDEV_LABELS; l++) {
364
365                 offset = vdev_label_offset(psize, l, 0);
366                 if ((offset % pp->sectorsize) != 0)
367                         continue;
368
369                 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
370                         continue;
371                 buf = label->vl_vdev_phys.vp_nvlist;
372
373                 if (nvlist_unpack(buf, buflen, config, 0) != 0)
374                         continue;
375
376                 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
377                     &state) != 0 || state > POOL_STATE_L2CACHE) {
378                         nvlist_free(*config);
379                         *config = NULL;
380                         continue;
381                 }
382
383                 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
384                     (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
385                     &txg) != 0 || txg == 0)) {
386                         nvlist_free(*config);
387                         *config = NULL;
388                         continue;
389                 }
390
391                 break;
392         }
393
394         kmem_free(label, size);
395         return (*config == NULL ? ENOENT : 0);
396 }
397
398 static void
399 resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
400 {
401         nvlist_t **new_configs;
402         uint64_t i;
403
404         if (id < *count)
405                 return;
406         new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
407             KM_SLEEP);
408         for (i = 0; i < *count; i++)
409                 new_configs[i] = (*configs)[i];
410         if (*configs != NULL)
411                 kmem_free(*configs, *count * sizeof(void *));
412         *configs = new_configs;
413         *count = id + 1;
414 }
415
416 static void
417 process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
418     const char *name, uint64_t* known_pool_guid)
419 {
420         nvlist_t *vdev_tree;
421         uint64_t pool_guid;
422         uint64_t vdev_guid, known_guid;
423         uint64_t id, txg, known_txg;
424         char *pname;
425         int i;
426
427         if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
428             strcmp(pname, name) != 0)
429                 goto ignore;
430
431         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
432                 goto ignore;
433
434         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
435                 goto ignore;
436
437         if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
438                 goto ignore;
439
440         if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
441                 goto ignore;
442
443         VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
444
445         if (*known_pool_guid != 0) {
446                 if (pool_guid != *known_pool_guid)
447                         goto ignore;
448         } else
449                 *known_pool_guid = pool_guid;
450
451         resize_configs(configs, count, id);
452
453         if ((*configs)[id] != NULL) {
454                 VERIFY(nvlist_lookup_uint64((*configs)[id],
455                     ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
456                 if (txg <= known_txg)
457                         goto ignore;
458                 nvlist_free((*configs)[id]);
459         }
460
461         (*configs)[id] = cfg;
462         return;
463
464 ignore:
465         nvlist_free(cfg);
466 }
467
468 static int
469 vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
470 {
471         int error;
472
473         if (pp->flags & G_PF_WITHER)
474                 return (EINVAL);
475         g_attach(cp, pp);
476         error = g_access(cp, 1, 0, 0);
477         if (error == 0) {
478                 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
479                         error = EINVAL;
480                 else if (pp->mediasize < SPA_MINDEVSIZE)
481                         error = EINVAL;
482                 if (error != 0)
483                         g_access(cp, -1, 0, 0);
484         }
485         if (error != 0)
486                 g_detach(cp);
487         return (error);
488 }
489
490 static void
491 vdev_geom_detach_taster(struct g_consumer *cp)
492 {
493         g_access(cp, -1, 0, 0);
494         g_detach(cp);
495 }
496
497 int
498 vdev_geom_read_pool_label(const char *name,
499     nvlist_t ***configs, uint64_t *count)
500 {
501         struct g_class *mp;
502         struct g_geom *gp, *zgp;
503         struct g_provider *pp;
504         struct g_consumer *zcp;
505         nvlist_t *vdev_cfg;
506         uint64_t pool_guid;
507         int error;
508
509         DROP_GIANT();
510         g_topology_lock();
511
512         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
513         /* This orphan function should be never called. */
514         zgp->orphan = vdev_geom_taste_orphan;
515         zcp = g_new_consumer(zgp);
516
517         *configs = NULL;
518         *count = 0;
519         pool_guid = 0;
520         LIST_FOREACH(mp, &g_classes, class) {
521                 if (mp == &zfs_vdev_class)
522                         continue;
523                 LIST_FOREACH(gp, &mp->geom, geom) {
524                         if (gp->flags & G_GEOM_WITHER)
525                                 continue;
526                         LIST_FOREACH(pp, &gp->provider, provider) {
527                                 if (pp->flags & G_PF_WITHER)
528                                         continue;
529                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
530                                         continue;
531                                 g_topology_unlock();
532                                 error = vdev_geom_read_config(zcp, &vdev_cfg);
533                                 g_topology_lock();
534                                 vdev_geom_detach_taster(zcp);
535                                 if (error)
536                                         continue;
537                                 ZFS_LOG(1, "successfully read vdev config");
538
539                                 process_vdev_config(configs, count,
540                                     vdev_cfg, name, &pool_guid);
541                         }
542                 }
543         }
544
545         g_destroy_consumer(zcp);
546         g_destroy_geom(zgp);
547         g_topology_unlock();
548         PICKUP_GIANT();
549
550         return (*count > 0 ? 0 : ENOENT);
551 }
552
553 static void
554 vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid)
555 {
556         nvlist_t *config;
557
558         g_topology_assert_not();
559
560         *pguid = 0;
561         *vguid = 0;
562         if (vdev_geom_read_config(cp, &config) == 0) {
563                 nvlist_get_guids(config, pguid, vguid);
564                 nvlist_free(config);
565         }
566 }
567
568 static struct g_consumer *
569 vdev_geom_attach_by_guids(vdev_t *vd)
570 {
571         struct g_class *mp;
572         struct g_geom *gp, *zgp;
573         struct g_provider *pp;
574         struct g_consumer *cp, *zcp;
575         uint64_t pguid, vguid;
576
577         g_topology_assert();
578
579         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
580         /* This orphan function should be never called. */
581         zgp->orphan = vdev_geom_taste_orphan;
582         zcp = g_new_consumer(zgp);
583
584         cp = NULL;
585         LIST_FOREACH(mp, &g_classes, class) {
586                 if (mp == &zfs_vdev_class)
587                         continue;
588                 LIST_FOREACH(gp, &mp->geom, geom) {
589                         if (gp->flags & G_GEOM_WITHER)
590                                 continue;
591                         LIST_FOREACH(pp, &gp->provider, provider) {
592                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
593                                         continue;
594                                 g_topology_unlock();
595                                 vdev_geom_read_guids(zcp, &pguid, &vguid);
596                                 g_topology_lock();
597                                 vdev_geom_detach_taster(zcp);
598                                 /* 
599                                  * Check that the label's vdev guid matches the
600                                  * desired guid.  If the label has a pool guid,
601                                  * check that it matches too. (Inactive spares
602                                  * and L2ARCs do not have any pool guid in the
603                                  * label.)
604                                 */
605                                 if ((pguid != 0 &&
606                                      pguid != spa_guid(vd->vdev_spa)) ||
607                                     vguid != vd->vdev_guid)
608                                         continue;
609                                 cp = vdev_geom_attach(pp, vd);
610                                 if (cp == NULL) {
611                                         printf("ZFS WARNING: Unable to "
612                                             "attach to %s.\n", pp->name);
613                                         continue;
614                                 }
615                                 break;
616                         }
617                         if (cp != NULL)
618                                 break;
619                 }
620                 if (cp != NULL)
621                         break;
622         }
623 end:
624         g_destroy_consumer(zcp);
625         g_destroy_geom(zgp);
626         return (cp);
627 }
628
629 static struct g_consumer *
630 vdev_geom_open_by_guids(vdev_t *vd)
631 {
632         struct g_consumer *cp;
633         char *buf;
634         size_t len;
635
636         g_topology_assert();
637
638         ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid);
639         cp = vdev_geom_attach_by_guids(vd);
640         if (cp != NULL) {
641                 len = strlen(cp->provider->name) + strlen("/dev/") + 1;
642                 buf = kmem_alloc(len, KM_SLEEP);
643
644                 snprintf(buf, len, "/dev/%s", cp->provider->name);
645                 spa_strfree(vd->vdev_path);
646                 vd->vdev_path = buf;
647
648                 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
649                     (uintmax_t)spa_guid(vd->vdev_spa),
650                     (uintmax_t)vd->vdev_guid, vd->vdev_path);
651         } else {
652                 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
653                     (uintmax_t)spa_guid(vd->vdev_spa),
654                     (uintmax_t)vd->vdev_guid);
655         }
656
657         return (cp);
658 }
659
660 static struct g_consumer *
661 vdev_geom_open_by_path(vdev_t *vd, int check_guid)
662 {
663         struct g_provider *pp;
664         struct g_consumer *cp;
665         uint64_t pguid, vguid;
666
667         g_topology_assert();
668
669         cp = NULL;
670         pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
671         if (pp != NULL) {
672                 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
673                 cp = vdev_geom_attach(pp, vd);
674                 if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
675                     pp->sectorsize <= VDEV_PAD_SIZE) {
676                         g_topology_unlock();
677                         vdev_geom_read_guids(cp, &pguid, &vguid);
678                         g_topology_lock();
679                         if (pguid != spa_guid(vd->vdev_spa) ||
680                             vguid != vd->vdev_guid) {
681                                 vdev_geom_close_locked(vd);
682                                 cp = NULL;
683                                 ZFS_LOG(1, "guid mismatch for provider %s: "
684                                     "%ju:%ju != %ju:%ju.", vd->vdev_path,
685                                     (uintmax_t)spa_guid(vd->vdev_spa),
686                                     (uintmax_t)vd->vdev_guid,
687                                     (uintmax_t)pguid, (uintmax_t)vguid);
688                         } else {
689                                 ZFS_LOG(1, "guid match for provider %s.",
690                                     vd->vdev_path);
691                         }
692                 }
693         }
694
695         return (cp);
696 }
697
698 static int
699 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
700     uint64_t *logical_ashift, uint64_t *physical_ashift)
701 {
702         struct g_provider *pp;
703         struct g_consumer *cp;
704         size_t bufsize;
705         int error;
706
707         /*
708          * We must have a pathname, and it must be absolute.
709          */
710         if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
711                 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
712                 return (EINVAL);
713         }
714
715         vd->vdev_tsd = NULL;
716
717         DROP_GIANT();
718         g_topology_lock();
719         error = 0;
720
721         if (vd->vdev_spa->spa_splitting_newspa ||
722             (vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
723              vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)) {
724                 /*
725                  * We are dealing with a vdev that hasn't been previously
726                  * opened (since boot), and we are not loading an
727                  * existing pool configuration.  This looks like a
728                  * vdev add operation to a new or existing pool.
729                  * Assume the user knows what he/she is doing and find
730                  * GEOM provider by its name, ignoring GUID mismatches.
731                  *
732                  * XXPOLICY: It would be safer to only allow a device
733                  *           that is unlabeled or labeled but missing
734                  *           GUID information to be opened in this fashion,
735                  *           unless we are doing a split, in which case we
736                  *           should allow any guid.
737                  */
738                 cp = vdev_geom_open_by_path(vd, 0);
739         } else {
740                 /*
741                  * Try using the recorded path for this device, but only
742                  * accept it if its label data contains the expected GUIDs.
743                  */
744                 cp = vdev_geom_open_by_path(vd, 1);
745                 if (cp == NULL) {
746                         /*
747                          * The device at vd->vdev_path doesn't have the
748                          * expected GUIDs. The disks might have merely
749                          * moved around so try all other GEOM providers
750                          * to find one with the right GUIDs.
751                          */
752                         cp = vdev_geom_open_by_guids(vd);
753                 }
754         }
755
756         if (cp == NULL) {
757                 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
758                 error = ENOENT;
759         } else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
760             !ISP2(cp->provider->sectorsize)) {
761                 ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
762                     vd->vdev_path);
763
764                 vdev_geom_close_locked(vd);
765                 error = EINVAL;
766                 cp = NULL;
767         } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
768                 int i;
769
770                 for (i = 0; i < 5; i++) {
771                         error = g_access(cp, 0, 1, 0);
772                         if (error == 0)
773                                 break;
774                         g_topology_unlock();
775                         tsleep(vd, 0, "vdev", hz / 2);
776                         g_topology_lock();
777                 }
778                 if (error != 0) {
779                         printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
780                             vd->vdev_path, error);
781                         vdev_geom_close_locked(vd);
782                         cp = NULL;
783                 }
784         }
785
786         g_topology_unlock();
787         PICKUP_GIANT();
788         if (cp == NULL) {
789                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
790                 return (error);
791         }
792         pp = cp->provider;
793
794         /*
795          * Determine the actual size of the device.
796          */
797         *max_psize = *psize = pp->mediasize;
798
799         /*
800          * Determine the device's minimum transfer size and preferred
801          * transfer size.
802          */
803         *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
804         *physical_ashift = 0;
805         if (pp->stripesize)
806                 *physical_ashift = highbit(pp->stripesize) - 1;
807
808         /*
809          * Clear the nowritecache settings, so that on a vdev_reopen()
810          * we will try again.
811          */
812         vd->vdev_nowritecache = B_FALSE;
813
814         /*
815          * Determine the device's rotation rate.
816          */
817         vdev_geom_set_rotation_rate(vd, cp);
818
819         return (0);
820 }
821
822 static void
823 vdev_geom_close(vdev_t *vd)
824 {
825
826         DROP_GIANT();
827         g_topology_lock();
828         vdev_geom_close_locked(vd);
829         g_topology_unlock();
830         PICKUP_GIANT();
831 }
832
833 static void
834 vdev_geom_io_intr(struct bio *bp)
835 {
836         vdev_t *vd;
837         zio_t *zio;
838
839         zio = bp->bio_caller1;
840         vd = zio->io_vd;
841         zio->io_error = bp->bio_error;
842         if (zio->io_error == 0 && bp->bio_resid != 0)
843                 zio->io_error = SET_ERROR(EIO);
844
845         switch(zio->io_error) {
846         case ENOTSUP:
847                 /*
848                  * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
849                  * that future attempts will never succeed. In this case
850                  * we set a persistent flag so that we don't bother with
851                  * requests in the future.
852                  */
853                 switch(bp->bio_cmd) {
854                 case BIO_FLUSH:
855                         vd->vdev_nowritecache = B_TRUE;
856                         break;
857                 case BIO_DELETE:
858                         vd->vdev_notrim = B_TRUE;
859                         break;
860                 }
861                 break;
862         case ENXIO:
863                 if (!vd->vdev_remove_wanted) {
864                         /*
865                          * If provider's error is set we assume it is being
866                          * removed.
867                          */
868                         if (bp->bio_to->error != 0) {
869                                 vd->vdev_remove_wanted = B_TRUE;
870                                 spa_async_request(zio->io_spa,
871                                     SPA_ASYNC_REMOVE);
872                         } else if (!vd->vdev_delayed_close) {
873                                 vd->vdev_delayed_close = B_TRUE;
874                         }
875                 }
876                 break;
877         }
878         g_destroy_bio(bp);
879         zio_delay_interrupt(zio);
880 }
881
882 static void
883 vdev_geom_io_start(zio_t *zio)
884 {
885         vdev_t *vd;
886         struct g_consumer *cp;
887         struct bio *bp;
888         int error;
889
890         vd = zio->io_vd;
891
892         switch (zio->io_type) {
893         case ZIO_TYPE_IOCTL:
894                 /* XXPOLICY */
895                 if (!vdev_readable(vd)) {
896                         zio->io_error = SET_ERROR(ENXIO);
897                         zio_interrupt(zio);
898                         return;
899                 } else {
900                         switch (zio->io_cmd) {
901                         case DKIOCFLUSHWRITECACHE:
902                                 if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
903                                         break;
904                                 if (vd->vdev_nowritecache) {
905                                         zio->io_error = SET_ERROR(ENOTSUP);
906                                         break;
907                                 }
908                                 goto sendreq;
909                         default:
910                                 zio->io_error = SET_ERROR(ENOTSUP);
911                         }
912                 }
913
914                 zio_execute(zio);
915                 return;
916         case ZIO_TYPE_FREE:
917                 if (vd->vdev_notrim) {
918                         zio->io_error = SET_ERROR(ENOTSUP);
919                 } else if (!vdev_geom_bio_delete_disable) {
920                         goto sendreq;
921                 }
922                 zio_execute(zio);
923                 return;
924         }
925 sendreq:
926         ASSERT(zio->io_type == ZIO_TYPE_READ ||
927             zio->io_type == ZIO_TYPE_WRITE ||
928             zio->io_type == ZIO_TYPE_FREE ||
929             zio->io_type == ZIO_TYPE_IOCTL);
930
931         cp = vd->vdev_tsd;
932         if (cp == NULL) {
933                 zio->io_error = SET_ERROR(ENXIO);
934                 zio_interrupt(zio);
935                 return;
936         }
937         bp = g_alloc_bio();
938         bp->bio_caller1 = zio;
939         switch (zio->io_type) {
940         case ZIO_TYPE_READ:
941         case ZIO_TYPE_WRITE:
942                 zio->io_target_timestamp = zio_handle_io_delay(zio);
943                 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
944                 bp->bio_data = zio->io_data;
945                 bp->bio_offset = zio->io_offset;
946                 bp->bio_length = zio->io_size;
947                 break;
948         case ZIO_TYPE_FREE:
949                 bp->bio_cmd = BIO_DELETE;
950                 bp->bio_data = NULL;
951                 bp->bio_offset = zio->io_offset;
952                 bp->bio_length = zio->io_size;
953                 break;
954         case ZIO_TYPE_IOCTL:
955                 bp->bio_cmd = BIO_FLUSH;
956                 bp->bio_flags |= BIO_ORDERED;
957                 bp->bio_data = NULL;
958                 bp->bio_offset = cp->provider->mediasize;
959                 bp->bio_length = 0;
960                 break;
961         }
962         bp->bio_done = vdev_geom_io_intr;
963
964         g_io_request(bp, cp);
965 }
966
967 static void
968 vdev_geom_io_done(zio_t *zio)
969 {
970 }
971
972 static void
973 vdev_geom_hold(vdev_t *vd)
974 {
975 }
976
977 static void
978 vdev_geom_rele(vdev_t *vd)
979 {
980 }
981
982 vdev_ops_t vdev_geom_ops = {
983         vdev_geom_open,
984         vdev_geom_close,
985         vdev_default_asize,
986         vdev_geom_io_start,
987         vdev_geom_io_done,
988         NULL,
989         vdev_geom_hold,
990         vdev_geom_rele,
991         VDEV_TYPE_DISK,         /* name of this vdev type */
992         B_TRUE                  /* leaf vdev */
993 };