]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
MFC r294329 (by asomers): Disallow zvol-backed ZFS pools
[FreeBSD/stable/10.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / vdev_geom.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23  * All rights reserved.
24  *
25  * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26  */
27
28 #include <sys/zfs_context.h>
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/bio.h>
32 #include <sys/disk.h>
33 #include <sys/spa.h>
34 #include <sys/spa_impl.h>
35 #include <sys/vdev_impl.h>
36 #include <sys/fs/zfs.h>
37 #include <sys/zio.h>
38 #include <geom/geom.h>
39 #include <geom/geom_int.h>
40
41 /*
42  * Virtual device vector for GEOM.
43  */
44
45 static g_attrchanged_t vdev_geom_attrchanged;
46 struct g_class zfs_vdev_class = {
47         .name = "ZFS::VDEV",
48         .version = G_VERSION,
49         .attrchanged = vdev_geom_attrchanged,
50 };
51
52 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
53
54 SYSCTL_DECL(_vfs_zfs_vdev);
55 /* Don't send BIO_FLUSH. */
56 static int vdev_geom_bio_flush_disable = 0;
57 TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
58 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
59     &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
60 /* Don't send BIO_DELETE. */
61 static int vdev_geom_bio_delete_disable = 0;
62 TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
63 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
64     &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
65
66 /*
67  * Thread local storage used to indicate when a thread is probing geoms
68  * for their guids.  If NULL, this thread is not tasting geoms.  If non NULL,
69  * it is looking for a replacement for the vdev_t* that is its value.
70  */
71 uint_t zfs_geom_probe_vdev_key;
72
73 static void
74 vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
75
76         int error;
77         uint16_t rate;
78
79         error = g_getattr("GEOM::rotation_rate", cp, &rate);
80         if (error == 0)
81                 vd->vdev_rotation_rate = rate;
82         else
83                 vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN;
84 }
85
86 static void
87 vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
88 {
89         vdev_t *vd;
90         spa_t *spa;
91         char *physpath;
92         int error, physpath_len;
93
94         vd = cp->private;
95         if (vd == NULL)
96                 return;
97
98         if (strcmp(attr, "GEOM::rotation_rate") == 0) {
99                 vdev_geom_set_rotation_rate(vd, cp);
100                 return;
101         }
102
103         if (strcmp(attr, "GEOM::physpath") != 0)
104                 return;
105
106         if (g_access(cp, 1, 0, 0) != 0)
107                 return;
108
109         /*
110          * Record/Update physical path information for this device.
111          */
112         spa = vd->vdev_spa;
113         physpath_len = MAXPATHLEN;
114         physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
115         error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
116         g_access(cp, -1, 0, 0);
117         if (error == 0) {
118                 char *old_physpath;
119
120                 /* g_topology lock ensures that vdev has not been closed */
121                 g_topology_assert();
122                 old_physpath = vd->vdev_physpath;
123                 vd->vdev_physpath = spa_strdup(physpath);
124                 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
125
126                 if (old_physpath != NULL)
127                         spa_strfree(old_physpath);
128         }
129         g_free(physpath);
130 }
131
132 static void
133 vdev_geom_orphan(struct g_consumer *cp)
134 {
135         vdev_t *vd;
136
137         g_topology_assert();
138
139         vd = cp->private;
140         if (vd == NULL) {
141                 /* Vdev close in progress.  Ignore the event. */
142                 return;
143         }
144
145         /*
146          * Orphan callbacks occur from the GEOM event thread.
147          * Concurrent with this call, new I/O requests may be
148          * working their way through GEOM about to find out
149          * (only once executed by the g_down thread) that we've
150          * been orphaned from our disk provider.  These I/Os
151          * must be retired before we can detach our consumer.
152          * This is most easily achieved by acquiring the
153          * SPA ZIO configuration lock as a writer, but doing
154          * so with the GEOM topology lock held would cause
155          * a lock order reversal.  Instead, rely on the SPA's
156          * async removal support to invoke a close on this
157          * vdev once it is safe to do so.
158          */
159         vd->vdev_remove_wanted = B_TRUE;
160         spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
161 }
162
163 static struct g_consumer *
164 vdev_geom_attach(struct g_provider *pp, vdev_t *vd)
165 {
166         struct g_geom *gp;
167         struct g_consumer *cp;
168         int error;
169
170         g_topology_assert();
171
172         ZFS_LOG(1, "Attaching to %s.", pp->name);
173         /* Do we have geom already? No? Create one. */
174         LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
175                 if (gp->flags & G_GEOM_WITHER)
176                         continue;
177                 if (strcmp(gp->name, "zfs::vdev") != 0)
178                         continue;
179                 break;
180         }
181         if (gp == NULL) {
182                 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
183                 gp->orphan = vdev_geom_orphan;
184                 gp->attrchanged = vdev_geom_attrchanged;
185                 cp = g_new_consumer(gp);
186                 error = g_attach(cp, pp);
187                 if (error != 0) {
188                         ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__,
189                             __LINE__, error);
190                         g_wither_geom(gp, ENXIO);
191                         return (NULL);
192                 }
193                 error = g_access(cp, 1, 0, 1);
194                 if (error != 0) {
195                         ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__,
196                                __LINE__, error);
197                         g_wither_geom(gp, ENXIO);
198                         return (NULL);
199                 }
200                 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
201         } else {
202                 /* Check if we are already connected to this provider. */
203                 LIST_FOREACH(cp, &gp->consumer, consumer) {
204                         if (cp->provider == pp) {
205                                 ZFS_LOG(1, "Found consumer for %s.", pp->name);
206                                 break;
207                         }
208                 }
209                 if (cp == NULL) {
210                         cp = g_new_consumer(gp);
211                         error = g_attach(cp, pp);
212                         if (error != 0) {
213                                 ZFS_LOG(1, "%s(%d): g_attach failed: %d\n",
214                                     __func__, __LINE__, error);
215                                 g_destroy_consumer(cp);
216                                 return (NULL);
217                         }
218                         error = g_access(cp, 1, 0, 1);
219                         if (error != 0) {
220                                 ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
221                                     __func__, __LINE__, error);
222                                 g_detach(cp);
223                                 g_destroy_consumer(cp);
224                                 return (NULL);
225                         }
226                         ZFS_LOG(1, "Created consumer for %s.", pp->name);
227                 } else {
228                         error = g_access(cp, 1, 0, 1);
229                         if (error != 0) {
230                                 ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
231                                     __func__, __LINE__, error);
232                                 return (NULL);
233                         }
234                         ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
235                 }
236         }
237
238         /* 
239          * BUG: cp may already belong to a vdev.  This could happen if:
240          * 1) That vdev is a shared spare, or
241          * 2) We are trying to reopen a missing vdev and we are scanning by
242          *    guid.  In that case, we'll ultimately fail to open this consumer,
243          *    but not until after setting the private field.
244          * The solution is to:
245          * 1) Don't set the private field until after the open succeeds, and
246          * 2) Set it to a linked list of vdevs, not just a single vdev
247          */
248         cp->private = vd;
249         vd->vdev_tsd = cp;
250
251         cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
252         return (cp);
253 }
254
255 static void
256 vdev_geom_close_locked(vdev_t *vd)
257 {
258         struct g_geom *gp;
259         struct g_consumer *cp;
260
261         g_topology_assert();
262
263         cp = vd->vdev_tsd;
264         if (cp == NULL)
265                 return;
266
267         ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
268         KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__));
269         vd->vdev_tsd = NULL;
270         vd->vdev_delayed_close = B_FALSE;
271         cp->private = NULL;
272
273         gp = cp->geom;
274         g_access(cp, -1, 0, -1);
275         /* Destroy consumer on last close. */
276         if (cp->acr == 0 && cp->ace == 0) {
277                 if (cp->acw > 0)
278                         g_access(cp, 0, -cp->acw, 0);
279                 if (cp->provider != NULL) {
280                         ZFS_LOG(1, "Destroyed consumer to %s.",
281                             cp->provider->name);
282                         g_detach(cp);
283                 }
284                 g_destroy_consumer(cp);
285         }
286         /* Destroy geom if there are no consumers left. */
287         if (LIST_EMPTY(&gp->consumer)) {
288                 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
289                 g_wither_geom(gp, ENXIO);
290         }
291 }
292
293 static void
294 nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid)
295 {
296
297         (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid);
298         (void) nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid);
299 }
300
301 static int
302 vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
303 {
304         struct bio *bp;
305         u_char *p;
306         off_t off, maxio;
307         int error;
308
309         ASSERT((offset % cp->provider->sectorsize) == 0);
310         ASSERT((size % cp->provider->sectorsize) == 0);
311
312         bp = g_alloc_bio();
313         off = offset;
314         offset += size;
315         p = data;
316         maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
317         error = 0;
318
319         for (; off < offset; off += maxio, p += maxio, size -= maxio) {
320                 bzero(bp, sizeof(*bp));
321                 bp->bio_cmd = cmd;
322                 bp->bio_done = NULL;
323                 bp->bio_offset = off;
324                 bp->bio_length = MIN(size, maxio);
325                 bp->bio_data = p;
326                 g_io_request(bp, cp);
327                 error = biowait(bp, "vdev_geom_io");
328                 if (error != 0)
329                         break;
330         }
331
332         g_destroy_bio(bp);
333         return (error);
334 }
335
336 static void
337 vdev_geom_taste_orphan(struct g_consumer *cp)
338 {
339         ZFS_LOG(0, "WARNING: Orphan %s while tasting its VDev GUID.",
340             cp->provider->name);
341 }
342
343 static int
344 vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
345 {
346         struct g_provider *pp;
347         vdev_label_t *label;
348         char *p, *buf;
349         size_t buflen;
350         uint64_t psize;
351         off_t offset, size;
352         uint64_t state, txg;
353         int error, l, len;
354
355         g_topology_assert_not();
356
357         pp = cp->provider;
358         ZFS_LOG(1, "Reading config from %s...", pp->name);
359
360         psize = pp->mediasize;
361         psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
362
363         size = sizeof(*label) + pp->sectorsize -
364             ((sizeof(*label) - 1) % pp->sectorsize) - 1;
365
366         label = kmem_alloc(size, KM_SLEEP);
367         buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
368
369         *config = NULL;
370         for (l = 0; l < VDEV_LABELS; l++) {
371
372                 offset = vdev_label_offset(psize, l, 0);
373                 if ((offset % pp->sectorsize) != 0)
374                         continue;
375
376                 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
377                         continue;
378                 buf = label->vl_vdev_phys.vp_nvlist;
379
380                 if (nvlist_unpack(buf, buflen, config, 0) != 0)
381                         continue;
382
383                 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
384                     &state) != 0 || state > POOL_STATE_L2CACHE) {
385                         nvlist_free(*config);
386                         *config = NULL;
387                         continue;
388                 }
389
390                 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
391                     (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
392                     &txg) != 0 || txg == 0)) {
393                         nvlist_free(*config);
394                         *config = NULL;
395                         continue;
396                 }
397
398                 break;
399         }
400
401         kmem_free(label, size);
402         return (*config == NULL ? ENOENT : 0);
403 }
404
405 static void
406 resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
407 {
408         nvlist_t **new_configs;
409         uint64_t i;
410
411         if (id < *count)
412                 return;
413         new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
414             KM_SLEEP);
415         for (i = 0; i < *count; i++)
416                 new_configs[i] = (*configs)[i];
417         if (*configs != NULL)
418                 kmem_free(*configs, *count * sizeof(void *));
419         *configs = new_configs;
420         *count = id + 1;
421 }
422
423 static void
424 process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
425     const char *name, uint64_t* known_pool_guid)
426 {
427         nvlist_t *vdev_tree;
428         uint64_t pool_guid;
429         uint64_t vdev_guid, known_guid;
430         uint64_t id, txg, known_txg;
431         char *pname;
432         int i;
433
434         if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
435             strcmp(pname, name) != 0)
436                 goto ignore;
437
438         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
439                 goto ignore;
440
441         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
442                 goto ignore;
443
444         if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
445                 goto ignore;
446
447         if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
448                 goto ignore;
449
450         VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
451
452         if (*known_pool_guid != 0) {
453                 if (pool_guid != *known_pool_guid)
454                         goto ignore;
455         } else
456                 *known_pool_guid = pool_guid;
457
458         resize_configs(configs, count, id);
459
460         if ((*configs)[id] != NULL) {
461                 VERIFY(nvlist_lookup_uint64((*configs)[id],
462                     ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
463                 if (txg <= known_txg)
464                         goto ignore;
465                 nvlist_free((*configs)[id]);
466         }
467
468         (*configs)[id] = cfg;
469         return;
470
471 ignore:
472         nvlist_free(cfg);
473 }
474
475 static int
476 vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
477 {
478         int error;
479
480         if (pp->flags & G_PF_WITHER)
481                 return (EINVAL);
482         g_attach(cp, pp);
483         error = g_access(cp, 1, 0, 0);
484         if (error == 0) {
485                 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
486                         error = EINVAL;
487                 else if (pp->mediasize < SPA_MINDEVSIZE)
488                         error = EINVAL;
489                 if (error != 0)
490                         g_access(cp, -1, 0, 0);
491         }
492         if (error != 0)
493                 g_detach(cp);
494         return (error);
495 }
496
497 static void
498 vdev_geom_detach_taster(struct g_consumer *cp)
499 {
500         g_access(cp, -1, 0, 0);
501         g_detach(cp);
502 }
503
504 int
505 vdev_geom_read_pool_label(const char *name,
506     nvlist_t ***configs, uint64_t *count)
507 {
508         struct g_class *mp;
509         struct g_geom *gp, *zgp;
510         struct g_provider *pp;
511         struct g_consumer *zcp;
512         nvlist_t *vdev_cfg;
513         uint64_t pool_guid;
514         int error;
515
516         DROP_GIANT();
517         g_topology_lock();
518
519         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
520         /* This orphan function should be never called. */
521         zgp->orphan = vdev_geom_taste_orphan;
522         zcp = g_new_consumer(zgp);
523
524         *configs = NULL;
525         *count = 0;
526         pool_guid = 0;
527         LIST_FOREACH(mp, &g_classes, class) {
528                 if (mp == &zfs_vdev_class)
529                         continue;
530                 LIST_FOREACH(gp, &mp->geom, geom) {
531                         if (gp->flags & G_GEOM_WITHER)
532                                 continue;
533                         LIST_FOREACH(pp, &gp->provider, provider) {
534                                 if (pp->flags & G_PF_WITHER)
535                                         continue;
536                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
537                                         continue;
538                                 g_topology_unlock();
539                                 error = vdev_geom_read_config(zcp, &vdev_cfg);
540                                 g_topology_lock();
541                                 vdev_geom_detach_taster(zcp);
542                                 if (error)
543                                         continue;
544                                 ZFS_LOG(1, "successfully read vdev config");
545
546                                 process_vdev_config(configs, count,
547                                     vdev_cfg, name, &pool_guid);
548                         }
549                 }
550         }
551
552         g_destroy_consumer(zcp);
553         g_destroy_geom(zgp);
554         g_topology_unlock();
555         PICKUP_GIANT();
556
557         return (*count > 0 ? 0 : ENOENT);
558 }
559
560 static void
561 vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid)
562 {
563         nvlist_t *config;
564
565         g_topology_assert_not();
566
567         *pguid = 0;
568         *vguid = 0;
569         if (vdev_geom_read_config(cp, &config) == 0) {
570                 nvlist_get_guids(config, pguid, vguid);
571                 nvlist_free(config);
572         }
573 }
574
575 static struct g_consumer *
576 vdev_geom_attach_by_guids(vdev_t *vd)
577 {
578         struct g_class *mp;
579         struct g_geom *gp, *zgp;
580         struct g_provider *pp;
581         struct g_consumer *cp, *zcp;
582         uint64_t pguid, vguid;
583
584         g_topology_assert();
585
586         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
587         zgp->orphan = vdev_geom_taste_orphan;
588         zcp = g_new_consumer(zgp);
589
590         cp = NULL;
591         LIST_FOREACH(mp, &g_classes, class) {
592                 if (mp == &zfs_vdev_class)
593                         continue;
594                 LIST_FOREACH(gp, &mp->geom, geom) {
595                         if (gp->flags & G_GEOM_WITHER)
596                                 continue;
597                         LIST_FOREACH(pp, &gp->provider, provider) {
598                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
599                                         continue;
600                                 g_topology_unlock();
601                                 vdev_geom_read_guids(zcp, &pguid, &vguid);
602                                 g_topology_lock();
603                                 vdev_geom_detach_taster(zcp);
604                                 /* 
605                                  * Check that the label's vdev guid matches the
606                                  * desired guid.  If the label has a pool guid,
607                                  * check that it matches too. (Inactive spares
608                                  * and L2ARCs do not have any pool guid in the
609                                  * label.)
610                                 */
611                                 if ((pguid != 0 &&
612                                      pguid != spa_guid(vd->vdev_spa)) ||
613                                     vguid != vd->vdev_guid)
614                                         continue;
615                                 cp = vdev_geom_attach(pp, vd);
616                                 if (cp == NULL) {
617                                         printf("ZFS WARNING: Unable to "
618                                             "attach to %s.\n", pp->name);
619                                         continue;
620                                 }
621                                 break;
622                         }
623                         if (cp != NULL)
624                                 break;
625                 }
626                 if (cp != NULL)
627                         break;
628         }
629 end:
630         g_destroy_consumer(zcp);
631         g_destroy_geom(zgp);
632         return (cp);
633 }
634
635 static struct g_consumer *
636 vdev_geom_open_by_guids(vdev_t *vd)
637 {
638         struct g_consumer *cp;
639         char *buf;
640         size_t len;
641
642         g_topology_assert();
643
644         ZFS_LOG(1, "Searching by guids [%ju:%ju].",
645                 (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid);
646         cp = vdev_geom_attach_by_guids(vd);
647         if (cp != NULL) {
648                 len = strlen(cp->provider->name) + strlen("/dev/") + 1;
649                 buf = kmem_alloc(len, KM_SLEEP);
650
651                 snprintf(buf, len, "/dev/%s", cp->provider->name);
652                 spa_strfree(vd->vdev_path);
653                 vd->vdev_path = buf;
654
655                 ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
656                     (uintmax_t)spa_guid(vd->vdev_spa),
657                     (uintmax_t)vd->vdev_guid, vd->vdev_path);
658         } else {
659                 ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
660                     (uintmax_t)spa_guid(vd->vdev_spa),
661                     (uintmax_t)vd->vdev_guid);
662         }
663
664         return (cp);
665 }
666
667 static struct g_consumer *
668 vdev_geom_open_by_path(vdev_t *vd, int check_guid)
669 {
670         struct g_provider *pp;
671         struct g_consumer *cp;
672         uint64_t pguid, vguid;
673
674         g_topology_assert();
675
676         cp = NULL;
677         pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
678         if (pp != NULL) {
679                 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
680                 cp = vdev_geom_attach(pp, vd);
681                 if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
682                     pp->sectorsize <= VDEV_PAD_SIZE) {
683                         g_topology_unlock();
684                         vdev_geom_read_guids(cp, &pguid, &vguid);
685                         g_topology_lock();
686                         /*
687                          * Check that the label's vdev guid matches the
688                          * desired guid.  If the label has a pool guid,
689                          * check that it matches too. (Inactive spares
690                          * and L2ARCs do not have any pool guid in the
691                          * label.)
692                          */
693                         if ((pguid != 0 &&
694                             pguid != spa_guid(vd->vdev_spa)) ||
695                             vguid != vd->vdev_guid) {
696                                 vdev_geom_close_locked(vd);
697                                 cp = NULL;
698                                 ZFS_LOG(1, "guid mismatch for provider %s: "
699                                     "%ju:%ju != %ju:%ju.", vd->vdev_path,
700                                     (uintmax_t)spa_guid(vd->vdev_spa),
701                                     (uintmax_t)vd->vdev_guid,
702                                     (uintmax_t)pguid, (uintmax_t)vguid);
703                         } else {
704                                 ZFS_LOG(1, "guid match for provider %s.",
705                                     vd->vdev_path);
706                         }
707                 }
708         }
709
710         return (cp);
711 }
712
713 static int
714 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
715     uint64_t *logical_ashift, uint64_t *physical_ashift)
716 {
717         struct g_provider *pp;
718         struct g_consumer *cp;
719         size_t bufsize;
720         int error;
721
722         /* Set the TLS to indicate downstack that we should not access zvols*/
723         VERIFY(tsd_set(zfs_geom_probe_vdev_key, vd) == 0);
724
725         /*
726          * We must have a pathname, and it must be absolute.
727          */
728         if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
729                 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
730                 return (EINVAL);
731         }
732
733         vd->vdev_tsd = NULL;
734
735         DROP_GIANT();
736         g_topology_lock();
737         error = 0;
738
739         if (vd->vdev_spa->spa_splitting_newspa ||
740             (vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
741              vd->vdev_spa->spa_load_state == SPA_LOAD_NONE ||
742              vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)) {
743                 /*
744                  * We are dealing with a vdev that hasn't been previously
745                  * opened (since boot), and we are not loading an
746                  * existing pool configuration.  This looks like a
747                  * vdev add operation to a new or existing pool.
748                  * Assume the user knows what he/she is doing and find
749                  * GEOM provider by its name, ignoring GUID mismatches.
750                  *
751                  * XXPOLICY: It would be safer to only allow a device
752                  *           that is unlabeled or labeled but missing
753                  *           GUID information to be opened in this fashion,
754                  *           unless we are doing a split, in which case we
755                  *           should allow any guid.
756                  */
757                 cp = vdev_geom_open_by_path(vd, 0);
758         } else {
759                 /*
760                  * Try using the recorded path for this device, but only
761                  * accept it if its label data contains the expected GUIDs.
762                  */
763                 cp = vdev_geom_open_by_path(vd, 1);
764                 if (cp == NULL) {
765                         /*
766                          * The device at vd->vdev_path doesn't have the
767                          * expected GUIDs. The disks might have merely
768                          * moved around so try all other GEOM providers
769                          * to find one with the right GUIDs.
770                          */
771                         cp = vdev_geom_open_by_guids(vd);
772                 }
773         }
774
775         /* Clear the TLS now that tasting is done */
776         VERIFY(tsd_set(zfs_geom_probe_vdev_key, NULL) == 0);
777
778         if (cp == NULL) {
779                 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
780                 error = ENOENT;
781         } else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
782             !ISP2(cp->provider->sectorsize)) {
783                 ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
784                     vd->vdev_path);
785
786                 vdev_geom_close_locked(vd);
787                 error = EINVAL;
788                 cp = NULL;
789         } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
790                 int i;
791
792                 for (i = 0; i < 5; i++) {
793                         error = g_access(cp, 0, 1, 0);
794                         if (error == 0)
795                                 break;
796                         g_topology_unlock();
797                         tsleep(vd, 0, "vdev", hz / 2);
798                         g_topology_lock();
799                 }
800                 if (error != 0) {
801                         printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
802                             vd->vdev_path, error);
803                         vdev_geom_close_locked(vd);
804                         cp = NULL;
805                 }
806         }
807
808         /* Fetch initial physical path information for this device. */
809         if (cp != NULL)
810                 vdev_geom_attrchanged(cp, "GEOM::physpath");
811         
812         g_topology_unlock();
813         PICKUP_GIANT();
814         if (cp == NULL) {
815                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
816                 return (error);
817         }
818         pp = cp->provider;
819
820         /*
821          * Determine the actual size of the device.
822          */
823         *max_psize = *psize = pp->mediasize;
824
825         /*
826          * Determine the device's minimum transfer size and preferred
827          * transfer size.
828          */
829         *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
830         *physical_ashift = 0;
831         if (pp->stripesize > (1 << *logical_ashift) && ISP2(pp->stripesize) &&
832             pp->stripesize <= (1 << SPA_MAXASHIFT) && pp->stripeoffset == 0)
833                 *physical_ashift = highbit(pp->stripesize) - 1;
834
835         /*
836          * Clear the nowritecache settings, so that on a vdev_reopen()
837          * we will try again.
838          */
839         vd->vdev_nowritecache = B_FALSE;
840
841         /*
842          * Determine the device's rotation rate.
843          */
844         vdev_geom_set_rotation_rate(vd, cp);
845
846         return (0);
847 }
848
849 static void
850 vdev_geom_close(vdev_t *vd)
851 {
852
853         DROP_GIANT();
854         g_topology_lock();
855         vdev_geom_close_locked(vd);
856         g_topology_unlock();
857         PICKUP_GIANT();
858 }
859
860 static void
861 vdev_geom_io_intr(struct bio *bp)
862 {
863         vdev_t *vd;
864         zio_t *zio;
865
866         zio = bp->bio_caller1;
867         vd = zio->io_vd;
868         zio->io_error = bp->bio_error;
869         if (zio->io_error == 0 && bp->bio_resid != 0)
870                 zio->io_error = SET_ERROR(EIO);
871
872         switch(zio->io_error) {
873         case ENOTSUP:
874                 /*
875                  * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
876                  * that future attempts will never succeed. In this case
877                  * we set a persistent flag so that we don't bother with
878                  * requests in the future.
879                  */
880                 switch(bp->bio_cmd) {
881                 case BIO_FLUSH:
882                         vd->vdev_nowritecache = B_TRUE;
883                         break;
884                 case BIO_DELETE:
885                         vd->vdev_notrim = B_TRUE;
886                         break;
887                 }
888                 break;
889         case ENXIO:
890                 if (!vd->vdev_remove_wanted) {
891                         /*
892                          * If provider's error is set we assume it is being
893                          * removed.
894                          */
895                         if (bp->bio_to->error != 0) {
896                                 vd->vdev_remove_wanted = B_TRUE;
897                                 spa_async_request(zio->io_spa,
898                                     SPA_ASYNC_REMOVE);
899                         } else if (!vd->vdev_delayed_close) {
900                                 vd->vdev_delayed_close = B_TRUE;
901                         }
902                 }
903                 break;
904         }
905         g_destroy_bio(bp);
906         zio_delay_interrupt(zio);
907 }
908
909 static void
910 vdev_geom_io_start(zio_t *zio)
911 {
912         vdev_t *vd;
913         struct g_consumer *cp;
914         struct bio *bp;
915         int error;
916
917         vd = zio->io_vd;
918
919         switch (zio->io_type) {
920         case ZIO_TYPE_IOCTL:
921                 /* XXPOLICY */
922                 if (!vdev_readable(vd)) {
923                         zio->io_error = SET_ERROR(ENXIO);
924                         zio_interrupt(zio);
925                         return;
926                 } else {
927                         switch (zio->io_cmd) {
928                         case DKIOCFLUSHWRITECACHE:
929                                 if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
930                                         break;
931                                 if (vd->vdev_nowritecache) {
932                                         zio->io_error = SET_ERROR(ENOTSUP);
933                                         break;
934                                 }
935                                 goto sendreq;
936                         default:
937                                 zio->io_error = SET_ERROR(ENOTSUP);
938                         }
939                 }
940
941                 zio_execute(zio);
942                 return;
943         case ZIO_TYPE_FREE:
944                 if (vd->vdev_notrim) {
945                         zio->io_error = SET_ERROR(ENOTSUP);
946                 } else if (!vdev_geom_bio_delete_disable) {
947                         goto sendreq;
948                 }
949                 zio_execute(zio);
950                 return;
951         }
952 sendreq:
953         ASSERT(zio->io_type == ZIO_TYPE_READ ||
954             zio->io_type == ZIO_TYPE_WRITE ||
955             zio->io_type == ZIO_TYPE_FREE ||
956             zio->io_type == ZIO_TYPE_IOCTL);
957
958         cp = vd->vdev_tsd;
959         if (cp == NULL) {
960                 zio->io_error = SET_ERROR(ENXIO);
961                 zio_interrupt(zio);
962                 return;
963         }
964         bp = g_alloc_bio();
965         bp->bio_caller1 = zio;
966         switch (zio->io_type) {
967         case ZIO_TYPE_READ:
968         case ZIO_TYPE_WRITE:
969                 zio->io_target_timestamp = zio_handle_io_delay(zio);
970                 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
971                 bp->bio_data = zio->io_data;
972                 bp->bio_offset = zio->io_offset;
973                 bp->bio_length = zio->io_size;
974                 break;
975         case ZIO_TYPE_FREE:
976                 bp->bio_cmd = BIO_DELETE;
977                 bp->bio_data = NULL;
978                 bp->bio_offset = zio->io_offset;
979                 bp->bio_length = zio->io_size;
980                 break;
981         case ZIO_TYPE_IOCTL:
982                 bp->bio_cmd = BIO_FLUSH;
983                 bp->bio_flags |= BIO_ORDERED;
984                 bp->bio_data = NULL;
985                 bp->bio_offset = cp->provider->mediasize;
986                 bp->bio_length = 0;
987                 break;
988         }
989         bp->bio_done = vdev_geom_io_intr;
990
991         g_io_request(bp, cp);
992 }
993
994 static void
995 vdev_geom_io_done(zio_t *zio)
996 {
997 }
998
999 static void
1000 vdev_geom_hold(vdev_t *vd)
1001 {
1002 }
1003
1004 static void
1005 vdev_geom_rele(vdev_t *vd)
1006 {
1007 }
1008
1009 vdev_ops_t vdev_geom_ops = {
1010         vdev_geom_open,
1011         vdev_geom_close,
1012         vdev_default_asize,
1013         vdev_geom_io_start,
1014         vdev_geom_io_done,
1015         NULL,
1016         vdev_geom_hold,
1017         vdev_geom_rele,
1018         VDEV_TYPE_DISK,         /* name of this vdev type */
1019         B_TRUE                  /* leaf vdev */
1020 };