]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
MFC r259168:
[FreeBSD/stable/9.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / vdev_geom.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23  * All rights reserved.
24  *
25  * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26  */
27
28 #include <sys/zfs_context.h>
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/bio.h>
32 #include <sys/disk.h>
33 #include <sys/spa.h>
34 #include <sys/spa_impl.h>
35 #include <sys/vdev_impl.h>
36 #include <sys/fs/zfs.h>
37 #include <sys/zio.h>
38 #include <geom/geom.h>
39 #include <geom/geom_int.h>
40
41 /*
42  * Virtual device vector for GEOM.
43  */
44
45 struct g_class zfs_vdev_class = {
46         .name = "ZFS::VDEV",
47         .version = G_VERSION,
48 };
49
50 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
51
52 SYSCTL_DECL(_vfs_zfs_vdev);
53 /* Don't send BIO_FLUSH. */
54 static int vdev_geom_bio_flush_disable = 0;
55 TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
56 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
57     &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
58 /* Don't send BIO_DELETE. */
59 static int vdev_geom_bio_delete_disable = 0;
60 TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
61 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
62     &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
63
64 static void
65 vdev_geom_orphan(struct g_consumer *cp)
66 {
67         vdev_t *vd;
68
69         g_topology_assert();
70
71         vd = cp->private;
72         if (vd == NULL)
73                 return;
74
75         /*
76          * Orphan callbacks occur from the GEOM event thread.
77          * Concurrent with this call, new I/O requests may be
78          * working their way through GEOM about to find out
79          * (only once executed by the g_down thread) that we've
80          * been orphaned from our disk provider.  These I/Os
81          * must be retired before we can detach our consumer.
82          * This is most easily achieved by acquiring the
83          * SPA ZIO configuration lock as a writer, but doing
84          * so with the GEOM topology lock held would cause
85          * a lock order reversal.  Instead, rely on the SPA's
86          * async removal support to invoke a close on this
87          * vdev once it is safe to do so.
88          */
89         zfs_post_remove(vd->vdev_spa, vd);
90         vd->vdev_remove_wanted = B_TRUE;
91         spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
92 }
93
94 static struct g_consumer *
95 vdev_geom_attach(struct g_provider *pp)
96 {
97         struct g_geom *gp;
98         struct g_consumer *cp;
99
100         g_topology_assert();
101
102         ZFS_LOG(1, "Attaching to %s.", pp->name);
103         /* Do we have geom already? No? Create one. */
104         LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
105                 if (gp->flags & G_GEOM_WITHER)
106                         continue;
107                 if (strcmp(gp->name, "zfs::vdev") != 0)
108                         continue;
109                 break;
110         }
111         if (gp == NULL) {
112                 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
113                 gp->orphan = vdev_geom_orphan;
114                 cp = g_new_consumer(gp);
115                 if (g_attach(cp, pp) != 0) {
116                         g_wither_geom(gp, ENXIO);
117                         return (NULL);
118                 }
119                 if (g_access(cp, 1, 0, 1) != 0) {
120                         g_wither_geom(gp, ENXIO);
121                         return (NULL);
122                 }
123                 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
124         } else {
125                 /* Check if we are already connected to this provider. */
126                 LIST_FOREACH(cp, &gp->consumer, consumer) {
127                         if (cp->provider == pp) {
128                                 ZFS_LOG(1, "Found consumer for %s.", pp->name);
129                                 break;
130                         }
131                 }
132                 if (cp == NULL) {
133                         cp = g_new_consumer(gp);
134                         if (g_attach(cp, pp) != 0) {
135                                 g_destroy_consumer(cp);
136                                 return (NULL);
137                         }
138                         if (g_access(cp, 1, 0, 1) != 0) {
139                                 g_detach(cp);
140                                 g_destroy_consumer(cp);
141                                 return (NULL);
142                         }
143                         ZFS_LOG(1, "Created consumer for %s.", pp->name);
144                 } else {
145                         if (g_access(cp, 1, 0, 1) != 0)
146                                 return (NULL);
147                         ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
148                 }
149         }
150         return (cp);
151 }
152
153 static void
154 vdev_geom_detach(void *arg, int flag __unused)
155 {
156         struct g_geom *gp;
157         struct g_consumer *cp;
158
159         g_topology_assert();
160         cp = arg;
161         gp = cp->geom;
162
163         ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
164         g_access(cp, -1, 0, -1);
165         /* Destroy consumer on last close. */
166         if (cp->acr == 0 && cp->ace == 0) {
167                 ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
168                 if (cp->acw > 0)
169                         g_access(cp, 0, -cp->acw, 0);
170                 g_detach(cp);
171                 g_destroy_consumer(cp);
172         }
173         /* Destroy geom if there are no consumers left. */
174         if (LIST_EMPTY(&gp->consumer)) {
175                 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
176                 g_wither_geom(gp, ENXIO);
177         }
178 }
179
180 static uint64_t
181 nvlist_get_guid(nvlist_t *list)
182 {
183         uint64_t value;
184
185         value = 0;
186         nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, &value);
187         return (value);
188 }
189
190 static int
191 vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
192 {
193         struct bio *bp;
194         u_char *p;
195         off_t off, maxio;
196         int error;
197
198         ASSERT((offset % cp->provider->sectorsize) == 0);
199         ASSERT((size % cp->provider->sectorsize) == 0);
200
201         bp = g_alloc_bio();
202         off = offset;
203         offset += size;
204         p = data;
205         maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
206         error = 0;
207
208         for (; off < offset; off += maxio, p += maxio, size -= maxio) {
209                 bzero(bp, sizeof(*bp));
210                 bp->bio_cmd = cmd;
211                 bp->bio_done = NULL;
212                 bp->bio_offset = off;
213                 bp->bio_length = MIN(size, maxio);
214                 bp->bio_data = p;
215                 g_io_request(bp, cp);
216                 error = biowait(bp, "vdev_geom_io");
217                 if (error != 0)
218                         break;
219         }
220
221         g_destroy_bio(bp);
222         return (error);
223 }
224
225 static void
226 vdev_geom_taste_orphan(struct g_consumer *cp)
227 {
228
229         KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
230             cp->provider->name));
231 }
232
233 static int
234 vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
235 {
236         struct g_provider *pp;
237         vdev_label_t *label;
238         char *p, *buf;
239         size_t buflen;
240         uint64_t psize;
241         off_t offset, size;
242         uint64_t guid, state, txg;
243         int error, l, len;
244
245         g_topology_assert_not();
246
247         pp = cp->provider;
248         ZFS_LOG(1, "Reading config from %s...", pp->name);
249
250         psize = pp->mediasize;
251         psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
252
253         size = sizeof(*label) + pp->sectorsize -
254             ((sizeof(*label) - 1) % pp->sectorsize) - 1;
255
256         guid = 0;
257         label = kmem_alloc(size, KM_SLEEP);
258         buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
259
260         *config = NULL;
261         for (l = 0; l < VDEV_LABELS; l++) {
262
263                 offset = vdev_label_offset(psize, l, 0);
264                 if ((offset % pp->sectorsize) != 0)
265                         continue;
266
267                 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
268                         continue;
269                 buf = label->vl_vdev_phys.vp_nvlist;
270
271                 if (nvlist_unpack(buf, buflen, config, 0) != 0)
272                         continue;
273
274                 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
275                     &state) != 0 || state > POOL_STATE_L2CACHE) {
276                         nvlist_free(*config);
277                         *config = NULL;
278                         continue;
279                 }
280
281                 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
282                     (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
283                     &txg) != 0 || txg == 0)) {
284                         nvlist_free(*config);
285                         *config = NULL;
286                         continue;
287                 }
288
289                 break;
290         }
291
292         kmem_free(label, size);
293         return (*config == NULL ? ENOENT : 0);
294 }
295
296 static void
297 resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
298 {
299         nvlist_t **new_configs;
300         uint64_t i;
301
302         if (id < *count)
303                 return;
304         new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
305             KM_SLEEP);
306         for (i = 0; i < *count; i++)
307                 new_configs[i] = (*configs)[i];
308         if (*configs != NULL)
309                 kmem_free(*configs, *count * sizeof(void *));
310         *configs = new_configs;
311         *count = id + 1;
312 }
313
314 static void
315 process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
316     const char *name, uint64_t* known_pool_guid)
317 {
318         nvlist_t *vdev_tree;
319         uint64_t pool_guid;
320         uint64_t vdev_guid, known_guid;
321         uint64_t id, txg, known_txg;
322         char *pname;
323         int i;
324
325         if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
326             strcmp(pname, name) != 0)
327                 goto ignore;
328
329         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
330                 goto ignore;
331
332         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
333                 goto ignore;
334
335         if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
336                 goto ignore;
337
338         if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
339                 goto ignore;
340
341         VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
342
343         if (*known_pool_guid != 0) {
344                 if (pool_guid != *known_pool_guid)
345                         goto ignore;
346         } else
347                 *known_pool_guid = pool_guid;
348
349         resize_configs(configs, count, id);
350
351         if ((*configs)[id] != NULL) {
352                 VERIFY(nvlist_lookup_uint64((*configs)[id],
353                     ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
354                 if (txg <= known_txg)
355                         goto ignore;
356                 nvlist_free((*configs)[id]);
357         }
358
359         (*configs)[id] = cfg;
360         return;
361
362 ignore:
363         nvlist_free(cfg);
364 }
365
366 static int
367 vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
368 {
369         int error;
370
371         if (pp->flags & G_PF_WITHER)
372                 return (EINVAL);
373         g_attach(cp, pp);
374         error = g_access(cp, 1, 0, 0);
375         if (error == 0) {
376                 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
377                         error = EINVAL;
378                 else if (pp->mediasize < SPA_MINDEVSIZE)
379                         error = EINVAL;
380                 if (error != 0)
381                         g_access(cp, -1, 0, 0);
382         }
383         if (error != 0)
384                 g_detach(cp);
385         return (error);
386 }
387
388 static void
389 vdev_geom_detach_taster(struct g_consumer *cp)
390 {
391         g_access(cp, -1, 0, 0);
392         g_detach(cp);
393 }
394
395 int
396 vdev_geom_read_pool_label(const char *name,
397     nvlist_t ***configs, uint64_t *count)
398 {
399         struct g_class *mp;
400         struct g_geom *gp, *zgp;
401         struct g_provider *pp;
402         struct g_consumer *zcp;
403         nvlist_t *vdev_cfg;
404         uint64_t pool_guid;
405         int error;
406
407         DROP_GIANT();
408         g_topology_lock();
409
410         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
411         /* This orphan function should be never called. */
412         zgp->orphan = vdev_geom_taste_orphan;
413         zcp = g_new_consumer(zgp);
414
415         *configs = NULL;
416         *count = 0;
417         pool_guid = 0;
418         LIST_FOREACH(mp, &g_classes, class) {
419                 if (mp == &zfs_vdev_class)
420                         continue;
421                 LIST_FOREACH(gp, &mp->geom, geom) {
422                         if (gp->flags & G_GEOM_WITHER)
423                                 continue;
424                         LIST_FOREACH(pp, &gp->provider, provider) {
425                                 if (pp->flags & G_PF_WITHER)
426                                         continue;
427                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
428                                         continue;
429                                 g_topology_unlock();
430                                 error = vdev_geom_read_config(zcp, &vdev_cfg);
431                                 g_topology_lock();
432                                 vdev_geom_detach_taster(zcp);
433                                 if (error)
434                                         continue;
435                                 ZFS_LOG(1, "successfully read vdev config");
436
437                                 process_vdev_config(configs, count,
438                                     vdev_cfg, name, &pool_guid);
439                         }
440                 }
441         }
442
443         g_destroy_consumer(zcp);
444         g_destroy_geom(zgp);
445         g_topology_unlock();
446         PICKUP_GIANT();
447
448         return (*count > 0 ? 0 : ENOENT);
449 }
450
451 static uint64_t
452 vdev_geom_read_guid(struct g_consumer *cp)
453 {
454         nvlist_t *config;
455         uint64_t guid;
456
457         g_topology_assert_not();
458
459         guid = 0;
460         if (vdev_geom_read_config(cp, &config) == 0) {
461                 guid = nvlist_get_guid(config);
462                 nvlist_free(config);
463         }
464         return (guid);
465 }
466
467 static struct g_consumer *
468 vdev_geom_attach_by_guid(uint64_t guid)
469 {
470         struct g_class *mp;
471         struct g_geom *gp, *zgp;
472         struct g_provider *pp;
473         struct g_consumer *cp, *zcp;
474         uint64_t pguid;
475
476         g_topology_assert();
477
478         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
479         /* This orphan function should be never called. */
480         zgp->orphan = vdev_geom_taste_orphan;
481         zcp = g_new_consumer(zgp);
482
483         cp = NULL;
484         LIST_FOREACH(mp, &g_classes, class) {
485                 if (mp == &zfs_vdev_class)
486                         continue;
487                 LIST_FOREACH(gp, &mp->geom, geom) {
488                         if (gp->flags & G_GEOM_WITHER)
489                                 continue;
490                         LIST_FOREACH(pp, &gp->provider, provider) {
491                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
492                                         continue;
493                                 g_topology_unlock();
494                                 pguid = vdev_geom_read_guid(zcp);
495                                 g_topology_lock();
496                                 vdev_geom_detach_taster(zcp);
497                                 if (pguid != guid)
498                                         continue;
499                                 cp = vdev_geom_attach(pp);
500                                 if (cp == NULL) {
501                                         printf("ZFS WARNING: Unable to attach to %s.\n",
502                                             pp->name);
503                                         continue;
504                                 }
505                                 break;
506                         }
507                         if (cp != NULL)
508                                 break;
509                 }
510                 if (cp != NULL)
511                         break;
512         }
513 end:
514         g_destroy_consumer(zcp);
515         g_destroy_geom(zgp);
516         return (cp);
517 }
518
519 static struct g_consumer *
520 vdev_geom_open_by_guid(vdev_t *vd)
521 {
522         struct g_consumer *cp;
523         char *buf;
524         size_t len;
525
526         g_topology_assert();
527
528         ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid);
529         cp = vdev_geom_attach_by_guid(vd->vdev_guid);
530         if (cp != NULL) {
531                 len = strlen(cp->provider->name) + strlen("/dev/") + 1;
532                 buf = kmem_alloc(len, KM_SLEEP);
533
534                 snprintf(buf, len, "/dev/%s", cp->provider->name);
535                 spa_strfree(vd->vdev_path);
536                 vd->vdev_path = buf;
537
538                 ZFS_LOG(1, "Attach by guid [%ju] succeeded, provider %s.",
539                     (uintmax_t)vd->vdev_guid, vd->vdev_path);
540         } else {
541                 ZFS_LOG(1, "Search by guid [%ju] failed.",
542                     (uintmax_t)vd->vdev_guid);
543         }
544
545         return (cp);
546 }
547
548 static struct g_consumer *
549 vdev_geom_open_by_path(vdev_t *vd, int check_guid)
550 {
551         struct g_provider *pp;
552         struct g_consumer *cp;
553         uint64_t guid;
554
555         g_topology_assert();
556
557         cp = NULL;
558         pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
559         if (pp != NULL) {
560                 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
561                 cp = vdev_geom_attach(pp);
562                 if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
563                     pp->sectorsize <= VDEV_PAD_SIZE) {
564                         g_topology_unlock();
565                         guid = vdev_geom_read_guid(cp);
566                         g_topology_lock();
567                         if (guid != vd->vdev_guid) {
568                                 vdev_geom_detach(cp, 0);
569                                 cp = NULL;
570                                 ZFS_LOG(1, "guid mismatch for provider %s: "
571                                     "%ju != %ju.", vd->vdev_path,
572                                     (uintmax_t)vd->vdev_guid, (uintmax_t)guid);
573                         } else {
574                                 ZFS_LOG(1, "guid match for provider %s.",
575                                     vd->vdev_path);
576                         }
577                 }
578         }
579
580         return (cp);
581 }
582
583 static int
584 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
585     uint64_t *logical_ashift, uint64_t *physical_ashift)
586 {
587         struct g_provider *pp;
588         struct g_consumer *cp;
589         size_t bufsize;
590         int error;
591
592         /*
593          * We must have a pathname, and it must be absolute.
594          */
595         if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
596                 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
597                 return (EINVAL);
598         }
599
600         vd->vdev_tsd = NULL;
601
602         DROP_GIANT();
603         g_topology_lock();
604         error = 0;
605
606         /*
607          * If we're creating or splitting a pool, just find the GEOM provider
608          * by its name and ignore GUID mismatches.
609          */
610         if (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE ||
611             vd->vdev_spa->spa_splitting_newspa == B_TRUE)
612                 cp = vdev_geom_open_by_path(vd, 0);
613         else {
614                 cp = vdev_geom_open_by_path(vd, 1);
615                 if (cp == NULL) {
616                         /*
617                          * The device at vd->vdev_path doesn't have the
618                          * expected guid. The disks might have merely
619                          * moved around so try all other GEOM providers
620                          * to find one with the right guid.
621                          */
622                         cp = vdev_geom_open_by_guid(vd);
623                 }
624         }
625
626         if (cp == NULL) {
627                 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
628                 error = ENOENT;
629         } else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
630             !ISP2(cp->provider->sectorsize)) {
631                 ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
632                     vd->vdev_path);
633                 vdev_geom_detach(cp, 0);
634                 error = EINVAL;
635                 cp = NULL;
636         } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
637                 int i;
638
639                 for (i = 0; i < 5; i++) {
640                         error = g_access(cp, 0, 1, 0);
641                         if (error == 0)
642                                 break;
643                         g_topology_unlock();
644                         tsleep(vd, 0, "vdev", hz / 2);
645                         g_topology_lock();
646                 }
647                 if (error != 0) {
648                         printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
649                             vd->vdev_path, error);
650                         vdev_geom_detach(cp, 0);
651                         cp = NULL;
652                 }
653         }
654         g_topology_unlock();
655         PICKUP_GIANT();
656         if (cp == NULL) {
657                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
658                 return (error);
659         }
660
661         cp->private = vd;
662         vd->vdev_tsd = cp;
663         pp = cp->provider;
664
665         /*
666          * Determine the actual size of the device.
667          */
668         *max_psize = *psize = pp->mediasize;
669
670         /*
671          * Determine the device's minimum transfer size and preferred
672          * transfer size.
673          */
674         *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
675         *physical_ashift = 0;
676         if (pp->stripesize)
677                 *physical_ashift = highbit(pp->stripesize) - 1;
678
679         /*
680          * Clear the nowritecache settings, so that on a vdev_reopen()
681          * we will try again.
682          */
683         vd->vdev_nowritecache = B_FALSE;
684
685         if (vd->vdev_physpath != NULL)
686                 spa_strfree(vd->vdev_physpath);
687         bufsize = sizeof("/dev/") + strlen(pp->name);
688         vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP);
689         snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name);
690
691         return (0);
692 }
693
694 static void
695 vdev_geom_close(vdev_t *vd)
696 {
697         struct g_consumer *cp;
698
699         cp = vd->vdev_tsd;
700         if (cp == NULL)
701                 return;
702         vd->vdev_tsd = NULL;
703         vd->vdev_delayed_close = B_FALSE;
704         cp->private = NULL;     /* XXX locking */
705         g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
706 }
707
708 static void
709 vdev_geom_io_intr(struct bio *bp)
710 {
711         vdev_t *vd;
712         zio_t *zio;
713
714         zio = bp->bio_caller1;
715         vd = zio->io_vd;
716         zio->io_error = bp->bio_error;
717         if (zio->io_error == 0 && bp->bio_resid != 0)
718                 zio->io_error = EIO;
719         if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
720                 /*
721                  * If we get ENOTSUP, we know that no future
722                  * attempts will ever succeed.  In this case we
723                  * set a persistent bit so that we don't bother
724                  * with the ioctl in the future.
725                  */
726                 vd->vdev_nowritecache = B_TRUE;
727         }
728         if (bp->bio_cmd == BIO_DELETE && bp->bio_error == ENOTSUP) {
729                 /*
730                  * If we get ENOTSUP, we know that no future
731                  * attempts will ever succeed.  In this case we
732                  * set a persistent bit so that we don't bother
733                  * with the ioctl in the future.
734                  */
735                 vd->vdev_notrim = B_TRUE;
736         }
737         if (zio->io_error == ENXIO && !vd->vdev_remove_wanted) {
738                 /*
739                  * If provider's error is set we assume it is being
740                  * removed.
741                  */
742                 if (bp->bio_to->error != 0) {
743                         vd->vdev_remove_wanted = B_TRUE;
744                         spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
745                 } else if (!vd->vdev_delayed_close) {
746                         vd->vdev_delayed_close = B_TRUE;
747                 }
748         }
749         g_destroy_bio(bp);
750         zio_interrupt(zio);
751 }
752
753 static int
754 vdev_geom_io_start(zio_t *zio)
755 {
756         vdev_t *vd;
757         struct g_consumer *cp;
758         struct bio *bp;
759         int error;
760
761         vd = zio->io_vd;
762
763         if (zio->io_type == ZIO_TYPE_IOCTL) {
764                 /* XXPOLICY */
765                 if (!vdev_readable(vd)) {
766                         zio->io_error = ENXIO;
767                         return (ZIO_PIPELINE_CONTINUE);
768                 }
769
770                 switch (zio->io_cmd) {
771                 case DKIOCFLUSHWRITECACHE:
772                         if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
773                                 break;
774                         if (vd->vdev_nowritecache) {
775                                 zio->io_error = ENOTSUP;
776                                 break;
777                         }
778                         goto sendreq;
779                 case DKIOCTRIM:
780                         if (vdev_geom_bio_delete_disable)
781                                 break;
782                         if (vd->vdev_notrim) {
783                                 zio->io_error = ENOTSUP;
784                                 break;
785                         }
786                         goto sendreq;
787                 default:
788                         zio->io_error = ENOTSUP;
789                 }
790
791                 return (ZIO_PIPELINE_CONTINUE);
792         }
793 sendreq:
794         cp = vd->vdev_tsd;
795         if (cp == NULL) {
796                 zio->io_error = ENXIO;
797                 return (ZIO_PIPELINE_CONTINUE);
798         }
799         bp = g_alloc_bio();
800         bp->bio_caller1 = zio;
801         switch (zio->io_type) {
802         case ZIO_TYPE_READ:
803         case ZIO_TYPE_WRITE:
804                 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
805                 bp->bio_data = zio->io_data;
806                 bp->bio_offset = zio->io_offset;
807                 bp->bio_length = zio->io_size;
808                 break;
809         case ZIO_TYPE_IOCTL:
810                 switch (zio->io_cmd) {
811                 case DKIOCFLUSHWRITECACHE:
812                         bp->bio_cmd = BIO_FLUSH;
813                         bp->bio_flags |= BIO_ORDERED;
814                         bp->bio_data = NULL;
815                         bp->bio_offset = cp->provider->mediasize;
816                         bp->bio_length = 0;
817                         break;
818                 case DKIOCTRIM:
819                         bp->bio_cmd = BIO_DELETE;
820                         bp->bio_data = NULL;
821                         bp->bio_offset = zio->io_offset;
822                         bp->bio_length = zio->io_size;
823                         break;
824                 }
825                 break;
826         }
827         bp->bio_done = vdev_geom_io_intr;
828
829         g_io_request(bp, cp);
830
831         return (ZIO_PIPELINE_STOP);
832 }
833
834 static void
835 vdev_geom_io_done(zio_t *zio)
836 {
837 }
838
839 static void
840 vdev_geom_hold(vdev_t *vd)
841 {
842 }
843
844 static void
845 vdev_geom_rele(vdev_t *vd)
846 {
847 }
848
849 vdev_ops_t vdev_geom_ops = {
850         vdev_geom_open,
851         vdev_geom_close,
852         vdev_default_asize,
853         vdev_geom_io_start,
854         vdev_geom_io_done,
855         NULL,
856         vdev_geom_hold,
857         vdev_geom_rele,
858         VDEV_TYPE_DISK,         /* name of this vdev type */
859         B_TRUE                  /* leaf vdev */
860 };