]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
MFC Alexander Motin's GEOM direct dispatch work:
[FreeBSD/stable/10.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / vdev_geom.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23  * All rights reserved.
24  *
25  * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26  */
27
28 #include <sys/zfs_context.h>
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/bio.h>
32 #include <sys/disk.h>
33 #include <sys/spa.h>
34 #include <sys/spa_impl.h>
35 #include <sys/vdev_impl.h>
36 #include <sys/fs/zfs.h>
37 #include <sys/zio.h>
38 #include <geom/geom.h>
39 #include <geom/geom_int.h>
40
41 /*
42  * Virtual device vector for GEOM.
43  */
44
45 struct g_class zfs_vdev_class = {
46         .name = "ZFS::VDEV",
47         .version = G_VERSION,
48 };
49
50 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
51
52 SYSCTL_DECL(_vfs_zfs_vdev);
53 /* Don't send BIO_FLUSH. */
54 static int vdev_geom_bio_flush_disable = 0;
55 TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
56 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
57     &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
58 /* Don't send BIO_DELETE. */
59 static int vdev_geom_bio_delete_disable = 0;
60 TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
61 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
62     &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
63
64 static void
65 vdev_geom_orphan(struct g_consumer *cp)
66 {
67         vdev_t *vd;
68
69         g_topology_assert();
70
71         vd = cp->private;
72         if (vd == NULL)
73                 return;
74
75         /*
76          * Orphan callbacks occur from the GEOM event thread.
77          * Concurrent with this call, new I/O requests may be
78          * working their way through GEOM about to find out
79          * (only once executed by the g_down thread) that we've
80          * been orphaned from our disk provider.  These I/Os
81          * must be retired before we can detach our consumer.
82          * This is most easily achieved by acquiring the
83          * SPA ZIO configuration lock as a writer, but doing
84          * so with the GEOM topology lock held would cause
85          * a lock order reversal.  Instead, rely on the SPA's
86          * async removal support to invoke a close on this
87          * vdev once it is safe to do so.
88          */
89         zfs_post_remove(vd->vdev_spa, vd);
90         vd->vdev_remove_wanted = B_TRUE;
91         spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
92 }
93
94 static struct g_consumer *
95 vdev_geom_attach(struct g_provider *pp)
96 {
97         struct g_geom *gp;
98         struct g_consumer *cp;
99
100         g_topology_assert();
101
102         ZFS_LOG(1, "Attaching to %s.", pp->name);
103         /* Do we have geom already? No? Create one. */
104         LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
105                 if (gp->flags & G_GEOM_WITHER)
106                         continue;
107                 if (strcmp(gp->name, "zfs::vdev") != 0)
108                         continue;
109                 break;
110         }
111         if (gp == NULL) {
112                 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
113                 gp->orphan = vdev_geom_orphan;
114                 cp = g_new_consumer(gp);
115                 if (g_attach(cp, pp) != 0) {
116                         g_wither_geom(gp, ENXIO);
117                         return (NULL);
118                 }
119                 if (g_access(cp, 1, 0, 1) != 0) {
120                         g_wither_geom(gp, ENXIO);
121                         return (NULL);
122                 }
123                 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
124         } else {
125                 /* Check if we are already connected to this provider. */
126                 LIST_FOREACH(cp, &gp->consumer, consumer) {
127                         if (cp->provider == pp) {
128                                 ZFS_LOG(1, "Found consumer for %s.", pp->name);
129                                 break;
130                         }
131                 }
132                 if (cp == NULL) {
133                         cp = g_new_consumer(gp);
134                         if (g_attach(cp, pp) != 0) {
135                                 g_destroy_consumer(cp);
136                                 return (NULL);
137                         }
138                         if (g_access(cp, 1, 0, 1) != 0) {
139                                 g_detach(cp);
140                                 g_destroy_consumer(cp);
141                                 return (NULL);
142                         }
143                         ZFS_LOG(1, "Created consumer for %s.", pp->name);
144                 } else {
145                         if (g_access(cp, 1, 0, 1) != 0)
146                                 return (NULL);
147                         ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
148                 }
149         }
150         cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
151         return (cp);
152 }
153
154 static void
155 vdev_geom_detach(void *arg, int flag __unused)
156 {
157         struct g_geom *gp;
158         struct g_consumer *cp;
159
160         g_topology_assert();
161         cp = arg;
162         gp = cp->geom;
163
164         ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
165         g_access(cp, -1, 0, -1);
166         /* Destroy consumer on last close. */
167         if (cp->acr == 0 && cp->ace == 0) {
168                 ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
169                 if (cp->acw > 0)
170                         g_access(cp, 0, -cp->acw, 0);
171                 g_detach(cp);
172                 g_destroy_consumer(cp);
173         }
174         /* Destroy geom if there are no consumers left. */
175         if (LIST_EMPTY(&gp->consumer)) {
176                 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
177                 g_wither_geom(gp, ENXIO);
178         }
179 }
180
181 static uint64_t
182 nvlist_get_guid(nvlist_t *list)
183 {
184         uint64_t value;
185
186         value = 0;
187         nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, &value);
188         return (value);
189 }
190
191 static int
192 vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
193 {
194         struct bio *bp;
195         u_char *p;
196         off_t off, maxio;
197         int error;
198
199         ASSERT((offset % cp->provider->sectorsize) == 0);
200         ASSERT((size % cp->provider->sectorsize) == 0);
201
202         bp = g_alloc_bio();
203         off = offset;
204         offset += size;
205         p = data;
206         maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
207         error = 0;
208
209         for (; off < offset; off += maxio, p += maxio, size -= maxio) {
210                 bzero(bp, sizeof(*bp));
211                 bp->bio_cmd = cmd;
212                 bp->bio_done = NULL;
213                 bp->bio_offset = off;
214                 bp->bio_length = MIN(size, maxio);
215                 bp->bio_data = p;
216                 g_io_request(bp, cp);
217                 error = biowait(bp, "vdev_geom_io");
218                 if (error != 0)
219                         break;
220         }
221
222         g_destroy_bio(bp);
223         return (error);
224 }
225
226 static void
227 vdev_geom_taste_orphan(struct g_consumer *cp)
228 {
229
230         KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
231             cp->provider->name));
232 }
233
234 static int
235 vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
236 {
237         struct g_provider *pp;
238         vdev_label_t *label;
239         char *p, *buf;
240         size_t buflen;
241         uint64_t psize;
242         off_t offset, size;
243         uint64_t guid, state, txg;
244         int error, l, len;
245
246         g_topology_assert_not();
247
248         pp = cp->provider;
249         ZFS_LOG(1, "Reading config from %s...", pp->name);
250
251         psize = pp->mediasize;
252         psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
253
254         size = sizeof(*label) + pp->sectorsize -
255             ((sizeof(*label) - 1) % pp->sectorsize) - 1;
256
257         guid = 0;
258         label = kmem_alloc(size, KM_SLEEP);
259         buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
260
261         *config = NULL;
262         for (l = 0; l < VDEV_LABELS; l++) {
263
264                 offset = vdev_label_offset(psize, l, 0);
265                 if ((offset % pp->sectorsize) != 0)
266                         continue;
267
268                 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
269                         continue;
270                 buf = label->vl_vdev_phys.vp_nvlist;
271
272                 if (nvlist_unpack(buf, buflen, config, 0) != 0)
273                         continue;
274
275                 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
276                     &state) != 0 || state > POOL_STATE_L2CACHE) {
277                         nvlist_free(*config);
278                         *config = NULL;
279                         continue;
280                 }
281
282                 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
283                     (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
284                     &txg) != 0 || txg == 0)) {
285                         nvlist_free(*config);
286                         *config = NULL;
287                         continue;
288                 }
289
290                 break;
291         }
292
293         kmem_free(label, size);
294         return (*config == NULL ? ENOENT : 0);
295 }
296
297 static void
298 resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
299 {
300         nvlist_t **new_configs;
301         uint64_t i;
302
303         if (id < *count)
304                 return;
305         new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
306             KM_SLEEP);
307         for (i = 0; i < *count; i++)
308                 new_configs[i] = (*configs)[i];
309         if (*configs != NULL)
310                 kmem_free(*configs, *count * sizeof(void *));
311         *configs = new_configs;
312         *count = id + 1;
313 }
314
315 static void
316 process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
317     const char *name, uint64_t* known_pool_guid)
318 {
319         nvlist_t *vdev_tree;
320         uint64_t pool_guid;
321         uint64_t vdev_guid, known_guid;
322         uint64_t id, txg, known_txg;
323         char *pname;
324         int i;
325
326         if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
327             strcmp(pname, name) != 0)
328                 goto ignore;
329
330         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
331                 goto ignore;
332
333         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
334                 goto ignore;
335
336         if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
337                 goto ignore;
338
339         if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
340                 goto ignore;
341
342         VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
343
344         if (*known_pool_guid != 0) {
345                 if (pool_guid != *known_pool_guid)
346                         goto ignore;
347         } else
348                 *known_pool_guid = pool_guid;
349
350         resize_configs(configs, count, id);
351
352         if ((*configs)[id] != NULL) {
353                 VERIFY(nvlist_lookup_uint64((*configs)[id],
354                     ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
355                 if (txg <= known_txg)
356                         goto ignore;
357                 nvlist_free((*configs)[id]);
358         }
359
360         (*configs)[id] = cfg;
361         return;
362
363 ignore:
364         nvlist_free(cfg);
365 }
366
367 static int
368 vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
369 {
370         int error;
371
372         if (pp->flags & G_PF_WITHER)
373                 return (EINVAL);
374         g_attach(cp, pp);
375         error = g_access(cp, 1, 0, 0);
376         if (error == 0) {
377                 if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
378                         error = EINVAL;
379                 else if (pp->mediasize < SPA_MINDEVSIZE)
380                         error = EINVAL;
381                 if (error != 0)
382                         g_access(cp, -1, 0, 0);
383         }
384         if (error != 0)
385                 g_detach(cp);
386         return (error);
387 }
388
389 static void
390 vdev_geom_detach_taster(struct g_consumer *cp)
391 {
392         g_access(cp, -1, 0, 0);
393         g_detach(cp);
394 }
395
396 int
397 vdev_geom_read_pool_label(const char *name,
398     nvlist_t ***configs, uint64_t *count)
399 {
400         struct g_class *mp;
401         struct g_geom *gp, *zgp;
402         struct g_provider *pp;
403         struct g_consumer *zcp;
404         nvlist_t *vdev_cfg;
405         uint64_t pool_guid;
406         int error;
407
408         DROP_GIANT();
409         g_topology_lock();
410
411         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
412         /* This orphan function should be never called. */
413         zgp->orphan = vdev_geom_taste_orphan;
414         zcp = g_new_consumer(zgp);
415
416         *configs = NULL;
417         *count = 0;
418         pool_guid = 0;
419         LIST_FOREACH(mp, &g_classes, class) {
420                 if (mp == &zfs_vdev_class)
421                         continue;
422                 LIST_FOREACH(gp, &mp->geom, geom) {
423                         if (gp->flags & G_GEOM_WITHER)
424                                 continue;
425                         LIST_FOREACH(pp, &gp->provider, provider) {
426                                 if (pp->flags & G_PF_WITHER)
427                                         continue;
428                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
429                                         continue;
430                                 g_topology_unlock();
431                                 error = vdev_geom_read_config(zcp, &vdev_cfg);
432                                 g_topology_lock();
433                                 vdev_geom_detach_taster(zcp);
434                                 if (error)
435                                         continue;
436                                 ZFS_LOG(1, "successfully read vdev config");
437
438                                 process_vdev_config(configs, count,
439                                     vdev_cfg, name, &pool_guid);
440                         }
441                 }
442         }
443
444         g_destroy_consumer(zcp);
445         g_destroy_geom(zgp);
446         g_topology_unlock();
447         PICKUP_GIANT();
448
449         return (*count > 0 ? 0 : ENOENT);
450 }
451
452 static uint64_t
453 vdev_geom_read_guid(struct g_consumer *cp)
454 {
455         nvlist_t *config;
456         uint64_t guid;
457
458         g_topology_assert_not();
459
460         guid = 0;
461         if (vdev_geom_read_config(cp, &config) == 0) {
462                 guid = nvlist_get_guid(config);
463                 nvlist_free(config);
464         }
465         return (guid);
466 }
467
468 static struct g_consumer *
469 vdev_geom_attach_by_guid(uint64_t guid)
470 {
471         struct g_class *mp;
472         struct g_geom *gp, *zgp;
473         struct g_provider *pp;
474         struct g_consumer *cp, *zcp;
475         uint64_t pguid;
476
477         g_topology_assert();
478
479         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
480         /* This orphan function should be never called. */
481         zgp->orphan = vdev_geom_taste_orphan;
482         zcp = g_new_consumer(zgp);
483
484         cp = NULL;
485         LIST_FOREACH(mp, &g_classes, class) {
486                 if (mp == &zfs_vdev_class)
487                         continue;
488                 LIST_FOREACH(gp, &mp->geom, geom) {
489                         if (gp->flags & G_GEOM_WITHER)
490                                 continue;
491                         LIST_FOREACH(pp, &gp->provider, provider) {
492                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
493                                         continue;
494                                 g_topology_unlock();
495                                 pguid = vdev_geom_read_guid(zcp);
496                                 g_topology_lock();
497                                 vdev_geom_detach_taster(zcp);
498                                 if (pguid != guid)
499                                         continue;
500                                 cp = vdev_geom_attach(pp);
501                                 if (cp == NULL) {
502                                         printf("ZFS WARNING: Unable to attach to %s.\n",
503                                             pp->name);
504                                         continue;
505                                 }
506                                 break;
507                         }
508                         if (cp != NULL)
509                                 break;
510                 }
511                 if (cp != NULL)
512                         break;
513         }
514 end:
515         g_destroy_consumer(zcp);
516         g_destroy_geom(zgp);
517         return (cp);
518 }
519
520 static struct g_consumer *
521 vdev_geom_open_by_guid(vdev_t *vd)
522 {
523         struct g_consumer *cp;
524         char *buf;
525         size_t len;
526
527         g_topology_assert();
528
529         ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid);
530         cp = vdev_geom_attach_by_guid(vd->vdev_guid);
531         if (cp != NULL) {
532                 len = strlen(cp->provider->name) + strlen("/dev/") + 1;
533                 buf = kmem_alloc(len, KM_SLEEP);
534
535                 snprintf(buf, len, "/dev/%s", cp->provider->name);
536                 spa_strfree(vd->vdev_path);
537                 vd->vdev_path = buf;
538
539                 ZFS_LOG(1, "Attach by guid [%ju] succeeded, provider %s.",
540                     (uintmax_t)vd->vdev_guid, vd->vdev_path);
541         } else {
542                 ZFS_LOG(1, "Search by guid [%ju] failed.",
543                     (uintmax_t)vd->vdev_guid);
544         }
545
546         return (cp);
547 }
548
549 static struct g_consumer *
550 vdev_geom_open_by_path(vdev_t *vd, int check_guid)
551 {
552         struct g_provider *pp;
553         struct g_consumer *cp;
554         uint64_t guid;
555
556         g_topology_assert();
557
558         cp = NULL;
559         pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
560         if (pp != NULL) {
561                 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
562                 cp = vdev_geom_attach(pp);
563                 if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
564                     pp->sectorsize <= VDEV_PAD_SIZE) {
565                         g_topology_unlock();
566                         guid = vdev_geom_read_guid(cp);
567                         g_topology_lock();
568                         if (guid != vd->vdev_guid) {
569                                 vdev_geom_detach(cp, 0);
570                                 cp = NULL;
571                                 ZFS_LOG(1, "guid mismatch for provider %s: "
572                                     "%ju != %ju.", vd->vdev_path,
573                                     (uintmax_t)vd->vdev_guid, (uintmax_t)guid);
574                         } else {
575                                 ZFS_LOG(1, "guid match for provider %s.",
576                                     vd->vdev_path);
577                         }
578                 }
579         }
580
581         return (cp);
582 }
583
584 static int
585 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
586     uint64_t *logical_ashift, uint64_t *physical_ashift)
587 {
588         struct g_provider *pp;
589         struct g_consumer *cp;
590         size_t bufsize;
591         int error;
592
593         /*
594          * We must have a pathname, and it must be absolute.
595          */
596         if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
597                 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
598                 return (EINVAL);
599         }
600
601         vd->vdev_tsd = NULL;
602
603         DROP_GIANT();
604         g_topology_lock();
605         error = 0;
606
607         /*
608          * If we're creating or splitting a pool, just find the GEOM provider
609          * by its name and ignore GUID mismatches.
610          */
611         if (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE ||
612             vd->vdev_spa->spa_splitting_newspa == B_TRUE)
613                 cp = vdev_geom_open_by_path(vd, 0);
614         else {
615                 cp = vdev_geom_open_by_path(vd, 1);
616                 if (cp == NULL) {
617                         /*
618                          * The device at vd->vdev_path doesn't have the
619                          * expected guid. The disks might have merely
620                          * moved around so try all other GEOM providers
621                          * to find one with the right guid.
622                          */
623                         cp = vdev_geom_open_by_guid(vd);
624                 }
625         }
626
627         if (cp == NULL) {
628                 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
629                 error = ENOENT;
630         } else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
631             !ISP2(cp->provider->sectorsize)) {
632                 ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
633                     vd->vdev_path);
634                 vdev_geom_detach(cp, 0);
635                 error = EINVAL;
636                 cp = NULL;
637         } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
638                 int i;
639
640                 for (i = 0; i < 5; i++) {
641                         error = g_access(cp, 0, 1, 0);
642                         if (error == 0)
643                                 break;
644                         g_topology_unlock();
645                         tsleep(vd, 0, "vdev", hz / 2);
646                         g_topology_lock();
647                 }
648                 if (error != 0) {
649                         printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
650                             vd->vdev_path, error);
651                         vdev_geom_detach(cp, 0);
652                         cp = NULL;
653                 }
654         }
655         g_topology_unlock();
656         PICKUP_GIANT();
657         if (cp == NULL) {
658                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
659                 return (error);
660         }
661
662         cp->private = vd;
663         vd->vdev_tsd = cp;
664         pp = cp->provider;
665
666         /*
667          * Determine the actual size of the device.
668          */
669         *max_psize = *psize = pp->mediasize;
670
671         /*
672          * Determine the device's minimum transfer size and preferred
673          * transfer size.
674          */
675         *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
676         *physical_ashift = 0;
677         if (pp->stripesize)
678                 *physical_ashift = highbit(pp->stripesize) - 1;
679
680         /*
681          * Clear the nowritecache settings, so that on a vdev_reopen()
682          * we will try again.
683          */
684         vd->vdev_nowritecache = B_FALSE;
685
686         if (vd->vdev_physpath != NULL)
687                 spa_strfree(vd->vdev_physpath);
688         bufsize = sizeof("/dev/") + strlen(pp->name);
689         vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP);
690         snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name);
691
692         return (0);
693 }
694
695 static void
696 vdev_geom_close(vdev_t *vd)
697 {
698         struct g_consumer *cp;
699
700         cp = vd->vdev_tsd;
701         if (cp == NULL)
702                 return;
703         vd->vdev_tsd = NULL;
704         vd->vdev_delayed_close = B_FALSE;
705         cp->private = NULL;     /* XXX locking */
706         g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
707 }
708
709 static void
710 vdev_geom_io_intr(struct bio *bp)
711 {
712         vdev_t *vd;
713         zio_t *zio;
714
715         zio = bp->bio_caller1;
716         vd = zio->io_vd;
717         zio->io_error = bp->bio_error;
718         if (zio->io_error == 0 && bp->bio_resid != 0)
719                 zio->io_error = EIO;
720         if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
721                 /*
722                  * If we get ENOTSUP, we know that no future
723                  * attempts will ever succeed.  In this case we
724                  * set a persistent bit so that we don't bother
725                  * with the ioctl in the future.
726                  */
727                 vd->vdev_nowritecache = B_TRUE;
728         }
729         if (bp->bio_cmd == BIO_DELETE && bp->bio_error == ENOTSUP) {
730                 /*
731                  * If we get ENOTSUP, we know that no future
732                  * attempts will ever succeed.  In this case we
733                  * set a persistent bit so that we don't bother
734                  * with the ioctl in the future.
735                  */
736                 vd->vdev_notrim = B_TRUE;
737         }
738         if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
739                 /*
740                  * If provider's error is set we assume it is being
741                  * removed.
742                  */
743                 if (bp->bio_to->error != 0) {
744                         /*
745                          * We post the resource as soon as possible, instead of
746                          * when the async removal actually happens, because the
747                          * DE is using this information to discard previous I/O
748                          * errors.
749                          */
750                         /* XXX: zfs_post_remove() can sleep. */
751                         zfs_post_remove(zio->io_spa, vd);
752                         vd->vdev_remove_wanted = B_TRUE;
753                         spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
754                 } else if (!vd->vdev_delayed_close) {
755                         vd->vdev_delayed_close = B_TRUE;
756                 }
757         }
758         g_destroy_bio(bp);
759         zio_interrupt(zio);
760 }
761
762 static int
763 vdev_geom_io_start(zio_t *zio)
764 {
765         vdev_t *vd;
766         struct g_consumer *cp;
767         struct bio *bp;
768         int error;
769
770         vd = zio->io_vd;
771
772         if (zio->io_type == ZIO_TYPE_IOCTL) {
773                 /* XXPOLICY */
774                 if (!vdev_readable(vd)) {
775                         zio->io_error = ENXIO;
776                         return (ZIO_PIPELINE_CONTINUE);
777                 }
778
779                 switch (zio->io_cmd) {
780                 case DKIOCFLUSHWRITECACHE:
781                         if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
782                                 break;
783                         if (vd->vdev_nowritecache) {
784                                 zio->io_error = ENOTSUP;
785                                 break;
786                         }
787                         goto sendreq;
788                 case DKIOCTRIM:
789                         if (vdev_geom_bio_delete_disable)
790                                 break;
791                         if (vd->vdev_notrim) {
792                                 zio->io_error = ENOTSUP;
793                                 break;
794                         }
795                         goto sendreq;
796                 default:
797                         zio->io_error = ENOTSUP;
798                 }
799
800                 return (ZIO_PIPELINE_CONTINUE);
801         }
802 sendreq:
803         cp = vd->vdev_tsd;
804         if (cp == NULL) {
805                 zio->io_error = ENXIO;
806                 return (ZIO_PIPELINE_CONTINUE);
807         }
808         bp = g_alloc_bio();
809         bp->bio_caller1 = zio;
810         switch (zio->io_type) {
811         case ZIO_TYPE_READ:
812         case ZIO_TYPE_WRITE:
813                 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
814                 bp->bio_data = zio->io_data;
815                 bp->bio_offset = zio->io_offset;
816                 bp->bio_length = zio->io_size;
817                 break;
818         case ZIO_TYPE_IOCTL:
819                 switch (zio->io_cmd) {
820                 case DKIOCFLUSHWRITECACHE:
821                         bp->bio_cmd = BIO_FLUSH;
822                         bp->bio_flags |= BIO_ORDERED;
823                         bp->bio_data = NULL;
824                         bp->bio_offset = cp->provider->mediasize;
825                         bp->bio_length = 0;
826                         break;
827                 case DKIOCTRIM:
828                         bp->bio_cmd = BIO_DELETE;
829                         bp->bio_data = NULL;
830                         bp->bio_offset = zio->io_offset;
831                         bp->bio_length = zio->io_size;
832                         break;
833                 }
834                 break;
835         }
836         bp->bio_done = vdev_geom_io_intr;
837
838         g_io_request(bp, cp);
839
840         return (ZIO_PIPELINE_STOP);
841 }
842
843 static void
844 vdev_geom_io_done(zio_t *zio)
845 {
846 }
847
848 static void
849 vdev_geom_hold(vdev_t *vd)
850 {
851 }
852
853 static void
854 vdev_geom_rele(vdev_t *vd)
855 {
856 }
857
858 vdev_ops_t vdev_geom_ops = {
859         vdev_geom_open,
860         vdev_geom_close,
861         vdev_default_asize,
862         vdev_geom_io_start,
863         vdev_geom_io_done,
864         NULL,
865         vdev_geom_hold,
866         vdev_geom_rele,
867         VDEV_TYPE_DISK,         /* name of this vdev type */
868         B_TRUE                  /* leaf vdev */
869 };