]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
MFC r252056:
[FreeBSD/stable/9.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / vdev_geom.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23  * All rights reserved.
24  *
25  * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26  */
27
28 #include <sys/zfs_context.h>
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/bio.h>
32 #include <sys/disk.h>
33 #include <sys/spa.h>
34 #include <sys/spa_impl.h>
35 #include <sys/vdev_impl.h>
36 #include <sys/fs/zfs.h>
37 #include <sys/zio.h>
38 #include <geom/geom.h>
39 #include <geom/geom_int.h>
40
41 /*
42  * Virtual device vector for GEOM.
43  */
44
45 struct g_class zfs_vdev_class = {
46         .name = "ZFS::VDEV",
47         .version = G_VERSION,
48 };
49
50 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
51
52 SYSCTL_DECL(_vfs_zfs_vdev);
53 /* Don't send BIO_FLUSH. */
54 static int vdev_geom_bio_flush_disable = 0;
55 TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
56 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
57     &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
58 /* Don't send BIO_DELETE. */
59 static int vdev_geom_bio_delete_disable = 0;
60 TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
61 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
62     &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
63
64 static void
65 vdev_geom_orphan(struct g_consumer *cp)
66 {
67         vdev_t *vd;
68
69         g_topology_assert();
70
71         vd = cp->private;
72
73         /*
74          * Orphan callbacks occur from the GEOM event thread.
75          * Concurrent with this call, new I/O requests may be
76          * working their way through GEOM about to find out
77          * (only once executed by the g_down thread) that we've
78          * been orphaned from our disk provider.  These I/Os
79          * must be retired before we can detach our consumer.
80          * This is most easily achieved by acquiring the
81          * SPA ZIO configuration lock as a writer, but doing
82          * so with the GEOM topology lock held would cause
83          * a lock order reversal.  Instead, rely on the SPA's
84          * async removal support to invoke a close on this
85          * vdev once it is safe to do so.
86          */
87         zfs_post_remove(vd->vdev_spa, vd);
88         vd->vdev_remove_wanted = B_TRUE;
89         spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
90 }
91
92 static struct g_consumer *
93 vdev_geom_attach(struct g_provider *pp)
94 {
95         struct g_geom *gp;
96         struct g_consumer *cp;
97
98         g_topology_assert();
99
100         ZFS_LOG(1, "Attaching to %s.", pp->name);
101         /* Do we have geom already? No? Create one. */
102         LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
103                 if (gp->flags & G_GEOM_WITHER)
104                         continue;
105                 if (strcmp(gp->name, "zfs::vdev") != 0)
106                         continue;
107                 break;
108         }
109         if (gp == NULL) {
110                 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
111                 gp->orphan = vdev_geom_orphan;
112                 cp = g_new_consumer(gp);
113                 if (g_attach(cp, pp) != 0) {
114                         g_wither_geom(gp, ENXIO);
115                         return (NULL);
116                 }
117                 if (g_access(cp, 1, 0, 1) != 0) {
118                         g_wither_geom(gp, ENXIO);
119                         return (NULL);
120                 }
121                 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
122         } else {
123                 /* Check if we are already connected to this provider. */
124                 LIST_FOREACH(cp, &gp->consumer, consumer) {
125                         if (cp->provider == pp) {
126                                 ZFS_LOG(1, "Found consumer for %s.", pp->name);
127                                 break;
128                         }
129                 }
130                 if (cp == NULL) {
131                         cp = g_new_consumer(gp);
132                         if (g_attach(cp, pp) != 0) {
133                                 g_destroy_consumer(cp);
134                                 return (NULL);
135                         }
136                         if (g_access(cp, 1, 0, 1) != 0) {
137                                 g_detach(cp);
138                                 g_destroy_consumer(cp);
139                                 return (NULL);
140                         }
141                         ZFS_LOG(1, "Created consumer for %s.", pp->name);
142                 } else {
143                         if (g_access(cp, 1, 0, 1) != 0)
144                                 return (NULL);
145                         ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
146                 }
147         }
148         return (cp);
149 }
150
151 static void
152 vdev_geom_detach(void *arg, int flag __unused)
153 {
154         struct g_geom *gp;
155         struct g_consumer *cp;
156
157         g_topology_assert();
158         cp = arg;
159         gp = cp->geom;
160
161         ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
162         g_access(cp, -1, 0, -1);
163         /* Destroy consumer on last close. */
164         if (cp->acr == 0 && cp->ace == 0) {
165                 ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
166                 if (cp->acw > 0)
167                         g_access(cp, 0, -cp->acw, 0);
168                 g_detach(cp);
169                 g_destroy_consumer(cp);
170         }
171         /* Destroy geom if there are no consumers left. */
172         if (LIST_EMPTY(&gp->consumer)) {
173                 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
174                 g_wither_geom(gp, ENXIO);
175         }
176 }
177
178 static uint64_t
179 nvlist_get_guid(nvlist_t *list)
180 {
181         uint64_t value;
182
183         value = 0;
184         nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, &value);
185         return (value);
186 }
187
188 static int
189 vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
190 {
191         struct bio *bp;
192         u_char *p;
193         off_t off, maxio;
194         int error;
195
196         ASSERT((offset % cp->provider->sectorsize) == 0);
197         ASSERT((size % cp->provider->sectorsize) == 0);
198
199         bp = g_alloc_bio();
200         off = offset;
201         offset += size;
202         p = data;
203         maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
204         error = 0;
205
206         for (; off < offset; off += maxio, p += maxio, size -= maxio) {
207                 bzero(bp, sizeof(*bp));
208                 bp->bio_cmd = cmd;
209                 bp->bio_done = NULL;
210                 bp->bio_offset = off;
211                 bp->bio_length = MIN(size, maxio);
212                 bp->bio_data = p;
213                 g_io_request(bp, cp);
214                 error = biowait(bp, "vdev_geom_io");
215                 if (error != 0)
216                         break;
217         }
218
219         g_destroy_bio(bp);
220         return (error);
221 }
222
223 static void
224 vdev_geom_taste_orphan(struct g_consumer *cp)
225 {
226
227         KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
228             cp->provider->name));
229 }
230
231 static int
232 vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
233 {
234         struct g_provider *pp;
235         vdev_label_t *label;
236         char *p, *buf;
237         size_t buflen;
238         uint64_t psize;
239         off_t offset, size;
240         uint64_t guid, state, txg;
241         int error, l, len;
242
243         g_topology_assert_not();
244
245         pp = cp->provider;
246         ZFS_LOG(1, "Reading config from %s...", pp->name);
247
248         psize = pp->mediasize;
249         psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
250
251         size = sizeof(*label) + pp->sectorsize -
252             ((sizeof(*label) - 1) % pp->sectorsize) - 1;
253
254         guid = 0;
255         label = kmem_alloc(size, KM_SLEEP);
256         buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
257
258         *config = NULL;
259         for (l = 0; l < VDEV_LABELS; l++) {
260
261                 offset = vdev_label_offset(psize, l, 0);
262                 if ((offset % pp->sectorsize) != 0)
263                         continue;
264
265                 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
266                         continue;
267                 buf = label->vl_vdev_phys.vp_nvlist;
268
269                 if (nvlist_unpack(buf, buflen, config, 0) != 0)
270                         continue;
271
272                 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
273                     &state) != 0 || state > POOL_STATE_L2CACHE) {
274                         nvlist_free(*config);
275                         *config = NULL;
276                         continue;
277                 }
278
279                 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
280                     (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
281                     &txg) != 0 || txg == 0)) {
282                         nvlist_free(*config);
283                         *config = NULL;
284                         continue;
285                 }
286
287                 break;
288         }
289
290         kmem_free(label, size);
291         return (*config == NULL ? ENOENT : 0);
292 }
293
294 static void
295 resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
296 {
297         nvlist_t **new_configs;
298         uint64_t i;
299
300         if (id < *count)
301                 return;
302         new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
303             KM_SLEEP);
304         for (i = 0; i < *count; i++)
305                 new_configs[i] = (*configs)[i];
306         if (*configs != NULL)
307                 kmem_free(*configs, *count * sizeof(void *));
308         *configs = new_configs;
309         *count = id + 1;
310 }
311
312 static void
313 process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
314     const char *name, uint64_t* known_pool_guid)
315 {
316         nvlist_t *vdev_tree;
317         uint64_t pool_guid;
318         uint64_t vdev_guid, known_guid;
319         uint64_t id, txg, known_txg;
320         char *pname;
321         int i;
322
323         if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
324             strcmp(pname, name) != 0)
325                 goto ignore;
326
327         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
328                 goto ignore;
329
330         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
331                 goto ignore;
332
333         if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
334                 goto ignore;
335
336         if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
337                 goto ignore;
338
339         VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
340
341         if (*known_pool_guid != 0) {
342                 if (pool_guid != *known_pool_guid)
343                         goto ignore;
344         } else
345                 *known_pool_guid = pool_guid;
346
347         resize_configs(configs, count, id);
348
349         if ((*configs)[id] != NULL) {
350                 VERIFY(nvlist_lookup_uint64((*configs)[id],
351                     ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
352                 if (txg <= known_txg)
353                         goto ignore;
354                 nvlist_free((*configs)[id]);
355         }
356
357         (*configs)[id] = cfg;
358         return;
359
360 ignore:
361         nvlist_free(cfg);
362 }
363
364 static int
365 vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
366 {
367         int error;
368
369         if (pp->flags & G_PF_WITHER)
370                 return (EINVAL);
371         if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
372                 return (EINVAL);
373         g_attach(cp, pp);
374         error = g_access(cp, 1, 0, 0);
375         if (error != 0)
376                 g_detach(cp);
377         return (error);
378 }
379
380 static void
381 vdev_geom_detach_taster(struct g_consumer *cp)
382 {
383         g_access(cp, -1, 0, 0);
384         g_detach(cp);
385 }
386
387 int
388 vdev_geom_read_pool_label(const char *name,
389     nvlist_t ***configs, uint64_t *count)
390 {
391         struct g_class *mp;
392         struct g_geom *gp, *zgp;
393         struct g_provider *pp;
394         struct g_consumer *zcp;
395         nvlist_t *vdev_cfg;
396         uint64_t pool_guid;
397         int error;
398
399         DROP_GIANT();
400         g_topology_lock();
401
402         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
403         /* This orphan function should be never called. */
404         zgp->orphan = vdev_geom_taste_orphan;
405         zcp = g_new_consumer(zgp);
406
407         *configs = NULL;
408         *count = 0;
409         pool_guid = 0;
410         LIST_FOREACH(mp, &g_classes, class) {
411                 if (mp == &zfs_vdev_class)
412                         continue;
413                 LIST_FOREACH(gp, &mp->geom, geom) {
414                         if (gp->flags & G_GEOM_WITHER)
415                                 continue;
416                         LIST_FOREACH(pp, &gp->provider, provider) {
417                                 if (pp->flags & G_PF_WITHER)
418                                         continue;
419                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
420                                         continue;
421                                 g_topology_unlock();
422                                 error = vdev_geom_read_config(zcp, &vdev_cfg);
423                                 g_topology_lock();
424                                 vdev_geom_detach_taster(zcp);
425                                 if (error)
426                                         continue;
427                                 ZFS_LOG(1, "successfully read vdev config");
428
429                                 process_vdev_config(configs, count,
430                                     vdev_cfg, name, &pool_guid);
431                         }
432                 }
433         }
434
435         g_destroy_consumer(zcp);
436         g_destroy_geom(zgp);
437         g_topology_unlock();
438         PICKUP_GIANT();
439
440         return (*count > 0 ? 0 : ENOENT);
441 }
442
443 static uint64_t
444 vdev_geom_read_guid(struct g_consumer *cp)
445 {
446         nvlist_t *config;
447         uint64_t guid;
448
449         g_topology_assert_not();
450
451         guid = 0;
452         if (vdev_geom_read_config(cp, &config) == 0) {
453                 guid = nvlist_get_guid(config);
454                 nvlist_free(config);
455         }
456         return (guid);
457 }
458
459 static struct g_consumer *
460 vdev_geom_attach_by_guid(uint64_t guid)
461 {
462         struct g_class *mp;
463         struct g_geom *gp, *zgp;
464         struct g_provider *pp;
465         struct g_consumer *cp, *zcp;
466         uint64_t pguid;
467
468         g_topology_assert();
469
470         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
471         /* This orphan function should be never called. */
472         zgp->orphan = vdev_geom_taste_orphan;
473         zcp = g_new_consumer(zgp);
474
475         cp = NULL;
476         LIST_FOREACH(mp, &g_classes, class) {
477                 if (mp == &zfs_vdev_class)
478                         continue;
479                 LIST_FOREACH(gp, &mp->geom, geom) {
480                         if (gp->flags & G_GEOM_WITHER)
481                                 continue;
482                         LIST_FOREACH(pp, &gp->provider, provider) {
483                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
484                                         continue;
485                                 g_topology_unlock();
486                                 pguid = vdev_geom_read_guid(zcp);
487                                 g_topology_lock();
488                                 vdev_geom_detach_taster(zcp);
489                                 if (pguid != guid)
490                                         continue;
491                                 cp = vdev_geom_attach(pp);
492                                 if (cp == NULL) {
493                                         printf("ZFS WARNING: Unable to attach to %s.\n",
494                                             pp->name);
495                                         continue;
496                                 }
497                                 break;
498                         }
499                         if (cp != NULL)
500                                 break;
501                 }
502                 if (cp != NULL)
503                         break;
504         }
505 end:
506         g_destroy_consumer(zcp);
507         g_destroy_geom(zgp);
508         return (cp);
509 }
510
511 static struct g_consumer *
512 vdev_geom_open_by_guid(vdev_t *vd)
513 {
514         struct g_consumer *cp;
515         char *buf;
516         size_t len;
517
518         g_topology_assert();
519
520         ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid);
521         cp = vdev_geom_attach_by_guid(vd->vdev_guid);
522         if (cp != NULL) {
523                 len = strlen(cp->provider->name) + strlen("/dev/") + 1;
524                 buf = kmem_alloc(len, KM_SLEEP);
525
526                 snprintf(buf, len, "/dev/%s", cp->provider->name);
527                 spa_strfree(vd->vdev_path);
528                 vd->vdev_path = buf;
529
530                 ZFS_LOG(1, "Attach by guid [%ju] succeeded, provider %s.",
531                     (uintmax_t)vd->vdev_guid, vd->vdev_path);
532         } else {
533                 ZFS_LOG(1, "Search by guid [%ju] failed.",
534                     (uintmax_t)vd->vdev_guid);
535         }
536
537         return (cp);
538 }
539
540 static struct g_consumer *
541 vdev_geom_open_by_path(vdev_t *vd, int check_guid)
542 {
543         struct g_provider *pp;
544         struct g_consumer *cp;
545         uint64_t guid;
546
547         g_topology_assert();
548
549         cp = NULL;
550         pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
551         if (pp != NULL) {
552                 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
553                 cp = vdev_geom_attach(pp);
554                 if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
555                     pp->sectorsize <= VDEV_PAD_SIZE) {
556                         g_topology_unlock();
557                         guid = vdev_geom_read_guid(cp);
558                         g_topology_lock();
559                         if (guid != vd->vdev_guid) {
560                                 vdev_geom_detach(cp, 0);
561                                 cp = NULL;
562                                 ZFS_LOG(1, "guid mismatch for provider %s: "
563                                     "%ju != %ju.", vd->vdev_path,
564                                     (uintmax_t)vd->vdev_guid, (uintmax_t)guid);
565                         } else {
566                                 ZFS_LOG(1, "guid match for provider %s.",
567                                     vd->vdev_path);
568                         }
569                 }
570         }
571
572         return (cp);
573 }
574
575 static int
576 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
577     uint64_t *ashift)
578 {
579         struct g_provider *pp;
580         struct g_consumer *cp;
581         size_t bufsize;
582         int error;
583
584         /*
585          * We must have a pathname, and it must be absolute.
586          */
587         if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
588                 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
589                 return (EINVAL);
590         }
591
592         vd->vdev_tsd = NULL;
593
594         DROP_GIANT();
595         g_topology_lock();
596         error = 0;
597
598         /*
599          * If we're creating or splitting a pool, just find the GEOM provider
600          * by its name and ignore GUID mismatches.
601          */
602         if (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE ||
603             vd->vdev_spa->spa_splitting_newspa == B_TRUE)
604                 cp = vdev_geom_open_by_path(vd, 0);
605         else {
606                 cp = vdev_geom_open_by_path(vd, 1);
607                 if (cp == NULL) {
608                         /*
609                          * The device at vd->vdev_path doesn't have the
610                          * expected guid. The disks might have merely
611                          * moved around so try all other GEOM providers
612                          * to find one with the right guid.
613                          */
614                         cp = vdev_geom_open_by_guid(vd);
615                 }
616         }
617
618         if (cp == NULL) {
619                 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
620                 error = ENOENT;
621         } else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
622             !ISP2(cp->provider->sectorsize)) {
623                 ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
624                     vd->vdev_path);
625                 vdev_geom_detach(cp, 0);
626                 error = EINVAL;
627                 cp = NULL;
628         } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
629                 int i;
630
631                 for (i = 0; i < 5; i++) {
632                         error = g_access(cp, 0, 1, 0);
633                         if (error == 0)
634                                 break;
635                         g_topology_unlock();
636                         tsleep(vd, 0, "vdev", hz / 2);
637                         g_topology_lock();
638                 }
639                 if (error != 0) {
640                         printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
641                             vd->vdev_path, error);
642                         vdev_geom_detach(cp, 0);
643                         cp = NULL;
644                 }
645         }
646         g_topology_unlock();
647         PICKUP_GIANT();
648         if (cp == NULL) {
649                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
650                 return (error);
651         }
652
653         cp->private = vd;
654         vd->vdev_tsd = cp;
655         pp = cp->provider;
656
657         /*
658          * Determine the actual size of the device.
659          */
660         *max_psize = *psize = pp->mediasize;
661
662         /*
663          * Determine the device's minimum transfer size.
664          */
665         *ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
666
667         /*
668          * Clear the nowritecache settings, so that on a vdev_reopen()
669          * we will try again.
670          */
671         vd->vdev_nowritecache = B_FALSE;
672
673         if (vd->vdev_physpath != NULL)
674                 spa_strfree(vd->vdev_physpath);
675         bufsize = sizeof("/dev/") + strlen(pp->name);
676         vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP);
677         snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name);
678
679         return (0);
680 }
681
682 static void
683 vdev_geom_close(vdev_t *vd)
684 {
685         struct g_consumer *cp;
686
687         cp = vd->vdev_tsd;
688         if (cp == NULL)
689                 return;
690         vd->vdev_tsd = NULL;
691         vd->vdev_delayed_close = B_FALSE;
692         g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
693 }
694
695 static void
696 vdev_geom_io_intr(struct bio *bp)
697 {
698         vdev_t *vd;
699         zio_t *zio;
700
701         zio = bp->bio_caller1;
702         vd = zio->io_vd;
703         zio->io_error = bp->bio_error;
704         if (zio->io_error == 0 && bp->bio_resid != 0)
705                 zio->io_error = EIO;
706         if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
707                 /*
708                  * If we get ENOTSUP, we know that no future
709                  * attempts will ever succeed.  In this case we
710                  * set a persistent bit so that we don't bother
711                  * with the ioctl in the future.
712                  */
713                 vd->vdev_nowritecache = B_TRUE;
714         }
715         if (bp->bio_cmd == BIO_DELETE && bp->bio_error == ENOTSUP) {
716                 /*
717                  * If we get ENOTSUP, we know that no future
718                  * attempts will ever succeed.  In this case we
719                  * set a persistent bit so that we don't bother
720                  * with the ioctl in the future.
721                  */
722                 vd->vdev_notrim = B_TRUE;
723         }
724         if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
725                 /*
726                  * If provider's error is set we assume it is being
727                  * removed.
728                  */
729                 if (bp->bio_to->error != 0) {
730                         /*
731                          * We post the resource as soon as possible, instead of
732                          * when the async removal actually happens, because the
733                          * DE is using this information to discard previous I/O
734                          * errors.
735                          */
736                         /* XXX: zfs_post_remove() can sleep. */
737                         zfs_post_remove(zio->io_spa, vd);
738                         vd->vdev_remove_wanted = B_TRUE;
739                         spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
740                 } else if (!vd->vdev_delayed_close) {
741                         vd->vdev_delayed_close = B_TRUE;
742                 }
743         }
744         g_destroy_bio(bp);
745         zio_interrupt(zio);
746 }
747
748 static int
749 vdev_geom_io_start(zio_t *zio)
750 {
751         vdev_t *vd;
752         struct g_consumer *cp;
753         struct bio *bp;
754         int error;
755
756         vd = zio->io_vd;
757
758         if (zio->io_type == ZIO_TYPE_IOCTL) {
759                 /* XXPOLICY */
760                 if (!vdev_readable(vd)) {
761                         zio->io_error = ENXIO;
762                         return (ZIO_PIPELINE_CONTINUE);
763                 }
764
765                 switch (zio->io_cmd) {
766                 case DKIOCFLUSHWRITECACHE:
767                         if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
768                                 break;
769                         if (vd->vdev_nowritecache) {
770                                 zio->io_error = ENOTSUP;
771                                 break;
772                         }
773                         goto sendreq;
774                 case DKIOCTRIM:
775                         if (vdev_geom_bio_delete_disable)
776                                 break;
777                         if (vd->vdev_notrim) {
778                                 zio->io_error = ENOTSUP;
779                                 break;
780                         }
781                         goto sendreq;
782                 default:
783                         zio->io_error = ENOTSUP;
784                 }
785
786                 return (ZIO_PIPELINE_CONTINUE);
787         }
788 sendreq:
789         cp = vd->vdev_tsd;
790         if (cp == NULL) {
791                 zio->io_error = ENXIO;
792                 return (ZIO_PIPELINE_CONTINUE);
793         }
794         bp = g_alloc_bio();
795         bp->bio_caller1 = zio;
796         switch (zio->io_type) {
797         case ZIO_TYPE_READ:
798         case ZIO_TYPE_WRITE:
799                 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
800                 bp->bio_data = zio->io_data;
801                 bp->bio_offset = zio->io_offset;
802                 bp->bio_length = zio->io_size;
803                 break;
804         case ZIO_TYPE_IOCTL:
805                 switch (zio->io_cmd) {
806                 case DKIOCFLUSHWRITECACHE:
807                         bp->bio_cmd = BIO_FLUSH;
808                         bp->bio_flags |= BIO_ORDERED;
809                         bp->bio_data = NULL;
810                         bp->bio_offset = cp->provider->mediasize;
811                         bp->bio_length = 0;
812                         break;
813                 case DKIOCTRIM:
814                         bp->bio_cmd = BIO_DELETE;
815                         bp->bio_data = NULL;
816                         bp->bio_offset = zio->io_offset;
817                         bp->bio_length = zio->io_size;
818                         break;
819                 }
820                 break;
821         }
822         bp->bio_done = vdev_geom_io_intr;
823
824         g_io_request(bp, cp);
825
826         return (ZIO_PIPELINE_STOP);
827 }
828
829 static void
830 vdev_geom_io_done(zio_t *zio)
831 {
832 }
833
834 static void
835 vdev_geom_hold(vdev_t *vd)
836 {
837 }
838
839 static void
840 vdev_geom_rele(vdev_t *vd)
841 {
842 }
843
844 vdev_ops_t vdev_geom_ops = {
845         vdev_geom_open,
846         vdev_geom_close,
847         vdev_default_asize,
848         vdev_geom_io_start,
849         vdev_geom_io_done,
850         NULL,
851         vdev_geom_hold,
852         vdev_geom_rele,
853         VDEV_TYPE_DISK,         /* name of this vdev type */
854         B_TRUE                  /* leaf vdev */
855 };