]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / vdev_geom.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23  * All rights reserved.
24  *
25  * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
26  */
27
28 #include <sys/zfs_context.h>
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/bio.h>
32 #include <sys/disk.h>
33 #include <sys/spa.h>
34 #include <sys/spa_impl.h>
35 #include <sys/vdev_impl.h>
36 #include <sys/fs/zfs.h>
37 #include <sys/zio.h>
38 #include <geom/geom.h>
39 #include <geom/geom_int.h>
40
41 /*
42  * Virtual device vector for GEOM.
43  */
44
45 struct g_class zfs_vdev_class = {
46         .name = "ZFS::VDEV",
47         .version = G_VERSION,
48 };
49
50 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
51
52 SYSCTL_DECL(_vfs_zfs_vdev);
53 /* Don't send BIO_FLUSH. */
54 static int vdev_geom_bio_flush_disable = 0;
55 TUNABLE_INT("vfs.zfs.vdev.bio_flush_disable", &vdev_geom_bio_flush_disable);
56 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RW,
57     &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
58 /* Don't send BIO_DELETE. */
59 static int vdev_geom_bio_delete_disable = 0;
60 TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
61 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
62     &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
63
64 static void
65 vdev_geom_orphan(struct g_consumer *cp)
66 {
67         vdev_t *vd;
68
69         g_topology_assert();
70
71         vd = cp->private;
72         if (vd == NULL)
73                 return;
74
75         /*
76          * Orphan callbacks occur from the GEOM event thread.
77          * Concurrent with this call, new I/O requests may be
78          * working their way through GEOM about to find out
79          * (only once executed by the g_down thread) that we've
80          * been orphaned from our disk provider.  These I/Os
81          * must be retired before we can detach our consumer.
82          * This is most easily achieved by acquiring the
83          * SPA ZIO configuration lock as a writer, but doing
84          * so with the GEOM topology lock held would cause
85          * a lock order reversal.  Instead, rely on the SPA's
86          * async removal support to invoke a close on this
87          * vdev once it is safe to do so.
88          */
89         zfs_post_remove(vd->vdev_spa, vd);
90         vd->vdev_remove_wanted = B_TRUE;
91         spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
92 }
93
94 static struct g_consumer *
95 vdev_geom_attach(struct g_provider *pp)
96 {
97         struct g_geom *gp;
98         struct g_consumer *cp;
99
100         g_topology_assert();
101
102         ZFS_LOG(1, "Attaching to %s.", pp->name);
103         /* Do we have geom already? No? Create one. */
104         LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
105                 if (gp->flags & G_GEOM_WITHER)
106                         continue;
107                 if (strcmp(gp->name, "zfs::vdev") != 0)
108                         continue;
109                 break;
110         }
111         if (gp == NULL) {
112                 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
113                 gp->orphan = vdev_geom_orphan;
114                 cp = g_new_consumer(gp);
115                 if (g_attach(cp, pp) != 0) {
116                         g_wither_geom(gp, ENXIO);
117                         return (NULL);
118                 }
119                 if (g_access(cp, 1, 0, 1) != 0) {
120                         g_wither_geom(gp, ENXIO);
121                         return (NULL);
122                 }
123                 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
124         } else {
125                 /* Check if we are already connected to this provider. */
126                 LIST_FOREACH(cp, &gp->consumer, consumer) {
127                         if (cp->provider == pp) {
128                                 ZFS_LOG(1, "Found consumer for %s.", pp->name);
129                                 break;
130                         }
131                 }
132                 if (cp == NULL) {
133                         cp = g_new_consumer(gp);
134                         if (g_attach(cp, pp) != 0) {
135                                 g_destroy_consumer(cp);
136                                 return (NULL);
137                         }
138                         if (g_access(cp, 1, 0, 1) != 0) {
139                                 g_detach(cp);
140                                 g_destroy_consumer(cp);
141                                 return (NULL);
142                         }
143                         ZFS_LOG(1, "Created consumer for %s.", pp->name);
144                 } else {
145                         if (g_access(cp, 1, 0, 1) != 0)
146                                 return (NULL);
147                         ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
148                 }
149         }
150         return (cp);
151 }
152
153 static void
154 vdev_geom_detach(void *arg, int flag __unused)
155 {
156         struct g_geom *gp;
157         struct g_consumer *cp;
158
159         g_topology_assert();
160         cp = arg;
161         gp = cp->geom;
162
163         ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
164         g_access(cp, -1, 0, -1);
165         /* Destroy consumer on last close. */
166         if (cp->acr == 0 && cp->ace == 0) {
167                 ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
168                 if (cp->acw > 0)
169                         g_access(cp, 0, -cp->acw, 0);
170                 g_detach(cp);
171                 g_destroy_consumer(cp);
172         }
173         /* Destroy geom if there are no consumers left. */
174         if (LIST_EMPTY(&gp->consumer)) {
175                 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
176                 g_wither_geom(gp, ENXIO);
177         }
178 }
179
180 static uint64_t
181 nvlist_get_guid(nvlist_t *list)
182 {
183         uint64_t value;
184
185         value = 0;
186         nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, &value);
187         return (value);
188 }
189
190 static int
191 vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
192 {
193         struct bio *bp;
194         u_char *p;
195         off_t off, maxio;
196         int error;
197
198         ASSERT((offset % cp->provider->sectorsize) == 0);
199         ASSERT((size % cp->provider->sectorsize) == 0);
200
201         bp = g_alloc_bio();
202         off = offset;
203         offset += size;
204         p = data;
205         maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
206         error = 0;
207
208         for (; off < offset; off += maxio, p += maxio, size -= maxio) {
209                 bzero(bp, sizeof(*bp));
210                 bp->bio_cmd = cmd;
211                 bp->bio_done = NULL;
212                 bp->bio_offset = off;
213                 bp->bio_length = MIN(size, maxio);
214                 bp->bio_data = p;
215                 g_io_request(bp, cp);
216                 error = biowait(bp, "vdev_geom_io");
217                 if (error != 0)
218                         break;
219         }
220
221         g_destroy_bio(bp);
222         return (error);
223 }
224
225 static void
226 vdev_geom_taste_orphan(struct g_consumer *cp)
227 {
228
229         KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
230             cp->provider->name));
231 }
232
233 static int
234 vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
235 {
236         struct g_provider *pp;
237         vdev_label_t *label;
238         char *p, *buf;
239         size_t buflen;
240         uint64_t psize;
241         off_t offset, size;
242         uint64_t guid, state, txg;
243         int error, l, len;
244
245         g_topology_assert_not();
246
247         pp = cp->provider;
248         ZFS_LOG(1, "Reading config from %s...", pp->name);
249
250         psize = pp->mediasize;
251         psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
252
253         size = sizeof(*label) + pp->sectorsize -
254             ((sizeof(*label) - 1) % pp->sectorsize) - 1;
255
256         guid = 0;
257         label = kmem_alloc(size, KM_SLEEP);
258         buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
259
260         *config = NULL;
261         for (l = 0; l < VDEV_LABELS; l++) {
262
263                 offset = vdev_label_offset(psize, l, 0);
264                 if ((offset % pp->sectorsize) != 0)
265                         continue;
266
267                 if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
268                         continue;
269                 buf = label->vl_vdev_phys.vp_nvlist;
270
271                 if (nvlist_unpack(buf, buflen, config, 0) != 0)
272                         continue;
273
274                 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
275                     &state) != 0 || state > POOL_STATE_L2CACHE) {
276                         nvlist_free(*config);
277                         *config = NULL;
278                         continue;
279                 }
280
281                 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
282                     (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
283                     &txg) != 0 || txg == 0)) {
284                         nvlist_free(*config);
285                         *config = NULL;
286                         continue;
287                 }
288
289                 break;
290         }
291
292         kmem_free(label, size);
293         return (*config == NULL ? ENOENT : 0);
294 }
295
296 static void
297 resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
298 {
299         nvlist_t **new_configs;
300         uint64_t i;
301
302         if (id < *count)
303                 return;
304         new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
305             KM_SLEEP);
306         for (i = 0; i < *count; i++)
307                 new_configs[i] = (*configs)[i];
308         if (*configs != NULL)
309                 kmem_free(*configs, *count * sizeof(void *));
310         *configs = new_configs;
311         *count = id + 1;
312 }
313
314 static void
315 process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
316     const char *name, uint64_t* known_pool_guid)
317 {
318         nvlist_t *vdev_tree;
319         uint64_t pool_guid;
320         uint64_t vdev_guid, known_guid;
321         uint64_t id, txg, known_txg;
322         char *pname;
323         int i;
324
325         if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
326             strcmp(pname, name) != 0)
327                 goto ignore;
328
329         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
330                 goto ignore;
331
332         if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
333                 goto ignore;
334
335         if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
336                 goto ignore;
337
338         if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
339                 goto ignore;
340
341         VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
342
343         if (*known_pool_guid != 0) {
344                 if (pool_guid != *known_pool_guid)
345                         goto ignore;
346         } else
347                 *known_pool_guid = pool_guid;
348
349         resize_configs(configs, count, id);
350
351         if ((*configs)[id] != NULL) {
352                 VERIFY(nvlist_lookup_uint64((*configs)[id],
353                     ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
354                 if (txg <= known_txg)
355                         goto ignore;
356                 nvlist_free((*configs)[id]);
357         }
358
359         (*configs)[id] = cfg;
360         return;
361
362 ignore:
363         nvlist_free(cfg);
364 }
365
366 static int
367 vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
368 {
369         int error;
370
371         if (pp->flags & G_PF_WITHER)
372                 return (EINVAL);
373         if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
374                 return (EINVAL);
375         g_attach(cp, pp);
376         error = g_access(cp, 1, 0, 0);
377         if (error != 0)
378                 g_detach(cp);
379         return (error);
380 }
381
382 static void
383 vdev_geom_detach_taster(struct g_consumer *cp)
384 {
385         g_access(cp, -1, 0, 0);
386         g_detach(cp);
387 }
388
389 int
390 vdev_geom_read_pool_label(const char *name,
391     nvlist_t ***configs, uint64_t *count)
392 {
393         struct g_class *mp;
394         struct g_geom *gp, *zgp;
395         struct g_provider *pp;
396         struct g_consumer *zcp;
397         nvlist_t *vdev_cfg;
398         uint64_t pool_guid;
399         int error;
400
401         DROP_GIANT();
402         g_topology_lock();
403
404         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
405         /* This orphan function should be never called. */
406         zgp->orphan = vdev_geom_taste_orphan;
407         zcp = g_new_consumer(zgp);
408
409         *configs = NULL;
410         *count = 0;
411         pool_guid = 0;
412         LIST_FOREACH(mp, &g_classes, class) {
413                 if (mp == &zfs_vdev_class)
414                         continue;
415                 LIST_FOREACH(gp, &mp->geom, geom) {
416                         if (gp->flags & G_GEOM_WITHER)
417                                 continue;
418                         LIST_FOREACH(pp, &gp->provider, provider) {
419                                 if (pp->flags & G_PF_WITHER)
420                                         continue;
421                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
422                                         continue;
423                                 g_topology_unlock();
424                                 error = vdev_geom_read_config(zcp, &vdev_cfg);
425                                 g_topology_lock();
426                                 vdev_geom_detach_taster(zcp);
427                                 if (error)
428                                         continue;
429                                 ZFS_LOG(1, "successfully read vdev config");
430
431                                 process_vdev_config(configs, count,
432                                     vdev_cfg, name, &pool_guid);
433                         }
434                 }
435         }
436
437         g_destroy_consumer(zcp);
438         g_destroy_geom(zgp);
439         g_topology_unlock();
440         PICKUP_GIANT();
441
442         return (*count > 0 ? 0 : ENOENT);
443 }
444
445 static uint64_t
446 vdev_geom_read_guid(struct g_consumer *cp)
447 {
448         nvlist_t *config;
449         uint64_t guid;
450
451         g_topology_assert_not();
452
453         guid = 0;
454         if (vdev_geom_read_config(cp, &config) == 0) {
455                 guid = nvlist_get_guid(config);
456                 nvlist_free(config);
457         }
458         return (guid);
459 }
460
461 static struct g_consumer *
462 vdev_geom_attach_by_guid(uint64_t guid)
463 {
464         struct g_class *mp;
465         struct g_geom *gp, *zgp;
466         struct g_provider *pp;
467         struct g_consumer *cp, *zcp;
468         uint64_t pguid;
469
470         g_topology_assert();
471
472         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
473         /* This orphan function should be never called. */
474         zgp->orphan = vdev_geom_taste_orphan;
475         zcp = g_new_consumer(zgp);
476
477         cp = NULL;
478         LIST_FOREACH(mp, &g_classes, class) {
479                 if (mp == &zfs_vdev_class)
480                         continue;
481                 LIST_FOREACH(gp, &mp->geom, geom) {
482                         if (gp->flags & G_GEOM_WITHER)
483                                 continue;
484                         LIST_FOREACH(pp, &gp->provider, provider) {
485                                 if (vdev_geom_attach_taster(zcp, pp) != 0)
486                                         continue;
487                                 g_topology_unlock();
488                                 pguid = vdev_geom_read_guid(zcp);
489                                 g_topology_lock();
490                                 vdev_geom_detach_taster(zcp);
491                                 if (pguid != guid)
492                                         continue;
493                                 cp = vdev_geom_attach(pp);
494                                 if (cp == NULL) {
495                                         printf("ZFS WARNING: Unable to attach to %s.\n",
496                                             pp->name);
497                                         continue;
498                                 }
499                                 break;
500                         }
501                         if (cp != NULL)
502                                 break;
503                 }
504                 if (cp != NULL)
505                         break;
506         }
507 end:
508         g_destroy_consumer(zcp);
509         g_destroy_geom(zgp);
510         return (cp);
511 }
512
513 static struct g_consumer *
514 vdev_geom_open_by_guid(vdev_t *vd)
515 {
516         struct g_consumer *cp;
517         char *buf;
518         size_t len;
519
520         g_topology_assert();
521
522         ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid);
523         cp = vdev_geom_attach_by_guid(vd->vdev_guid);
524         if (cp != NULL) {
525                 len = strlen(cp->provider->name) + strlen("/dev/") + 1;
526                 buf = kmem_alloc(len, KM_SLEEP);
527
528                 snprintf(buf, len, "/dev/%s", cp->provider->name);
529                 spa_strfree(vd->vdev_path);
530                 vd->vdev_path = buf;
531
532                 ZFS_LOG(1, "Attach by guid [%ju] succeeded, provider %s.",
533                     (uintmax_t)vd->vdev_guid, vd->vdev_path);
534         } else {
535                 ZFS_LOG(1, "Search by guid [%ju] failed.",
536                     (uintmax_t)vd->vdev_guid);
537         }
538
539         return (cp);
540 }
541
542 static struct g_consumer *
543 vdev_geom_open_by_path(vdev_t *vd, int check_guid)
544 {
545         struct g_provider *pp;
546         struct g_consumer *cp;
547         uint64_t guid;
548
549         g_topology_assert();
550
551         cp = NULL;
552         pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
553         if (pp != NULL) {
554                 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
555                 cp = vdev_geom_attach(pp);
556                 if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
557                     pp->sectorsize <= VDEV_PAD_SIZE) {
558                         g_topology_unlock();
559                         guid = vdev_geom_read_guid(cp);
560                         g_topology_lock();
561                         if (guid != vd->vdev_guid) {
562                                 vdev_geom_detach(cp, 0);
563                                 cp = NULL;
564                                 ZFS_LOG(1, "guid mismatch for provider %s: "
565                                     "%ju != %ju.", vd->vdev_path,
566                                     (uintmax_t)vd->vdev_guid, (uintmax_t)guid);
567                         } else {
568                                 ZFS_LOG(1, "guid match for provider %s.",
569                                     vd->vdev_path);
570                         }
571                 }
572         }
573
574         return (cp);
575 }
576
577 static int
578 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
579     uint64_t *logical_ashift, uint64_t *physical_ashift)
580 {
581         struct g_provider *pp;
582         struct g_consumer *cp;
583         size_t bufsize;
584         int error;
585
586         /*
587          * We must have a pathname, and it must be absolute.
588          */
589         if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
590                 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
591                 return (EINVAL);
592         }
593
594         vd->vdev_tsd = NULL;
595
596         DROP_GIANT();
597         g_topology_lock();
598         error = 0;
599
600         /*
601          * If we're creating or splitting a pool, just find the GEOM provider
602          * by its name and ignore GUID mismatches.
603          */
604         if (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE ||
605             vd->vdev_spa->spa_splitting_newspa == B_TRUE)
606                 cp = vdev_geom_open_by_path(vd, 0);
607         else {
608                 cp = vdev_geom_open_by_path(vd, 1);
609                 if (cp == NULL) {
610                         /*
611                          * The device at vd->vdev_path doesn't have the
612                          * expected guid. The disks might have merely
613                          * moved around so try all other GEOM providers
614                          * to find one with the right guid.
615                          */
616                         cp = vdev_geom_open_by_guid(vd);
617                 }
618         }
619
620         if (cp == NULL) {
621                 ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
622                 error = ENOENT;
623         } else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
624             !ISP2(cp->provider->sectorsize)) {
625                 ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
626                     vd->vdev_path);
627                 vdev_geom_detach(cp, 0);
628                 error = EINVAL;
629                 cp = NULL;
630         } else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
631                 int i;
632
633                 for (i = 0; i < 5; i++) {
634                         error = g_access(cp, 0, 1, 0);
635                         if (error == 0)
636                                 break;
637                         g_topology_unlock();
638                         tsleep(vd, 0, "vdev", hz / 2);
639                         g_topology_lock();
640                 }
641                 if (error != 0) {
642                         printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
643                             vd->vdev_path, error);
644                         vdev_geom_detach(cp, 0);
645                         cp = NULL;
646                 }
647         }
648         g_topology_unlock();
649         PICKUP_GIANT();
650         if (cp == NULL) {
651                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
652                 return (error);
653         }
654
655         cp->private = vd;
656         vd->vdev_tsd = cp;
657         pp = cp->provider;
658
659         /*
660          * Determine the actual size of the device.
661          */
662         *max_psize = *psize = pp->mediasize;
663
664         /*
665          * Determine the device's minimum transfer size and preferred
666          * transfer size.
667          */
668         *logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
669         *physical_ashift = 0;
670         if (pp->stripesize)
671                 *physical_ashift = highbit(pp->stripesize) - 1;
672
673         /*
674          * Clear the nowritecache settings, so that on a vdev_reopen()
675          * we will try again.
676          */
677         vd->vdev_nowritecache = B_FALSE;
678
679         if (vd->vdev_physpath != NULL)
680                 spa_strfree(vd->vdev_physpath);
681         bufsize = sizeof("/dev/") + strlen(pp->name);
682         vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP);
683         snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name);
684
685         return (0);
686 }
687
688 static void
689 vdev_geom_close(vdev_t *vd)
690 {
691         struct g_consumer *cp;
692
693         cp = vd->vdev_tsd;
694         if (cp == NULL)
695                 return;
696         vd->vdev_tsd = NULL;
697         vd->vdev_delayed_close = B_FALSE;
698         cp->private = NULL;     /* XXX locking */
699         g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
700 }
701
702 static void
703 vdev_geom_io_intr(struct bio *bp)
704 {
705         vdev_t *vd;
706         zio_t *zio;
707
708         zio = bp->bio_caller1;
709         vd = zio->io_vd;
710         zio->io_error = bp->bio_error;
711         if (zio->io_error == 0 && bp->bio_resid != 0)
712                 zio->io_error = EIO;
713         if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
714                 /*
715                  * If we get ENOTSUP, we know that no future
716                  * attempts will ever succeed.  In this case we
717                  * set a persistent bit so that we don't bother
718                  * with the ioctl in the future.
719                  */
720                 vd->vdev_nowritecache = B_TRUE;
721         }
722         if (bp->bio_cmd == BIO_DELETE && bp->bio_error == ENOTSUP) {
723                 /*
724                  * If we get ENOTSUP, we know that no future
725                  * attempts will ever succeed.  In this case we
726                  * set a persistent bit so that we don't bother
727                  * with the ioctl in the future.
728                  */
729                 vd->vdev_notrim = B_TRUE;
730         }
731         if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
732                 /*
733                  * If provider's error is set we assume it is being
734                  * removed.
735                  */
736                 if (bp->bio_to->error != 0) {
737                         /*
738                          * We post the resource as soon as possible, instead of
739                          * when the async removal actually happens, because the
740                          * DE is using this information to discard previous I/O
741                          * errors.
742                          */
743                         /* XXX: zfs_post_remove() can sleep. */
744                         zfs_post_remove(zio->io_spa, vd);
745                         vd->vdev_remove_wanted = B_TRUE;
746                         spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
747                 } else if (!vd->vdev_delayed_close) {
748                         vd->vdev_delayed_close = B_TRUE;
749                 }
750         }
751         g_destroy_bio(bp);
752         zio_interrupt(zio);
753 }
754
755 static int
756 vdev_geom_io_start(zio_t *zio)
757 {
758         vdev_t *vd;
759         struct g_consumer *cp;
760         struct bio *bp;
761         int error;
762
763         vd = zio->io_vd;
764
765         if (zio->io_type == ZIO_TYPE_IOCTL) {
766                 /* XXPOLICY */
767                 if (!vdev_readable(vd)) {
768                         zio->io_error = ENXIO;
769                         return (ZIO_PIPELINE_CONTINUE);
770                 }
771
772                 switch (zio->io_cmd) {
773                 case DKIOCFLUSHWRITECACHE:
774                         if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
775                                 break;
776                         if (vd->vdev_nowritecache) {
777                                 zio->io_error = ENOTSUP;
778                                 break;
779                         }
780                         goto sendreq;
781                 case DKIOCTRIM:
782                         if (vdev_geom_bio_delete_disable)
783                                 break;
784                         if (vd->vdev_notrim) {
785                                 zio->io_error = ENOTSUP;
786                                 break;
787                         }
788                         goto sendreq;
789                 default:
790                         zio->io_error = ENOTSUP;
791                 }
792
793                 return (ZIO_PIPELINE_CONTINUE);
794         }
795 sendreq:
796         cp = vd->vdev_tsd;
797         if (cp == NULL) {
798                 zio->io_error = ENXIO;
799                 return (ZIO_PIPELINE_CONTINUE);
800         }
801         bp = g_alloc_bio();
802         bp->bio_caller1 = zio;
803         switch (zio->io_type) {
804         case ZIO_TYPE_READ:
805         case ZIO_TYPE_WRITE:
806                 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
807                 bp->bio_data = zio->io_data;
808                 bp->bio_offset = zio->io_offset;
809                 bp->bio_length = zio->io_size;
810                 break;
811         case ZIO_TYPE_IOCTL:
812                 switch (zio->io_cmd) {
813                 case DKIOCFLUSHWRITECACHE:
814                         bp->bio_cmd = BIO_FLUSH;
815                         bp->bio_flags |= BIO_ORDERED;
816                         bp->bio_data = NULL;
817                         bp->bio_offset = cp->provider->mediasize;
818                         bp->bio_length = 0;
819                         break;
820                 case DKIOCTRIM:
821                         bp->bio_cmd = BIO_DELETE;
822                         bp->bio_data = NULL;
823                         bp->bio_offset = zio->io_offset;
824                         bp->bio_length = zio->io_size;
825                         break;
826                 }
827                 break;
828         }
829         bp->bio_done = vdev_geom_io_intr;
830
831         g_io_request(bp, cp);
832
833         return (ZIO_PIPELINE_STOP);
834 }
835
836 static void
837 vdev_geom_io_done(zio_t *zio)
838 {
839 }
840
841 static void
842 vdev_geom_hold(vdev_t *vd)
843 {
844 }
845
846 static void
847 vdev_geom_rele(vdev_t *vd)
848 {
849 }
850
851 vdev_ops_t vdev_geom_ops = {
852         vdev_geom_open,
853         vdev_geom_close,
854         vdev_default_asize,
855         vdev_geom_io_start,
856         vdev_geom_io_done,
857         NULL,
858         vdev_geom_hold,
859         vdev_geom_rele,
860         VDEV_TYPE_DISK,         /* name of this vdev type */
861         B_TRUE                  /* leaf vdev */
862 };