4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23 * All rights reserved.
26 #include <sys/zfs_context.h>
27 #include <sys/param.h>
28 #include <sys/kernel.h>
32 #include <sys/vdev_impl.h>
33 #include <sys/fs/zfs.h>
35 #include <geom/geom.h>
36 #include <geom/geom_int.h>
39 * Virtual device vector for GEOM.
42 struct g_class zfs_vdev_class = {
47 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
49 typedef struct vdev_geom_ctx {
50 struct g_consumer *gc_consumer;
52 struct bio_queue_head gc_queue;
53 struct mtx gc_queue_mtx;
57 vdev_geom_release(vdev_t *vd)
64 mtx_lock(&ctx->gc_queue_mtx);
66 wakeup_one(&ctx->gc_queue);
67 while (ctx->gc_state != 2)
68 msleep(&ctx->gc_state, &ctx->gc_queue_mtx, 0, "vgeom:w", 0);
69 mtx_unlock(&ctx->gc_queue_mtx);
70 mtx_destroy(&ctx->gc_queue_mtx);
71 kmem_free(ctx, sizeof(*ctx));
75 vdev_geom_orphan(struct g_consumer *cp)
85 error = cp->provider->error;
87 ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
88 if (cp->acr + cp->acw + cp->ace > 0)
89 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
90 ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
92 g_destroy_consumer(cp);
93 /* Destroy geom if there are no consumers left. */
94 if (LIST_EMPTY(&gp->consumer)) {
95 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
96 g_wither_geom(gp, error);
98 vdev_geom_release(vd);
99 /* Both methods below work, but in a bit different way. */
101 vd->vdev_reopen_wanted = 1;
103 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
104 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, vd->vdev_stat.vs_aux);
108 static struct g_consumer *
109 vdev_geom_attach(struct g_provider *pp, int write)
112 struct g_consumer *cp;
116 ZFS_LOG(1, "Attaching to %s.", pp->name);
117 /* Do we have geom already? No? Create one. */
118 LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
119 if (gp->flags & G_GEOM_WITHER)
121 if (strcmp(gp->name, "zfs::vdev") != 0)
126 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
127 gp->orphan = vdev_geom_orphan;
128 cp = g_new_consumer(gp);
129 if (g_attach(cp, pp) != 0) {
130 g_wither_geom(gp, ENXIO);
133 if (g_access(cp, 1, write, 1) != 0) {
134 g_wither_geom(gp, ENXIO);
137 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
139 /* Check if we are already connected to this provider. */
140 LIST_FOREACH(cp, &gp->consumer, consumer) {
141 if (cp->provider == pp) {
142 ZFS_LOG(1, "Found consumer for %s.", pp->name);
147 cp = g_new_consumer(gp);
148 if (g_attach(cp, pp) != 0) {
149 g_destroy_consumer(cp);
152 if (g_access(cp, 1, write, 1) != 0) {
154 g_destroy_consumer(cp);
157 ZFS_LOG(1, "Created consumer for %s.", pp->name);
159 if (g_access(cp, 1, cp->acw > 0 ? 0 : write, 1) != 0)
161 ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
168 vdev_geom_detach(void *arg, int flag __unused)
171 struct g_consumer *cp;
177 ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
178 g_access(cp, -1, 0, -1);
179 /* Destroy consumer on last close. */
180 if (cp->acr == 0 && cp->ace == 0) {
181 ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
183 g_access(cp, 0, -cp->acw, 0);
185 g_destroy_consumer(cp);
187 /* Destroy geom if there are no consumers left. */
188 if (LIST_EMPTY(&gp->consumer)) {
189 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
190 g_wither_geom(gp, ENXIO);
195 vdev_geom_worker(void *arg)
197 vdev_geom_ctx_t *ctx;
203 mtx_lock(&ctx->gc_queue_mtx);
204 bp = bioq_takefirst(&ctx->gc_queue);
206 if (ctx->gc_state == 1) {
208 wakeup_one(&ctx->gc_state);
209 mtx_unlock(&ctx->gc_queue_mtx);
212 msleep(&ctx->gc_queue, &ctx->gc_queue_mtx,
213 PRIBIO | PDROP, "vgeom:io", 0);
216 mtx_unlock(&ctx->gc_queue_mtx);
217 zio = bp->bio_caller1;
218 zio->io_error = bp->bio_error;
219 if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
223 * If we get ENOTSUP, we know that no future
224 * attempts will ever succeed. In this case we
225 * set a persistent bit so that we don't bother
226 * with the ioctl in the future.
229 vd->vdev_nowritecache = B_TRUE;
232 zio_next_stage_async(zio);
237 vdev_geom_get_id(struct g_consumer *cp)
242 g_topology_assert_not();
243 len = DISK_IDENT_SIZE;
244 id = kmem_zalloc(len, KM_SLEEP);
245 if (g_io_getattr("GEOM::ident", cp, &len, id) != 0) {
246 kmem_free(id, DISK_IDENT_SIZE);
253 vdev_geom_free_id(char *id)
257 kmem_free(id, DISK_IDENT_SIZE);
260 struct vdev_geom_find {
263 struct g_consumer *cp;
267 vdev_geom_taste_orphan(struct g_consumer *cp)
270 KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
271 cp->provider->name));
275 vdev_geom_attach_by_id_event(void *arg, int flags __unused)
277 struct vdev_geom_find *ap;
279 struct g_geom *gp, *zgp;
280 struct g_provider *pp;
281 struct g_consumer *zcp;
288 zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
289 /* This orphan function should be never called. */
290 zgp->orphan = vdev_geom_taste_orphan;
291 zcp = g_new_consumer(zgp);
293 LIST_FOREACH(mp, &g_classes, class) {
294 if (mp == &zfs_vdev_class)
296 LIST_FOREACH(gp, &mp->geom, geom) {
297 if (gp->flags & G_GEOM_WITHER)
299 LIST_FOREACH(pp, &gp->provider, provider) {
300 if (pp->flags & G_PF_WITHER)
303 if (g_access(zcp, 1, 0, 0) != 0) {
308 id = vdev_geom_get_id(zcp);
310 g_access(zcp, -1, 0, 0);
312 if (id == NULL || strcmp(id, ap->id) != 0) {
313 vdev_geom_free_id(id);
316 vdev_geom_free_id(id);
317 ap->cp = vdev_geom_attach(pp, ap->write);
318 if (ap->cp == NULL) {
319 printf("ZFS WARNING: Cannot open %s "
320 "for writting.\n", pp->name);
329 g_destroy_consumer(zcp);
333 static struct g_consumer *
334 vdev_geom_attach_by_id(const char *id, int write)
336 struct vdev_geom_find *ap;
337 struct g_consumer *cp;
339 ap = kmem_zalloc(sizeof(*ap), KM_SLEEP);
342 g_waitfor_event(vdev_geom_attach_by_id_event, ap, M_WAITOK, NULL);
344 kmem_free(ap, sizeof(*ap));
349 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
351 vdev_geom_ctx_t *ctx;
352 struct g_provider *pp;
353 struct g_consumer *cp;
358 * We must have a pathname, and it must be absolute.
360 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
361 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
365 if ((owned = mtx_owned(&Giant)))
369 pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
371 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
372 cp = vdev_geom_attach(pp, !!(spa_mode & FWRITE));
373 if (cp != NULL && vd->vdev_devid != NULL) {
375 id = vdev_geom_get_id(cp);
377 if (id == NULL || strcmp(id, vd->vdev_devid) != 0) {
378 vdev_geom_detach(cp, 0);
380 ZFS_LOG(1, "ID mismatch for provider %s: "
381 "[%s]!=[%s].", vd->vdev_path,
385 ZFS_LOG(1, "ID match for provider %s.", vd->vdev_path);
390 vdev_geom_free_id(id);
391 if (cp == NULL && vd->vdev_devid != NULL) {
392 ZFS_LOG(1, "Searching by ID [%s].", vd->vdev_devid);
393 cp = vdev_geom_attach_by_id(vd->vdev_devid,
394 !!(spa_mode & FWRITE));
396 size_t len = strlen(cp->provider->name) + 6; /* 6 == strlen("/dev/") + 1 */
397 char *buf = kmem_alloc(len, KM_SLEEP);
399 snprintf(buf, len, "/dev/%s", cp->provider->name);
400 spa_strfree(vd->vdev_path);
403 ZFS_LOG(1, "Attach by ID [%s] succeeded, provider %s.",
404 vd->vdev_devid, vd->vdev_path);
410 ZFS_LOG(1, "Provider %s (id=[%s]) not found.", vd->vdev_path,
412 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
418 * Determine the actual size of the device.
420 *psize = pp->mediasize;
423 * Determine the device's minimum transfer size.
425 *ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
428 * Clear the nowritecache bit, so that on a vdev_reopen() we will
431 vd->vdev_nowritecache = B_FALSE;
435 ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP);
436 bioq_init(&ctx->gc_queue);
437 mtx_init(&ctx->gc_queue_mtx, "zfs:vdev:geom:queue", NULL, MTX_DEF);
438 ctx->gc_consumer = cp;
443 kthread_create(vdev_geom_worker, ctx, NULL, 0, 0, "vdev:worker %s",
450 vdev_geom_close(vdev_t *vd)
452 vdev_geom_ctx_t *ctx;
453 struct g_consumer *cp;
455 if ((ctx = vd->vdev_tsd) == NULL)
457 if ((cp = ctx->gc_consumer) == NULL)
459 vdev_geom_release(vd);
460 g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
464 vdev_geom_io_intr(struct bio *bp)
466 vdev_geom_ctx_t *ctx;
469 zio = bp->bio_caller1;
470 ctx = zio->io_vd->vdev_tsd;
472 mtx_lock(&ctx->gc_queue_mtx);
473 bioq_insert_tail(&ctx->gc_queue, bp);
474 wakeup_one(&ctx->gc_queue);
475 mtx_unlock(&ctx->gc_queue_mtx);
479 vdev_geom_io_start(zio_t *zio)
482 vdev_geom_ctx_t *ctx;
483 struct g_consumer *cp;
492 cp = ctx->gc_consumer;
494 if (zio->io_type == ZIO_TYPE_IOCTL) {
495 zio_vdev_io_bypass(zio);
498 if (vdev_is_dead(vd)) {
499 zio->io_error = ENXIO;
500 zio_next_stage_async(zio);
504 switch (zio->io_cmd) {
506 case DKIOCFLUSHWRITECACHE:
507 if (vd->vdev_nowritecache) {
508 zio->io_error = ENOTSUP;
514 zio->io_error = ENOTSUP;
517 zio_next_stage_async(zio);
521 if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
524 if ((zio = vdev_queue_io(zio)) == NULL)
529 error = vdev_is_dead(vd) ? ENXIO : vdev_error_inject(vd, zio);
530 if (error == 0 && cp == NULL)
533 zio->io_error = error;
534 zio_next_stage_async(zio);
539 bp->bio_caller1 = zio;
540 switch (zio->io_type) {
543 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
544 bp->bio_data = zio->io_data;
545 bp->bio_offset = zio->io_offset;
546 bp->bio_length = zio->io_size;
549 bp->bio_cmd = BIO_FLUSH;
551 bp->bio_offset = cp->provider->mediasize;
555 bp->bio_done = vdev_geom_io_intr;
557 g_io_request(bp, cp);
561 vdev_geom_io_done(zio_t *zio)
563 vdev_queue_io_done(zio);
565 if (zio->io_type == ZIO_TYPE_WRITE)
566 vdev_cache_write(zio);
568 if (zio_injection_enabled && zio->io_error == 0)
569 zio->io_error = zio_handle_device_injection(zio->io_vd, EIO);
574 vdev_ops_t vdev_geom_ops = {
581 VDEV_TYPE_DISK, /* name of this vdev type */
582 B_TRUE /* leaf vdev */