]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
Create releng/7.2 from stable/7 in preparation for 7.2-RELEASE.
[FreeBSD/releng/7.2.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / vdev_geom.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
23  * All rights reserved.
24  */
25
26 #include <sys/zfs_context.h>
27 #include <sys/param.h>
28 #include <sys/kernel.h>
29 #include <sys/bio.h>
30 #include <sys/disk.h>
31 #include <sys/spa.h>
32 #include <sys/vdev_impl.h>
33 #include <sys/fs/zfs.h>
34 #include <sys/zio.h>
35 #include <geom/geom.h>
36 #include <geom/geom_int.h>
37
38 /*
39  * Virtual device vector for GEOM.
40  */
41
42 struct g_class zfs_vdev_class = {
43         .name = "ZFS::VDEV",
44         .version = G_VERSION,
45 };
46
47 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
48
49 typedef struct vdev_geom_ctx {
50         struct g_consumer *gc_consumer;
51         int gc_state;
52         struct bio_queue_head gc_queue;
53         struct mtx gc_queue_mtx;
54 } vdev_geom_ctx_t;
55
56 static void
57 vdev_geom_release(vdev_t *vd)
58 {
59         vdev_geom_ctx_t *ctx;
60
61         ctx = vd->vdev_tsd;
62         vd->vdev_tsd = NULL;
63
64         mtx_lock(&ctx->gc_queue_mtx);
65         ctx->gc_state = 1;
66         wakeup_one(&ctx->gc_queue);
67         while (ctx->gc_state != 2)
68                 msleep(&ctx->gc_state, &ctx->gc_queue_mtx, 0, "vgeom:w", 0);
69         mtx_unlock(&ctx->gc_queue_mtx);
70         mtx_destroy(&ctx->gc_queue_mtx);
71         kmem_free(ctx, sizeof(*ctx));
72 }
73
74 static void
75 vdev_geom_orphan(struct g_consumer *cp)
76 {
77         struct g_geom *gp;
78         vdev_t *vd;
79         int error;
80
81         g_topology_assert();
82
83         vd = cp->private;
84         gp = cp->geom;
85         error = cp->provider->error;
86
87         ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
88         if (cp->acr + cp->acw + cp->ace > 0)
89                 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
90         ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
91         g_detach(cp);
92         g_destroy_consumer(cp);
93         /* Destroy geom if there are no consumers left. */
94         if (LIST_EMPTY(&gp->consumer)) {
95                 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
96                 g_wither_geom(gp, error);
97         }
98         vdev_geom_release(vd);
99         /* Both methods below work, but in a bit different way. */
100 #if 0
101         vd->vdev_reopen_wanted = 1;
102 #else
103         vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
104         vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, vd->vdev_stat.vs_aux);
105 #endif
106 }
107
108 static struct g_consumer *
109 vdev_geom_attach(struct g_provider *pp, int write)
110 {
111         struct g_geom *gp;
112         struct g_consumer *cp;
113
114         g_topology_assert();
115
116         ZFS_LOG(1, "Attaching to %s.", pp->name);
117         /* Do we have geom already? No? Create one. */
118         LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
119                 if (gp->flags & G_GEOM_WITHER)
120                         continue;
121                 if (strcmp(gp->name, "zfs::vdev") != 0)
122                         continue;
123                 break;
124         }
125         if (gp == NULL) {
126                 gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
127                 gp->orphan = vdev_geom_orphan;
128                 cp = g_new_consumer(gp);
129                 if (g_attach(cp, pp) != 0) {
130                         g_wither_geom(gp, ENXIO);
131                         return (NULL);
132                 }
133                 if (g_access(cp, 1, write, 1) != 0) {
134                         g_wither_geom(gp, ENXIO);
135                         return (NULL);
136                 }
137                 ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
138         } else {
139                 /* Check if we are already connected to this provider. */
140                 LIST_FOREACH(cp, &gp->consumer, consumer) {
141                         if (cp->provider == pp) {
142                                 ZFS_LOG(1, "Found consumer for %s.", pp->name);
143                                 break;
144                         }
145                 }
146                 if (cp == NULL) {
147                         cp = g_new_consumer(gp);
148                         if (g_attach(cp, pp) != 0) {
149                                 g_destroy_consumer(cp);
150                                 return (NULL);
151                         }
152                         if (g_access(cp, 1, write, 1) != 0) {
153                                 g_detach(cp);
154                                 g_destroy_consumer(cp);
155                                 return (NULL);
156                         }
157                         ZFS_LOG(1, "Created consumer for %s.", pp->name);
158                 } else {
159                         if (g_access(cp, 1, cp->acw > 0 ? 0 : write, 1) != 0)
160                                 return (NULL);
161                         ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
162                 }
163         }
164         return (cp);
165 }
166
167 static void
168 vdev_geom_detach(void *arg, int flag __unused)
169 {
170         struct g_geom *gp;
171         struct g_consumer *cp;
172
173         g_topology_assert();
174         cp = arg;
175         gp = cp->geom;
176
177         ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
178         g_access(cp, -1, 0, -1);
179         /* Destroy consumer on last close. */
180         if (cp->acr == 0 && cp->ace == 0) {
181                 ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
182                 if (cp->acw > 0)
183                         g_access(cp, 0, -cp->acw, 0);
184                 g_detach(cp);
185                 g_destroy_consumer(cp);
186         }
187         /* Destroy geom if there are no consumers left. */
188         if (LIST_EMPTY(&gp->consumer)) {
189                 ZFS_LOG(1, "Destroyed geom %s.", gp->name);
190                 g_wither_geom(gp, ENXIO);
191         }
192 }
193
194 static void
195 vdev_geom_worker(void *arg)
196 {
197         vdev_geom_ctx_t *ctx;
198         zio_t *zio;
199         struct bio *bp;
200
201         ctx = arg;
202         for (;;) {
203                 mtx_lock(&ctx->gc_queue_mtx);
204                 bp = bioq_takefirst(&ctx->gc_queue);
205                 if (bp == NULL) {
206                         if (ctx->gc_state == 1) {
207                                 ctx->gc_state = 2;
208                                 wakeup_one(&ctx->gc_state);
209                                 mtx_unlock(&ctx->gc_queue_mtx);
210                                 kthread_exit(0);
211                         }
212                         msleep(&ctx->gc_queue, &ctx->gc_queue_mtx,
213                             PRIBIO | PDROP, "vgeom:io", 0);
214                         continue;
215                 }
216                 mtx_unlock(&ctx->gc_queue_mtx);
217                 zio = bp->bio_caller1;
218                 zio->io_error = bp->bio_error;
219                 if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
220                         vdev_t *vd;
221
222                         /*
223                          * If we get ENOTSUP, we know that no future
224                          * attempts will ever succeed.  In this case we
225                          * set a persistent bit so that we don't bother
226                          * with the ioctl in the future.
227                          */
228                         vd = zio->io_vd;
229                         vd->vdev_nowritecache = B_TRUE;
230                 }
231                 g_destroy_bio(bp);
232                 zio_next_stage_async(zio);
233         }
234 }
235
236 static char *
237 vdev_geom_get_id(struct g_consumer *cp)
238 {
239         char *id;
240         int len;
241
242         g_topology_assert_not();
243         len = DISK_IDENT_SIZE;
244         id = kmem_zalloc(len, KM_SLEEP);
245         if (g_io_getattr("GEOM::ident", cp, &len, id) != 0) {
246                 kmem_free(id, DISK_IDENT_SIZE);
247                 return (NULL);
248         }
249         return (id);
250 }
251
252 static void
253 vdev_geom_free_id(char *id)
254 {
255
256         if (id != NULL)
257                 kmem_free(id, DISK_IDENT_SIZE);
258 }
259
260 struct vdev_geom_find {
261         const char *id;
262         int write;
263         struct g_consumer *cp;
264 };
265
266 static void
267 vdev_geom_taste_orphan(struct g_consumer *cp)
268 {
269
270         KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
271             cp->provider->name));
272 }
273
274 static void
275 vdev_geom_attach_by_id_event(void *arg, int flags __unused)
276 {
277         struct vdev_geom_find *ap;
278         struct g_class *mp;
279         struct g_geom *gp, *zgp;
280         struct g_provider *pp;
281         struct g_consumer *zcp;
282         char *id;
283
284         g_topology_assert();
285
286         ap = arg;
287
288         zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
289         /* This orphan function should be never called. */
290         zgp->orphan = vdev_geom_taste_orphan;
291         zcp = g_new_consumer(zgp);
292
293         LIST_FOREACH(mp, &g_classes, class) {
294                 if (mp == &zfs_vdev_class)
295                         continue;
296                 LIST_FOREACH(gp, &mp->geom, geom) {
297                         if (gp->flags & G_GEOM_WITHER)
298                                 continue;
299                         LIST_FOREACH(pp, &gp->provider, provider) {
300                                 if (pp->flags & G_PF_WITHER)
301                                         continue;
302                                 g_attach(zcp, pp);
303                                 if (g_access(zcp, 1, 0, 0) != 0) {
304                                         g_detach(zcp);
305                                         continue;
306                                 }
307                                 g_topology_unlock();
308                                 id = vdev_geom_get_id(zcp);
309                                 g_topology_lock();
310                                 g_access(zcp, -1, 0, 0);
311                                 g_detach(zcp);
312                                 if (id == NULL || strcmp(id, ap->id) != 0) {
313                                         vdev_geom_free_id(id);
314                                         continue;
315                                 }
316                                 vdev_geom_free_id(id);
317                                 ap->cp = vdev_geom_attach(pp, ap->write);
318                                 if (ap->cp == NULL) {
319                                         printf("ZFS WARNING: Cannot open %s "
320                                             "for writting.\n", pp->name);
321                                         continue;
322                                 }
323                                 goto end;
324                         }
325                 }
326         }
327         ap->cp = NULL;
328 end:
329         g_destroy_consumer(zcp);
330         g_destroy_geom(zgp);
331 }
332
333 static struct g_consumer *
334 vdev_geom_attach_by_id(const char *id, int write)
335 {
336         struct vdev_geom_find *ap;
337         struct g_consumer *cp;
338
339         ap = kmem_zalloc(sizeof(*ap), KM_SLEEP);
340         ap->id = id;
341         ap->write = write;
342         g_waitfor_event(vdev_geom_attach_by_id_event, ap, M_WAITOK, NULL);
343         cp = ap->cp;
344         kmem_free(ap, sizeof(*ap));
345         return (cp);
346 }
347
348 static int
349 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
350 {
351         vdev_geom_ctx_t *ctx;
352         struct g_provider *pp;
353         struct g_consumer *cp;
354         char *id = NULL;
355         int owned;
356
357         /*
358          * We must have a pathname, and it must be absolute.
359          */
360         if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
361                 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
362                 return (EINVAL);
363         }
364
365         if ((owned = mtx_owned(&Giant)))
366                 mtx_unlock(&Giant);
367         cp = NULL;
368         g_topology_lock();
369         pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
370         if (pp != NULL) {
371                 ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
372                 cp = vdev_geom_attach(pp, !!(spa_mode & FWRITE));
373                 if (cp != NULL && vd->vdev_devid != NULL) {
374                         g_topology_unlock();
375                         id = vdev_geom_get_id(cp);
376                         g_topology_lock();
377                         if (id == NULL || strcmp(id, vd->vdev_devid) != 0) {
378                                 vdev_geom_detach(cp, 0);
379                                 cp = NULL;
380                                 ZFS_LOG(1, "ID mismatch for provider %s: "
381                                     "[%s]!=[%s].", vd->vdev_path,
382                                     vd->vdev_devid, id);
383                                 goto next;
384                         }
385                         ZFS_LOG(1, "ID match for provider %s.", vd->vdev_path);
386                 }
387         }
388 next:
389         g_topology_unlock();
390         vdev_geom_free_id(id);
391         if (cp == NULL && vd->vdev_devid != NULL) {
392                 ZFS_LOG(1, "Searching by ID [%s].", vd->vdev_devid);
393                 cp = vdev_geom_attach_by_id(vd->vdev_devid,
394                     !!(spa_mode & FWRITE));
395                 if (cp != NULL) {
396                         size_t len = strlen(cp->provider->name) + 6; /* 6 == strlen("/dev/") + 1 */
397                         char *buf = kmem_alloc(len, KM_SLEEP);
398
399                         snprintf(buf, len, "/dev/%s", cp->provider->name);
400                         spa_strfree(vd->vdev_path);
401                         vd->vdev_path = buf;
402
403                         ZFS_LOG(1, "Attach by ID [%s] succeeded, provider %s.",
404                             vd->vdev_devid, vd->vdev_path);
405                 }
406         }
407         if (owned)
408                 mtx_lock(&Giant);
409         if (cp == NULL) {
410                 ZFS_LOG(1, "Provider %s (id=[%s]) not found.", vd->vdev_path,
411                     vd->vdev_devid);
412                 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
413                 return (EACCES);
414         }
415         pp = cp->provider;
416
417         /*
418          * Determine the actual size of the device.
419          */
420         *psize = pp->mediasize;
421
422         /*
423          * Determine the device's minimum transfer size.
424          */
425         *ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
426
427         /*
428          * Clear the nowritecache bit, so that on a vdev_reopen() we will
429          * try again.
430          */
431         vd->vdev_nowritecache = B_FALSE;
432
433         cp->private = vd;
434
435         ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP);
436         bioq_init(&ctx->gc_queue);
437         mtx_init(&ctx->gc_queue_mtx, "zfs:vdev:geom:queue", NULL, MTX_DEF);
438         ctx->gc_consumer = cp;
439         ctx->gc_state = 0;
440
441         vd->vdev_tsd = ctx;
442
443         kthread_create(vdev_geom_worker, ctx, NULL, 0, 0, "vdev:worker %s",
444             pp->name);
445
446         return (0);
447 }
448
449 static void
450 vdev_geom_close(vdev_t *vd)
451 {
452         vdev_geom_ctx_t *ctx;
453         struct g_consumer *cp;
454
455         if ((ctx = vd->vdev_tsd) == NULL)
456                 return;
457         if ((cp = ctx->gc_consumer) == NULL)
458                 return;
459         vdev_geom_release(vd);
460         g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
461 }
462
463 static void
464 vdev_geom_io_intr(struct bio *bp)
465 {
466         vdev_geom_ctx_t *ctx;
467         zio_t *zio;
468
469         zio = bp->bio_caller1;
470         ctx = zio->io_vd->vdev_tsd;
471
472         mtx_lock(&ctx->gc_queue_mtx);
473         bioq_insert_tail(&ctx->gc_queue, bp);
474         wakeup_one(&ctx->gc_queue);
475         mtx_unlock(&ctx->gc_queue_mtx);
476 }
477
478 static void
479 vdev_geom_io_start(zio_t *zio)
480 {
481         vdev_t *vd;
482         vdev_geom_ctx_t *ctx;
483         struct g_consumer *cp;
484         struct bio *bp;
485         int error;
486
487         cp = NULL;
488
489         vd = zio->io_vd;
490         ctx = vd->vdev_tsd;
491         if (ctx != NULL)
492                 cp = ctx->gc_consumer;
493
494         if (zio->io_type == ZIO_TYPE_IOCTL) {
495                 zio_vdev_io_bypass(zio);
496
497                 /* XXPOLICY */
498                 if (vdev_is_dead(vd)) {
499                         zio->io_error = ENXIO;
500                         zio_next_stage_async(zio);
501                         return;
502                 }
503
504                 switch (zio->io_cmd) {
505
506                 case DKIOCFLUSHWRITECACHE:
507                         if (vd->vdev_nowritecache) {
508                                 zio->io_error = ENOTSUP;
509                                 break;
510                         }
511
512                         goto sendreq;
513                 default:
514                         zio->io_error = ENOTSUP;
515                 }
516
517                 zio_next_stage_async(zio);
518                 return;
519         }
520
521         if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
522                 return;
523
524         if ((zio = vdev_queue_io(zio)) == NULL)
525                 return;
526
527 sendreq:
528
529         error = vdev_is_dead(vd) ? ENXIO : vdev_error_inject(vd, zio);
530         if (error == 0 && cp == NULL)
531                 error = ENXIO;
532         if (error) {
533                 zio->io_error = error;
534                 zio_next_stage_async(zio);
535                 return;
536         }
537
538         bp = g_alloc_bio();
539         bp->bio_caller1 = zio;
540         switch (zio->io_type) {
541         case ZIO_TYPE_READ:
542         case ZIO_TYPE_WRITE:
543                 bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
544                 bp->bio_data = zio->io_data;
545                 bp->bio_offset = zio->io_offset;
546                 bp->bio_length = zio->io_size;
547                 break;
548         case ZIO_TYPE_IOCTL:
549                 bp->bio_cmd = BIO_FLUSH;
550                 bp->bio_data = NULL;
551                 bp->bio_offset = cp->provider->mediasize;
552                 bp->bio_length = 0;
553                 break;
554         }
555         bp->bio_done = vdev_geom_io_intr;
556
557         g_io_request(bp, cp);
558 }
559
560 static void
561 vdev_geom_io_done(zio_t *zio)
562 {
563         vdev_queue_io_done(zio);
564
565         if (zio->io_type == ZIO_TYPE_WRITE)
566                 vdev_cache_write(zio);
567
568         if (zio_injection_enabled && zio->io_error == 0)
569                 zio->io_error = zio_handle_device_injection(zio->io_vd, EIO);
570
571         zio_next_stage(zio);
572 }
573
574 vdev_ops_t vdev_geom_ops = {
575         vdev_geom_open,
576         vdev_geom_close,
577         vdev_default_asize,
578         vdev_geom_io_start,
579         vdev_geom_io_done,
580         NULL,
581         VDEV_TYPE_DISK,         /* name of this vdev type */
582         B_TRUE                  /* leaf vdev */
583 };