]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - sys/geom/vinum/geom_vinum_drive.c
Create releng/7.2 from stable/7 in preparation for 7.2-RELEASE.
[FreeBSD/releng/7.2.git] / sys / geom / vinum / geom_vinum_drive.c
1 /*-
2  * Copyright (c) 2004, 2005 Lukas Ertl
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/param.h>
31 #include <sys/bio.h>
32 #include <sys/errno.h>
33 #include <sys/conf.h>
34 #include <sys/kernel.h>
35 #include <sys/kthread.h>
36 #include <sys/libkern.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/module.h>
40 #include <sys/mutex.h>
41 #include <sys/sbuf.h>
42 #include <sys/systm.h>
43 #include <sys/time.h>
44
45 #include <geom/geom.h>
46 #include <geom/vinum/geom_vinum_var.h>
47 #include <geom/vinum/geom_vinum.h>
48 #include <geom/vinum/geom_vinum_share.h>
49
50 static void     gv_drive_dead(void *, int);
51 static void     gv_drive_worker(void *);
52
53 void
54 gv_config_new_drive(struct gv_drive *d)
55 {
56         struct gv_hdr *vhdr;
57         struct gv_freelist *fl;
58
59         KASSERT(d != NULL, ("config_new_drive: NULL d"));
60
61         vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO);
62         vhdr->magic = GV_MAGIC;
63         vhdr->config_length = GV_CFG_LEN;
64
65         bcopy(hostname, vhdr->label.sysname, GV_HOSTNAME_LEN);
66         strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME);
67         microtime(&vhdr->label.date_of_birth);
68
69         d->hdr = vhdr;
70
71         LIST_INIT(&d->subdisks);
72         LIST_INIT(&d->freelist);
73
74         fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO);
75         fl->offset = GV_DATA_START;
76         fl->size = d->avail;
77         LIST_INSERT_HEAD(&d->freelist, fl, freelist);
78         d->freelist_entries = 1;
79
80         d->bqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO);
81         bioq_init(d->bqueue);
82         mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
83         kthread_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name);
84         d->flags |= GV_DRIVE_THREAD_ACTIVE;
85 }
86
87 void
88 gv_save_config_all(struct gv_softc *sc)
89 {
90         struct gv_drive *d;
91
92         g_topology_assert();
93
94         LIST_FOREACH(d, &sc->drives, drive) {
95                 if (d->geom == NULL)
96                         continue;
97                 gv_save_config(NULL, d, sc);
98         }
99 }
100
101 /* Save the vinum configuration back to disk. */
102 void
103 gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc)
104 {
105         struct g_geom *gp;
106         struct g_consumer *cp2;
107         struct gv_hdr *vhdr, *hdr;
108         struct sbuf *sb;
109         int error;
110
111         g_topology_assert();
112
113         KASSERT(d != NULL, ("gv_save_config: null d"));
114         KASSERT(sc != NULL, ("gv_save_config: null sc"));
115
116         /*
117          * We can't save the config on a drive that isn't up, but drives that
118          * were just created aren't officially up yet, so we check a special
119          * flag.
120          */
121         if ((d->state != GV_DRIVE_UP) && !(d->flags && GV_DRIVE_NEWBORN))
122                 return;
123
124         if (cp == NULL) {
125                 gp = d->geom;
126                 KASSERT(gp != NULL, ("gv_save_config: null gp"));
127                 cp2 = LIST_FIRST(&gp->consumer);
128                 KASSERT(cp2 != NULL, ("gv_save_config: null cp2"));
129         } else
130                 cp2 = cp;
131
132         vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
133         vhdr->magic = GV_MAGIC;
134         vhdr->config_length = GV_CFG_LEN;
135
136         hdr = d->hdr;
137         if (hdr == NULL) {
138                 printf("GEOM_VINUM: drive %s has NULL hdr\n", d->name);
139                 g_free(vhdr);
140                 return;
141         }
142         microtime(&hdr->label.last_update);
143         bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label));
144
145         sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
146         gv_format_config(sc, sb, 1, NULL);
147         sbuf_finish(sb);
148
149         error = g_access(cp2, 0, 1, 0);
150         if (error) {
151                 printf("GEOM_VINUM: g_access failed on drive %s, errno %d\n",
152                     d->name, error);
153                 sbuf_delete(sb);
154                 g_free(vhdr);
155                 return;
156         }
157         g_topology_unlock();
158
159         do {
160                 error = g_write_data(cp2, GV_HDR_OFFSET, vhdr, GV_HDR_LEN);
161                 if (error) {
162                         printf("GEOM_VINUM: writing vhdr failed on drive %s, "
163                             "errno %d", d->name, error);
164                         break;
165                 }
166
167                 error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb),
168                     GV_CFG_LEN);
169                 if (error) {
170                         printf("GEOM_VINUM: writing first config copy failed "
171                             "on drive %s, errno %d", d->name, error);
172                         break;
173                 }
174                 
175                 error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN,
176                     sbuf_data(sb), GV_CFG_LEN);
177                 if (error)
178                         printf("GEOM_VINUM: writing second config copy failed "
179                             "on drive %s, errno %d", d->name, error);
180         } while (0);
181
182         g_topology_lock();
183         g_access(cp2, 0, -1, 0);
184         sbuf_delete(sb);
185         g_free(vhdr);
186
187         if (d->geom != NULL)
188                 gv_drive_modify(d);
189 }
190
191 /* This resembles g_slice_access(). */
192 static int
193 gv_drive_access(struct g_provider *pp, int dr, int dw, int de)
194 {
195         struct g_geom *gp;
196         struct g_consumer *cp;
197         struct g_provider *pp2;
198         struct gv_drive *d;
199         struct gv_sd *s, *s2;
200         int error;
201
202         gp = pp->geom;
203         cp = LIST_FIRST(&gp->consumer);
204         if (cp == NULL)
205                 return (0);
206
207         d = gp->softc;
208         if (d == NULL)
209                 return (0);
210
211         s = pp->private;
212         KASSERT(s != NULL, ("gv_drive_access: NULL s"));
213
214         LIST_FOREACH(s2, &d->subdisks, from_drive) {
215                 if (s == s2)
216                         continue;
217                 if (s->drive_offset + s->size <= s2->drive_offset)
218                         continue;
219                 if (s2->drive_offset + s2->size <= s->drive_offset)
220                         continue;
221
222                 /* Overlap. */
223                 pp2 = s2->provider;
224                 KASSERT(s2 != NULL, ("gv_drive_access: NULL s2"));
225                 if ((pp->acw + dw) > 0 && pp2->ace > 0)
226                         return (EPERM);
227                 if ((pp->ace + de) > 0 && pp2->acw > 0)
228                         return (EPERM);
229         }
230
231         error = g_access(cp, dr, dw, de);
232         return (error);
233 }
234
235 static void
236 gv_drive_done(struct bio *bp)
237 {
238         struct gv_drive *d;
239
240         /* Put the BIO on the worker queue again. */
241         d = bp->bio_from->geom->softc;
242         bp->bio_cflags |= GV_BIO_DONE;
243         mtx_lock(&d->bqueue_mtx);
244         bioq_insert_tail(d->bqueue, bp);
245         wakeup(d);
246         mtx_unlock(&d->bqueue_mtx);
247 }
248
249
250 static void
251 gv_drive_start(struct bio *bp)
252 {
253         struct gv_drive *d;
254         struct gv_sd *s;
255
256         switch (bp->bio_cmd) {
257         case BIO_READ:
258         case BIO_WRITE:
259         case BIO_DELETE:
260                 break;
261         case BIO_GETATTR:
262         default:
263                 g_io_deliver(bp, EOPNOTSUPP);
264                 return;
265         }
266
267         s = bp->bio_to->private;
268         if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) {
269                 g_io_deliver(bp, ENXIO);
270                 return;
271         }
272
273         d = bp->bio_to->geom->softc;
274
275         /*
276          * Put the BIO on the worker queue, where the worker thread will pick
277          * it up.
278          */
279         mtx_lock(&d->bqueue_mtx);
280         bioq_disksort(d->bqueue, bp);
281         wakeup(d);
282         mtx_unlock(&d->bqueue_mtx);
283
284 }
285
286 static void
287 gv_drive_worker(void *arg)
288 {
289         struct bio *bp, *cbp;
290         struct g_geom *gp;
291         struct g_provider *pp;
292         struct gv_drive *d;
293         struct gv_sd *s;
294         int error;
295
296         d = arg;
297
298         mtx_lock(&d->bqueue_mtx);
299         for (;;) {
300                 /* We were signaled to exit. */
301                 if (d->flags & GV_DRIVE_THREAD_DIE)
302                         break;
303
304                 /* Take the first BIO from out queue. */
305                 bp = bioq_takefirst(d->bqueue);
306                 if (bp == NULL) {
307                         msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10);
308                         continue;
309                 }
310                 mtx_unlock(&d->bqueue_mtx);
311  
312                 pp = bp->bio_to;
313                 gp = pp->geom;
314
315                 /* Completed request. */
316                 if (bp->bio_cflags & GV_BIO_DONE) {
317                         error = bp->bio_error;
318
319                         /* Deliver the original request. */
320                         g_std_done(bp);
321
322                         /* The request had an error, we need to clean up. */
323                         if (error != 0) {
324                                 g_topology_lock();
325                                 gv_set_drive_state(d, GV_DRIVE_DOWN,
326                                     GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
327                                 g_topology_unlock();
328                                 g_post_event(gv_drive_dead, d, M_WAITOK, d,
329                                     NULL);
330                         }
331
332                 /* New request, needs to be sent downwards. */
333                 } else {
334                         s = pp->private;
335
336                         if ((s->state == GV_SD_DOWN) ||
337                             (s->state == GV_SD_STALE)) {
338                                 g_io_deliver(bp, ENXIO);
339                                 mtx_lock(&d->bqueue_mtx);
340                                 continue;
341                         }
342                         if (bp->bio_offset > s->size) {
343                                 g_io_deliver(bp, EINVAL);
344                                 mtx_lock(&d->bqueue_mtx);
345                                 continue;
346                         }
347
348                         cbp = g_clone_bio(bp);
349                         if (cbp == NULL) {
350                                 g_io_deliver(bp, ENOMEM);
351                                 mtx_lock(&d->bqueue_mtx);
352                                 continue;
353                         }
354                         if (cbp->bio_offset + cbp->bio_length > s->size)
355                                 cbp->bio_length = s->size -
356                                     cbp->bio_offset;
357                         cbp->bio_done = gv_drive_done;
358                         cbp->bio_offset += s->drive_offset;
359                         g_io_request(cbp, LIST_FIRST(&gp->consumer));
360                 }
361
362                 mtx_lock(&d->bqueue_mtx);
363         }
364
365         while ((bp = bioq_takefirst(d->bqueue)) != NULL) {
366                 mtx_unlock(&d->bqueue_mtx);
367                 if (bp->bio_cflags & GV_BIO_DONE) 
368                         g_std_done(bp);
369                 else
370                         g_io_deliver(bp, ENXIO);
371                 mtx_lock(&d->bqueue_mtx);
372         }
373         mtx_unlock(&d->bqueue_mtx);
374         d->flags |= GV_DRIVE_THREAD_DEAD;
375
376         kthread_exit(ENXIO);
377 }
378
379
380 static void
381 gv_drive_orphan(struct g_consumer *cp)
382 {
383         struct g_geom *gp;
384         struct gv_drive *d;
385
386         g_topology_assert();
387         gp = cp->geom;
388         g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name);
389         d = gp->softc;
390         if (d != NULL) {
391                 gv_set_drive_state(d, GV_DRIVE_DOWN,
392                     GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
393                 g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL);
394         } else
395                 g_wither_geom(gp, ENXIO);
396 }
397
398 static struct g_geom *
399 gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
400 {
401         struct g_geom *gp, *gp2;
402         struct g_consumer *cp;
403         struct gv_drive *d;
404         struct gv_sd *s;
405         struct gv_softc *sc;
406         struct gv_freelist *fl;
407         struct gv_hdr *vhdr;
408         int error;
409         char *buf, errstr[ERRBUFSIZ];
410
411         vhdr = NULL;
412         d = NULL;
413
414         g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name);
415         g_topology_assert();
416
417         /* Find the VINUM class and its associated geom. */
418         gp2 = find_vinum_geom();
419         if (gp2 == NULL)
420                 return (NULL);
421         sc = gp2->softc;
422
423         gp = g_new_geomf(mp, "%s.vinumdrive", pp->name);
424         gp->start = gv_drive_start;
425         gp->orphan = gv_drive_orphan;
426         gp->access = gv_drive_access;
427         gp->start = gv_drive_start;
428
429         cp = g_new_consumer(gp);
430         g_attach(cp, pp);
431         error = g_access(cp, 1, 0, 0);
432         if (error) {
433                 g_detach(cp);
434                 g_destroy_consumer(cp);
435                 g_destroy_geom(gp);
436                 return (NULL);
437         }
438
439         g_topology_unlock();
440
441         /* Now check if the provided slice is a valid vinum drive. */
442         do {
443                 vhdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, NULL);
444                 if (vhdr == NULL)
445                         break;
446                 if (vhdr->magic != GV_MAGIC) {
447                         g_free(vhdr);
448                         break;
449                 }
450
451                 /* A valid vinum drive, let's parse the on-disk information. */
452                 buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, NULL);
453                 if (buf == NULL) {
454                         g_free(vhdr);
455                         break;
456                 }
457                 g_topology_lock();
458                 gv_parse_config(sc, buf, 1);
459                 g_free(buf);
460
461                 /*
462                  * Let's see if this drive is already known in the
463                  * configuration.
464                  */
465                 d = gv_find_drive(sc, vhdr->label.name);
466
467                 /* We already know about this drive. */
468                 if (d != NULL) {
469                         /* Check if this drive already has a geom. */
470                         if (d->geom != NULL) {
471                                 g_topology_unlock();
472                                 g_free(vhdr);
473                                 break;
474                         }
475                         bcopy(vhdr, d->hdr, sizeof(*vhdr));
476                         g_free(vhdr);
477
478                 /* This is a new drive. */
479                 } else {
480                         d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
481
482                         /* Initialize all needed variables. */
483                         d->size = pp->mediasize - GV_DATA_START;
484                         d->avail = d->size;
485                         d->hdr = vhdr;
486                         strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME);
487                         LIST_INIT(&d->subdisks);
488                         LIST_INIT(&d->freelist);
489
490                         /* We also need a freelist entry. */
491                         fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
492                         fl->offset = GV_DATA_START;
493                         fl->size = d->avail;
494                         LIST_INSERT_HEAD(&d->freelist, fl, freelist);
495                         d->freelist_entries = 1;
496
497                         /* Save it into the main configuration. */
498                         LIST_INSERT_HEAD(&sc->drives, d, drive);
499                 }
500
501                 /*
502                  * Create bio queue, queue mutex and a worker thread, if
503                  * necessary.
504                  */
505                 if (d->bqueue == NULL) {
506                         d->bqueue = g_malloc(sizeof(struct bio_queue_head),
507                             M_WAITOK | M_ZERO);
508                         bioq_init(d->bqueue);
509                 }
510                 if (mtx_initialized(&d->bqueue_mtx) == 0)
511                         mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
512
513                 if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) {
514                         kthread_create(gv_drive_worker, d, NULL, 0, 0,
515                             "gv_d %s", d->name);
516                         d->flags |= GV_DRIVE_THREAD_ACTIVE;
517                 }
518
519                 g_access(cp, -1, 0, 0);
520
521                 gp->softc = d;
522                 d->geom = gp;
523                 d->vinumconf = sc;
524                 strncpy(d->device, pp->name, GV_MAXDRIVENAME);
525
526                 /*
527                  * Find out which subdisks belong to this drive and crosslink
528                  * them.
529                  */
530                 LIST_FOREACH(s, &sc->subdisks, sd) {
531                         if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME))
532                                 /* XXX: errors ignored */
533                                 gv_sd_to_drive(sc, d, s, errstr,
534                                     sizeof(errstr));
535                 }
536
537                 /* This drive is now up for sure. */
538                 gv_set_drive_state(d, GV_DRIVE_UP, 0);
539
540                 /*
541                  * If there are subdisks on this drive, we need to create
542                  * providers for them.
543                  */ 
544                 if (d->sdcount)
545                         gv_drive_modify(d);
546
547                 return (gp);
548
549         } while (0);
550
551         g_topology_lock();
552         g_access(cp, -1, 0, 0);
553
554         g_detach(cp);
555         g_destroy_consumer(cp);
556         g_destroy_geom(gp);
557         return (NULL);
558 }
559
560 /*
561  * Modify the providers for the given drive 'd'.  It is assumed that the
562  * subdisk list of 'd' is already correctly set up.
563  */
564 void
565 gv_drive_modify(struct gv_drive *d)
566 {
567         struct g_geom *gp;
568         struct g_consumer *cp;
569         struct g_provider *pp, *pp2;
570         struct gv_sd *s;
571
572         KASSERT(d != NULL, ("gv_drive_modify: null d"));
573         gp = d->geom;
574         KASSERT(gp != NULL, ("gv_drive_modify: null gp"));
575         cp = LIST_FIRST(&gp->consumer);
576         KASSERT(cp != NULL, ("gv_drive_modify: null cp"));
577         pp = cp->provider;
578         KASSERT(pp != NULL, ("gv_drive_modify: null pp"));
579
580         g_topology_assert();
581
582         LIST_FOREACH(s, &d->subdisks, from_drive) {
583                 /* This subdisk already has a provider. */
584                 if (s->provider != NULL)
585                         continue;
586                 pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name);
587                 pp2->mediasize = s->size;
588                 pp2->sectorsize = pp->sectorsize;
589                 g_error_provider(pp2, 0);
590                 s->provider = pp2;
591                 pp2->private = s;
592         }
593 }
594
595 static void
596 gv_drive_dead(void *arg, int flag)
597 {
598         struct g_geom *gp;
599         struct g_consumer *cp;
600         struct gv_drive *d;
601         struct gv_sd *s;
602
603         g_topology_assert();
604         KASSERT(arg != NULL, ("gv_drive_dead: NULL arg"));
605
606         if (flag == EV_CANCEL)
607                 return;
608
609         d = arg;
610         if (d->state != GV_DRIVE_DOWN)
611                 return;
612
613         g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name);
614
615         gp = d->geom;
616         if (gp == NULL)
617                 return;
618
619         LIST_FOREACH(cp, &gp->consumer, consumer) {
620                 if (cp->nstart != cp->nend) {
621                         printf("GEOM_VINUM: dead drive '%s' has still "
622                             "active requests, can't detach consumer\n",
623                             d->name);
624                         g_post_event(gv_drive_dead, d, M_WAITOK, d,
625                             NULL);
626                         return;
627                 }
628                 if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
629                         g_access(cp, -cp->acr, -cp->acw, -cp->ace);
630         }
631
632         printf("GEOM_VINUM: lost drive '%s'\n", d->name);
633         d->geom = NULL;
634         LIST_FOREACH(s, &d->subdisks, from_drive) {
635                 s->provider = NULL;
636                 s->consumer = NULL;
637         }
638         gv_kill_drive_thread(d);
639         gp->softc = NULL;
640         g_wither_geom(gp, ENXIO);
641 }
642
643 static int
644 gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp,
645     struct g_geom *gp)
646 {
647         struct gv_drive *d;
648
649         g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name);
650         g_topology_assert();
651
652         d = gp->softc;
653         gv_kill_drive_thread(d);
654
655         g_wither_geom(gp, ENXIO);
656         return (0);
657 }
658
659 #define VINUMDRIVE_CLASS_NAME "VINUMDRIVE"
660
661 static struct g_class g_vinum_drive_class = {
662         .name = VINUMDRIVE_CLASS_NAME,
663         .version = G_VERSION,
664         .taste = gv_drive_taste,
665         .destroy_geom = gv_drive_destroy_geom
666 };
667
668 DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive);