2 * Copyright (c) 2004 Lukas Ertl
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
30 #include <sys/param.h>
32 #include <sys/kernel.h>
33 #include <sys/kthread.h>
34 #include <sys/libkern.h>
35 #include <sys/malloc.h>
36 #include <sys/queue.h>
38 #include <geom/geom.h>
39 #include <geom/vinum/geom_vinum_var.h>
40 #include <geom/vinum/geom_vinum.h>
41 #include <geom/vinum/geom_vinum_share.h>
43 static int gv_init_plex(struct gv_plex *);
44 void gv_init_td(void *);
45 static int gv_rebuild_plex(struct gv_plex *);
46 void gv_rebuild_td(void *);
47 static int gv_start_plex(struct gv_plex *);
48 static int gv_start_vol(struct gv_volume *);
49 static int gv_sync(struct gv_volume *);
50 void gv_sync_td(void *);
60 gv_parityop(struct g_geom *gp, struct gctl_req *req)
65 struct g_consumer *cp;
66 int error, *flags, type, *rebuild, rv;
71 plex = gctl_get_param(req, "plex", NULL);
73 gctl_error(req, "no plex given");
77 flags = gctl_get_paraml(req, "flags", sizeof(*flags));
79 gctl_error(req, "no flags given");
83 rebuild = gctl_get_paraml(req, "rebuild", sizeof(*rebuild));
84 if (rebuild == NULL) {
85 gctl_error(req, "no rebuild op given");
90 type = gv_object_type(sc, plex);
98 gctl_error(req, "'%s' is not a plex", plex);
102 p = gv_find_plex(sc, plex);
103 if (p->state != GV_PLEX_UP) {
104 gctl_error(req, "plex %s is not completely accessible",
108 if (p->org != GV_PLEX_RAID5) {
109 gctl_error(req, "plex %s is not a RAID5 plex", p->name);
114 error = g_access(cp, 1, 1, 0);
116 gctl_error(req, "cannot access consumer");
121 /* Reset the check pointer when using -f. */
122 if (*flags & GV_FLAG_F)
127 gctl_error(req, "cannot create BIO - out of memory");
129 error = g_access(cp, -1, -1, 0);
132 bp->bio_cmd = BIO_WRITE;
134 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
135 bp->bio_cflags |= GV_BIO_CHECK;
137 bp->bio_cflags |= GV_BIO_PARITY;
138 bp->bio_offset = p->synced;
139 bp->bio_length = p->stripesize;
141 /* Schedule it down ... */
142 g_io_request(bp, cp);
144 /* ... and wait for the result. */
145 error = biowait(bp, "gwrite");
146 g_free(bp->bio_data);
150 /* Incorrect parity. */
154 /* Some other error happened. */
156 gctl_error(req, "Parity check failed at offset 0x%jx, "
157 "errno %d", (intmax_t)p->synced, error);
159 /* Correct parity. */
163 gctl_set_param(req, "offset", &p->synced, sizeof(p->synced));
165 /* Advance the checkpointer if there was no error. */
167 p->synced += p->stripesize;
169 /* End of plex; reset the check pointer and signal it to the caller. */
170 if (p->synced >= p->size) {
176 error = g_access(cp, -1, -1, 0);
179 gctl_set_param(req, "rv", &rv, sizeof(rv));
183 gv_start_obj(struct g_geom *gp, struct gctl_req *req)
188 int *argc, *initsize;
192 argc = gctl_get_paraml(req, "argc", sizeof(*argc));
193 initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize));
195 if (argc == NULL || *argc == 0) {
196 gctl_error(req, "no arguments given");
202 for (i = 0; i < *argc; i++) {
203 snprintf(buf, sizeof(buf), "argv%d", i);
204 argv = gctl_get_param(req, buf, NULL);
207 type = gv_object_type(sc, argv);
210 v = gv_find_vol(sc, argv);
211 err = gv_start_vol(v);
213 if (err == EINPROGRESS) {
214 gctl_error(req, "cannot start volume "
215 "'%s': already in progress", argv);
217 gctl_error(req, "cannot start volume "
218 "'%s'; errno: %d", argv, err);
225 p = gv_find_plex(sc, argv);
226 err = gv_start_plex(p);
228 if (err == EINPROGRESS) {
229 gctl_error(req, "cannot start plex "
230 "'%s': already in progress", argv);
232 gctl_error(req, "cannot start plex "
233 "'%s'; errno: %d", argv, err);
242 gctl_error(req, "cannot start '%s' - not yet supported",
246 gctl_error(req, "unknown object '%s'", argv);
253 gv_start_plex(struct gv_plex *p)
258 KASSERT(p != NULL, ("gv_start_plex: NULL p"));
260 if (p->state == GV_PLEX_UP)
265 if ((v != NULL) && (v->plexcount > 1))
267 else if (p->org == GV_PLEX_RAID5) {
268 if (p->state == GV_PLEX_DEGRADED)
269 error = gv_rebuild_plex(p);
271 error = gv_init_plex(p);
278 gv_start_vol(struct gv_volume *v)
284 KASSERT(v != NULL, ("gv_start_vol: NULL v"));
288 if (v->plexcount == 0)
291 else if (v->plexcount == 1) {
292 p = LIST_FIRST(&v->plexes);
293 KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name));
294 if (p->org == GV_PLEX_RAID5) {
297 error = gv_init_plex(p);
299 case GV_PLEX_DEGRADED:
300 error = gv_rebuild_plex(p);
306 LIST_FOREACH(s, &p->subdisks, in_plex) {
307 gv_set_sd_state(s, GV_SD_UP,
318 gv_sync(struct gv_volume *v)
321 struct gv_plex *p, *up;
322 struct gv_sync_args *sync;
324 KASSERT(v != NULL, ("gv_sync: NULL v"));
326 KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name));
328 /* Find the plex that's up. */
330 LIST_FOREACH(up, &v->plexes, in_volume) {
331 if (up->state == GV_PLEX_UP)
335 /* Didn't find a good plex. */
339 LIST_FOREACH(p, &v->plexes, in_volume) {
340 if ((p == up) || (p->state == GV_PLEX_UP))
342 if (p->flags & GV_PLEX_SYNCING) {
343 return (EINPROGRESS);
345 p->flags |= GV_PLEX_SYNCING;
346 sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
350 sync->syncsize = GV_DFLT_SYNCSIZE;
351 kthread_create(gv_sync_td, sync, NULL, 0, 0, "gv_sync '%s'",
359 gv_rebuild_plex(struct gv_plex *p)
361 struct gv_sync_args *sync;
363 if (gv_is_open(p->geom))
366 if (p->flags & GV_PLEX_SYNCING)
367 return (EINPROGRESS);
368 p->flags |= GV_PLEX_SYNCING;
370 sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
372 sync->syncsize = GV_DFLT_SYNCSIZE;
374 kthread_create(gv_rebuild_td, sync, NULL, 0, 0, "gv_rebuild %s",
381 gv_init_plex(struct gv_plex *p)
385 KASSERT(p != NULL, ("gv_init_plex: NULL p"));
387 LIST_FOREACH(s, &p->subdisks, in_plex) {
388 if (s->state == GV_SD_INITIALIZING)
389 return (EINPROGRESS);
390 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
391 s->init_size = GV_DFLT_SYNCSIZE;
392 kthread_create(gv_init_td, s, NULL, 0, 0, "gv_init %s",
399 /* This thread is responsible for rebuilding a degraded RAID5 plex. */
401 gv_rebuild_td(void *arg)
405 struct g_consumer *cp;
406 struct gv_sync_args *sync;
420 error = g_access(cp, 1, 1, 0);
423 printf("GEOM_VINUM: rebuild of %s failed to access consumer: "
424 "%d\n", p->name, error);
429 buf = g_malloc(sync->syncsize, M_WAITOK);
431 printf("GEOM_VINUM: rebuild of %s started\n", p->name);
433 for (i = 0; i < p->size; i += (p->stripesize * (p->sdcount - 1))) {
435 if (i + sync->syncsize > p->size)
436 sync->syncsize = p->size - i;
440 printf("GEOM_VINUM: rebuild of %s failed creating bio: "
441 "out of memory\n", p->name);
444 bp->bio_cmd = BIO_WRITE;
447 bp->bio_cflags |= GV_BIO_REBUILD;
449 bp->bio_length = p->stripesize;
451 /* Schedule it down ... */
452 g_io_request(bp, cp);
454 /* ... and wait for the result. */
455 error = biowait(bp, "gwrite");
457 printf("GEOM_VINUM: rebuild of %s failed at offset %jd "
458 "errno: %d\n", p->name, i, error);
471 g_access(cp, -1, -1, 0);
472 gv_save_config_all(p->vinumconf);
475 p->flags &= ~GV_PLEX_SYNCING;
478 /* Successful initialization. */
480 printf("GEOM_VINUM: rebuild of %s finished\n", p->name);
487 gv_sync_td(void *arg)
491 struct g_consumer *from, *to;
492 struct gv_sync_args *sync;
499 from = sync->from->consumer;
500 to = sync->to->consumer;
508 error = g_access(from, 1, 0, 0);
511 printf("GEOM_VINUM: sync from '%s' failed to access "
512 "consumer: %d\n", sync->from->name, error);
516 error = g_access(to, 0, 1, 0);
518 g_access(from, -1, 0, 0);
520 printf("GEOM_VINUM: sync to '%s' failed to access "
521 "consumer: %d\n", p->name, error);
527 printf("GEOM_VINUM: plex sync %s -> %s started\n", sync->from->name,
529 for (i = 0; i < p->size; i+= sync->syncsize) {
530 /* Read some bits from the good plex. */
531 buf = g_read_data(from, i, sync->syncsize, &error);
533 printf("GEOM_VINUM: sync read from '%s' failed at "
534 "offset %jd; errno: %d\n", sync->from->name, i,
540 * Create a bio and schedule it down on the 'bad' plex. We
541 * cannot simply use g_write_data() because we have to let the
542 * lower parts know that we are an initialization process and
543 * not a 'normal' request.
547 printf("GEOM_VINUM: sync write to '%s' failed at "
548 "offset %jd; out of memory\n", p->name, i);
552 bp->bio_cmd = BIO_WRITE;
554 bp->bio_length = sync->syncsize;
559 * This hack declare this bio as part of an initialization
560 * process, so that the lower levels allow it to get through.
562 bp->bio_cflags |= GV_BIO_SYNCREQ;
564 /* Schedule it down ... */
565 g_io_request(bp, to);
567 /* ... and wait for the result. */
568 error = biowait(bp, "gwrite");
572 printf("GEOM_VINUM: sync write to '%s' failed at "
573 "offset %jd; errno: %d\n", p->name, i, error);
577 /* Note that we have synced a little bit more. */
578 p->synced += sync->syncsize;
582 g_access(from, -1, 0, 0);
583 g_access(to, 0, -1, 0);
584 gv_save_config_all(p->vinumconf);
587 /* Successful initialization. */
589 printf("GEOM_VINUM: plex sync %s -> %s finished\n",
590 sync->from->name, sync->to->name);
592 p->flags &= ~GV_PLEX_SYNCING;
600 gv_init_td(void *arg)
605 struct g_consumer *cp;
607 off_t i, init_size, start, offset, length;
611 KASSERT(s != NULL, ("gv_init_td: NULL s"));
613 KASSERT(d != NULL, ("gv_init_td: NULL d"));
615 KASSERT(gp != NULL, ("gv_init_td: NULL gp"));
617 cp = LIST_FIRST(&gp->consumer);
618 KASSERT(cp != NULL, ("gv_init_td: NULL cp"));
621 init_size = s->init_size;
622 start = s->drive_offset + s->initialized;
623 offset = s->drive_offset;
626 buf = g_malloc(s->init_size, M_WAITOK | M_ZERO);
629 error = g_access(cp, 0, 1, 0);
631 s->init_error = error;
633 printf("GEOM_VINUM: subdisk '%s' init: failed to access "
634 "consumer; error: %d\n", s->name, error);
639 for (i = start; i < offset + length; i += init_size) {
640 error = g_write_data(cp, i, buf, init_size);
642 printf("GEOM_VINUM: subdisk '%s' init: write failed"
643 " at offset %jd (drive offset %jd); error %d\n",
644 s->name, (intmax_t)s->initialized, (intmax_t)i,
648 s->initialized += init_size;
654 g_access(cp, 0, -1, 0);
657 s->init_error = error;
659 gv_set_sd_state(s, GV_SD_STALE,
660 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
664 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
667 printf("GEOM_VINUM: subdisk '%s' init: finished successfully\n",