]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/geom/mirror/g_mirror.c
This commit was generated by cvs2svn to compensate for changes in r147464,
[FreeBSD/FreeBSD.git] / sys / geom / mirror / g_mirror.c
1 /*-
2  * Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/module.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/mutex.h>
37 #include <sys/bio.h>
38 #include <sys/sysctl.h>
39 #include <sys/malloc.h>
40 #include <sys/eventhandler.h>
41 #include <vm/uma.h>
42 #include <geom/geom.h>
43 #include <sys/proc.h>
44 #include <sys/kthread.h>
45 #include <sys/sched.h>
46 #include <geom/mirror/g_mirror.h>
47
48
49 static MALLOC_DEFINE(M_MIRROR, "mirror data", "GEOM_MIRROR Data");
50
51 SYSCTL_DECL(_kern_geom);
52 SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff");
53 u_int g_mirror_debug = 0;
54 TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
55 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
56     "Debug level");
57 static u_int g_mirror_timeout = 4;
58 TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
59 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
60     0, "Time to wait on all mirror components");
61 static u_int g_mirror_idletime = 5;
62 TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
63 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
64     &g_mirror_idletime, 0, "Mark components as clean when idling");
65 static u_int g_mirror_reqs_per_sync = 5;
66 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
67     &g_mirror_reqs_per_sync, 0,
68     "Number of regular I/O requests per synchronization request");
69 static u_int g_mirror_syncs_per_sec = 1000;
70 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
71     &g_mirror_syncs_per_sec, 0,
72     "Number of synchronizations requests per second");
73
74 #define MSLEEP(ident, mtx, priority, wmesg, timeout)    do {            \
75         G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));       \
76         msleep((ident), (mtx), (priority), (wmesg), (timeout));         \
77         G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));       \
78 } while (0)
79
80 static eventhandler_tag g_mirror_ehtag = NULL;
81
82 static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
83     struct g_geom *gp);
84 static g_taste_t g_mirror_taste;
85 static void g_mirror_init(struct g_class *mp);
86 static void g_mirror_fini(struct g_class *mp);
87
88 struct g_class g_mirror_class = {
89         .name = G_MIRROR_CLASS_NAME,
90         .version = G_VERSION,
91         .ctlreq = g_mirror_config,
92         .taste = g_mirror_taste,
93         .destroy_geom = g_mirror_destroy_geom,
94         .init = g_mirror_init,
95         .fini = g_mirror_fini
96 };
97
98
99 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
100 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
101 static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
102 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
103     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
104 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
105
106
107 static const char *
108 g_mirror_disk_state2str(int state)
109 {
110
111         switch (state) {
112         case G_MIRROR_DISK_STATE_NONE:
113                 return ("NONE");
114         case G_MIRROR_DISK_STATE_NEW:
115                 return ("NEW");
116         case G_MIRROR_DISK_STATE_ACTIVE:
117                 return ("ACTIVE");
118         case G_MIRROR_DISK_STATE_STALE:
119                 return ("STALE");
120         case G_MIRROR_DISK_STATE_SYNCHRONIZING:
121                 return ("SYNCHRONIZING");
122         case G_MIRROR_DISK_STATE_DISCONNECTED:
123                 return ("DISCONNECTED");
124         case G_MIRROR_DISK_STATE_DESTROY:
125                 return ("DESTROY");
126         default:
127                 return ("INVALID");
128         }
129 }
130
131 static const char *
132 g_mirror_device_state2str(int state)
133 {
134
135         switch (state) {
136         case G_MIRROR_DEVICE_STATE_STARTING:
137                 return ("STARTING");
138         case G_MIRROR_DEVICE_STATE_RUNNING:
139                 return ("RUNNING");
140         default:
141                 return ("INVALID");
142         }
143 }
144
145 static const char *
146 g_mirror_get_diskname(struct g_mirror_disk *disk)
147 {
148
149         if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
150                 return ("[unknown]");
151         return (disk->d_name);
152 }
153
154 /*
155  * --- Events handling functions ---
156  * Events in geom_mirror are used to maintain disks and device status
157  * from one thread to simplify locking.
158  */
159 static void
160 g_mirror_event_free(struct g_mirror_event *ep)
161 {
162
163         free(ep, M_MIRROR);
164 }
165
166 int
167 g_mirror_event_send(void *arg, int state, int flags)
168 {
169         struct g_mirror_softc *sc;
170         struct g_mirror_disk *disk;
171         struct g_mirror_event *ep;
172         int error;
173
174         ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
175         G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
176         if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
177                 disk = NULL;
178                 sc = arg;
179         } else {
180                 disk = arg;
181                 sc = disk->d_softc;
182         }
183         ep->e_disk = disk;
184         ep->e_state = state;
185         ep->e_flags = flags;
186         ep->e_error = 0;
187         mtx_lock(&sc->sc_events_mtx);
188         TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
189         mtx_unlock(&sc->sc_events_mtx);
190         G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
191         mtx_lock(&sc->sc_queue_mtx);
192         wakeup(sc);
193         mtx_unlock(&sc->sc_queue_mtx);
194         if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
195                 return (0);
196         g_topology_assert();
197         G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
198         g_topology_unlock();
199         while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
200                 mtx_lock(&sc->sc_events_mtx);
201                 MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
202                     hz * 5);
203         }
204         /* Don't even try to use 'sc' here, because it could be already dead. */
205         g_topology_lock();
206         error = ep->e_error;
207         g_mirror_event_free(ep);
208         return (error);
209 }
210
211 static struct g_mirror_event *
212 g_mirror_event_get(struct g_mirror_softc *sc)
213 {
214         struct g_mirror_event *ep;
215
216         mtx_lock(&sc->sc_events_mtx);
217         ep = TAILQ_FIRST(&sc->sc_events);
218         mtx_unlock(&sc->sc_events_mtx);
219         return (ep);
220 }
221
222 static void
223 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
224 {
225
226         mtx_lock(&sc->sc_events_mtx);
227         TAILQ_REMOVE(&sc->sc_events, ep, e_next);
228         mtx_unlock(&sc->sc_events_mtx);
229 }
230
231 static void
232 g_mirror_event_cancel(struct g_mirror_disk *disk)
233 {
234         struct g_mirror_softc *sc;
235         struct g_mirror_event *ep, *tmpep;
236
237         g_topology_assert();
238
239         sc = disk->d_softc;
240         mtx_lock(&sc->sc_events_mtx);
241         TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
242                 if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
243                         continue;
244                 if (ep->e_disk != disk)
245                         continue;
246                 TAILQ_REMOVE(&sc->sc_events, ep, e_next);
247                 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
248                         g_mirror_event_free(ep);
249                 else {
250                         ep->e_error = ECANCELED;
251                         wakeup(ep);
252                 }
253         }
254         mtx_unlock(&sc->sc_events_mtx);
255 }
256
257 /*
258  * Return the number of disks in given state.
259  * If state is equal to -1, count all connected disks.
260  */
261 u_int
262 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
263 {
264         struct g_mirror_disk *disk;
265         u_int n = 0;
266
267         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
268                 if (state == -1 || disk->d_state == state)
269                         n++;
270         }
271         return (n);
272 }
273
274 /*
275  * Find a disk in mirror by its disk ID.
276  */
277 static struct g_mirror_disk *
278 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
279 {
280         struct g_mirror_disk *disk;
281
282         g_topology_assert();
283
284         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
285                 if (disk->d_id == id)
286                         return (disk);
287         }
288         return (NULL);
289 }
290
291 static u_int
292 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
293 {
294         struct bio *bp;
295         u_int nreqs = 0;
296
297         mtx_lock(&sc->sc_queue_mtx);
298         TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
299                 if (bp->bio_from == cp)
300                         nreqs++;
301         }
302         mtx_unlock(&sc->sc_queue_mtx);
303         return (nreqs);
304 }
305
306 static int
307 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
308 {
309
310         if (cp->index > 0) {
311                 G_MIRROR_DEBUG(2,
312                     "I/O requests for %s exist, can't destroy it now.",
313                     cp->provider->name);
314                 return (1);
315         }
316         if (g_mirror_nrequests(sc, cp) > 0) {
317                 G_MIRROR_DEBUG(2,
318                     "I/O requests for %s in queue, can't destroy it now.",
319                     cp->provider->name);
320                 return (1);
321         }
322         return (0);
323 }
324
325 static void
326 g_mirror_destroy_consumer(void *arg, int flags __unused)
327 {
328         struct g_consumer *cp;
329
330         cp = arg;
331         G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
332         g_detach(cp);
333         g_destroy_consumer(cp);
334 }
335
336 static void
337 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
338 {
339         struct g_provider *pp;
340         int retaste_wait;
341
342         g_topology_assert();
343
344         cp->private = NULL;
345         if (g_mirror_is_busy(sc, cp))
346                 return;
347         pp = cp->provider;
348         retaste_wait = 0;
349         if (cp->acw == 1) {
350                 if ((pp->geom->flags & G_GEOM_WITHER) == 0)
351                         retaste_wait = 1;
352         }
353         G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
354             -cp->acw, -cp->ace, 0);
355         if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
356                 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
357         if (retaste_wait) {
358                 /*
359                  * After retaste event was send (inside g_access()), we can send
360                  * event to detach and destroy consumer.
361                  * A class, which has consumer to the given provider connected
362                  * will not receive retaste event for the provider.
363                  * This is the way how I ignore retaste events when I close
364                  * consumers opened for write: I detach and destroy consumer
365                  * after retaste event is sent.
366                  */
367                 g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
368                 return;
369         }
370         G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
371         g_detach(cp);
372         g_destroy_consumer(cp);
373 }
374
375 static int
376 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
377 {
378         struct g_consumer *cp;
379         int error;
380
381         g_topology_assert();
382         KASSERT(disk->d_consumer == NULL,
383             ("Disk already connected (device %s).", disk->d_softc->sc_name));
384
385         cp = g_new_consumer(disk->d_softc->sc_geom);
386         error = g_attach(cp, pp);
387         if (error != 0) {
388                 g_destroy_consumer(cp);
389                 return (error);
390         }
391         error = g_access(cp, 1, 1, 1);
392         if (error != 0) {
393                 g_detach(cp);
394                 g_destroy_consumer(cp);
395                 G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
396                     pp->name, error);
397                 return (error);
398         }
399         disk->d_consumer = cp;
400         disk->d_consumer->private = disk;
401         disk->d_consumer->index = 0;
402
403         G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
404         return (0);
405 }
406
407 static void
408 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
409 {
410
411         g_topology_assert();
412
413         if (cp == NULL)
414                 return;
415         if (cp->provider != NULL)
416                 g_mirror_kill_consumer(sc, cp);
417         else
418                 g_destroy_consumer(cp);
419 }
420
421 /*
422  * Initialize disk. This means allocate memory, create consumer, attach it
423  * to the provider and open access (r1w1e1) to it.
424  */
425 static struct g_mirror_disk *
426 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
427     struct g_mirror_metadata *md, int *errorp)
428 {
429         struct g_mirror_disk *disk;
430         int error;
431
432         disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
433         if (disk == NULL) {
434                 error = ENOMEM;
435                 goto fail;
436         }
437         disk->d_softc = sc;
438         error = g_mirror_connect_disk(disk, pp);
439         if (error != 0)
440                 goto fail;
441         disk->d_id = md->md_did;
442         disk->d_state = G_MIRROR_DISK_STATE_NONE;
443         disk->d_priority = md->md_priority;
444         disk->d_delay.sec = 0;
445         disk->d_delay.frac = 0;
446         binuptime(&disk->d_last_used);
447         disk->d_flags = md->md_dflags;
448         if (md->md_provider[0] != '\0')
449                 disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
450         disk->d_sync.ds_consumer = NULL;
451         disk->d_sync.ds_offset = md->md_sync_offset;
452         disk->d_sync.ds_offset_done = md->md_sync_offset;
453         disk->d_sync.ds_resync = -1;
454         disk->d_genid = md->md_genid;
455         disk->d_sync.ds_syncid = md->md_syncid;
456         if (errorp != NULL)
457                 *errorp = 0;
458         return (disk);
459 fail:
460         if (errorp != NULL)
461                 *errorp = error;
462         if (disk != NULL)
463                 free(disk, M_MIRROR);
464         return (NULL);
465 }
466
467 static void
468 g_mirror_destroy_disk(struct g_mirror_disk *disk)
469 {
470         struct g_mirror_softc *sc;
471
472         g_topology_assert();
473
474         LIST_REMOVE(disk, d_next);
475         g_mirror_event_cancel(disk);
476         sc = disk->d_softc;
477         if (sc->sc_hint == disk)
478                 sc->sc_hint = NULL;
479         switch (disk->d_state) {
480         case G_MIRROR_DISK_STATE_SYNCHRONIZING:
481                 g_mirror_sync_stop(disk, 1);
482                 /* FALLTHROUGH */
483         case G_MIRROR_DISK_STATE_NEW:
484         case G_MIRROR_DISK_STATE_STALE:
485         case G_MIRROR_DISK_STATE_ACTIVE:
486                 g_mirror_disconnect_consumer(sc, disk->d_consumer);
487                 free(disk, M_MIRROR);
488                 break;
489         default:
490                 KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
491                     g_mirror_get_diskname(disk),
492                     g_mirror_disk_state2str(disk->d_state)));
493         }
494 }
495
496 static void
497 g_mirror_destroy_device(struct g_mirror_softc *sc)
498 {
499         struct g_mirror_disk *disk;
500         struct g_mirror_event *ep;
501         struct g_geom *gp;
502         struct g_consumer *cp, *tmpcp;
503
504         g_topology_assert();
505
506         gp = sc->sc_geom;
507         if (sc->sc_provider != NULL)
508                 g_mirror_destroy_provider(sc);
509         for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
510             disk = LIST_FIRST(&sc->sc_disks)) {
511                 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
512                 g_mirror_update_metadata(disk);
513                 g_mirror_destroy_disk(disk);
514         }
515         while ((ep = g_mirror_event_get(sc)) != NULL) {
516                 g_mirror_event_remove(sc, ep);
517                 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
518                         g_mirror_event_free(ep);
519                 else {
520                         ep->e_error = ECANCELED;
521                         ep->e_flags |= G_MIRROR_EVENT_DONE;
522                         G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
523                         mtx_lock(&sc->sc_events_mtx);
524                         wakeup(ep);
525                         mtx_unlock(&sc->sc_events_mtx);
526                 }
527         }
528         callout_drain(&sc->sc_callout);
529         gp->softc = NULL;
530
531         LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
532                 g_mirror_disconnect_consumer(sc, cp);
533         }
534         sc->sc_sync.ds_geom->softc = NULL;
535         g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
536         mtx_destroy(&sc->sc_queue_mtx);
537         mtx_destroy(&sc->sc_events_mtx);
538         G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
539         g_wither_geom(gp, ENXIO);
540 }
541
542 static void
543 g_mirror_orphan(struct g_consumer *cp)
544 {
545         struct g_mirror_disk *disk;
546
547         g_topology_assert();
548
549         disk = cp->private;
550         if (disk == NULL)
551                 return;
552         disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
553         g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
554             G_MIRROR_EVENT_DONTWAIT);
555 }
556
557 /*
558  * Function should return the next active disk on the list.
559  * It is possible that it will be the same disk as given.
560  * If there are no active disks on list, NULL is returned.
561  */
562 static __inline struct g_mirror_disk *
563 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
564 {
565         struct g_mirror_disk *dp;
566
567         for (dp = LIST_NEXT(disk, d_next); dp != disk;
568             dp = LIST_NEXT(dp, d_next)) {
569                 if (dp == NULL)
570                         dp = LIST_FIRST(&sc->sc_disks);
571                 if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
572                         break;
573         }
574         if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
575                 return (NULL);
576         return (dp);
577 }
578
579 static struct g_mirror_disk *
580 g_mirror_get_disk(struct g_mirror_softc *sc)
581 {
582         struct g_mirror_disk *disk;
583
584         if (sc->sc_hint == NULL) {
585                 sc->sc_hint = LIST_FIRST(&sc->sc_disks);
586                 if (sc->sc_hint == NULL)
587                         return (NULL);
588         }
589         disk = sc->sc_hint;
590         if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
591                 disk = g_mirror_find_next(sc, disk);
592                 if (disk == NULL)
593                         return (NULL);
594         }
595         sc->sc_hint = g_mirror_find_next(sc, disk);
596         return (disk);
597 }
598
599 static int
600 g_mirror_write_metadata(struct g_mirror_disk *disk,
601     struct g_mirror_metadata *md)
602 {
603         struct g_mirror_softc *sc;
604         struct g_consumer *cp;
605         off_t offset, length;
606         u_char *sector;
607         int error = 0;
608
609         g_topology_assert();
610
611         sc = disk->d_softc;
612         cp = disk->d_consumer;
613         KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
614         KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
615         KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
616             ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
617             cp->acw, cp->ace));
618         length = cp->provider->sectorsize;
619         offset = cp->provider->mediasize - length;
620         sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
621         if (md != NULL)
622                 mirror_metadata_encode(md, sector);
623         g_topology_unlock();
624         error = g_write_data(cp, offset, sector, length);
625         g_topology_lock();
626         free(sector, M_MIRROR);
627         if (error != 0) {
628                 disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_GENID;
629                 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
630                     G_MIRROR_EVENT_DONTWAIT);
631         }
632         return (error);
633 }
634
635 static int
636 g_mirror_clear_metadata(struct g_mirror_disk *disk)
637 {
638         int error;
639
640         g_topology_assert();
641         error = g_mirror_write_metadata(disk, NULL);
642         if (error == 0) {
643                 G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
644                     g_mirror_get_diskname(disk));
645         } else {
646                 G_MIRROR_DEBUG(0,
647                     "Cannot clear metadata on disk %s (error=%d).",
648                     g_mirror_get_diskname(disk), error);
649         }
650         return (error);
651 }
652
653 void
654 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
655     struct g_mirror_metadata *md)
656 {
657
658         strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
659         md->md_version = G_MIRROR_VERSION;
660         strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
661         md->md_mid = sc->sc_id;
662         md->md_all = sc->sc_ndisks;
663         md->md_slice = sc->sc_slice;
664         md->md_balance = sc->sc_balance;
665         md->md_genid = sc->sc_genid;
666         md->md_mediasize = sc->sc_mediasize;
667         md->md_sectorsize = sc->sc_sectorsize;
668         md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
669         bzero(md->md_provider, sizeof(md->md_provider));
670         if (disk == NULL) {
671                 md->md_did = arc4random();
672                 md->md_priority = 0;
673                 md->md_syncid = 0;
674                 md->md_dflags = 0;
675                 md->md_sync_offset = 0;
676                 md->md_provsize = 0;
677         } else {
678                 md->md_did = disk->d_id;
679                 md->md_priority = disk->d_priority;
680                 md->md_syncid = disk->d_sync.ds_syncid;
681                 md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
682                 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
683                         md->md_sync_offset = disk->d_sync.ds_offset_done;
684                 else
685                         md->md_sync_offset = 0;
686                 if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
687                         strlcpy(md->md_provider,
688                             disk->d_consumer->provider->name,
689                             sizeof(md->md_provider));
690                 }
691                 md->md_provsize = disk->d_consumer->provider->mediasize;
692         }
693 }
694
695 void
696 g_mirror_update_metadata(struct g_mirror_disk *disk)
697 {
698         struct g_mirror_metadata md;
699         int error;
700
701         g_topology_assert();
702         g_mirror_fill_metadata(disk->d_softc, disk, &md);
703         error = g_mirror_write_metadata(disk, &md);
704         if (error == 0) {
705                 G_MIRROR_DEBUG(2, "Metadata on %s updated.",
706                     g_mirror_get_diskname(disk));
707         } else {
708                 G_MIRROR_DEBUG(0,
709                     "Cannot update metadata on disk %s (error=%d).",
710                     g_mirror_get_diskname(disk), error);
711         }
712 }
713
714 static void
715 g_mirror_bump_syncid(struct g_mirror_softc *sc)
716 {
717         struct g_mirror_disk *disk;
718
719         g_topology_assert();
720         KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
721             ("%s called with no active disks (device=%s).", __func__,
722             sc->sc_name));
723
724         sc->sc_syncid++;
725         G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
726             sc->sc_syncid);
727         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
728                 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
729                     disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
730                         disk->d_sync.ds_syncid = sc->sc_syncid;
731                         g_mirror_update_metadata(disk);
732                 }
733         }
734 }
735
736 static void
737 g_mirror_bump_genid(struct g_mirror_softc *sc)
738 {
739         struct g_mirror_disk *disk;
740
741         g_topology_assert();
742         KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
743             ("%s called with no active disks (device=%s).", __func__,
744             sc->sc_name));
745
746         sc->sc_genid++;
747         G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
748             sc->sc_genid);
749         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
750                 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
751                     disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
752                         disk->d_genid = sc->sc_genid;
753                         g_mirror_update_metadata(disk);
754                 }
755         }
756 }
757
758 static void
759 g_mirror_idle(struct g_mirror_softc *sc)
760 {
761         struct g_mirror_disk *disk;
762
763         if (sc->sc_provider == NULL || sc->sc_provider->acw == 0)
764                 return;
765         sc->sc_idle = 1;
766         g_topology_lock();
767         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
768                 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
769                         continue;
770                 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
771                     g_mirror_get_diskname(disk), sc->sc_name);
772                 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
773                 g_mirror_update_metadata(disk);
774         }
775         g_topology_unlock();
776 }
777
778 static void
779 g_mirror_unidle(struct g_mirror_softc *sc)
780 {
781         struct g_mirror_disk *disk;
782
783         sc->sc_idle = 0;
784         g_topology_lock();
785         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
786                 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
787                         continue;
788                 G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
789                     g_mirror_get_diskname(disk), sc->sc_name);
790                 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
791                 g_mirror_update_metadata(disk);
792         }
793         g_topology_unlock();
794 }
795
796 /*
797  * Return 1 if we should check if mirror is idling.
798  */
799 static int
800 g_mirror_check_idle(struct g_mirror_softc *sc)
801 {
802         struct g_mirror_disk *disk;
803
804         if (sc->sc_idle)
805                 return (0);
806         if (sc->sc_provider != NULL && sc->sc_provider->acw == 0)
807                 return (0);
808         /*
809          * Check if there are no in-flight requests.
810          */
811         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
812                 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
813                         continue;
814                 if (disk->d_consumer->index > 0)
815                         return (0);
816         }
817         return (1);
818 }
819
820 static __inline int
821 bintime_cmp(struct bintime *bt1, struct bintime *bt2)
822 {
823
824         if (bt1->sec < bt2->sec)
825                 return (-1);
826         else if (bt1->sec > bt2->sec)
827                 return (1);
828         if (bt1->frac < bt2->frac)
829                 return (-1);
830         else if (bt1->frac > bt2->frac)
831                 return (1);
832         return (0);
833 }
834
835 static void
836 g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp)
837 {
838
839         if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD)
840                 return;
841         binuptime(&disk->d_delay);
842         bintime_sub(&disk->d_delay, &bp->bio_t0);
843 }
844
845 static void
846 g_mirror_done(struct bio *bp)
847 {
848         struct g_mirror_softc *sc;
849
850         sc = bp->bio_from->geom->softc;
851         bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR; 
852         mtx_lock(&sc->sc_queue_mtx);
853         bioq_disksort(&sc->sc_queue, bp);
854         wakeup(sc);
855         mtx_unlock(&sc->sc_queue_mtx);
856 }
857
858 static void
859 g_mirror_regular_request(struct bio *bp)
860 {
861         struct g_mirror_softc *sc;
862         struct g_mirror_disk *disk;
863         struct bio *pbp;
864
865         g_topology_assert_not();
866
867         bp->bio_from->index--;
868         pbp = bp->bio_parent;
869         sc = pbp->bio_to->geom->softc;
870         disk = bp->bio_from->private;
871         if (disk == NULL) {
872                 g_topology_lock();
873                 g_mirror_kill_consumer(sc, bp->bio_from);
874                 g_topology_unlock();
875         } else {
876                 g_mirror_update_delay(disk, bp);
877         }
878
879         pbp->bio_inbed++;
880         KASSERT(pbp->bio_inbed <= pbp->bio_children,
881             ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
882             pbp->bio_children));
883         if (bp->bio_error == 0 && pbp->bio_error == 0) {
884                 G_MIRROR_LOGREQ(3, bp, "Request delivered.");
885                 g_destroy_bio(bp);
886                 if (pbp->bio_children == pbp->bio_inbed) {
887                         G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
888                         pbp->bio_completed = pbp->bio_length;
889                         g_io_deliver(pbp, pbp->bio_error);
890                 }
891                 return;
892         } else if (bp->bio_error != 0) {
893                 if (pbp->bio_error == 0)
894                         pbp->bio_error = bp->bio_error;
895                 G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
896                     bp->bio_error);
897                 if (disk != NULL) {
898                         sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
899                         g_mirror_event_send(disk,
900                             G_MIRROR_DISK_STATE_DISCONNECTED,
901                             G_MIRROR_EVENT_DONTWAIT);
902                 }
903                 switch (pbp->bio_cmd) {
904                 case BIO_DELETE:
905                 case BIO_WRITE:
906                         pbp->bio_inbed--;
907                         pbp->bio_children--;
908                         break;
909                 }
910         }
911         g_destroy_bio(bp);
912
913         switch (pbp->bio_cmd) {
914         case BIO_READ:
915                 if (pbp->bio_children == pbp->bio_inbed) {
916                         pbp->bio_error = 0;
917                         mtx_lock(&sc->sc_queue_mtx);
918                         bioq_disksort(&sc->sc_queue, pbp);
919                         G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
920                         wakeup(sc);
921                         mtx_unlock(&sc->sc_queue_mtx);
922                 }
923                 break;
924         case BIO_DELETE:
925         case BIO_WRITE:
926                 if (pbp->bio_children == 0) {
927                         /*
928                          * All requests failed.
929                          */
930                 } else if (pbp->bio_inbed < pbp->bio_children) {
931                         /* Do nothing. */
932                         break;
933                 } else if (pbp->bio_children == pbp->bio_inbed) {
934                         /* Some requests succeeded. */
935                         pbp->bio_error = 0;
936                         pbp->bio_completed = pbp->bio_length;
937                 }
938                 g_io_deliver(pbp, pbp->bio_error);
939                 break;
940         default:
941                 KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
942                 break;
943         }
944 }
945
946 static void
947 g_mirror_sync_done(struct bio *bp)
948 {
949         struct g_mirror_softc *sc;
950
951         G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
952         sc = bp->bio_from->geom->softc;
953         bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC;
954         mtx_lock(&sc->sc_queue_mtx);
955         bioq_disksort(&sc->sc_queue, bp);
956         wakeup(sc);
957         mtx_unlock(&sc->sc_queue_mtx);
958 }
959
960 static void
961 g_mirror_start(struct bio *bp)
962 {
963         struct g_mirror_softc *sc;
964
965         sc = bp->bio_to->geom->softc;
966         /*
967          * If sc == NULL or there are no valid disks, provider's error
968          * should be set and g_mirror_start() should not be called at all.
969          */
970         KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
971             ("Provider's error should be set (error=%d)(mirror=%s).",
972             bp->bio_to->error, bp->bio_to->name));
973         G_MIRROR_LOGREQ(3, bp, "Request received.");
974
975         switch (bp->bio_cmd) {
976         case BIO_READ:
977         case BIO_WRITE:
978         case BIO_DELETE:
979                 break;
980         case BIO_GETATTR:
981         default:
982                 g_io_deliver(bp, EOPNOTSUPP);
983                 return;
984         }
985         mtx_lock(&sc->sc_queue_mtx);
986         bioq_disksort(&sc->sc_queue, bp);
987         G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
988         wakeup(sc);
989         mtx_unlock(&sc->sc_queue_mtx);
990 }
991
992 /*
993  * Send one synchronization request.
994  */
995 static void
996 g_mirror_sync_one(struct g_mirror_disk *disk)
997 {
998         struct g_mirror_softc *sc;
999         struct bio *bp;
1000
1001         sc = disk->d_softc;
1002         KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1003             ("Disk %s is not marked for synchronization.",
1004             g_mirror_get_diskname(disk)));
1005
1006         bp = g_new_bio();
1007         if (bp == NULL)
1008                 return;
1009         bp->bio_parent = NULL;
1010         bp->bio_cmd = BIO_READ;
1011         bp->bio_offset = disk->d_sync.ds_offset;
1012         bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1013         bp->bio_cflags = 0;
1014         bp->bio_done = g_mirror_sync_done;
1015         bp->bio_data = disk->d_sync.ds_data;
1016         if (bp->bio_data == NULL) {
1017                 g_destroy_bio(bp);
1018                 return;
1019         }
1020         disk->d_sync.ds_offset += bp->bio_length;
1021         bp->bio_to = sc->sc_provider;
1022         G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1023         disk->d_sync.ds_consumer->index++;
1024         g_io_request(bp, disk->d_sync.ds_consumer);
1025 }
1026
1027 static void
1028 g_mirror_sync_request(struct bio *bp)
1029 {
1030         struct g_mirror_softc *sc;
1031         struct g_mirror_disk *disk;
1032
1033         bp->bio_from->index--;
1034         sc = bp->bio_from->geom->softc;
1035         disk = bp->bio_from->private;
1036         if (disk == NULL) {
1037                 g_topology_lock();
1038                 g_mirror_kill_consumer(sc, bp->bio_from);
1039                 g_topology_unlock();
1040                 g_destroy_bio(bp);
1041                 return;
1042         }
1043
1044         /*
1045          * Synchronization request.
1046          */
1047         switch (bp->bio_cmd) {
1048         case BIO_READ:
1049             {
1050                 struct g_consumer *cp;
1051
1052                 if (bp->bio_error != 0) {
1053                         G_MIRROR_LOGREQ(0, bp,
1054                             "Synchronization request failed (error=%d).",
1055                             bp->bio_error);
1056                         g_destroy_bio(bp);
1057                         return;
1058                 }
1059                 G_MIRROR_LOGREQ(3, bp,
1060                     "Synchronization request half-finished.");
1061                 bp->bio_cmd = BIO_WRITE;
1062                 bp->bio_cflags = 0;
1063                 cp = disk->d_consumer;
1064                 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1065                     ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1066                     cp->acr, cp->acw, cp->ace));
1067                 cp->index++;
1068                 g_io_request(bp, cp);
1069                 return;
1070             }
1071         case BIO_WRITE:
1072             {
1073                 struct g_mirror_disk_sync *sync;
1074
1075                 if (bp->bio_error != 0) {
1076                         G_MIRROR_LOGREQ(0, bp,
1077                             "Synchronization request failed (error=%d).",
1078                             bp->bio_error);
1079                         g_destroy_bio(bp);
1080                         sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1081                         g_mirror_event_send(disk,
1082                             G_MIRROR_DISK_STATE_DISCONNECTED,
1083                             G_MIRROR_EVENT_DONTWAIT);
1084                         return;
1085                 }
1086                 G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1087                 sync = &disk->d_sync;
1088                 sync->ds_offset_done = bp->bio_offset + bp->bio_length;
1089                 g_destroy_bio(bp);
1090                 if (sync->ds_resync != -1)
1091                         break;
1092                 if (sync->ds_offset_done == sc->sc_provider->mediasize) {
1093                         /*
1094                          * Disk up-to-date, activate it.
1095                          */
1096                         g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1097                             G_MIRROR_EVENT_DONTWAIT);
1098                         return;
1099                 } else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) {
1100                         /*
1101                          * Update offset_done on every 100 blocks.
1102                          * XXX: This should be configurable.
1103                          */
1104                         g_topology_lock();
1105                         g_mirror_update_metadata(disk);
1106                         g_topology_unlock();
1107                 }
1108                 return;
1109             }
1110         default:
1111                 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1112                     bp->bio_cmd, sc->sc_name));
1113                 break;
1114         }
1115 }
1116
1117 static void
1118 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1119 {
1120         struct g_mirror_disk *disk;
1121         struct g_consumer *cp;
1122         struct bio *cbp;
1123
1124         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1125                 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1126                         break;
1127         }
1128         if (disk == NULL) {
1129                 if (bp->bio_error == 0)
1130                         bp->bio_error = ENXIO;
1131                 g_io_deliver(bp, bp->bio_error);
1132                 return;
1133         }
1134         cbp = g_clone_bio(bp);
1135         if (cbp == NULL) {
1136                 if (bp->bio_error == 0)
1137                         bp->bio_error = ENOMEM;
1138                 g_io_deliver(bp, bp->bio_error);
1139                 return;
1140         }
1141         /*
1142          * Fill in the component buf structure.
1143          */
1144         cp = disk->d_consumer;
1145         cbp->bio_done = g_mirror_done;
1146         cbp->bio_to = cp->provider;
1147         G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1148         KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1149             ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1150             cp->acw, cp->ace));
1151         cp->index++;
1152         g_io_request(cbp, cp);
1153 }
1154
1155 static void
1156 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1157 {
1158         struct g_mirror_disk *disk;
1159         struct g_consumer *cp;
1160         struct bio *cbp;
1161
1162         disk = g_mirror_get_disk(sc);
1163         if (disk == NULL) {
1164                 if (bp->bio_error == 0)
1165                         bp->bio_error = ENXIO;
1166                 g_io_deliver(bp, bp->bio_error);
1167                 return;
1168         }
1169         cbp = g_clone_bio(bp);
1170         if (cbp == NULL) {
1171                 if (bp->bio_error == 0)
1172                         bp->bio_error = ENOMEM;
1173                 g_io_deliver(bp, bp->bio_error);
1174                 return;
1175         }
1176         /*
1177          * Fill in the component buf structure.
1178          */
1179         cp = disk->d_consumer;
1180         cbp->bio_done = g_mirror_done;
1181         cbp->bio_to = cp->provider;
1182         G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1183         KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1184             ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1185             cp->acw, cp->ace));
1186         cp->index++;
1187         g_io_request(cbp, cp);
1188 }
1189
1190 static void
1191 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1192 {
1193         struct g_mirror_disk *disk, *dp;
1194         struct g_consumer *cp;
1195         struct bio *cbp;
1196         struct bintime curtime;
1197
1198         binuptime(&curtime);
1199         /*
1200          * Find a disk which the smallest load.
1201          */
1202         disk = NULL;
1203         LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1204                 if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1205                         continue;
1206                 /* If disk wasn't used for more than 2 sec, use it. */
1207                 if (curtime.sec - dp->d_last_used.sec >= 2) {
1208                         disk = dp;
1209                         break;
1210                 }
1211                 if (disk == NULL ||
1212                     bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) {
1213                         disk = dp;
1214                 }
1215         }
1216         KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1217         cbp = g_clone_bio(bp);
1218         if (cbp == NULL) {
1219                 if (bp->bio_error == 0)
1220                         bp->bio_error = ENOMEM;
1221                 g_io_deliver(bp, bp->bio_error);
1222                 return;
1223         }
1224         /*
1225          * Fill in the component buf structure.
1226          */
1227         cp = disk->d_consumer;
1228         cbp->bio_done = g_mirror_done;
1229         cbp->bio_to = cp->provider;
1230         binuptime(&disk->d_last_used);
1231         G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1232         KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1233             ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1234             cp->acw, cp->ace));
1235         cp->index++;
1236         g_io_request(cbp, cp);
1237 }
1238
1239 static void
1240 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1241 {
1242         struct bio_queue_head queue;
1243         struct g_mirror_disk *disk;
1244         struct g_consumer *cp;
1245         struct bio *cbp;
1246         off_t left, mod, offset, slice;
1247         u_char *data;
1248         u_int ndisks;
1249
1250         if (bp->bio_length <= sc->sc_slice) {
1251                 g_mirror_request_round_robin(sc, bp);
1252                 return;
1253         }
1254         ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1255         slice = bp->bio_length / ndisks;
1256         mod = slice % sc->sc_provider->sectorsize;
1257         if (mod != 0)
1258                 slice += sc->sc_provider->sectorsize - mod;
1259         /*
1260          * Allocate all bios before sending any request, so we can
1261          * return ENOMEM in nice and clean way.
1262          */
1263         left = bp->bio_length;
1264         offset = bp->bio_offset;
1265         data = bp->bio_data;
1266         bioq_init(&queue);
1267         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1268                 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1269                         continue;
1270                 cbp = g_clone_bio(bp);
1271                 if (cbp == NULL) {
1272                         for (cbp = bioq_first(&queue); cbp != NULL;
1273                             cbp = bioq_first(&queue)) {
1274                                 bioq_remove(&queue, cbp);
1275                                 g_destroy_bio(cbp);
1276                         }
1277                         if (bp->bio_error == 0)
1278                                 bp->bio_error = ENOMEM;
1279                         g_io_deliver(bp, bp->bio_error);
1280                         return;
1281                 }
1282                 bioq_insert_tail(&queue, cbp);
1283                 cbp->bio_done = g_mirror_done;
1284                 cbp->bio_caller1 = disk;
1285                 cbp->bio_to = disk->d_consumer->provider;
1286                 cbp->bio_offset = offset;
1287                 cbp->bio_data = data;
1288                 cbp->bio_length = MIN(left, slice);
1289                 left -= cbp->bio_length;
1290                 if (left == 0)
1291                         break;
1292                 offset += cbp->bio_length;
1293                 data += cbp->bio_length;
1294         }
1295         for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1296                 bioq_remove(&queue, cbp);
1297                 G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1298                 disk = cbp->bio_caller1;
1299                 cbp->bio_caller1 = NULL;
1300                 cp = disk->d_consumer;
1301                 KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1302                     ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1303                     cp->acr, cp->acw, cp->ace));
1304                 disk->d_consumer->index++;
1305                 g_io_request(cbp, disk->d_consumer);
1306         }
1307 }
1308
1309 static void
1310 g_mirror_register_request(struct bio *bp)
1311 {
1312         struct g_mirror_softc *sc;
1313
1314         sc = bp->bio_to->geom->softc;
1315         switch (bp->bio_cmd) {
1316         case BIO_READ:
1317                 switch (sc->sc_balance) {
1318                 case G_MIRROR_BALANCE_LOAD:
1319                         g_mirror_request_load(sc, bp);
1320                         break;
1321                 case G_MIRROR_BALANCE_PREFER:
1322                         g_mirror_request_prefer(sc, bp);
1323                         break;
1324                 case G_MIRROR_BALANCE_ROUND_ROBIN:
1325                         g_mirror_request_round_robin(sc, bp);
1326                         break;
1327                 case G_MIRROR_BALANCE_SPLIT:
1328                         g_mirror_request_split(sc, bp);
1329                         break;
1330                 }
1331                 return;
1332         case BIO_WRITE:
1333         case BIO_DELETE:
1334             {
1335                 struct g_mirror_disk *disk;
1336                 struct g_mirror_disk_sync *sync;
1337                 struct bio_queue_head queue;
1338                 struct g_consumer *cp;
1339                 struct bio *cbp;
1340
1341                 if (sc->sc_idle)
1342                         g_mirror_unidle(sc);
1343                 /*
1344                  * Allocate all bios before sending any request, so we can
1345                  * return ENOMEM in nice and clean way.
1346                  */
1347                 bioq_init(&queue);
1348                 LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1349                         sync = &disk->d_sync;
1350                         switch (disk->d_state) {
1351                         case G_MIRROR_DISK_STATE_ACTIVE:
1352                                 break;
1353                         case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1354                                 if (bp->bio_offset >= sync->ds_offset)
1355                                         continue;
1356                                 else if (bp->bio_offset + bp->bio_length >
1357                                     sync->ds_offset_done &&
1358                                     (bp->bio_offset < sync->ds_resync ||
1359                                      sync->ds_resync == -1)) {
1360                                         sync->ds_resync = bp->bio_offset -
1361                                             (bp->bio_offset % MAXPHYS);
1362                                 }
1363                                 break;
1364                         default:
1365                                 continue;
1366                         }
1367                         cbp = g_clone_bio(bp);
1368                         if (cbp == NULL) {
1369                                 for (cbp = bioq_first(&queue); cbp != NULL;
1370                                     cbp = bioq_first(&queue)) {
1371                                         bioq_remove(&queue, cbp);
1372                                         g_destroy_bio(cbp);
1373                                 }
1374                                 if (bp->bio_error == 0)
1375                                         bp->bio_error = ENOMEM;
1376                                 g_io_deliver(bp, bp->bio_error);
1377                                 return;
1378                         }
1379                         bioq_insert_tail(&queue, cbp);
1380                         cbp->bio_done = g_mirror_done;
1381                         cp = disk->d_consumer;
1382                         cbp->bio_caller1 = cp;
1383                         cbp->bio_to = cp->provider;
1384                         KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
1385                             ("Consumer %s not opened (r%dw%de%d).",
1386                             cp->provider->name, cp->acr, cp->acw, cp->ace));
1387                 }
1388                 for (cbp = bioq_first(&queue); cbp != NULL;
1389                     cbp = bioq_first(&queue)) {
1390                         bioq_remove(&queue, cbp);
1391                         G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1392                         cp = cbp->bio_caller1;
1393                         cbp->bio_caller1 = NULL;
1394                         cp->index++;
1395                         g_io_request(cbp, cp);
1396                 }
1397                 /*
1398                  * Bump syncid on first write.
1399                  */
1400                 if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1401                         sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1402                         g_topology_lock();
1403                         g_mirror_bump_syncid(sc);
1404                         g_topology_unlock();
1405                 }
1406                 return;
1407             }
1408         default:
1409                 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1410                     bp->bio_cmd, sc->sc_name));
1411                 break;
1412         }
1413 }
1414
1415 static int
1416 g_mirror_can_destroy(struct g_mirror_softc *sc)
1417 {
1418         struct g_geom *gp;
1419         struct g_consumer *cp;
1420
1421         g_topology_assert();
1422         gp = sc->sc_geom;
1423         LIST_FOREACH(cp, &gp->consumer, consumer) {
1424                 if (g_mirror_is_busy(sc, cp))
1425                         return (0);
1426         }
1427         gp = sc->sc_sync.ds_geom;
1428         LIST_FOREACH(cp, &gp->consumer, consumer) {
1429                 if (g_mirror_is_busy(sc, cp))
1430                         return (0);
1431         }
1432         G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1433             sc->sc_name);
1434         return (1);
1435 }
1436
1437 static int
1438 g_mirror_try_destroy(struct g_mirror_softc *sc)
1439 {
1440
1441         if (sc->sc_rootmount != NULL) {
1442                 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1443                     sc->sc_rootmount);
1444                 root_mount_rel(sc->sc_rootmount);
1445                 sc->sc_rootmount = NULL;
1446         }
1447         g_topology_lock();
1448         if (!g_mirror_can_destroy(sc)) {
1449                 g_topology_unlock();
1450                 return (0);
1451         }
1452         if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1453                 g_topology_unlock();
1454                 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1455                     &sc->sc_worker);
1456                 wakeup(&sc->sc_worker);
1457                 sc->sc_worker = NULL;
1458         } else {
1459                 g_mirror_destroy_device(sc);
1460                 g_topology_unlock();
1461                 free(sc, M_MIRROR);
1462         }
1463         return (1);
1464 }
1465
1466 /*
1467  * Worker thread.
1468  */
1469 static void
1470 g_mirror_worker(void *arg)
1471 {
1472         struct g_mirror_softc *sc;
1473         struct g_mirror_disk *disk;
1474         struct g_mirror_disk_sync *sync;
1475         struct g_mirror_event *ep;
1476         struct bio *bp;
1477         u_int nreqs;
1478
1479         sc = arg;
1480         mtx_lock_spin(&sched_lock);
1481         sched_prio(curthread, PRIBIO);
1482         mtx_unlock_spin(&sched_lock);
1483
1484         nreqs = 0;
1485         for (;;) {
1486                 G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1487                 /*
1488                  * First take a look at events.
1489                  * This is important to handle events before any I/O requests.
1490                  */
1491                 ep = g_mirror_event_get(sc);
1492                 if (ep != NULL && g_topology_try_lock()) {
1493                         g_mirror_event_remove(sc, ep);
1494                         if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1495                                 /* Update only device status. */
1496                                 G_MIRROR_DEBUG(3,
1497                                     "Running event for device %s.",
1498                                     sc->sc_name);
1499                                 ep->e_error = 0;
1500                                 g_mirror_update_device(sc, 1);
1501                         } else {
1502                                 /* Update disk status. */
1503                                 G_MIRROR_DEBUG(3, "Running event for disk %s.",
1504                                      g_mirror_get_diskname(ep->e_disk));
1505                                 ep->e_error = g_mirror_update_disk(ep->e_disk,
1506                                     ep->e_state);
1507                                 if (ep->e_error == 0)
1508                                         g_mirror_update_device(sc, 0);
1509                         }
1510                         g_topology_unlock();
1511                         if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1512                                 KASSERT(ep->e_error == 0,
1513                                     ("Error cannot be handled."));
1514                                 g_mirror_event_free(ep);
1515                         } else {
1516                                 ep->e_flags |= G_MIRROR_EVENT_DONE;
1517                                 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1518                                     ep);
1519                                 mtx_lock(&sc->sc_events_mtx);
1520                                 wakeup(ep);
1521                                 mtx_unlock(&sc->sc_events_mtx);
1522                         }
1523                         if ((sc->sc_flags &
1524                             G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1525                                 if (g_mirror_try_destroy(sc))
1526                                         kthread_exit(0);
1527                         }
1528                         G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1529                         continue;
1530                 }
1531                 /*
1532                  * Now I/O requests.
1533                  */
1534                 /* Get first request from the queue. */
1535                 mtx_lock(&sc->sc_queue_mtx);
1536                 bp = bioq_first(&sc->sc_queue);
1537                 if (bp == NULL) {
1538                         if (ep != NULL) {
1539                                 /*
1540                                  * No I/O requests and topology lock was
1541                                  * already held? Try again.
1542                                  */
1543                                 mtx_unlock(&sc->sc_queue_mtx);
1544                                 continue;
1545                         }
1546                         if ((sc->sc_flags &
1547                             G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1548                                 mtx_unlock(&sc->sc_queue_mtx);
1549                                 if (g_mirror_try_destroy(sc))
1550                                         kthread_exit(0);
1551                                 mtx_lock(&sc->sc_queue_mtx);
1552                         }
1553                 }
1554                 if (sc->sc_sync.ds_ndisks > 0 &&
1555                     (bp == NULL || nreqs > g_mirror_reqs_per_sync)) {
1556                         mtx_unlock(&sc->sc_queue_mtx);
1557                         /*
1558                          * It is time for synchronization...
1559                          */
1560                         nreqs = 0;
1561                         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1562                                 if (disk->d_state !=
1563                                     G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1564                                         continue;
1565                                 }
1566                                 sync = &disk->d_sync;
1567                                 if (sync->ds_offset >=
1568                                     sc->sc_provider->mediasize) {
1569                                         continue;
1570                                 }
1571                                 if (sync->ds_offset > sync->ds_offset_done)
1572                                         continue;
1573                                 if (sync->ds_resync != -1) {
1574                                         sync->ds_offset = sync->ds_resync;
1575                                         sync->ds_offset_done = sync->ds_resync;
1576                                         sync->ds_resync = -1;
1577                                 }
1578                                 g_mirror_sync_one(disk);
1579                         }
1580                         G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__);
1581                         goto sleep;
1582                 }
1583                 if (bp == NULL) {
1584                         if (g_mirror_check_idle(sc)) {
1585                                 u_int idletime;
1586
1587                                 idletime = g_mirror_idletime;
1588                                 if (idletime == 0)
1589                                         idletime = 1;
1590                                 idletime *= hz;
1591                                 if (msleep(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1592                                     "m:w1", idletime) == EWOULDBLOCK) {
1593                                         G_MIRROR_DEBUG(5, "%s: I'm here 3.",
1594                                             __func__);
1595                                         /*
1596                                          * No I/O requests in 'idletime' seconds,
1597                                          * so mark components as clean.
1598                                          */
1599                                         g_mirror_idle(sc);
1600                                 }
1601                                 G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1602                         } else {
1603                                 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP,
1604                                     "m:w2", 0);
1605                                 G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__);
1606                         }
1607                         continue;
1608                 }
1609                 nreqs++;
1610                 bioq_remove(&sc->sc_queue, bp);
1611                 mtx_unlock(&sc->sc_queue_mtx);
1612
1613                 if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) {
1614                         g_mirror_regular_request(bp);
1615                 } else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1616                         u_int timeout, sps;
1617
1618                         g_mirror_sync_request(bp);
1619 sleep:
1620                         sps = g_mirror_syncs_per_sec;
1621                         if (sps == 0) {
1622                                 G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__);
1623                                 continue;
1624                         }
1625                         if (ep != NULL) {
1626                                 /*
1627                                  * We have some pending events, don't sleep now.
1628                                  */
1629                                 G_MIRROR_DEBUG(5, "%s: I'm here 7.", __func__);
1630                                 continue;
1631                         }
1632                         mtx_lock(&sc->sc_queue_mtx);
1633                         if (bioq_first(&sc->sc_queue) != NULL) {
1634                                 mtx_unlock(&sc->sc_queue_mtx);
1635                                 G_MIRROR_DEBUG(5, "%s: I'm here 8.", __func__);
1636                                 continue;
1637                         }
1638                         timeout = hz / sps;
1639                         if (timeout == 0)
1640                                 timeout = 1;
1641                         MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w3",
1642                             timeout);
1643                 } else {
1644                         g_mirror_register_request(bp);
1645                 }
1646                 G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
1647         }
1648 }
1649
1650 /*
1651  * Open disk's consumer if needed.
1652  */
1653 static void
1654 g_mirror_update_access(struct g_mirror_disk *disk)
1655 {
1656         struct g_provider *pp;
1657
1658         g_topology_assert();
1659
1660         pp = disk->d_softc->sc_provider;
1661         if (pp == NULL)
1662                 return;
1663         if (pp->acw > 0) {
1664                 if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1665                         G_MIRROR_DEBUG(1,
1666                             "Disk %s (device %s) marked as dirty.",
1667                             g_mirror_get_diskname(disk),
1668                             disk->d_softc->sc_name);
1669                         disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1670                 }
1671         } else if (pp->acw == 0) {
1672                 if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1673                         G_MIRROR_DEBUG(1,
1674                             "Disk %s (device %s) marked as clean.",
1675                             g_mirror_get_diskname(disk),
1676                             disk->d_softc->sc_name);
1677                         disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1678                 }
1679         }
1680 }
1681
1682 static void
1683 g_mirror_sync_start(struct g_mirror_disk *disk)
1684 {
1685         struct g_mirror_softc *sc;
1686         int error;
1687
1688         g_topology_assert();
1689
1690         sc = disk->d_softc;
1691         KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1692             ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1693             sc->sc_state));
1694
1695         G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1696             g_mirror_get_diskname(disk));
1697         disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1698         KASSERT(disk->d_sync.ds_consumer == NULL,
1699             ("Sync consumer already exists (device=%s, disk=%s).",
1700             sc->sc_name, g_mirror_get_diskname(disk)));
1701         disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
1702         disk->d_sync.ds_consumer->private = disk;
1703         disk->d_sync.ds_consumer->index = 0;
1704         error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
1705         KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
1706             disk->d_softc->sc_name, error));
1707         error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
1708         KASSERT(error == 0, ("Cannot open %s (error=%d).",
1709             disk->d_softc->sc_name, error));
1710         disk->d_sync.ds_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
1711         sc->sc_sync.ds_ndisks++;
1712 }
1713
1714 /*
1715  * Stop synchronization process.
1716  * type: 0 - synchronization finished
1717  *       1 - synchronization stopped
1718  */
1719 static void
1720 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1721 {
1722
1723         g_topology_assert();
1724         KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1725             ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1726             g_mirror_disk_state2str(disk->d_state)));
1727         if (disk->d_sync.ds_consumer == NULL)
1728                 return;
1729
1730         if (type == 0) {
1731                 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
1732                     disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1733         } else /* if (type == 1) */ {
1734                 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
1735                     disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1736         }
1737         g_mirror_kill_consumer(disk->d_softc, disk->d_sync.ds_consumer);
1738         free(disk->d_sync.ds_data, M_MIRROR);
1739         disk->d_sync.ds_consumer = NULL;
1740         disk->d_softc->sc_sync.ds_ndisks--;
1741         disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1742 }
1743
1744 static void
1745 g_mirror_launch_provider(struct g_mirror_softc *sc)
1746 {
1747         struct g_mirror_disk *disk;
1748         struct g_provider *pp;
1749
1750         g_topology_assert();
1751
1752         pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
1753         pp->mediasize = sc->sc_mediasize;
1754         pp->sectorsize = sc->sc_sectorsize;
1755         sc->sc_provider = pp;
1756         g_error_provider(pp, 0);
1757         G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
1758             pp->name);
1759         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1760                 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1761                         g_mirror_sync_start(disk);
1762         }
1763 }
1764
1765 static void
1766 g_mirror_destroy_provider(struct g_mirror_softc *sc)
1767 {
1768         struct g_mirror_disk *disk;
1769         struct bio *bp;
1770
1771         g_topology_assert();
1772         KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
1773             sc->sc_name));
1774
1775         g_error_provider(sc->sc_provider, ENXIO);
1776         mtx_lock(&sc->sc_queue_mtx);
1777         while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
1778                 bioq_remove(&sc->sc_queue, bp);
1779                 g_io_deliver(bp, ENXIO);
1780         }
1781         mtx_unlock(&sc->sc_queue_mtx);
1782         G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
1783             sc->sc_provider->name);
1784         sc->sc_provider->flags |= G_PF_WITHER;
1785         g_orphan_provider(sc->sc_provider, ENXIO);
1786         sc->sc_provider = NULL;
1787         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1788                 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1789                         g_mirror_sync_stop(disk, 1);
1790         }
1791 }
1792
1793 static void
1794 g_mirror_go(void *arg)
1795 {
1796         struct g_mirror_softc *sc;
1797
1798         sc = arg;
1799         G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
1800         g_mirror_event_send(sc, 0,
1801             G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
1802 }
1803
1804 static u_int
1805 g_mirror_determine_state(struct g_mirror_disk *disk)
1806 {
1807         struct g_mirror_softc *sc;
1808         u_int state;
1809
1810         sc = disk->d_softc;
1811         if (sc->sc_syncid == disk->d_sync.ds_syncid) {
1812                 if ((disk->d_flags &
1813                     G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1814                         /* Disk does not need synchronization. */
1815                         state = G_MIRROR_DISK_STATE_ACTIVE;
1816                 } else {
1817                         if ((sc->sc_flags &
1818                              G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
1819                             (disk->d_flags &
1820                              G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1821                                 /*
1822                                  * We can start synchronization from
1823                                  * the stored offset.
1824                                  */
1825                                 state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1826                         } else {
1827                                 state = G_MIRROR_DISK_STATE_STALE;
1828                         }
1829                 }
1830         } else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
1831                 /*
1832                  * Reset all synchronization data for this disk,
1833                  * because if it even was synchronized, it was
1834                  * synchronized to disks with different syncid.
1835                  */
1836                 disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1837                 disk->d_sync.ds_offset = 0;
1838                 disk->d_sync.ds_offset_done = 0;
1839                 disk->d_sync.ds_syncid = sc->sc_syncid;
1840                 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
1841                     (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1842                         state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1843                 } else {
1844                         state = G_MIRROR_DISK_STATE_STALE;
1845                 }
1846         } else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
1847                 /*
1848                  * Not good, NOT GOOD!
1849                  * It means that mirror was started on stale disks
1850                  * and more fresh disk just arrive.
1851                  * If there were writes, mirror is fucked up, sorry.
1852                  * I think the best choice here is don't touch
1853                  * this disk and inform the user laudly.
1854                  */
1855                 G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
1856                     "disk (%s) arrives!! It will not be connected to the "
1857                     "running device.", sc->sc_name,
1858                     g_mirror_get_diskname(disk));
1859                 g_mirror_destroy_disk(disk);
1860                 state = G_MIRROR_DISK_STATE_NONE;
1861                 /* Return immediately, because disk was destroyed. */
1862                 return (state);
1863         }
1864         G_MIRROR_DEBUG(3, "State for %s disk: %s.",
1865             g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
1866         return (state);
1867 }
1868
1869 /*
1870  * Update device state.
1871  */
1872 static void
1873 g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
1874 {
1875         struct g_mirror_disk *disk;
1876         u_int state;
1877
1878         g_topology_assert();
1879
1880         switch (sc->sc_state) {
1881         case G_MIRROR_DEVICE_STATE_STARTING:
1882             {
1883                 struct g_mirror_disk *pdisk, *tdisk;
1884                 u_int dirty, ndisks, genid, syncid;
1885
1886                 KASSERT(sc->sc_provider == NULL,
1887                     ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
1888                 /*
1889                  * Are we ready? We are, if all disks are connected or
1890                  * if we have any disks and 'force' is true.
1891                  */
1892                 if ((force && g_mirror_ndisks(sc, -1) > 0) ||
1893                     sc->sc_ndisks == g_mirror_ndisks(sc, -1)) {
1894                         ;
1895                 } else if (g_mirror_ndisks(sc, -1) == 0) {
1896                         /*
1897                          * Disks went down in starting phase, so destroy
1898                          * device.
1899                          */
1900                         callout_drain(&sc->sc_callout);
1901                         sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1902                         G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1903                             sc->sc_rootmount);
1904                         root_mount_rel(sc->sc_rootmount);
1905                         sc->sc_rootmount = NULL;
1906                         return;
1907                 } else {
1908                         return;
1909                 }
1910
1911                 /*
1912                  * Activate all disks with the biggest syncid.
1913                  */
1914                 if (force) {
1915                         /*
1916                          * If 'force' is true, we have been called due to
1917                          * timeout, so don't bother canceling timeout.
1918                          */
1919                         ndisks = 0;
1920                         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1921                                 if ((disk->d_flags &
1922                                     G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1923                                         ndisks++;
1924                                 }
1925                         }
1926                         if (ndisks == 0) {
1927                                 /* No valid disks found, destroy device. */
1928                                 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1929                                 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
1930                                     __LINE__, sc->sc_rootmount);
1931                                 root_mount_rel(sc->sc_rootmount);
1932                                 sc->sc_rootmount = NULL;
1933                                 return;
1934                         }
1935                 } else {
1936                         /* Cancel timeout. */
1937                         callout_drain(&sc->sc_callout);
1938                 }
1939
1940                 /*
1941                  * Find the biggest genid.
1942                  */
1943                 genid = 0;
1944                 LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1945                         if (disk->d_genid > genid)
1946                                 genid = disk->d_genid;
1947                 }
1948                 sc->sc_genid = genid;
1949                 /*
1950                  * Remove all disks without the biggest genid.
1951                  */
1952                 LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
1953                         if (disk->d_genid < genid) {
1954                                 G_MIRROR_DEBUG(0,
1955                                     "Component %s (device %s) broken, skipping.",
1956                                     g_mirror_get_diskname(disk), sc->sc_name);
1957                                 g_mirror_destroy_disk(disk);
1958                         }
1959                 }
1960
1961                 /*
1962                  * Find the biggest syncid.
1963                  */
1964                 syncid = 0;
1965                 LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1966                         if (disk->d_sync.ds_syncid > syncid)
1967                                 syncid = disk->d_sync.ds_syncid;
1968                 }
1969
1970                 /*
1971                  * Here we need to look for dirty disks and if all disks
1972                  * with the biggest syncid are dirty, we have to choose
1973                  * one with the biggest priority and rebuild the rest.
1974                  */
1975                 /*
1976                  * Find the number of dirty disks with the biggest syncid.
1977                  * Find the number of disks with the biggest syncid.
1978                  * While here, find a disk with the biggest priority.
1979                  */
1980                 dirty = ndisks = 0;
1981                 pdisk = NULL;
1982                 LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1983                         if (disk->d_sync.ds_syncid != syncid)
1984                                 continue;
1985                         if ((disk->d_flags &
1986                             G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1987                                 continue;
1988                         }
1989                         ndisks++;
1990                         if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1991                                 dirty++;
1992                                 if (pdisk == NULL ||
1993                                     pdisk->d_priority < disk->d_priority) {
1994                                         pdisk = disk;
1995                                 }
1996                         }
1997                 }
1998                 if (dirty == 0) {
1999                         /* No dirty disks at all, great. */
2000                 } else if (dirty == ndisks) {
2001                         /*
2002                          * Force synchronization for all dirty disks except one
2003                          * with the biggest priority.
2004                          */
2005                         KASSERT(pdisk != NULL, ("pdisk == NULL"));
2006                         G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2007                             "master disk for synchronization.",
2008                             g_mirror_get_diskname(pdisk), sc->sc_name);
2009                         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2010                                 if (disk->d_sync.ds_syncid != syncid)
2011                                         continue;
2012                                 if ((disk->d_flags &
2013                                     G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2014                                         continue;
2015                                 }
2016                                 KASSERT((disk->d_flags &
2017                                     G_MIRROR_DISK_FLAG_DIRTY) != 0,
2018                                     ("Disk %s isn't marked as dirty.",
2019                                     g_mirror_get_diskname(disk)));
2020                                 /* Skip the disk with the biggest priority. */
2021                                 if (disk == pdisk)
2022                                         continue;
2023                                 disk->d_sync.ds_syncid = 0;
2024                         }
2025                 } else if (dirty < ndisks) {
2026                         /*
2027                          * Force synchronization for all dirty disks.
2028                          * We have some non-dirty disks.
2029                          */
2030                         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2031                                 if (disk->d_sync.ds_syncid != syncid)
2032                                         continue;
2033                                 if ((disk->d_flags &
2034                                     G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2035                                         continue;
2036                                 }
2037                                 if ((disk->d_flags &
2038                                     G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2039                                         continue;
2040                                 }
2041                                 disk->d_sync.ds_syncid = 0;
2042                         }
2043                 }
2044
2045                 /* Reset hint. */
2046                 sc->sc_hint = NULL;
2047                 sc->sc_syncid = syncid;
2048                 if (force) {
2049                         /* Remember to bump syncid on first write. */
2050                         sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2051                 }
2052                 state = G_MIRROR_DEVICE_STATE_RUNNING;
2053                 G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2054                     sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2055                     g_mirror_device_state2str(state));
2056                 sc->sc_state = state;
2057                 LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2058                         state = g_mirror_determine_state(disk);
2059                         g_mirror_event_send(disk, state,
2060                             G_MIRROR_EVENT_DONTWAIT);
2061                         if (state == G_MIRROR_DISK_STATE_STALE)
2062                                 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2063                 }
2064                 break;
2065             }
2066         case G_MIRROR_DEVICE_STATE_RUNNING:
2067                 if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2068                     g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2069                         /*
2070                          * No active disks or no disks at all,
2071                          * so destroy device.
2072                          */
2073                         if (sc->sc_provider != NULL)
2074                                 g_mirror_destroy_provider(sc);
2075                         sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2076                         break;
2077                 } else if (g_mirror_ndisks(sc,
2078                     G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2079                     g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2080                         /*
2081                          * We have active disks, launch provider if it doesn't
2082                          * exist.
2083                          */
2084                         if (sc->sc_provider == NULL)
2085                                 g_mirror_launch_provider(sc);
2086                         if (sc->sc_rootmount != NULL) {
2087                                 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2088                                     __LINE__, sc->sc_rootmount);
2089                                 root_mount_rel(sc->sc_rootmount);
2090                                 sc->sc_rootmount = NULL;
2091                         }
2092                 }
2093                 /*
2094                  * Genid should be bumped immediately, so do it here.
2095                  */
2096                 if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2097                         sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2098                         g_mirror_bump_genid(sc);
2099                 }
2100                 break;
2101         default:
2102                 KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2103                     sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2104                 break;
2105         }
2106 }
2107
2108 /*
2109  * Update disk state and device state if needed.
2110  */
2111 #define DISK_STATE_CHANGED()    G_MIRROR_DEBUG(1,                       \
2112         "Disk %s state changed from %s to %s (device %s).",             \
2113         g_mirror_get_diskname(disk),                                    \
2114         g_mirror_disk_state2str(disk->d_state),                         \
2115         g_mirror_disk_state2str(state), sc->sc_name)
2116 static int
2117 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2118 {
2119         struct g_mirror_softc *sc;
2120
2121         g_topology_assert();
2122
2123         sc = disk->d_softc;
2124 again:
2125         G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2126             g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2127             g_mirror_disk_state2str(state));
2128         switch (state) {
2129         case G_MIRROR_DISK_STATE_NEW:
2130                 /*
2131                  * Possible scenarios:
2132                  * 1. New disk arrive.
2133                  */
2134                 /* Previous state should be NONE. */
2135                 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2136                     ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2137                     g_mirror_disk_state2str(disk->d_state)));
2138                 DISK_STATE_CHANGED();
2139
2140                 disk->d_state = state;
2141                 if (LIST_EMPTY(&sc->sc_disks))
2142                         LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2143                 else {
2144                         struct g_mirror_disk *dp;
2145
2146                         LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2147                                 if (disk->d_priority >= dp->d_priority) {
2148                                         LIST_INSERT_BEFORE(dp, disk, d_next);
2149                                         dp = NULL;
2150                                         break;
2151                                 }
2152                                 if (LIST_NEXT(dp, d_next) == NULL)
2153                                         break;
2154                         }
2155                         if (dp != NULL)
2156                                 LIST_INSERT_AFTER(dp, disk, d_next);
2157                 }
2158                 G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
2159                     sc->sc_name, g_mirror_get_diskname(disk));
2160                 if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2161                         break;
2162                 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2163                     ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2164                     g_mirror_device_state2str(sc->sc_state),
2165                     g_mirror_get_diskname(disk),
2166                     g_mirror_disk_state2str(disk->d_state)));
2167                 state = g_mirror_determine_state(disk);
2168                 if (state != G_MIRROR_DISK_STATE_NONE)
2169                         goto again;
2170                 break;
2171         case G_MIRROR_DISK_STATE_ACTIVE:
2172                 /*
2173                  * Possible scenarios:
2174                  * 1. New disk does not need synchronization.
2175                  * 2. Synchronization process finished successfully.
2176                  */
2177                 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2178                     ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2179                     g_mirror_device_state2str(sc->sc_state),
2180                     g_mirror_get_diskname(disk),
2181                     g_mirror_disk_state2str(disk->d_state)));
2182                 /* Previous state should be NEW or SYNCHRONIZING. */
2183                 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2184                     disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2185                     ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2186                     g_mirror_disk_state2str(disk->d_state)));
2187                 DISK_STATE_CHANGED();
2188
2189                 if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2190                         disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2191                 else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2192                         disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2193                         disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2194                         g_mirror_sync_stop(disk, 0);
2195                 }
2196                 disk->d_state = state;
2197                 disk->d_sync.ds_offset = 0;
2198                 disk->d_sync.ds_offset_done = 0;
2199                 g_mirror_update_access(disk);
2200                 g_mirror_update_metadata(disk);
2201                 G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
2202                     sc->sc_name, g_mirror_get_diskname(disk));
2203                 break;
2204         case G_MIRROR_DISK_STATE_STALE:
2205                 /*
2206                  * Possible scenarios:
2207                  * 1. Stale disk was connected.
2208                  */
2209                 /* Previous state should be NEW. */
2210                 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2211                     ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2212                     g_mirror_disk_state2str(disk->d_state)));
2213                 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2214                     ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2215                     g_mirror_device_state2str(sc->sc_state),
2216                     g_mirror_get_diskname(disk),
2217                     g_mirror_disk_state2str(disk->d_state)));
2218                 /*
2219                  * STALE state is only possible if device is marked
2220                  * NOAUTOSYNC.
2221                  */
2222                 KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2223                     ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2224                     g_mirror_device_state2str(sc->sc_state),
2225                     g_mirror_get_diskname(disk),
2226                     g_mirror_disk_state2str(disk->d_state)));
2227                 DISK_STATE_CHANGED();
2228
2229                 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2230                 disk->d_state = state;
2231                 g_mirror_update_metadata(disk);
2232                 G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2233                     sc->sc_name, g_mirror_get_diskname(disk));
2234                 break;
2235         case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2236                 /*
2237                  * Possible scenarios:
2238                  * 1. Disk which needs synchronization was connected.
2239                  */
2240                 /* Previous state should be NEW. */
2241                 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2242                     ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2243                     g_mirror_disk_state2str(disk->d_state)));
2244                 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2245                     ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2246                     g_mirror_device_state2str(sc->sc_state),
2247                     g_mirror_get_diskname(disk),
2248                     g_mirror_disk_state2str(disk->d_state)));
2249                 DISK_STATE_CHANGED();
2250
2251                 if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2252                         disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2253                 disk->d_state = state;
2254                 if (sc->sc_provider != NULL) {
2255                         g_mirror_sync_start(disk);
2256                         g_mirror_update_metadata(disk);
2257                 }
2258                 break;
2259         case G_MIRROR_DISK_STATE_DISCONNECTED:
2260                 /*
2261                  * Possible scenarios:
2262                  * 1. Device wasn't running yet, but disk disappear.
2263                  * 2. Disk was active and disapppear.
2264                  * 3. Disk disappear during synchronization process.
2265                  */
2266                 if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2267                         /*
2268                          * Previous state should be ACTIVE, STALE or
2269                          * SYNCHRONIZING.
2270                          */
2271                         KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2272                             disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2273                             disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2274                             ("Wrong disk state (%s, %s).",
2275                             g_mirror_get_diskname(disk),
2276                             g_mirror_disk_state2str(disk->d_state)));
2277                 } else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2278                         /* Previous state should be NEW. */
2279                         KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2280                             ("Wrong disk state (%s, %s).",
2281                             g_mirror_get_diskname(disk),
2282                             g_mirror_disk_state2str(disk->d_state)));
2283                         /*
2284                          * Reset bumping syncid if disk disappeared in STARTING
2285                          * state.
2286                          */
2287                         if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2288                                 sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2289 #ifdef  INVARIANTS
2290                 } else {
2291                         KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2292                             sc->sc_name,
2293                             g_mirror_device_state2str(sc->sc_state),
2294                             g_mirror_get_diskname(disk),
2295                             g_mirror_disk_state2str(disk->d_state)));
2296 #endif
2297                 }
2298                 DISK_STATE_CHANGED();
2299                 G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2300                     sc->sc_name, g_mirror_get_diskname(disk));
2301
2302                 g_mirror_destroy_disk(disk);
2303                 break;
2304         case G_MIRROR_DISK_STATE_DESTROY:
2305             {
2306                 int error;
2307
2308                 error = g_mirror_clear_metadata(disk);
2309                 if (error != 0)
2310                         return (error);
2311                 DISK_STATE_CHANGED();
2312                 G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2313                     sc->sc_name, g_mirror_get_diskname(disk));
2314
2315                 g_mirror_destroy_disk(disk);
2316                 sc->sc_ndisks--;
2317                 LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2318                         g_mirror_update_metadata(disk);
2319                 }
2320                 break;
2321             }
2322         default:
2323                 KASSERT(1 == 0, ("Unknown state (%u).", state));
2324                 break;
2325         }
2326         return (0);
2327 }
2328 #undef  DISK_STATE_CHANGED
2329
2330 int
2331 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2332 {
2333         struct g_provider *pp;
2334         u_char *buf;
2335         int error;
2336
2337         g_topology_assert();
2338
2339         error = g_access(cp, 1, 0, 0);
2340         if (error != 0)
2341                 return (error);
2342         pp = cp->provider;
2343         g_topology_unlock();
2344         /* Metadata are stored on last sector. */
2345         buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2346             &error);
2347         g_topology_lock();
2348         g_access(cp, -1, 0, 0);
2349         if (error != 0) {
2350                 G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2351                     cp->provider->name, error);
2352                 if (buf != NULL)
2353                         g_free(buf);
2354                 return (error);
2355         }
2356
2357         /* Decode metadata. */
2358         error = mirror_metadata_decode(buf, md);
2359         g_free(buf);
2360         if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2361                 return (EINVAL);
2362         if (md->md_version > G_MIRROR_VERSION) {
2363                 G_MIRROR_DEBUG(0,
2364                     "Kernel module is too old to handle metadata from %s.",
2365                     cp->provider->name);
2366                 return (EINVAL);
2367         }
2368         if (error != 0) {
2369                 G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2370                     cp->provider->name);
2371                 return (error);
2372         }
2373
2374         return (0);
2375 }
2376
2377 static int
2378 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2379     struct g_mirror_metadata *md)
2380 {
2381
2382         if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2383                 G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2384                     pp->name, md->md_did);
2385                 return (EEXIST);
2386         }
2387         if (md->md_all != sc->sc_ndisks) {
2388                 G_MIRROR_DEBUG(1,
2389                     "Invalid '%s' field on disk %s (device %s), skipping.",
2390                     "md_all", pp->name, sc->sc_name);
2391                 return (EINVAL);
2392         }
2393         if (md->md_slice != sc->sc_slice) {
2394                 G_MIRROR_DEBUG(1,
2395                     "Invalid '%s' field on disk %s (device %s), skipping.",
2396                     "md_slice", pp->name, sc->sc_name);
2397                 return (EINVAL);
2398         }
2399         if (md->md_balance != sc->sc_balance) {
2400                 G_MIRROR_DEBUG(1,
2401                     "Invalid '%s' field on disk %s (device %s), skipping.",
2402                     "md_balance", pp->name, sc->sc_name);
2403                 return (EINVAL);
2404         }
2405         if (md->md_mediasize != sc->sc_mediasize) {
2406                 G_MIRROR_DEBUG(1,
2407                     "Invalid '%s' field on disk %s (device %s), skipping.",
2408                     "md_mediasize", pp->name, sc->sc_name);
2409                 return (EINVAL);
2410         }
2411         if (sc->sc_mediasize > pp->mediasize) {
2412                 G_MIRROR_DEBUG(1,
2413                     "Invalid size of disk %s (device %s), skipping.", pp->name,
2414                     sc->sc_name);
2415                 return (EINVAL);
2416         }
2417         if (md->md_sectorsize != sc->sc_sectorsize) {
2418                 G_MIRROR_DEBUG(1,
2419                     "Invalid '%s' field on disk %s (device %s), skipping.",
2420                     "md_sectorsize", pp->name, sc->sc_name);
2421                 return (EINVAL);
2422         }
2423         if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2424                 G_MIRROR_DEBUG(1,
2425                     "Invalid sector size of disk %s (device %s), skipping.",
2426                     pp->name, sc->sc_name);
2427                 return (EINVAL);
2428         }
2429         if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2430                 G_MIRROR_DEBUG(1,
2431                     "Invalid device flags on disk %s (device %s), skipping.",
2432                     pp->name, sc->sc_name);
2433                 return (EINVAL);
2434         }
2435         if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2436                 G_MIRROR_DEBUG(1,
2437                     "Invalid disk flags on disk %s (device %s), skipping.",
2438                     pp->name, sc->sc_name);
2439                 return (EINVAL);
2440         }
2441         return (0);
2442 }
2443
2444 int
2445 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2446     struct g_mirror_metadata *md)
2447 {
2448         struct g_mirror_disk *disk;
2449         int error;
2450
2451         g_topology_assert();
2452         G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2453
2454         error = g_mirror_check_metadata(sc, pp, md);
2455         if (error != 0)
2456                 return (error);
2457         if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2458             md->md_genid < sc->sc_genid) {
2459                 G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2460                     pp->name, sc->sc_name);
2461                 return (EINVAL);
2462         }
2463         disk = g_mirror_init_disk(sc, pp, md, &error);
2464         if (disk == NULL)
2465                 return (error);
2466         error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2467             G_MIRROR_EVENT_WAIT);
2468         if (error != 0)
2469                 return (error);
2470         if (md->md_version < G_MIRROR_VERSION) {
2471                 G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2472                     pp->name, md->md_version, G_MIRROR_VERSION);
2473                 g_mirror_update_metadata(disk);
2474         }
2475         return (0);
2476 }
2477
2478 static int
2479 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2480 {
2481         struct g_mirror_softc *sc;
2482         struct g_mirror_disk *disk;
2483         int dcr, dcw, dce;
2484
2485         g_topology_assert();
2486         G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2487             acw, ace);
2488
2489         dcr = pp->acr + acr;
2490         dcw = pp->acw + acw;
2491         dce = pp->ace + ace;
2492
2493         sc = pp->geom->softc;
2494         if (sc == NULL || LIST_EMPTY(&sc->sc_disks) ||
2495             (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
2496                 if (acr <= 0 && acw <= 0 && ace <= 0)
2497                         return (0);
2498                 else
2499                         return (ENXIO);
2500         }
2501         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2502                 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
2503                         continue;
2504                 /*
2505                  * Mark disk as dirty on open and unmark on close.
2506                  */
2507                 if (pp->acw == 0 && dcw > 0) {
2508                         G_MIRROR_DEBUG(1,
2509                             "Disk %s (device %s) marked as dirty.",
2510                             g_mirror_get_diskname(disk), sc->sc_name);
2511                         disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2512                         g_mirror_update_metadata(disk);
2513                 } else if (pp->acw > 0 && dcw == 0) {
2514                         G_MIRROR_DEBUG(1,
2515                             "Disk %s (device %s) marked as clean.",
2516                             g_mirror_get_diskname(disk), sc->sc_name);
2517                         disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2518                         g_mirror_update_metadata(disk);
2519                 }
2520         }
2521         return (0);
2522 }
2523
2524 static struct g_geom *
2525 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2526 {
2527         struct g_mirror_softc *sc;
2528         struct g_geom *gp;
2529         int error, timeout;
2530
2531         g_topology_assert();
2532         G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2533             md->md_mid);
2534
2535         /* One disk is minimum. */
2536         if (md->md_all < 1)
2537                 return (NULL);
2538         /*
2539          * Action geom.
2540          */
2541         gp = g_new_geomf(mp, "%s", md->md_name);
2542         sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2543         gp->start = g_mirror_start;
2544         gp->orphan = g_mirror_orphan;
2545         gp->access = g_mirror_access;
2546         gp->dumpconf = g_mirror_dumpconf;
2547
2548         sc->sc_id = md->md_mid;
2549         sc->sc_slice = md->md_slice;
2550         sc->sc_balance = md->md_balance;
2551         sc->sc_mediasize = md->md_mediasize;
2552         sc->sc_sectorsize = md->md_sectorsize;
2553         sc->sc_ndisks = md->md_all;
2554         sc->sc_flags = md->md_mflags;
2555         sc->sc_bump_id = 0;
2556         sc->sc_idle = 0;
2557         bioq_init(&sc->sc_queue);
2558         mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2559         LIST_INIT(&sc->sc_disks);
2560         TAILQ_INIT(&sc->sc_events);
2561         mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2562         callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2563         sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2564         gp->softc = sc;
2565         sc->sc_geom = gp;
2566         sc->sc_provider = NULL;
2567         /*
2568          * Synchronization geom.
2569          */
2570         gp = g_new_geomf(mp, "%s.sync", md->md_name);
2571         gp->softc = sc;
2572         gp->orphan = g_mirror_orphan;
2573         sc->sc_sync.ds_geom = gp;
2574         sc->sc_sync.ds_ndisks = 0;
2575         error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2576             "g_mirror %s", md->md_name);
2577         if (error != 0) {
2578                 G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2579                     sc->sc_name);
2580                 g_destroy_geom(sc->sc_sync.ds_geom);
2581                 mtx_destroy(&sc->sc_events_mtx);
2582                 mtx_destroy(&sc->sc_queue_mtx);
2583                 g_destroy_geom(sc->sc_geom);
2584                 free(sc, M_MIRROR);
2585                 return (NULL);
2586         }
2587
2588         G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
2589
2590         sc->sc_rootmount = root_mount_hold("GMIRROR");
2591         G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
2592         /*
2593          * Run timeout.
2594          */
2595         timeout = g_mirror_timeout * hz;
2596         callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
2597         return (sc->sc_geom);
2598 }
2599
2600 int
2601 g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
2602 {
2603         struct g_provider *pp;
2604
2605         g_topology_assert();
2606
2607         if (sc == NULL)
2608                 return (ENXIO);
2609         pp = sc->sc_provider;
2610         if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2611                 if (force) {
2612                         G_MIRROR_DEBUG(1, "Device %s is still open, so it "
2613                             "can't be definitely removed.", pp->name);
2614                 } else {
2615                         G_MIRROR_DEBUG(1,
2616                             "Device %s is still open (r%dw%de%d).", pp->name,
2617                             pp->acr, pp->acw, pp->ace);
2618                         return (EBUSY);
2619                 }
2620         }
2621
2622         sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2623         sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2624         g_topology_unlock();
2625         G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2626         mtx_lock(&sc->sc_queue_mtx);
2627         wakeup(sc);
2628         mtx_unlock(&sc->sc_queue_mtx);
2629         G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2630         while (sc->sc_worker != NULL)
2631                 tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2632         G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2633         g_topology_lock();
2634         g_mirror_destroy_device(sc);
2635         free(sc, M_MIRROR);
2636         return (0);
2637 }
2638
2639 static void
2640 g_mirror_taste_orphan(struct g_consumer *cp)
2641 {
2642
2643         KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2644             cp->provider->name));
2645 }
2646
2647 static struct g_geom *
2648 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2649 {
2650         struct g_mirror_metadata md;
2651         struct g_mirror_softc *sc;
2652         struct g_consumer *cp;
2653         struct g_geom *gp;
2654         int error;
2655
2656         g_topology_assert();
2657         g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
2658         G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
2659
2660         gp = g_new_geomf(mp, "mirror:taste");
2661         /*
2662          * This orphan function should be never called.
2663          */
2664         gp->orphan = g_mirror_taste_orphan;
2665         cp = g_new_consumer(gp);
2666         g_attach(cp, pp);
2667         error = g_mirror_read_metadata(cp, &md);
2668         g_detach(cp);
2669         g_destroy_consumer(cp);
2670         g_destroy_geom(gp);
2671         if (error != 0)
2672                 return (NULL);
2673         gp = NULL;
2674
2675         if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
2676                 return (NULL);
2677         if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
2678                 return (NULL);
2679         if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2680                 G_MIRROR_DEBUG(0,
2681                     "Device %s: provider %s marked as inactive, skipping.",
2682                     md.md_name, pp->name);
2683                 return (NULL);
2684         }
2685         if (g_mirror_debug >= 2)
2686                 mirror_metadata_dump(&md);
2687
2688         /*
2689          * Let's check if device already exists.
2690          */
2691         sc = NULL;
2692         LIST_FOREACH(gp, &mp->geom, geom) {
2693                 sc = gp->softc;
2694                 if (sc == NULL)
2695                         continue;
2696                 if (sc->sc_sync.ds_geom == gp)
2697                         continue;
2698                 if (strcmp(md.md_name, sc->sc_name) != 0)
2699                         continue;
2700                 if (md.md_mid != sc->sc_id) {
2701                         G_MIRROR_DEBUG(0, "Device %s already configured.",
2702                             sc->sc_name);
2703                         return (NULL);
2704                 }
2705                 break;
2706         }
2707         if (gp == NULL) {
2708                 gp = g_mirror_create(mp, &md);
2709                 if (gp == NULL) {
2710                         G_MIRROR_DEBUG(0, "Cannot create device %s.",
2711                             md.md_name);
2712                         return (NULL);
2713                 }
2714                 sc = gp->softc;
2715         }
2716         G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
2717         error = g_mirror_add_disk(sc, pp, &md);
2718         if (error != 0) {
2719                 G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
2720                     pp->name, gp->name, error);
2721                 if (LIST_EMPTY(&sc->sc_disks))
2722                         g_mirror_destroy(sc, 1);
2723                 return (NULL);
2724         }
2725         return (gp);
2726 }
2727
2728 static int
2729 g_mirror_destroy_geom(struct gctl_req *req __unused,
2730     struct g_class *mp __unused, struct g_geom *gp)
2731 {
2732
2733         return (g_mirror_destroy(gp->softc, 0));
2734 }
2735
2736 static void
2737 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2738     struct g_consumer *cp, struct g_provider *pp)
2739 {
2740         struct g_mirror_softc *sc;
2741
2742         g_topology_assert();
2743
2744         sc = gp->softc;
2745         if (sc == NULL)
2746                 return;
2747         /* Skip synchronization geom. */
2748         if (gp == sc->sc_sync.ds_geom)
2749                 return;
2750         if (pp != NULL) {
2751                 /* Nothing here. */
2752         } else if (cp != NULL) {
2753                 struct g_mirror_disk *disk;
2754
2755                 disk = cp->private;
2756                 if (disk == NULL)
2757                         return;
2758                 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
2759                 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2760                         sbuf_printf(sb, "%s<Synchronized>", indent);
2761                         if (disk->d_sync.ds_offset_done == 0)
2762                                 sbuf_printf(sb, "0%%");
2763                         else {
2764                                 sbuf_printf(sb, "%u%%",
2765                                     (u_int)((disk->d_sync.ds_offset_done * 100) /
2766                                     sc->sc_provider->mediasize));
2767                         }
2768                         sbuf_printf(sb, "</Synchronized>\n");
2769                 }
2770                 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
2771                     disk->d_sync.ds_syncid);
2772                 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
2773                     disk->d_genid);
2774                 sbuf_printf(sb, "%s<Flags>", indent);
2775                 if (disk->d_flags == 0)
2776                         sbuf_printf(sb, "NONE");
2777                 else {
2778                         int first = 1;
2779
2780 #define ADD_FLAG(flag, name)    do {                                    \
2781         if ((disk->d_flags & (flag)) != 0) {                            \
2782                 if (!first)                                             \
2783                         sbuf_printf(sb, ", ");                          \
2784                 else                                                    \
2785                         first = 0;                                      \
2786                 sbuf_printf(sb, name);                                  \
2787         }                                                               \
2788 } while (0)
2789                         ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
2790                         ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
2791                         ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
2792                         ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
2793                             "SYNCHRONIZING");
2794                         ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
2795 #undef  ADD_FLAG
2796                 }
2797                 sbuf_printf(sb, "</Flags>\n");
2798                 sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
2799                     disk->d_priority);
2800                 sbuf_printf(sb, "%s<State>%s</State>\n", indent,
2801                     g_mirror_disk_state2str(disk->d_state));
2802         } else {
2803                 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
2804                 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
2805                 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
2806                 sbuf_printf(sb, "%s<Flags>", indent);
2807                 if (sc->sc_flags == 0)
2808                         sbuf_printf(sb, "NONE");
2809                 else {
2810                         int first = 1;
2811
2812 #define ADD_FLAG(flag, name)    do {                                    \
2813         if ((sc->sc_flags & (flag)) != 0) {                             \
2814                 if (!first)                                             \
2815                         sbuf_printf(sb, ", ");                          \
2816                 else                                                    \
2817                         first = 0;                                      \
2818                 sbuf_printf(sb, name);                                  \
2819         }                                                               \
2820 } while (0)
2821                         ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
2822 #undef  ADD_FLAG
2823                 }
2824                 sbuf_printf(sb, "</Flags>\n");
2825                 sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
2826                     (u_int)sc->sc_slice);
2827                 sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
2828                     balance_name(sc->sc_balance));
2829                 sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
2830                     sc->sc_ndisks);
2831                 sbuf_printf(sb, "%s<State>", indent);
2832                 if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2833                         sbuf_printf(sb, "%s", "STARTING");
2834                 else if (sc->sc_ndisks ==
2835                     g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
2836                         sbuf_printf(sb, "%s", "COMPLETE");
2837                 else
2838                         sbuf_printf(sb, "%s", "DEGRADED");
2839                 sbuf_printf(sb, "</State>\n");
2840         }
2841 }
2842
2843 static void
2844 g_mirror_shutdown(void *arg, int howto)
2845 {
2846         struct g_class *mp;
2847         struct g_geom *gp, *gp2;
2848
2849         mp = arg;
2850         DROP_GIANT();
2851         g_topology_lock();
2852         LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
2853                 if (gp->softc == NULL)
2854                         continue;
2855                 g_mirror_destroy(gp->softc, 1);
2856         }
2857         g_topology_unlock();
2858         PICKUP_GIANT();
2859 #if 0
2860         tsleep(&gp, PRIBIO, "m:shutdown", hz * 20);
2861 #endif
2862 }
2863
2864 static void
2865 g_mirror_init(struct g_class *mp)
2866 {
2867
2868         g_mirror_ehtag = EVENTHANDLER_REGISTER(shutdown_post_sync,
2869             g_mirror_shutdown, mp, SHUTDOWN_PRI_FIRST);
2870         if (g_mirror_ehtag == NULL)
2871                 G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
2872 }
2873
2874 static void
2875 g_mirror_fini(struct g_class *mp)
2876 {
2877
2878         if (g_mirror_ehtag == NULL)
2879                 return;
2880         EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_ehtag);
2881 }
2882
2883 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);