]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/geom/mirror/g_mirror.c
Make g_attach() return ENXIO for orphaned providers; update various
[FreeBSD/FreeBSD.git] / sys / geom / mirror / g_mirror.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bio.h>
35 #include <sys/eventhandler.h>
36 #include <sys/fail.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/limits.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/sbuf.h>
45 #include <sys/sched.h>
46 #include <sys/sx.h>
47 #include <sys/sysctl.h>
48
49 #include <geom/geom.h>
50 #include <geom/geom_dbg.h>
51 #include <geom/mirror/g_mirror.h>
52
53 FEATURE(geom_mirror, "GEOM mirroring support");
54
55 static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
56
57 SYSCTL_DECL(_kern_geom);
58 static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
59     "GEOM_MIRROR stuff");
60 int g_mirror_debug = 0;
61 SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
62     "Debug level");
63 bool g_launch_mirror_before_timeout = true;
64 SYSCTL_BOOL(_kern_geom_mirror, OID_AUTO, launch_mirror_before_timeout,
65     CTLFLAG_RWTUN, &g_launch_mirror_before_timeout, 0,
66     "If false, force gmirror to wait out the full kern.geom.mirror.timeout "
67     "before launching mirrors");
68 static u_int g_mirror_timeout = 4;
69 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
70     0, "Time to wait on all mirror components");
71 static u_int g_mirror_idletime = 5;
72 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
73     &g_mirror_idletime, 0, "Mark components as clean when idling");
74 static u_int g_mirror_disconnect_on_failure = 1;
75 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
76     &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
77 static u_int g_mirror_syncreqs = 2;
78 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
79     &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
80 static u_int g_mirror_sync_period = 5;
81 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
82     &g_mirror_sync_period, 0,
83     "Metadata update period during synchronization, in seconds");
84
85 #define MSLEEP(ident, mtx, priority, wmesg, timeout)    do {            \
86         G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));       \
87         msleep((ident), (mtx), (priority), (wmesg), (timeout));         \
88         G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));       \
89 } while (0)
90
91 static eventhandler_tag g_mirror_post_sync = NULL;
92 static int g_mirror_shutdown = 0;
93
94 static g_ctl_destroy_geom_t g_mirror_destroy_geom;
95 static g_taste_t g_mirror_taste;
96 static g_init_t g_mirror_init;
97 static g_fini_t g_mirror_fini;
98 static g_provgone_t g_mirror_providergone;
99 static g_resize_t g_mirror_resize;
100
101 struct g_class g_mirror_class = {
102         .name = G_MIRROR_CLASS_NAME,
103         .version = G_VERSION,
104         .ctlreq = g_mirror_config,
105         .taste = g_mirror_taste,
106         .destroy_geom = g_mirror_destroy_geom,
107         .init = g_mirror_init,
108         .fini = g_mirror_fini,
109         .providergone = g_mirror_providergone,
110         .resize = g_mirror_resize
111 };
112
113 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
114 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
115 static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
116 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
117     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
118 static int g_mirror_refresh_device(struct g_mirror_softc *sc,
119     const struct g_provider *pp, const struct g_mirror_metadata *md);
120 static void g_mirror_sync_reinit(const struct g_mirror_disk *disk,
121     struct bio *bp, off_t offset);
122 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
123 static void g_mirror_register_request(struct g_mirror_softc *sc,
124     struct bio *bp);
125 static void g_mirror_sync_release(struct g_mirror_softc *sc);
126
127 static const char *
128 g_mirror_disk_state2str(int state)
129 {
130
131         switch (state) {
132         case G_MIRROR_DISK_STATE_NONE:
133                 return ("NONE");
134         case G_MIRROR_DISK_STATE_NEW:
135                 return ("NEW");
136         case G_MIRROR_DISK_STATE_ACTIVE:
137                 return ("ACTIVE");
138         case G_MIRROR_DISK_STATE_STALE:
139                 return ("STALE");
140         case G_MIRROR_DISK_STATE_SYNCHRONIZING:
141                 return ("SYNCHRONIZING");
142         case G_MIRROR_DISK_STATE_DISCONNECTED:
143                 return ("DISCONNECTED");
144         case G_MIRROR_DISK_STATE_DESTROY:
145                 return ("DESTROY");
146         default:
147                 return ("INVALID");
148         }
149 }
150
151 static const char *
152 g_mirror_device_state2str(int state)
153 {
154
155         switch (state) {
156         case G_MIRROR_DEVICE_STATE_STARTING:
157                 return ("STARTING");
158         case G_MIRROR_DEVICE_STATE_RUNNING:
159                 return ("RUNNING");
160         default:
161                 return ("INVALID");
162         }
163 }
164
165 static const char *
166 g_mirror_get_diskname(struct g_mirror_disk *disk)
167 {
168
169         if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
170                 return ("[unknown]");
171         return (disk->d_name);
172 }
173
174 /*
175  * --- Events handling functions ---
176  * Events in geom_mirror are used to maintain disks and device status
177  * from one thread to simplify locking.
178  */
179 static void
180 g_mirror_event_free(struct g_mirror_event *ep)
181 {
182
183         free(ep, M_MIRROR);
184 }
185
186 int
187 g_mirror_event_send(void *arg, int state, int flags)
188 {
189         struct g_mirror_softc *sc;
190         struct g_mirror_disk *disk;
191         struct g_mirror_event *ep;
192         int error;
193
194         ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
195         G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
196         if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
197                 disk = NULL;
198                 sc = arg;
199         } else {
200                 disk = arg;
201                 sc = disk->d_softc;
202         }
203         ep->e_disk = disk;
204         ep->e_state = state;
205         ep->e_flags = flags;
206         ep->e_error = 0;
207         mtx_lock(&sc->sc_events_mtx);
208         TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
209         mtx_unlock(&sc->sc_events_mtx);
210         G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
211         mtx_lock(&sc->sc_queue_mtx);
212         wakeup(sc);
213         mtx_unlock(&sc->sc_queue_mtx);
214         if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
215                 return (0);
216         G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
217         sx_xunlock(&sc->sc_lock);
218         while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
219                 mtx_lock(&sc->sc_events_mtx);
220                 MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
221                     hz * 5);
222         }
223         error = ep->e_error;
224         g_mirror_event_free(ep);
225         sx_xlock(&sc->sc_lock);
226         return (error);
227 }
228
229 static struct g_mirror_event *
230 g_mirror_event_first(struct g_mirror_softc *sc)
231 {
232         struct g_mirror_event *ep;
233
234         mtx_lock(&sc->sc_events_mtx);
235         ep = TAILQ_FIRST(&sc->sc_events);
236         mtx_unlock(&sc->sc_events_mtx);
237         return (ep);
238 }
239
240 static void
241 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
242 {
243
244         mtx_lock(&sc->sc_events_mtx);
245         TAILQ_REMOVE(&sc->sc_events, ep, e_next);
246         mtx_unlock(&sc->sc_events_mtx);
247 }
248
249 static void
250 g_mirror_event_cancel(struct g_mirror_disk *disk)
251 {
252         struct g_mirror_softc *sc;
253         struct g_mirror_event *ep, *tmpep;
254
255         sc = disk->d_softc;
256         sx_assert(&sc->sc_lock, SX_XLOCKED);
257
258         mtx_lock(&sc->sc_events_mtx);
259         TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
260                 if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
261                         continue;
262                 if (ep->e_disk != disk)
263                         continue;
264                 TAILQ_REMOVE(&sc->sc_events, ep, e_next);
265                 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
266                         g_mirror_event_free(ep);
267                 else {
268                         ep->e_error = ECANCELED;
269                         wakeup(ep);
270                 }
271         }
272         mtx_unlock(&sc->sc_events_mtx);
273 }
274
275 /*
276  * Return the number of disks in given state.
277  * If state is equal to -1, count all connected disks.
278  */
279 u_int
280 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
281 {
282         struct g_mirror_disk *disk;
283         u_int n = 0;
284
285         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
286                 if (state == -1 || disk->d_state == state)
287                         n++;
288         }
289         return (n);
290 }
291
292 /*
293  * Find a disk in mirror by its disk ID.
294  */
295 static struct g_mirror_disk *
296 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
297 {
298         struct g_mirror_disk *disk;
299
300         sx_assert(&sc->sc_lock, SX_XLOCKED);
301
302         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
303                 if (disk->d_id == id)
304                         return (disk);
305         }
306         return (NULL);
307 }
308
309 static u_int
310 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
311 {
312         struct bio *bp;
313         u_int nreqs = 0;
314
315         mtx_lock(&sc->sc_queue_mtx);
316         TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) {
317                 if (bp->bio_from == cp)
318                         nreqs++;
319         }
320         mtx_unlock(&sc->sc_queue_mtx);
321         return (nreqs);
322 }
323
324 static int
325 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
326 {
327
328         if (cp->index > 0) {
329                 G_MIRROR_DEBUG(2,
330                     "I/O requests for %s exist, can't destroy it now.",
331                     cp->provider->name);
332                 return (1);
333         }
334         if (g_mirror_nrequests(sc, cp) > 0) {
335                 G_MIRROR_DEBUG(2,
336                     "I/O requests for %s in queue, can't destroy it now.",
337                     cp->provider->name);
338                 return (1);
339         }
340         return (0);
341 }
342
343 static void
344 g_mirror_destroy_consumer(void *arg, int flags __unused)
345 {
346         struct g_consumer *cp;
347
348         g_topology_assert();
349
350         cp = arg;
351         G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
352         g_detach(cp);
353         g_destroy_consumer(cp);
354 }
355
356 static void
357 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
358 {
359         struct g_provider *pp;
360         int retaste_wait;
361
362         g_topology_assert();
363
364         cp->private = NULL;
365         if (g_mirror_is_busy(sc, cp))
366                 return;
367         pp = cp->provider;
368         retaste_wait = 0;
369         if (cp->acw == 1) {
370                 if ((pp->geom->flags & G_GEOM_WITHER) == 0)
371                         retaste_wait = 1;
372         }
373         G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
374             -cp->acw, -cp->ace, 0);
375         if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
376                 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
377         if (retaste_wait) {
378                 /*
379                  * After retaste event was send (inside g_access()), we can send
380                  * event to detach and destroy consumer.
381                  * A class, which has consumer to the given provider connected
382                  * will not receive retaste event for the provider.
383                  * This is the way how I ignore retaste events when I close
384                  * consumers opened for write: I detach and destroy consumer
385                  * after retaste event is sent.
386                  */
387                 g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
388                 return;
389         }
390         G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
391         g_detach(cp);
392         g_destroy_consumer(cp);
393 }
394
395 static int
396 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
397 {
398         struct g_consumer *cp;
399         int error;
400
401         g_topology_assert_not();
402         KASSERT(disk->d_consumer == NULL,
403             ("Disk already connected (device %s).", disk->d_softc->sc_name));
404
405         g_topology_lock();
406         cp = g_new_consumer(disk->d_softc->sc_geom);
407         cp->flags |= G_CF_DIRECT_RECEIVE;
408         error = g_attach(cp, pp);
409         if (error != 0) {
410                 g_destroy_consumer(cp);
411                 g_topology_unlock();
412                 return (error);
413         }
414         error = g_access(cp, 1, 1, 1);
415         if (error != 0) {
416                 g_detach(cp);
417                 g_destroy_consumer(cp);
418                 g_topology_unlock();
419                 G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
420                     pp->name, error);
421                 return (error);
422         }
423         g_topology_unlock();
424         disk->d_consumer = cp;
425         disk->d_consumer->private = disk;
426         disk->d_consumer->index = 0;
427
428         G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
429         return (0);
430 }
431
432 static void
433 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
434 {
435
436         g_topology_assert();
437
438         if (cp == NULL)
439                 return;
440         if (cp->provider != NULL)
441                 g_mirror_kill_consumer(sc, cp);
442         else
443                 g_destroy_consumer(cp);
444 }
445
446 /*
447  * Initialize disk. This means allocate memory, create consumer, attach it
448  * to the provider and open access (r1w1e1) to it.
449  */
450 static struct g_mirror_disk *
451 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
452     struct g_mirror_metadata *md, int *errorp)
453 {
454         struct g_mirror_disk *disk;
455         int i, error;
456
457         disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
458         if (disk == NULL) {
459                 error = ENOMEM;
460                 goto fail;
461         }
462         disk->d_softc = sc;
463         error = g_mirror_connect_disk(disk, pp);
464         if (error != 0)
465                 goto fail;
466         disk->d_id = md->md_did;
467         disk->d_state = G_MIRROR_DISK_STATE_NONE;
468         disk->d_priority = md->md_priority;
469         disk->d_flags = md->md_dflags;
470         error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
471         if (error == 0 && i != 0)
472                 disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
473         if (md->md_provider[0] != '\0')
474                 disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
475         disk->d_sync.ds_consumer = NULL;
476         disk->d_sync.ds_offset = md->md_sync_offset;
477         disk->d_sync.ds_offset_done = md->md_sync_offset;
478         disk->d_sync.ds_update_ts = time_uptime;
479         disk->d_genid = md->md_genid;
480         disk->d_sync.ds_syncid = md->md_syncid;
481         disk->d_init_ndisks = md->md_all;
482         disk->d_init_slice = md->md_slice;
483         disk->d_init_balance = md->md_balance;
484         disk->d_init_mediasize = md->md_mediasize;
485         if (errorp != NULL)
486                 *errorp = 0;
487         return (disk);
488 fail:
489         if (errorp != NULL)
490                 *errorp = error;
491         if (disk != NULL)
492                 free(disk, M_MIRROR);
493         return (NULL);
494 }
495
496 static void
497 g_mirror_destroy_disk(struct g_mirror_disk *disk)
498 {
499         struct g_mirror_softc *sc;
500
501         g_topology_assert_not();
502         sc = disk->d_softc;
503         sx_assert(&sc->sc_lock, SX_XLOCKED);
504
505         g_topology_lock();
506         LIST_REMOVE(disk, d_next);
507         g_topology_unlock();
508         g_mirror_event_cancel(disk);
509         if (sc->sc_hint == disk)
510                 sc->sc_hint = NULL;
511         switch (disk->d_state) {
512         case G_MIRROR_DISK_STATE_SYNCHRONIZING:
513                 g_mirror_sync_stop(disk, 1);
514                 /* FALLTHROUGH */
515         case G_MIRROR_DISK_STATE_NEW:
516         case G_MIRROR_DISK_STATE_STALE:
517         case G_MIRROR_DISK_STATE_ACTIVE:
518                 g_topology_lock();
519                 g_mirror_disconnect_consumer(sc, disk->d_consumer);
520                 g_topology_unlock();
521                 free(disk, M_MIRROR);
522                 break;
523         default:
524                 KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
525                     g_mirror_get_diskname(disk),
526                     g_mirror_disk_state2str(disk->d_state)));
527         }
528 }
529
530 static void
531 g_mirror_free_device(struct g_mirror_softc *sc)
532 {
533
534         g_topology_assert();
535
536         mtx_destroy(&sc->sc_queue_mtx);
537         mtx_destroy(&sc->sc_events_mtx);
538         mtx_destroy(&sc->sc_done_mtx);
539         sx_destroy(&sc->sc_lock);
540         free(sc, M_MIRROR);
541 }
542
543 static void
544 g_mirror_providergone(struct g_provider *pp)
545 {
546         struct g_mirror_softc *sc = pp->private;
547
548         if ((--sc->sc_refcnt) == 0)
549                 g_mirror_free_device(sc);
550 }
551
552 static void
553 g_mirror_destroy_device(struct g_mirror_softc *sc)
554 {
555         struct g_mirror_disk *disk;
556         struct g_mirror_event *ep;
557         struct g_geom *gp;
558         struct g_consumer *cp, *tmpcp;
559
560         g_topology_assert_not();
561         sx_assert(&sc->sc_lock, SX_XLOCKED);
562
563         gp = sc->sc_geom;
564         if (sc->sc_provider != NULL)
565                 g_mirror_destroy_provider(sc);
566         for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
567             disk = LIST_FIRST(&sc->sc_disks)) {
568                 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
569                 g_mirror_update_metadata(disk);
570                 g_mirror_destroy_disk(disk);
571         }
572         while ((ep = g_mirror_event_first(sc)) != NULL) {
573                 g_mirror_event_remove(sc, ep);
574                 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
575                         g_mirror_event_free(ep);
576                 else {
577                         ep->e_error = ECANCELED;
578                         ep->e_flags |= G_MIRROR_EVENT_DONE;
579                         G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
580                         mtx_lock(&sc->sc_events_mtx);
581                         wakeup(ep);
582                         mtx_unlock(&sc->sc_events_mtx);
583                 }
584         }
585         callout_drain(&sc->sc_callout);
586
587         g_topology_lock();
588         LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
589                 g_mirror_disconnect_consumer(sc, cp);
590         }
591         g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
592         G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
593         g_wither_geom(gp, ENXIO);
594         sx_xunlock(&sc->sc_lock);
595         if ((--sc->sc_refcnt) == 0)
596                 g_mirror_free_device(sc);
597         g_topology_unlock();
598 }
599
600 static void
601 g_mirror_orphan(struct g_consumer *cp)
602 {
603         struct g_mirror_disk *disk;
604
605         g_topology_assert();
606
607         disk = cp->private;
608         if (disk == NULL)
609                 return;
610         disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
611         g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
612             G_MIRROR_EVENT_DONTWAIT);
613 }
614
615 /*
616  * Function should return the next active disk on the list.
617  * It is possible that it will be the same disk as given.
618  * If there are no active disks on list, NULL is returned.
619  */
620 static __inline struct g_mirror_disk *
621 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
622 {
623         struct g_mirror_disk *dp;
624
625         for (dp = LIST_NEXT(disk, d_next); dp != disk;
626             dp = LIST_NEXT(dp, d_next)) {
627                 if (dp == NULL)
628                         dp = LIST_FIRST(&sc->sc_disks);
629                 if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
630                         break;
631         }
632         if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
633                 return (NULL);
634         return (dp);
635 }
636
637 static struct g_mirror_disk *
638 g_mirror_get_disk(struct g_mirror_softc *sc)
639 {
640         struct g_mirror_disk *disk;
641
642         if (sc->sc_hint == NULL) {
643                 sc->sc_hint = LIST_FIRST(&sc->sc_disks);
644                 if (sc->sc_hint == NULL)
645                         return (NULL);
646         }
647         disk = sc->sc_hint;
648         if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
649                 disk = g_mirror_find_next(sc, disk);
650                 if (disk == NULL)
651                         return (NULL);
652         }
653         sc->sc_hint = g_mirror_find_next(sc, disk);
654         return (disk);
655 }
656
657 static int
658 g_mirror_write_metadata(struct g_mirror_disk *disk,
659     struct g_mirror_metadata *md)
660 {
661         struct g_mirror_softc *sc;
662         struct g_consumer *cp;
663         off_t offset, length;
664         u_char *sector;
665         int error = 0;
666
667         g_topology_assert_not();
668         sc = disk->d_softc;
669         sx_assert(&sc->sc_lock, SX_LOCKED);
670
671         cp = disk->d_consumer;
672         KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
673         KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
674         KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
675             ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
676             cp->acw, cp->ace));
677         length = cp->provider->sectorsize;
678         offset = cp->provider->mediasize - length;
679         sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
680         if (md != NULL &&
681             (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
682                 /*
683                  * Handle the case, when the size of parent provider reduced.
684                  */
685                 if (offset < md->md_mediasize)
686                         error = ENOSPC;
687                 else
688                         mirror_metadata_encode(md, sector);
689         }
690         KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
691         if (error == 0)
692                 error = g_write_data(cp, offset, sector, length);
693         free(sector, M_MIRROR);
694         if (error != 0) {
695                 if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
696                         disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
697                         G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
698                             "(device=%s, error=%d).",
699                             g_mirror_get_diskname(disk), sc->sc_name, error);
700                 } else {
701                         G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
702                             "(device=%s, error=%d).",
703                             g_mirror_get_diskname(disk), sc->sc_name, error);
704                 }
705                 if (g_mirror_disconnect_on_failure &&
706                     g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
707                         sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
708                         g_mirror_event_send(disk,
709                             G_MIRROR_DISK_STATE_DISCONNECTED,
710                             G_MIRROR_EVENT_DONTWAIT);
711                 }
712         }
713         return (error);
714 }
715
716 static int
717 g_mirror_clear_metadata(struct g_mirror_disk *disk)
718 {
719         int error;
720
721         g_topology_assert_not();
722         sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
723
724         if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
725                 return (0);
726         error = g_mirror_write_metadata(disk, NULL);
727         if (error == 0) {
728                 G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
729                     g_mirror_get_diskname(disk));
730         } else {
731                 G_MIRROR_DEBUG(0,
732                     "Cannot clear metadata on disk %s (error=%d).",
733                     g_mirror_get_diskname(disk), error);
734         }
735         return (error);
736 }
737
738 void
739 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
740     struct g_mirror_metadata *md)
741 {
742
743         strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
744         md->md_version = G_MIRROR_VERSION;
745         strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
746         md->md_mid = sc->sc_id;
747         md->md_all = sc->sc_ndisks;
748         md->md_slice = sc->sc_slice;
749         md->md_balance = sc->sc_balance;
750         md->md_genid = sc->sc_genid;
751         md->md_mediasize = sc->sc_mediasize;
752         md->md_sectorsize = sc->sc_sectorsize;
753         md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
754         bzero(md->md_provider, sizeof(md->md_provider));
755         if (disk == NULL) {
756                 md->md_did = arc4random();
757                 md->md_priority = 0;
758                 md->md_syncid = 0;
759                 md->md_dflags = 0;
760                 md->md_sync_offset = 0;
761                 md->md_provsize = 0;
762         } else {
763                 md->md_did = disk->d_id;
764                 md->md_priority = disk->d_priority;
765                 md->md_syncid = disk->d_sync.ds_syncid;
766                 md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
767                 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
768                         md->md_sync_offset = disk->d_sync.ds_offset_done;
769                 else
770                         md->md_sync_offset = 0;
771                 if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
772                         strlcpy(md->md_provider,
773                             disk->d_consumer->provider->name,
774                             sizeof(md->md_provider));
775                 }
776                 md->md_provsize = disk->d_consumer->provider->mediasize;
777         }
778 }
779
780 void
781 g_mirror_update_metadata(struct g_mirror_disk *disk)
782 {
783         struct g_mirror_softc *sc;
784         struct g_mirror_metadata md;
785         int error;
786
787         g_topology_assert_not();
788         sc = disk->d_softc;
789         sx_assert(&sc->sc_lock, SX_LOCKED);
790
791         if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
792                 return;
793         if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
794                 g_mirror_fill_metadata(sc, disk, &md);
795         error = g_mirror_write_metadata(disk, &md);
796         if (error == 0) {
797                 G_MIRROR_DEBUG(2, "Metadata on %s updated.",
798                     g_mirror_get_diskname(disk));
799         } else {
800                 G_MIRROR_DEBUG(0,
801                     "Cannot update metadata on disk %s (error=%d).",
802                     g_mirror_get_diskname(disk), error);
803         }
804 }
805
806 static void
807 g_mirror_bump_syncid(struct g_mirror_softc *sc)
808 {
809         struct g_mirror_disk *disk;
810
811         g_topology_assert_not();
812         sx_assert(&sc->sc_lock, SX_XLOCKED);
813         KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
814             ("%s called with no active disks (device=%s).", __func__,
815             sc->sc_name));
816
817         sc->sc_syncid++;
818         G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
819             sc->sc_syncid);
820         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
821                 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
822                     disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
823                         disk->d_sync.ds_syncid = sc->sc_syncid;
824                         g_mirror_update_metadata(disk);
825                 }
826         }
827 }
828
829 static void
830 g_mirror_bump_genid(struct g_mirror_softc *sc)
831 {
832         struct g_mirror_disk *disk;
833
834         g_topology_assert_not();
835         sx_assert(&sc->sc_lock, SX_XLOCKED);
836         KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
837             ("%s called with no active disks (device=%s).", __func__,
838             sc->sc_name));
839
840         sc->sc_genid++;
841         G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
842             sc->sc_genid);
843         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
844                 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
845                     disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
846                         disk->d_genid = sc->sc_genid;
847                         g_mirror_update_metadata(disk);
848                 }
849         }
850 }
851
852 static int
853 g_mirror_idle(struct g_mirror_softc *sc, int acw)
854 {
855         struct g_mirror_disk *disk;
856         int timeout;
857
858         g_topology_assert_not();
859         sx_assert(&sc->sc_lock, SX_XLOCKED);
860
861         if (sc->sc_provider == NULL)
862                 return (0);
863         if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
864                 return (0);
865         if (sc->sc_idle)
866                 return (0);
867         if (sc->sc_writes > 0)
868                 return (0);
869         if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
870                 timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
871                 if (!g_mirror_shutdown && timeout > 0)
872                         return (timeout);
873         }
874         sc->sc_idle = 1;
875         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
876                 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
877                         continue;
878                 G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
879                     g_mirror_get_diskname(disk), sc->sc_name);
880                 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
881                 g_mirror_update_metadata(disk);
882         }
883         return (0);
884 }
885
886 static void
887 g_mirror_unidle(struct g_mirror_softc *sc)
888 {
889         struct g_mirror_disk *disk;
890
891         g_topology_assert_not();
892         sx_assert(&sc->sc_lock, SX_XLOCKED);
893
894         if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
895                 return;
896         sc->sc_idle = 0;
897         sc->sc_last_write = time_uptime;
898         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
899                 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
900                         continue;
901                 G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
902                     g_mirror_get_diskname(disk), sc->sc_name);
903                 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
904                 g_mirror_update_metadata(disk);
905         }
906 }
907
908 static void
909 g_mirror_done(struct bio *bp)
910 {
911         struct g_mirror_softc *sc;
912
913         sc = bp->bio_from->geom->softc;
914         bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
915         mtx_lock(&sc->sc_queue_mtx);
916         TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
917         mtx_unlock(&sc->sc_queue_mtx);
918         wakeup(sc);
919 }
920
921 static void
922 g_mirror_regular_request_error(struct g_mirror_softc *sc,
923     struct g_mirror_disk *disk, struct bio *bp)
924 {
925
926         if ((bp->bio_cmd == BIO_FLUSH || bp->bio_cmd == BIO_SPEEDUP) &&
927             bp->bio_error == EOPNOTSUPP)
928                 return;
929
930         if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
931                 disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
932                 G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
933                     bp->bio_error);
934         } else {
935                 G_MIRROR_LOGREQ(1, bp, "Request failed (error=%d).",
936                     bp->bio_error);
937         }
938         if (g_mirror_disconnect_on_failure &&
939             g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
940                 if (bp->bio_error == ENXIO &&
941                     bp->bio_cmd == BIO_READ)
942                         sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
943                 else if (bp->bio_error == ENXIO)
944                         sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
945                 else
946                         sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
947                 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
948                     G_MIRROR_EVENT_DONTWAIT);
949         }
950 }
951
952 static void
953 g_mirror_regular_request(struct g_mirror_softc *sc, struct bio *bp)
954 {
955         struct g_mirror_disk *disk;
956         struct bio *pbp;
957
958         g_topology_assert_not();
959         KASSERT(sc->sc_provider == bp->bio_parent->bio_to,
960             ("regular request %p with unexpected origin", bp));
961
962         pbp = bp->bio_parent;
963         bp->bio_from->index--;
964         if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
965                 sc->sc_writes--;
966         disk = bp->bio_from->private;
967         if (disk == NULL) {
968                 g_topology_lock();
969                 g_mirror_kill_consumer(sc, bp->bio_from);
970                 g_topology_unlock();
971         }
972
973         switch (bp->bio_cmd) {
974         case BIO_READ:
975                 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
976                     bp->bio_error);
977                 break;
978         case BIO_WRITE:
979                 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
980                     bp->bio_error);
981                 break;
982         case BIO_DELETE:
983                 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_delete,
984                     bp->bio_error);
985                 break;
986         case BIO_FLUSH:
987                 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_flush,
988                     bp->bio_error);
989                 break;
990         case BIO_SPEEDUP:
991                 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_speedup,
992                     bp->bio_error);
993                 break;
994         }
995
996         pbp->bio_inbed++;
997         KASSERT(pbp->bio_inbed <= pbp->bio_children,
998             ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
999             pbp->bio_children));
1000         if (bp->bio_error == 0 && pbp->bio_error == 0) {
1001                 G_MIRROR_LOGREQ(3, bp, "Request delivered.");
1002                 g_destroy_bio(bp);
1003                 if (pbp->bio_children == pbp->bio_inbed) {
1004                         G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
1005                         pbp->bio_completed = pbp->bio_length;
1006                         if (pbp->bio_cmd == BIO_WRITE ||
1007                             pbp->bio_cmd == BIO_DELETE) {
1008                                 TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1009                                 /* Release delayed sync requests if possible. */
1010                                 g_mirror_sync_release(sc);
1011                         }
1012                         g_io_deliver(pbp, pbp->bio_error);
1013                 }
1014                 return;
1015         } else if (bp->bio_error != 0) {
1016                 if (pbp->bio_error == 0)
1017                         pbp->bio_error = bp->bio_error;
1018                 if (disk != NULL)
1019                         g_mirror_regular_request_error(sc, disk, bp);
1020                 switch (pbp->bio_cmd) {
1021                 case BIO_DELETE:
1022                 case BIO_WRITE:
1023                 case BIO_FLUSH:
1024                 case BIO_SPEEDUP:
1025                         pbp->bio_inbed--;
1026                         pbp->bio_children--;
1027                         break;
1028                 }
1029         }
1030         g_destroy_bio(bp);
1031
1032         switch (pbp->bio_cmd) {
1033         case BIO_READ:
1034                 if (pbp->bio_inbed < pbp->bio_children)
1035                         break;
1036                 if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
1037                         g_io_deliver(pbp, pbp->bio_error);
1038                 else {
1039                         pbp->bio_error = 0;
1040                         mtx_lock(&sc->sc_queue_mtx);
1041                         TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue);
1042                         mtx_unlock(&sc->sc_queue_mtx);
1043                         G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1044                         wakeup(sc);
1045                 }
1046                 break;
1047         case BIO_DELETE:
1048         case BIO_WRITE:
1049         case BIO_FLUSH:
1050         case BIO_SPEEDUP:
1051                 if (pbp->bio_children == 0) {
1052                         /*
1053                          * All requests failed.
1054                          */
1055                 } else if (pbp->bio_inbed < pbp->bio_children) {
1056                         /* Do nothing. */
1057                         break;
1058                 } else if (pbp->bio_children == pbp->bio_inbed) {
1059                         /* Some requests succeeded. */
1060                         pbp->bio_error = 0;
1061                         pbp->bio_completed = pbp->bio_length;
1062                 }
1063                 if (pbp->bio_cmd == BIO_WRITE || pbp->bio_cmd == BIO_DELETE) {
1064                         TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1065                         /* Release delayed sync requests if possible. */
1066                         g_mirror_sync_release(sc);
1067                 }
1068                 g_io_deliver(pbp, pbp->bio_error);
1069                 break;
1070         default:
1071                 KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1072                 break;
1073         }
1074 }
1075
1076 static void
1077 g_mirror_sync_done(struct bio *bp)
1078 {
1079         struct g_mirror_softc *sc;
1080
1081         G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1082         sc = bp->bio_from->geom->softc;
1083         bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1084         mtx_lock(&sc->sc_queue_mtx);
1085         TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1086         mtx_unlock(&sc->sc_queue_mtx);
1087         wakeup(sc);
1088 }
1089
1090 static void
1091 g_mirror_candelete(struct bio *bp)
1092 {
1093         struct g_mirror_softc *sc;
1094         struct g_mirror_disk *disk;
1095         int val;
1096
1097         sc = bp->bio_to->private;
1098         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1099                 if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1100                         break;
1101         }
1102         val = disk != NULL;
1103         g_handleattr(bp, "GEOM::candelete", &val, sizeof(val));
1104 }
1105
1106 static void
1107 g_mirror_kernel_dump(struct bio *bp)
1108 {
1109         struct g_mirror_softc *sc;
1110         struct g_mirror_disk *disk;
1111         struct bio *cbp;
1112         struct g_kerneldump *gkd;
1113
1114         /*
1115          * We configure dumping to the first component, because this component
1116          * will be used for reading with 'prefer' balance algorithm.
1117          * If the component with the highest priority is currently disconnected
1118          * we will not be able to read the dump after the reboot if it will be
1119          * connected and synchronized later. Can we do something better?
1120          */
1121         sc = bp->bio_to->private;
1122         disk = LIST_FIRST(&sc->sc_disks);
1123
1124         gkd = (struct g_kerneldump *)bp->bio_data;
1125         if (gkd->length > bp->bio_to->mediasize)
1126                 gkd->length = bp->bio_to->mediasize;
1127         cbp = g_clone_bio(bp);
1128         if (cbp == NULL) {
1129                 g_io_deliver(bp, ENOMEM);
1130                 return;
1131         }
1132         cbp->bio_done = g_std_done;
1133         g_io_request(cbp, disk->d_consumer);
1134         G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1135             g_mirror_get_diskname(disk));
1136 }
1137
1138 static void
1139 g_mirror_start(struct bio *bp)
1140 {
1141         struct g_mirror_softc *sc;
1142
1143         sc = bp->bio_to->private;
1144         /*
1145          * If sc == NULL or there are no valid disks, provider's error
1146          * should be set and g_mirror_start() should not be called at all.
1147          */
1148         KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1149             ("Provider's error should be set (error=%d)(mirror=%s).",
1150             bp->bio_to->error, bp->bio_to->name));
1151         G_MIRROR_LOGREQ(3, bp, "Request received.");
1152
1153         switch (bp->bio_cmd) {
1154         case BIO_READ:
1155         case BIO_WRITE:
1156         case BIO_DELETE:
1157         case BIO_SPEEDUP:
1158         case BIO_FLUSH:
1159                 break;
1160         case BIO_GETATTR:
1161                 if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1162                         g_mirror_candelete(bp);
1163                         return;
1164                 } else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1165                         g_mirror_kernel_dump(bp);
1166                         return;
1167                 }
1168                 /* FALLTHROUGH */
1169         default:
1170                 g_io_deliver(bp, EOPNOTSUPP);
1171                 return;
1172         }
1173         mtx_lock(&sc->sc_queue_mtx);
1174         if (bp->bio_to->error != 0) {
1175                 mtx_unlock(&sc->sc_queue_mtx);
1176                 g_io_deliver(bp, bp->bio_to->error);
1177                 return;
1178         }
1179         TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1180         mtx_unlock(&sc->sc_queue_mtx);
1181         G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1182         wakeup(sc);
1183 }
1184
1185 /*
1186  * Return TRUE if the given request is colliding with a in-progress
1187  * synchronization request.
1188  */
1189 static bool
1190 g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1191 {
1192         struct g_mirror_disk *disk;
1193         struct bio *sbp;
1194         off_t rstart, rend, sstart, send;
1195         u_int i;
1196
1197         if (sc->sc_sync.ds_ndisks == 0)
1198                 return (false);
1199         rstart = bp->bio_offset;
1200         rend = bp->bio_offset + bp->bio_length;
1201         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1202                 if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1203                         continue;
1204                 for (i = 0; i < g_mirror_syncreqs; i++) {
1205                         sbp = disk->d_sync.ds_bios[i];
1206                         if (sbp == NULL)
1207                                 continue;
1208                         sstart = sbp->bio_offset;
1209                         send = sbp->bio_offset + sbp->bio_length;
1210                         if (rend > sstart && rstart < send)
1211                                 return (true);
1212                 }
1213         }
1214         return (false);
1215 }
1216
1217 /*
1218  * Return TRUE if the given sync request is colliding with a in-progress regular
1219  * request.
1220  */
1221 static bool
1222 g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1223 {
1224         off_t rstart, rend, sstart, send;
1225         struct bio *bp;
1226
1227         if (sc->sc_sync.ds_ndisks == 0)
1228                 return (false);
1229         sstart = sbp->bio_offset;
1230         send = sbp->bio_offset + sbp->bio_length;
1231         TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) {
1232                 rstart = bp->bio_offset;
1233                 rend = bp->bio_offset + bp->bio_length;
1234                 if (rend > sstart && rstart < send)
1235                         return (true);
1236         }
1237         return (false);
1238 }
1239
1240 /*
1241  * Puts regular request onto delayed queue.
1242  */
1243 static void
1244 g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1245 {
1246
1247         G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1248         TAILQ_INSERT_TAIL(&sc->sc_regular_delayed, bp, bio_queue);
1249 }
1250
1251 /*
1252  * Puts synchronization request onto delayed queue.
1253  */
1254 static void
1255 g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1256 {
1257
1258         G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1259         TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue);
1260 }
1261
1262 /*
1263  * Requeue delayed regular requests.
1264  */
1265 static void
1266 g_mirror_regular_release(struct g_mirror_softc *sc)
1267 {
1268         struct bio *bp;
1269
1270         if ((bp = TAILQ_FIRST(&sc->sc_regular_delayed)) == NULL)
1271                 return;
1272         if (g_mirror_sync_collision(sc, bp))
1273                 return;
1274
1275         G_MIRROR_DEBUG(2, "Requeuing regular requests after collision.");
1276         mtx_lock(&sc->sc_queue_mtx);
1277         TAILQ_CONCAT(&sc->sc_regular_delayed, &sc->sc_queue, bio_queue);
1278         TAILQ_SWAP(&sc->sc_regular_delayed, &sc->sc_queue, bio, bio_queue);
1279         mtx_unlock(&sc->sc_queue_mtx);
1280 }
1281
1282 /*
1283  * Releases delayed sync requests which don't collide anymore with regular
1284  * requests.
1285  */
1286 static void
1287 g_mirror_sync_release(struct g_mirror_softc *sc)
1288 {
1289         struct bio *bp, *bp2;
1290
1291         TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) {
1292                 if (g_mirror_regular_collision(sc, bp))
1293                         continue;
1294                 TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue);
1295                 G_MIRROR_LOGREQ(2, bp,
1296                     "Releasing delayed synchronization request.");
1297                 g_io_request(bp, bp->bio_from);
1298         }
1299 }
1300
1301 /*
1302  * Free a synchronization request and clear its slot in the array.
1303  */
1304 static void
1305 g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
1306 {
1307         int idx;
1308
1309         if (disk != NULL && disk->d_sync.ds_bios != NULL) {
1310                 idx = (int)(uintptr_t)bp->bio_caller1;
1311                 KASSERT(disk->d_sync.ds_bios[idx] == bp,
1312                     ("unexpected sync BIO at %p:%d", disk, idx));
1313                 disk->d_sync.ds_bios[idx] = NULL;
1314         }
1315         free(bp->bio_data, M_MIRROR);
1316         g_destroy_bio(bp);
1317 }
1318
1319 /*
1320  * Handle synchronization requests.
1321  * Every synchronization request is a two-step process: first, a read request is
1322  * sent to the mirror provider via the sync consumer. If that request completes
1323  * successfully, it is converted to a write and sent to the disk being
1324  * synchronized. If the write also completes successfully, the synchronization
1325  * offset is advanced and a new read request is submitted.
1326  */
1327 static void
1328 g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp)
1329 {
1330         struct g_mirror_disk *disk;
1331         struct g_mirror_disk_sync *sync;
1332
1333         KASSERT((bp->bio_cmd == BIO_READ &&
1334             bp->bio_from->geom == sc->sc_sync.ds_geom) ||
1335             (bp->bio_cmd == BIO_WRITE && bp->bio_from->geom == sc->sc_geom),
1336             ("Sync BIO %p with unexpected origin", bp));
1337
1338         bp->bio_from->index--;
1339         disk = bp->bio_from->private;
1340         if (disk == NULL) {
1341                 sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1342                 g_topology_lock();
1343                 g_mirror_kill_consumer(sc, bp->bio_from);
1344                 g_topology_unlock();
1345                 g_mirror_sync_request_free(NULL, bp);
1346                 sx_xlock(&sc->sc_lock);
1347                 return;
1348         }
1349
1350         sync = &disk->d_sync;
1351
1352         /*
1353          * Synchronization request.
1354          */
1355         switch (bp->bio_cmd) {
1356         case BIO_READ: {
1357                 struct g_consumer *cp;
1358
1359                 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
1360                     bp->bio_error);
1361
1362                 if (bp->bio_error != 0) {
1363                         G_MIRROR_LOGREQ(0, bp,
1364                             "Synchronization request failed (error=%d).",
1365                             bp->bio_error);
1366
1367                         /*
1368                          * The read error will trigger a syncid bump, so there's
1369                          * no need to do that here.
1370                          *
1371                          * The read error handling for regular requests will
1372                          * retry the read from all active mirrors before passing
1373                          * the error back up, so there's no need to retry here.
1374                          */
1375                         g_mirror_sync_request_free(disk, bp);
1376                         g_mirror_event_send(disk,
1377                             G_MIRROR_DISK_STATE_DISCONNECTED,
1378                             G_MIRROR_EVENT_DONTWAIT);
1379                         return;
1380                 }
1381                 G_MIRROR_LOGREQ(3, bp,
1382                     "Synchronization request half-finished.");
1383                 bp->bio_cmd = BIO_WRITE;
1384                 bp->bio_cflags = 0;
1385                 cp = disk->d_consumer;
1386                 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1387                     ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1388                     cp->acr, cp->acw, cp->ace));
1389                 cp->index++;
1390                 g_io_request(bp, cp);
1391                 return;
1392         }
1393         case BIO_WRITE: {
1394                 off_t offset;
1395                 int i;
1396
1397                 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
1398                     bp->bio_error);
1399
1400                 if (bp->bio_error != 0) {
1401                         G_MIRROR_LOGREQ(0, bp,
1402                             "Synchronization request failed (error=%d).",
1403                             bp->bio_error);
1404                         g_mirror_sync_request_free(disk, bp);
1405                         sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1406                         g_mirror_event_send(disk,
1407                             G_MIRROR_DISK_STATE_DISCONNECTED,
1408                             G_MIRROR_EVENT_DONTWAIT);
1409                         return;
1410                 }
1411                 G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1412                 if (sync->ds_offset >= sc->sc_mediasize ||
1413                     sync->ds_consumer == NULL ||
1414                     (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1415                         /* Don't send more synchronization requests. */
1416                         sync->ds_inflight--;
1417                         g_mirror_sync_request_free(disk, bp);
1418                         if (sync->ds_inflight > 0)
1419                                 return;
1420                         if (sync->ds_consumer == NULL ||
1421                             (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1422                                 return;
1423                         }
1424                         /* Disk up-to-date, activate it. */
1425                         g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1426                             G_MIRROR_EVENT_DONTWAIT);
1427                         return;
1428                 }
1429
1430                 /* Send next synchronization request. */
1431                 g_mirror_sync_reinit(disk, bp, sync->ds_offset);
1432                 sync->ds_offset += bp->bio_length;
1433
1434                 G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1435                 sync->ds_consumer->index++;
1436
1437                 /*
1438                  * Delay the request if it is colliding with a regular request.
1439                  */
1440                 if (g_mirror_regular_collision(sc, bp))
1441                         g_mirror_sync_delay(sc, bp);
1442                 else
1443                         g_io_request(bp, sync->ds_consumer);
1444
1445                 /* Requeue delayed requests if possible. */
1446                 g_mirror_regular_release(sc);
1447
1448                 /* Find the smallest offset */
1449                 offset = sc->sc_mediasize;
1450                 for (i = 0; i < g_mirror_syncreqs; i++) {
1451                         bp = sync->ds_bios[i];
1452                         if (bp != NULL && bp->bio_offset < offset)
1453                                 offset = bp->bio_offset;
1454                 }
1455                 if (g_mirror_sync_period > 0 &&
1456                     time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
1457                         sync->ds_offset_done = offset;
1458                         g_mirror_update_metadata(disk);
1459                         sync->ds_update_ts = time_uptime;
1460                 }
1461                 return;
1462         }
1463         default:
1464                 panic("Invalid I/O request %p", bp);
1465         }
1466 }
1467
1468 static void
1469 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1470 {
1471         struct g_mirror_disk *disk;
1472         struct g_consumer *cp;
1473         struct bio *cbp;
1474
1475         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1476                 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1477                         break;
1478         }
1479         if (disk == NULL) {
1480                 if (bp->bio_error == 0)
1481                         bp->bio_error = ENXIO;
1482                 g_io_deliver(bp, bp->bio_error);
1483                 return;
1484         }
1485         cbp = g_clone_bio(bp);
1486         if (cbp == NULL) {
1487                 if (bp->bio_error == 0)
1488                         bp->bio_error = ENOMEM;
1489                 g_io_deliver(bp, bp->bio_error);
1490                 return;
1491         }
1492         /*
1493          * Fill in the component buf structure.
1494          */
1495         cp = disk->d_consumer;
1496         cbp->bio_done = g_mirror_done;
1497         cbp->bio_to = cp->provider;
1498         G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1499         KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1500             ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1501             cp->acw, cp->ace));
1502         cp->index++;
1503         g_io_request(cbp, cp);
1504 }
1505
1506 static void
1507 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1508 {
1509         struct g_mirror_disk *disk;
1510         struct g_consumer *cp;
1511         struct bio *cbp;
1512
1513         disk = g_mirror_get_disk(sc);
1514         if (disk == NULL) {
1515                 if (bp->bio_error == 0)
1516                         bp->bio_error = ENXIO;
1517                 g_io_deliver(bp, bp->bio_error);
1518                 return;
1519         }
1520         cbp = g_clone_bio(bp);
1521         if (cbp == NULL) {
1522                 if (bp->bio_error == 0)
1523                         bp->bio_error = ENOMEM;
1524                 g_io_deliver(bp, bp->bio_error);
1525                 return;
1526         }
1527         /*
1528          * Fill in the component buf structure.
1529          */
1530         cp = disk->d_consumer;
1531         cbp->bio_done = g_mirror_done;
1532         cbp->bio_to = cp->provider;
1533         G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1534         KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1535             ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1536             cp->acw, cp->ace));
1537         cp->index++;
1538         g_io_request(cbp, cp);
1539 }
1540
1541 #define TRACK_SIZE  (1 * 1024 * 1024)
1542 #define LOAD_SCALE      256
1543 #define ABS(x)          (((x) >= 0) ? (x) : (-(x)))
1544
1545 static void
1546 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1547 {
1548         struct g_mirror_disk *disk, *dp;
1549         struct g_consumer *cp;
1550         struct bio *cbp;
1551         int prio, best;
1552
1553         /* Find a disk with the smallest load. */
1554         disk = NULL;
1555         best = INT_MAX;
1556         LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1557                 if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1558                         continue;
1559                 prio = dp->load;
1560                 /* If disk head is precisely in position - highly prefer it. */
1561                 if (dp->d_last_offset == bp->bio_offset)
1562                         prio -= 2 * LOAD_SCALE;
1563                 else
1564                 /* If disk head is close to position - prefer it. */
1565                 if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1566                         prio -= 1 * LOAD_SCALE;
1567                 if (prio <= best) {
1568                         disk = dp;
1569                         best = prio;
1570                 }
1571         }
1572         KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1573         cbp = g_clone_bio(bp);
1574         if (cbp == NULL) {
1575                 if (bp->bio_error == 0)
1576                         bp->bio_error = ENOMEM;
1577                 g_io_deliver(bp, bp->bio_error);
1578                 return;
1579         }
1580         /*
1581          * Fill in the component buf structure.
1582          */
1583         cp = disk->d_consumer;
1584         cbp->bio_done = g_mirror_done;
1585         cbp->bio_to = cp->provider;
1586         G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1587         KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1588             ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1589             cp->acw, cp->ace));
1590         cp->index++;
1591         /* Remember last head position */
1592         disk->d_last_offset = bp->bio_offset + bp->bio_length;
1593         /* Update loads. */
1594         LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1595                 dp->load = (dp->d_consumer->index * LOAD_SCALE +
1596                     dp->load * 7) / 8;
1597         }
1598         g_io_request(cbp, cp);
1599 }
1600
1601 static void
1602 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1603 {
1604         struct bio_queue queue;
1605         struct g_mirror_disk *disk;
1606         struct g_consumer *cp;
1607         struct bio *cbp;
1608         off_t left, mod, offset, slice;
1609         u_char *data;
1610         u_int ndisks;
1611
1612         if (bp->bio_length <= sc->sc_slice) {
1613                 g_mirror_request_round_robin(sc, bp);
1614                 return;
1615         }
1616         ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1617         slice = bp->bio_length / ndisks;
1618         mod = slice % sc->sc_provider->sectorsize;
1619         if (mod != 0)
1620                 slice += sc->sc_provider->sectorsize - mod;
1621         /*
1622          * Allocate all bios before sending any request, so we can
1623          * return ENOMEM in nice and clean way.
1624          */
1625         left = bp->bio_length;
1626         offset = bp->bio_offset;
1627         data = bp->bio_data;
1628         TAILQ_INIT(&queue);
1629         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1630                 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1631                         continue;
1632                 cbp = g_clone_bio(bp);
1633                 if (cbp == NULL) {
1634                         while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1635                                 TAILQ_REMOVE(&queue, cbp, bio_queue);
1636                                 g_destroy_bio(cbp);
1637                         }
1638                         if (bp->bio_error == 0)
1639                                 bp->bio_error = ENOMEM;
1640                         g_io_deliver(bp, bp->bio_error);
1641                         return;
1642                 }
1643                 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1644                 cbp->bio_done = g_mirror_done;
1645                 cbp->bio_caller1 = disk;
1646                 cbp->bio_to = disk->d_consumer->provider;
1647                 cbp->bio_offset = offset;
1648                 cbp->bio_data = data;
1649                 cbp->bio_length = MIN(left, slice);
1650                 left -= cbp->bio_length;
1651                 if (left == 0)
1652                         break;
1653                 offset += cbp->bio_length;
1654                 data += cbp->bio_length;
1655         }
1656         while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1657                 TAILQ_REMOVE(&queue, cbp, bio_queue);
1658                 G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1659                 disk = cbp->bio_caller1;
1660                 cbp->bio_caller1 = NULL;
1661                 cp = disk->d_consumer;
1662                 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1663                     ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1664                     cp->acr, cp->acw, cp->ace));
1665                 disk->d_consumer->index++;
1666                 g_io_request(cbp, disk->d_consumer);
1667         }
1668 }
1669
1670 static void
1671 g_mirror_register_request(struct g_mirror_softc *sc, struct bio *bp)
1672 {
1673         struct bio_queue queue;
1674         struct bio *cbp;
1675         struct g_consumer *cp;
1676         struct g_mirror_disk *disk;
1677
1678         sx_assert(&sc->sc_lock, SA_XLOCKED);
1679
1680         /*
1681          * To avoid ordering issues, if a write is deferred because of a
1682          * collision with a sync request, all I/O is deferred until that
1683          * write is initiated.
1684          */
1685         if (bp->bio_from->geom != sc->sc_sync.ds_geom &&
1686             !TAILQ_EMPTY(&sc->sc_regular_delayed)) {
1687                 g_mirror_regular_delay(sc, bp);
1688                 return;
1689         }
1690
1691         switch (bp->bio_cmd) {
1692         case BIO_READ:
1693                 switch (sc->sc_balance) {
1694                 case G_MIRROR_BALANCE_LOAD:
1695                         g_mirror_request_load(sc, bp);
1696                         break;
1697                 case G_MIRROR_BALANCE_PREFER:
1698                         g_mirror_request_prefer(sc, bp);
1699                         break;
1700                 case G_MIRROR_BALANCE_ROUND_ROBIN:
1701                         g_mirror_request_round_robin(sc, bp);
1702                         break;
1703                 case G_MIRROR_BALANCE_SPLIT:
1704                         g_mirror_request_split(sc, bp);
1705                         break;
1706                 }
1707                 return;
1708         case BIO_WRITE:
1709         case BIO_DELETE:
1710                 /*
1711                  * Delay the request if it is colliding with a synchronization
1712                  * request.
1713                  */
1714                 if (g_mirror_sync_collision(sc, bp)) {
1715                         g_mirror_regular_delay(sc, bp);
1716                         return;
1717                 }
1718
1719                 if (sc->sc_idle)
1720                         g_mirror_unidle(sc);
1721                 else
1722                         sc->sc_last_write = time_uptime;
1723
1724                 /*
1725                  * Bump syncid on first write.
1726                  */
1727                 if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1728                         sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1729                         g_mirror_bump_syncid(sc);
1730                 }
1731
1732                 /*
1733                  * Allocate all bios before sending any request, so we can
1734                  * return ENOMEM in nice and clean way.
1735                  */
1736                 TAILQ_INIT(&queue);
1737                 LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1738                         switch (disk->d_state) {
1739                         case G_MIRROR_DISK_STATE_ACTIVE:
1740                                 break;
1741                         case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1742                                 if (bp->bio_offset >= disk->d_sync.ds_offset)
1743                                         continue;
1744                                 break;
1745                         default:
1746                                 continue;
1747                         }
1748                         if (bp->bio_cmd == BIO_DELETE &&
1749                             (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1750                                 continue;
1751                         cbp = g_clone_bio(bp);
1752                         if (cbp == NULL) {
1753                                 while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1754                                         TAILQ_REMOVE(&queue, cbp, bio_queue);
1755                                         g_destroy_bio(cbp);
1756                                 }
1757                                 if (bp->bio_error == 0)
1758                                         bp->bio_error = ENOMEM;
1759                                 g_io_deliver(bp, bp->bio_error);
1760                                 return;
1761                         }
1762                         TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1763                         cbp->bio_done = g_mirror_done;
1764                         cp = disk->d_consumer;
1765                         cbp->bio_caller1 = cp;
1766                         cbp->bio_to = cp->provider;
1767                         KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1768                             ("Consumer %s not opened (r%dw%de%d).",
1769                             cp->provider->name, cp->acr, cp->acw, cp->ace));
1770                 }
1771                 if (TAILQ_EMPTY(&queue)) {
1772                         KASSERT(bp->bio_cmd == BIO_DELETE,
1773                             ("No consumers for regular request %p", bp));
1774                         g_io_deliver(bp, EOPNOTSUPP);
1775                         return;
1776                 }
1777                 while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1778                         G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1779                         TAILQ_REMOVE(&queue, cbp, bio_queue);
1780                         cp = cbp->bio_caller1;
1781                         cbp->bio_caller1 = NULL;
1782                         cp->index++;
1783                         sc->sc_writes++;
1784                         g_io_request(cbp, cp);
1785                 }
1786                 /*
1787                  * Put request onto inflight queue, so we can check if new
1788                  * synchronization requests don't collide with it.
1789                  */
1790                 TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue);
1791                 return;
1792         case BIO_SPEEDUP:
1793         case BIO_FLUSH:
1794                 TAILQ_INIT(&queue);
1795                 LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1796                         if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1797                                 continue;
1798                         cbp = g_clone_bio(bp);
1799                         if (cbp == NULL) {
1800                                 while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1801                                         TAILQ_REMOVE(&queue, cbp, bio_queue);
1802                                         g_destroy_bio(cbp);
1803                                 }
1804                                 if (bp->bio_error == 0)
1805                                         bp->bio_error = ENOMEM;
1806                                 g_io_deliver(bp, bp->bio_error);
1807                                 return;
1808                         }
1809                         TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1810                         cbp->bio_done = g_mirror_done;
1811                         cbp->bio_caller1 = disk;
1812                         cbp->bio_to = disk->d_consumer->provider;
1813                 }
1814                 KASSERT(!TAILQ_EMPTY(&queue),
1815                     ("No consumers for regular request %p", bp));
1816                 while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1817                         G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1818                         TAILQ_REMOVE(&queue, cbp, bio_queue);
1819                         disk = cbp->bio_caller1;
1820                         cbp->bio_caller1 = NULL;
1821                         cp = disk->d_consumer;
1822                         KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1823                             ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1824                             cp->acr, cp->acw, cp->ace));
1825                         cp->index++;
1826                         g_io_request(cbp, cp);
1827                 }
1828                 break;
1829         default:
1830                 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1831                     bp->bio_cmd, sc->sc_name));
1832                 break;
1833         }
1834 }
1835
1836 static int
1837 g_mirror_can_destroy(struct g_mirror_softc *sc)
1838 {
1839         struct g_geom *gp;
1840         struct g_consumer *cp;
1841
1842         g_topology_assert();
1843         gp = sc->sc_geom;
1844         if (gp->softc == NULL)
1845                 return (1);
1846         if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1847                 return (0);
1848         LIST_FOREACH(cp, &gp->consumer, consumer) {
1849                 if (g_mirror_is_busy(sc, cp))
1850                         return (0);
1851         }
1852         gp = sc->sc_sync.ds_geom;
1853         LIST_FOREACH(cp, &gp->consumer, consumer) {
1854                 if (g_mirror_is_busy(sc, cp))
1855                         return (0);
1856         }
1857         G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1858             sc->sc_name);
1859         return (1);
1860 }
1861
1862 static int
1863 g_mirror_try_destroy(struct g_mirror_softc *sc)
1864 {
1865
1866         if (sc->sc_rootmount != NULL) {
1867                 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1868                     sc->sc_rootmount);
1869                 root_mount_rel(sc->sc_rootmount);
1870                 sc->sc_rootmount = NULL;
1871         }
1872         g_topology_lock();
1873         if (!g_mirror_can_destroy(sc)) {
1874                 g_topology_unlock();
1875                 return (0);
1876         }
1877         sc->sc_geom->softc = NULL;
1878         sc->sc_sync.ds_geom->softc = NULL;
1879         if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
1880                 g_topology_unlock();
1881                 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1882                     &sc->sc_worker);
1883                 /* Unlock sc_lock here, as it can be destroyed after wakeup. */
1884                 sx_xunlock(&sc->sc_lock);
1885                 wakeup(&sc->sc_worker);
1886                 sc->sc_worker = NULL;
1887         } else {
1888                 g_topology_unlock();
1889                 g_mirror_destroy_device(sc);
1890         }
1891         return (1);
1892 }
1893
1894 /*
1895  * Worker thread.
1896  */
1897 static void
1898 g_mirror_worker(void *arg)
1899 {
1900         struct g_mirror_softc *sc;
1901         struct g_mirror_event *ep;
1902         struct bio *bp;
1903         int timeout;
1904
1905         sc = arg;
1906         thread_lock(curthread);
1907         sched_prio(curthread, PRIBIO);
1908         thread_unlock(curthread);
1909
1910         sx_xlock(&sc->sc_lock);
1911         for (;;) {
1912                 G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1913                 /*
1914                  * First take a look at events.
1915                  * This is important to handle events before any I/O requests.
1916                  */
1917                 ep = g_mirror_event_first(sc);
1918                 if (ep != NULL) {
1919                         g_mirror_event_remove(sc, ep);
1920                         if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1921                                 /* Update only device status. */
1922                                 G_MIRROR_DEBUG(3,
1923                                     "Running event for device %s.",
1924                                     sc->sc_name);
1925                                 ep->e_error = 0;
1926                                 g_mirror_update_device(sc, true);
1927                         } else {
1928                                 /* Update disk status. */
1929                                 G_MIRROR_DEBUG(3, "Running event for disk %s.",
1930                                      g_mirror_get_diskname(ep->e_disk));
1931                                 ep->e_error = g_mirror_update_disk(ep->e_disk,
1932                                     ep->e_state);
1933                                 if (ep->e_error == 0)
1934                                         g_mirror_update_device(sc, false);
1935                         }
1936                         if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1937                                 KASSERT(ep->e_error == 0,
1938                                     ("Error cannot be handled."));
1939                                 g_mirror_event_free(ep);
1940                         } else {
1941                                 ep->e_flags |= G_MIRROR_EVENT_DONE;
1942                                 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1943                                     ep);
1944                                 mtx_lock(&sc->sc_events_mtx);
1945                                 wakeup(ep);
1946                                 mtx_unlock(&sc->sc_events_mtx);
1947                         }
1948                         if ((sc->sc_flags &
1949                             G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1950                                 if (g_mirror_try_destroy(sc)) {
1951                                         curthread->td_pflags &= ~TDP_GEOM;
1952                                         G_MIRROR_DEBUG(1, "Thread exiting.");
1953                                         kproc_exit(0);
1954                                 }
1955                         }
1956                         G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1957                         continue;
1958                 }
1959
1960                 /*
1961                  * Check if we can mark array as CLEAN and if we can't take
1962                  * how much seconds should we wait.
1963                  */
1964                 timeout = g_mirror_idle(sc, -1);
1965
1966                 /*
1967                  * Handle I/O requests.
1968                  */
1969                 mtx_lock(&sc->sc_queue_mtx);
1970                 bp = TAILQ_FIRST(&sc->sc_queue);
1971                 if (bp != NULL)
1972                         TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
1973                 else {
1974                         if ((sc->sc_flags &
1975                             G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1976                                 mtx_unlock(&sc->sc_queue_mtx);
1977                                 if (g_mirror_try_destroy(sc)) {
1978                                         curthread->td_pflags &= ~TDP_GEOM;
1979                                         G_MIRROR_DEBUG(1, "Thread exiting.");
1980                                         kproc_exit(0);
1981                                 }
1982                                 mtx_lock(&sc->sc_queue_mtx);
1983                                 if (!TAILQ_EMPTY(&sc->sc_queue)) {
1984                                         mtx_unlock(&sc->sc_queue_mtx);
1985                                         continue;
1986                                 }
1987                         }
1988                         if (g_mirror_event_first(sc) != NULL) {
1989                                 mtx_unlock(&sc->sc_queue_mtx);
1990                                 continue;
1991                         }
1992                         sx_xunlock(&sc->sc_lock);
1993                         MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1994                             timeout * hz);
1995                         sx_xlock(&sc->sc_lock);
1996                         G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1997                         continue;
1998                 }
1999                 mtx_unlock(&sc->sc_queue_mtx);
2000
2001                 if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
2002                     (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
2003                         /*
2004                          * Handle completion of the first half (the read) of a
2005                          * block synchronization operation.
2006                          */
2007                         g_mirror_sync_request(sc, bp);
2008                 } else if (bp->bio_to != sc->sc_provider) {
2009                         if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
2010                                 /*
2011                                  * Handle completion of a regular I/O request.
2012                                  */
2013                                 g_mirror_regular_request(sc, bp);
2014                         else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2015                                 /*
2016                                  * Handle completion of the second half (the
2017                                  * write) of a block synchronization operation.
2018                                  */
2019                                 g_mirror_sync_request(sc, bp);
2020                         else {
2021                                 KASSERT(0,
2022                                     ("Invalid request cflags=0x%hx to=%s.",
2023                                     bp->bio_cflags, bp->bio_to->name));
2024                         }
2025                 } else {
2026                         /*
2027                          * Initiate an I/O request.
2028                          */
2029                         g_mirror_register_request(sc, bp);
2030                 }
2031                 G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
2032         }
2033 }
2034
2035 static void
2036 g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
2037 {
2038
2039         sx_assert(&sc->sc_lock, SX_LOCKED);
2040
2041         if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
2042                 return;
2043         if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2044                 G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
2045                     g_mirror_get_diskname(disk), sc->sc_name);
2046                 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2047         } else if (sc->sc_idle &&
2048             (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2049                 G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
2050                     g_mirror_get_diskname(disk), sc->sc_name);
2051                 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2052         }
2053 }
2054
2055 static void
2056 g_mirror_sync_reinit(const struct g_mirror_disk *disk, struct bio *bp,
2057     off_t offset)
2058 {
2059         void *data;
2060         int idx;
2061
2062         data = bp->bio_data;
2063         idx = (int)(uintptr_t)bp->bio_caller1;
2064         g_reset_bio(bp);
2065
2066         bp->bio_cmd = BIO_READ;
2067         bp->bio_data = data;
2068         bp->bio_done = g_mirror_sync_done;
2069         bp->bio_from = disk->d_sync.ds_consumer;
2070         bp->bio_to = disk->d_softc->sc_provider;
2071         bp->bio_caller1 = (void *)(uintptr_t)idx;
2072         bp->bio_offset = offset;
2073         bp->bio_length = MIN(MAXPHYS,
2074             disk->d_softc->sc_mediasize - bp->bio_offset);
2075 }
2076
2077 static void
2078 g_mirror_sync_start(struct g_mirror_disk *disk)
2079 {
2080         struct g_mirror_softc *sc;
2081         struct g_mirror_disk_sync *sync;
2082         struct g_consumer *cp;
2083         struct bio *bp;
2084         int error, i;
2085
2086         g_topology_assert_not();
2087         sc = disk->d_softc;
2088         sync = &disk->d_sync;
2089         sx_assert(&sc->sc_lock, SX_LOCKED);
2090
2091         KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2092             ("Disk %s is not marked for synchronization.",
2093             g_mirror_get_diskname(disk)));
2094         KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2095             ("Device not in RUNNING state (%s, %u).", sc->sc_name,
2096             sc->sc_state));
2097
2098         sx_xunlock(&sc->sc_lock);
2099         g_topology_lock();
2100         cp = g_new_consumer(sc->sc_sync.ds_geom);
2101         cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
2102         error = g_attach(cp, sc->sc_provider);
2103         KASSERT(error == 0,
2104             ("Cannot attach to %s (error=%d).", sc->sc_name, error));
2105         error = g_access(cp, 1, 0, 0);
2106         KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
2107         g_topology_unlock();
2108         sx_xlock(&sc->sc_lock);
2109
2110         G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
2111             g_mirror_get_diskname(disk));
2112         if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
2113                 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2114         KASSERT(sync->ds_consumer == NULL,
2115             ("Sync consumer already exists (device=%s, disk=%s).",
2116             sc->sc_name, g_mirror_get_diskname(disk)));
2117
2118         sync->ds_consumer = cp;
2119         sync->ds_consumer->private = disk;
2120         sync->ds_consumer->index = 0;
2121
2122         /*
2123          * Allocate memory for synchronization bios and initialize them.
2124          */
2125         sync->ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2126             M_MIRROR, M_WAITOK);
2127         for (i = 0; i < g_mirror_syncreqs; i++) {
2128                 bp = g_alloc_bio();
2129                 sync->ds_bios[i] = bp;
2130
2131                 bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
2132                 bp->bio_caller1 = (void *)(uintptr_t)i;
2133                 g_mirror_sync_reinit(disk, bp, sync->ds_offset);
2134                 sync->ds_offset += bp->bio_length;
2135         }
2136
2137         /* Increase the number of disks in SYNCHRONIZING state. */
2138         sc->sc_sync.ds_ndisks++;
2139         /* Set the number of in-flight synchronization requests. */
2140         sync->ds_inflight = g_mirror_syncreqs;
2141
2142         /*
2143          * Fire off first synchronization requests.
2144          */
2145         for (i = 0; i < g_mirror_syncreqs; i++) {
2146                 bp = sync->ds_bios[i];
2147                 G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2148                 sync->ds_consumer->index++;
2149                 /*
2150                  * Delay the request if it is colliding with a regular request.
2151                  */
2152                 if (g_mirror_regular_collision(sc, bp))
2153                         g_mirror_sync_delay(sc, bp);
2154                 else
2155                         g_io_request(bp, sync->ds_consumer);
2156         }
2157 }
2158
2159 /*
2160  * Stop synchronization process.
2161  * type: 0 - synchronization finished
2162  *       1 - synchronization stopped
2163  */
2164 static void
2165 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2166 {
2167         struct g_mirror_softc *sc;
2168         struct g_consumer *cp;
2169
2170         g_topology_assert_not();
2171         sc = disk->d_softc;
2172         sx_assert(&sc->sc_lock, SX_LOCKED);
2173
2174         KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2175             ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2176             g_mirror_disk_state2str(disk->d_state)));
2177         if (disk->d_sync.ds_consumer == NULL)
2178                 return;
2179
2180         if (type == 0) {
2181                 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2182                     sc->sc_name, g_mirror_get_diskname(disk));
2183         } else /* if (type == 1) */ {
2184                 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2185                     sc->sc_name, g_mirror_get_diskname(disk));
2186         }
2187         g_mirror_regular_release(sc);
2188         free(disk->d_sync.ds_bios, M_MIRROR);
2189         disk->d_sync.ds_bios = NULL;
2190         cp = disk->d_sync.ds_consumer;
2191         disk->d_sync.ds_consumer = NULL;
2192         disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2193         sc->sc_sync.ds_ndisks--;
2194         sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2195         g_topology_lock();
2196         g_mirror_kill_consumer(sc, cp);
2197         g_topology_unlock();
2198         sx_xlock(&sc->sc_lock);
2199 }
2200
2201 static void
2202 g_mirror_launch_provider(struct g_mirror_softc *sc)
2203 {
2204         struct g_mirror_disk *disk;
2205         struct g_provider *pp, *dp;
2206
2207         sx_assert(&sc->sc_lock, SX_LOCKED);
2208
2209         g_topology_lock();
2210         pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2211         pp->flags |= G_PF_DIRECT_RECEIVE;
2212         pp->mediasize = sc->sc_mediasize;
2213         pp->sectorsize = sc->sc_sectorsize;
2214         pp->stripesize = 0;
2215         pp->stripeoffset = 0;
2216
2217         /* Splitting of unmapped BIO's could work but isn't implemented now */
2218         if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2219                 pp->flags |= G_PF_ACCEPT_UNMAPPED;
2220
2221         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2222                 if (disk->d_consumer && disk->d_consumer->provider) {
2223                         dp = disk->d_consumer->provider;
2224                         if (dp->stripesize > pp->stripesize) {
2225                                 pp->stripesize = dp->stripesize;
2226                                 pp->stripeoffset = dp->stripeoffset;
2227                         }
2228                         /* A provider underneath us doesn't support unmapped */
2229                         if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2230                                 G_MIRROR_DEBUG(0, "Cancelling unmapped "
2231                                     "because of %s.", dp->name);
2232                                 pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2233                         }
2234                 }
2235         }
2236         pp->private = sc;
2237         sc->sc_refcnt++;
2238         sc->sc_provider = pp;
2239         g_error_provider(pp, 0);
2240         g_topology_unlock();
2241         G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2242             g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2243         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2244                 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2245                         g_mirror_sync_start(disk);
2246         }
2247 }
2248
2249 static void
2250 g_mirror_destroy_provider(struct g_mirror_softc *sc)
2251 {
2252         struct g_mirror_disk *disk;
2253         struct bio *bp;
2254
2255         g_topology_assert_not();
2256         KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2257             sc->sc_name));
2258
2259         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2260                 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2261                         g_mirror_sync_stop(disk, 1);
2262         }
2263
2264         g_topology_lock();
2265         g_error_provider(sc->sc_provider, ENXIO);
2266         mtx_lock(&sc->sc_queue_mtx);
2267         while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
2268                 TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
2269                 /*
2270                  * Abort any pending I/O that wasn't generated by us.
2271                  * Synchronization requests and requests destined for individual
2272                  * mirror components can be destroyed immediately.
2273                  */
2274                 if (bp->bio_to == sc->sc_provider &&
2275                     bp->bio_from->geom != sc->sc_sync.ds_geom) {
2276                         g_io_deliver(bp, ENXIO);
2277                 } else {
2278                         if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2279                                 free(bp->bio_data, M_MIRROR);
2280                         g_destroy_bio(bp);
2281                 }
2282         }
2283         mtx_unlock(&sc->sc_queue_mtx);
2284         g_wither_provider(sc->sc_provider, ENXIO);
2285         sc->sc_provider = NULL;
2286         G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2287         g_topology_unlock();
2288 }
2289
2290 static void
2291 g_mirror_go(void *arg)
2292 {
2293         struct g_mirror_softc *sc;
2294
2295         sc = arg;
2296         G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2297         g_mirror_event_send(sc, 0,
2298             G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2299 }
2300
2301 static u_int
2302 g_mirror_determine_state(struct g_mirror_disk *disk)
2303 {
2304         struct g_mirror_softc *sc;
2305         u_int state;
2306
2307         sc = disk->d_softc;
2308         if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2309                 if ((disk->d_flags &
2310                     G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
2311                     (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
2312                      (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
2313                         /* Disk does not need synchronization. */
2314                         state = G_MIRROR_DISK_STATE_ACTIVE;
2315                 } else {
2316                         if ((sc->sc_flags &
2317                              G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2318                             (disk->d_flags &
2319                              G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2320                                 /*
2321                                  * We can start synchronization from
2322                                  * the stored offset.
2323                                  */
2324                                 state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2325                         } else {
2326                                 state = G_MIRROR_DISK_STATE_STALE;
2327                         }
2328                 }
2329         } else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2330                 /*
2331                  * Reset all synchronization data for this disk,
2332                  * because if it even was synchronized, it was
2333                  * synchronized to disks with different syncid.
2334                  */
2335                 disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2336                 disk->d_sync.ds_offset = 0;
2337                 disk->d_sync.ds_offset_done = 0;
2338                 disk->d_sync.ds_syncid = sc->sc_syncid;
2339                 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2340                     (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2341                         state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2342                 } else {
2343                         state = G_MIRROR_DISK_STATE_STALE;
2344                 }
2345         } else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2346                 /*
2347                  * Not good, NOT GOOD!
2348                  * It means that mirror was started on stale disks
2349                  * and more fresh disk just arrive.
2350                  * If there were writes, mirror is broken, sorry.
2351                  * I think the best choice here is don't touch
2352                  * this disk and inform the user loudly.
2353                  */
2354                 G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2355                     "disk (%s) arrives!! It will not be connected to the "
2356                     "running device.", sc->sc_name,
2357                     g_mirror_get_diskname(disk));
2358                 g_mirror_destroy_disk(disk);
2359                 state = G_MIRROR_DISK_STATE_NONE;
2360                 /* Return immediately, because disk was destroyed. */
2361                 return (state);
2362         }
2363         G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2364             g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2365         return (state);
2366 }
2367
2368 /*
2369  * Update device state.
2370  */
2371 static void
2372 g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2373 {
2374         struct g_mirror_disk *disk;
2375         u_int state;
2376
2377         sx_assert(&sc->sc_lock, SX_XLOCKED);
2378
2379         switch (sc->sc_state) {
2380         case G_MIRROR_DEVICE_STATE_STARTING:
2381             {
2382                 struct g_mirror_disk *pdisk, *tdisk;
2383                 const char *mismatch;
2384                 uintmax_t found, newest;
2385                 u_int dirty, ndisks;
2386
2387                 /* Pre-flight checks */
2388                 LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2389                         /*
2390                          * Confirm we already detected the newest genid.
2391                          */
2392                         KASSERT(sc->sc_genid >= disk->d_genid,
2393                             ("%s: found newer genid %u (sc:%p had %u).", __func__,
2394                             disk->d_genid, sc, sc->sc_genid));
2395
2396                         /* Kick out any previously tasted stale components. */
2397                         if (disk->d_genid < sc->sc_genid) {
2398                                 G_MIRROR_DEBUG(0, "Stale 'genid' field on %s "
2399                                     "(device %s) (component=%u latest=%u), skipping.",
2400                                     g_mirror_get_diskname(disk), sc->sc_name,
2401                                     disk->d_genid, sc->sc_genid);
2402                                 g_mirror_destroy_disk(disk);
2403                                 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2404                                 continue;
2405                         }
2406
2407                         /*
2408                          * Confirm we already detected the newest syncid.
2409                          */
2410                         KASSERT(sc->sc_syncid >= disk->d_sync.ds_syncid,
2411                             ("%s: found newer syncid %u (sc:%p had %u).",
2412                              __func__, disk->d_sync.ds_syncid, sc,
2413                              sc->sc_syncid));
2414
2415 #define DETECT_MISMATCH(field, name) \
2416                         if (mismatch == NULL &&                                 \
2417                             disk->d_init_ ## field != sc->sc_ ## field) {       \
2418                                 mismatch = name;                                \
2419                                 found = (intmax_t)disk->d_init_ ## field;       \
2420                                 newest = (intmax_t)sc->sc_ ## field;            \
2421                         }
2422                         mismatch = NULL;
2423                         DETECT_MISMATCH(ndisks, "md_all");
2424                         DETECT_MISMATCH(balance, "md_balance");
2425                         DETECT_MISMATCH(slice, "md_slice");
2426                         DETECT_MISMATCH(mediasize, "md_mediasize");
2427 #undef DETECT_MISMATCH
2428                         if (mismatch != NULL) {
2429                                 G_MIRROR_DEBUG(0, "Found a mismatching '%s' "
2430                                     "field on %s (device %s) (found=%ju "
2431                                     "newest=%ju).", mismatch,
2432                                     g_mirror_get_diskname(disk), sc->sc_name,
2433                                     found, newest);
2434                                 g_mirror_destroy_disk(disk);
2435                                 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2436                                 continue;
2437                         }
2438                 }
2439
2440                 KASSERT(sc->sc_provider == NULL,
2441                     ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2442                 /*
2443                  * Are we ready? If the timeout (force is true) has expired, and
2444                  * any disks are present, then yes. If we're permitted to launch
2445                  * before the timeout has expired and the expected number of
2446                  * current-generation mirror disks have been tasted, then yes.
2447                  */
2448                 ndisks = g_mirror_ndisks(sc, -1);
2449                 if ((force && ndisks > 0) ||
2450                     (g_launch_mirror_before_timeout && ndisks == sc->sc_ndisks)) {
2451                         ;
2452                 } else if (ndisks == 0) {
2453                         /*
2454                          * Disks went down in starting phase, so destroy
2455                          * device.
2456                          */
2457                         callout_drain(&sc->sc_callout);
2458                         sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2459                         G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2460                             sc->sc_rootmount);
2461                         root_mount_rel(sc->sc_rootmount);
2462                         sc->sc_rootmount = NULL;
2463                         return;
2464                 } else {
2465                         return;
2466                 }
2467
2468                 /*
2469                  * Activate all disks with the biggest syncid.
2470                  */
2471                 if (force) {
2472                         /*
2473                          * If 'force' is true, we have been called due to
2474                          * timeout, so don't bother canceling timeout.
2475                          */
2476                         ndisks = 0;
2477                         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2478                                 if ((disk->d_flags &
2479                                     G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2480                                         ndisks++;
2481                                 }
2482                         }
2483                         if (ndisks == 0) {
2484                                 /* No valid disks found, destroy device. */
2485                                 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2486                                 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2487                                     __LINE__, sc->sc_rootmount);
2488                                 root_mount_rel(sc->sc_rootmount);
2489                                 sc->sc_rootmount = NULL;
2490                                 return;
2491                         }
2492                 } else {
2493                         /* Cancel timeout. */
2494                         callout_drain(&sc->sc_callout);
2495                 }
2496
2497                 /*
2498                  * Here we need to look for dirty disks and if all disks
2499                  * with the biggest syncid are dirty, we have to choose
2500                  * one with the biggest priority and rebuild the rest.
2501                  */
2502                 /*
2503                  * Find the number of dirty disks with the biggest syncid.
2504                  * Find the number of disks with the biggest syncid.
2505                  * While here, find a disk with the biggest priority.
2506                  */
2507                 dirty = ndisks = 0;
2508                 pdisk = NULL;
2509                 LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2510                         if (disk->d_sync.ds_syncid != sc->sc_syncid)
2511                                 continue;
2512                         if ((disk->d_flags &
2513                             G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2514                                 continue;
2515                         }
2516                         ndisks++;
2517                         if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2518                                 dirty++;
2519                                 if (pdisk == NULL ||
2520                                     pdisk->d_priority < disk->d_priority) {
2521                                         pdisk = disk;
2522                                 }
2523                         }
2524                 }
2525                 if (dirty == 0) {
2526                         /* No dirty disks at all, great. */
2527                 } else if (dirty == ndisks) {
2528                         /*
2529                          * Force synchronization for all dirty disks except one
2530                          * with the biggest priority.
2531                          */
2532                         KASSERT(pdisk != NULL, ("pdisk == NULL"));
2533                         G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2534                             "master disk for synchronization.",
2535                             g_mirror_get_diskname(pdisk), sc->sc_name);
2536                         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2537                                 if (disk->d_sync.ds_syncid != sc->sc_syncid)
2538                                         continue;
2539                                 if ((disk->d_flags &
2540                                     G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2541                                         continue;
2542                                 }
2543                                 KASSERT((disk->d_flags &
2544                                     G_MIRROR_DISK_FLAG_DIRTY) != 0,
2545                                     ("Disk %s isn't marked as dirty.",
2546                                     g_mirror_get_diskname(disk)));
2547                                 /* Skip the disk with the biggest priority. */
2548                                 if (disk == pdisk)
2549                                         continue;
2550                                 disk->d_sync.ds_syncid = 0;
2551                         }
2552                 } else if (dirty < ndisks) {
2553                         /*
2554                          * Force synchronization for all dirty disks.
2555                          * We have some non-dirty disks.
2556                          */
2557                         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2558                                 if (disk->d_sync.ds_syncid != sc->sc_syncid)
2559                                         continue;
2560                                 if ((disk->d_flags &
2561                                     G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2562                                         continue;
2563                                 }
2564                                 if ((disk->d_flags &
2565                                     G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2566                                         continue;
2567                                 }
2568                                 disk->d_sync.ds_syncid = 0;
2569                         }
2570                 }
2571
2572                 /* Reset hint. */
2573                 sc->sc_hint = NULL;
2574                 if (force) {
2575                         /* Remember to bump syncid on first write. */
2576                         sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2577                 }
2578                 state = G_MIRROR_DEVICE_STATE_RUNNING;
2579                 G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2580                     sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2581                     g_mirror_device_state2str(state));
2582                 sc->sc_state = state;
2583                 LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2584                         state = g_mirror_determine_state(disk);
2585                         g_mirror_event_send(disk, state,
2586                             G_MIRROR_EVENT_DONTWAIT);
2587                         if (state == G_MIRROR_DISK_STATE_STALE)
2588                                 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2589                 }
2590                 break;
2591             }
2592         case G_MIRROR_DEVICE_STATE_RUNNING:
2593                 if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2594                     g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2595                         /*
2596                          * No usable disks, so destroy the device.
2597                          */
2598                         sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2599                         break;
2600                 } else if (g_mirror_ndisks(sc,
2601                     G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2602                     g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2603                         /*
2604                          * We have active disks, launch provider if it doesn't
2605                          * exist.
2606                          */
2607                         if (sc->sc_provider == NULL)
2608                                 g_mirror_launch_provider(sc);
2609                         if (sc->sc_rootmount != NULL) {
2610                                 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2611                                     __LINE__, sc->sc_rootmount);
2612                                 root_mount_rel(sc->sc_rootmount);
2613                                 sc->sc_rootmount = NULL;
2614                         }
2615                 }
2616                 /*
2617                  * Genid should be bumped immediately, so do it here.
2618                  */
2619                 if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2620                         sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2621                         g_mirror_bump_genid(sc);
2622                 }
2623                 if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
2624                         sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
2625                         g_mirror_bump_syncid(sc);
2626                 }
2627                 break;
2628         default:
2629                 KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2630                     sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2631                 break;
2632         }
2633 }
2634
2635 /*
2636  * Update disk state and device state if needed.
2637  */
2638 #define DISK_STATE_CHANGED()    G_MIRROR_DEBUG(1,                       \
2639         "Disk %s state changed from %s to %s (device %s).",             \
2640         g_mirror_get_diskname(disk),                                    \
2641         g_mirror_disk_state2str(disk->d_state),                         \
2642         g_mirror_disk_state2str(state), sc->sc_name)
2643 static int
2644 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2645 {
2646         struct g_mirror_softc *sc;
2647
2648         sc = disk->d_softc;
2649         sx_assert(&sc->sc_lock, SX_XLOCKED);
2650
2651 again:
2652         G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2653             g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2654             g_mirror_disk_state2str(state));
2655         switch (state) {
2656         case G_MIRROR_DISK_STATE_NEW:
2657                 /*
2658                  * Possible scenarios:
2659                  * 1. New disk arrive.
2660                  */
2661                 /* Previous state should be NONE. */
2662                 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2663                     ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2664                     g_mirror_disk_state2str(disk->d_state)));
2665                 DISK_STATE_CHANGED();
2666
2667                 disk->d_state = state;
2668                 g_topology_lock();
2669                 if (LIST_EMPTY(&sc->sc_disks))
2670                         LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2671                 else {
2672                         struct g_mirror_disk *dp;
2673
2674                         LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2675                                 if (disk->d_priority >= dp->d_priority) {
2676                                         LIST_INSERT_BEFORE(dp, disk, d_next);
2677                                         dp = NULL;
2678                                         break;
2679                                 }
2680                                 if (LIST_NEXT(dp, d_next) == NULL)
2681                                         break;
2682                         }
2683                         if (dp != NULL)
2684                                 LIST_INSERT_AFTER(dp, disk, d_next);
2685                 }
2686                 g_topology_unlock();
2687                 G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2688                     sc->sc_name, g_mirror_get_diskname(disk));
2689                 if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2690                         break;
2691                 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2692                     ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2693                     g_mirror_device_state2str(sc->sc_state),
2694                     g_mirror_get_diskname(disk),
2695                     g_mirror_disk_state2str(disk->d_state)));
2696                 state = g_mirror_determine_state(disk);
2697                 if (state != G_MIRROR_DISK_STATE_NONE)
2698                         goto again;
2699                 break;
2700         case G_MIRROR_DISK_STATE_ACTIVE:
2701                 /*
2702                  * Possible scenarios:
2703                  * 1. New disk does not need synchronization.
2704                  * 2. Synchronization process finished successfully.
2705                  */
2706                 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2707                     ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2708                     g_mirror_device_state2str(sc->sc_state),
2709                     g_mirror_get_diskname(disk),
2710                     g_mirror_disk_state2str(disk->d_state)));
2711                 /* Previous state should be NEW or SYNCHRONIZING. */
2712                 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2713                     disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2714                     ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2715                     g_mirror_disk_state2str(disk->d_state)));
2716                 DISK_STATE_CHANGED();
2717
2718                 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2719                         disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2720                         disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2721                         g_mirror_sync_stop(disk, 0);
2722                 }
2723                 disk->d_state = state;
2724                 disk->d_sync.ds_offset = 0;
2725                 disk->d_sync.ds_offset_done = 0;
2726                 g_mirror_update_idle(sc, disk);
2727                 g_mirror_update_metadata(disk);
2728                 G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2729                     sc->sc_name, g_mirror_get_diskname(disk));
2730                 break;
2731         case G_MIRROR_DISK_STATE_STALE:
2732                 /*
2733                  * Possible scenarios:
2734                  * 1. Stale disk was connected.
2735                  */
2736                 /* Previous state should be NEW. */
2737                 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2738                     ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2739                     g_mirror_disk_state2str(disk->d_state)));
2740                 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2741                     ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2742                     g_mirror_device_state2str(sc->sc_state),
2743                     g_mirror_get_diskname(disk),
2744                     g_mirror_disk_state2str(disk->d_state)));
2745                 /*
2746                  * STALE state is only possible if device is marked
2747                  * NOAUTOSYNC.
2748                  */
2749                 KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2750                     ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2751                     g_mirror_device_state2str(sc->sc_state),
2752                     g_mirror_get_diskname(disk),
2753                     g_mirror_disk_state2str(disk->d_state)));
2754                 DISK_STATE_CHANGED();
2755
2756                 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2757                 disk->d_state = state;
2758                 g_mirror_update_metadata(disk);
2759                 G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2760                     sc->sc_name, g_mirror_get_diskname(disk));
2761                 break;
2762         case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2763                 /*
2764                  * Possible scenarios:
2765                  * 1. Disk which needs synchronization was connected.
2766                  */
2767                 /* Previous state should be NEW. */
2768                 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2769                     ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2770                     g_mirror_disk_state2str(disk->d_state)));
2771                 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2772                     ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2773                     g_mirror_device_state2str(sc->sc_state),
2774                     g_mirror_get_diskname(disk),
2775                     g_mirror_disk_state2str(disk->d_state)));
2776                 DISK_STATE_CHANGED();
2777
2778                 if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2779                         disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2780                 disk->d_state = state;
2781                 if (sc->sc_provider != NULL) {
2782                         g_mirror_sync_start(disk);
2783                         g_mirror_update_metadata(disk);
2784                 }
2785                 break;
2786         case G_MIRROR_DISK_STATE_DISCONNECTED:
2787                 /*
2788                  * Possible scenarios:
2789                  * 1. Device wasn't running yet, but disk disappear.
2790                  * 2. Disk was active and disapppear.
2791                  * 3. Disk disappear during synchronization process.
2792                  */
2793                 if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2794                         /*
2795                          * Previous state should be ACTIVE, STALE or
2796                          * SYNCHRONIZING.
2797                          */
2798                         KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2799                             disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2800                             disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2801                             ("Wrong disk state (%s, %s).",
2802                             g_mirror_get_diskname(disk),
2803                             g_mirror_disk_state2str(disk->d_state)));
2804                 } else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2805                         /* Previous state should be NEW. */
2806                         KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2807                             ("Wrong disk state (%s, %s).",
2808                             g_mirror_get_diskname(disk),
2809                             g_mirror_disk_state2str(disk->d_state)));
2810                         /*
2811                          * Reset bumping syncid if disk disappeared in STARTING
2812                          * state.
2813                          */
2814                         if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2815                                 sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2816 #ifdef  INVARIANTS
2817                 } else {
2818                         KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2819                             sc->sc_name,
2820                             g_mirror_device_state2str(sc->sc_state),
2821                             g_mirror_get_diskname(disk),
2822                             g_mirror_disk_state2str(disk->d_state)));
2823 #endif
2824                 }
2825                 DISK_STATE_CHANGED();
2826                 G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2827                     sc->sc_name, g_mirror_get_diskname(disk));
2828
2829                 g_mirror_destroy_disk(disk);
2830                 break;
2831         case G_MIRROR_DISK_STATE_DESTROY:
2832             {
2833                 int error;
2834
2835                 error = g_mirror_clear_metadata(disk);
2836                 if (error != 0) {
2837                         G_MIRROR_DEBUG(0,
2838                             "Device %s: failed to clear metadata on %s: %d.",
2839                             sc->sc_name, g_mirror_get_diskname(disk), error);
2840                         break;
2841                 }
2842                 DISK_STATE_CHANGED();
2843                 G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2844                     sc->sc_name, g_mirror_get_diskname(disk));
2845
2846                 g_mirror_destroy_disk(disk);
2847                 sc->sc_ndisks--;
2848                 LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2849                         g_mirror_update_metadata(disk);
2850                 }
2851                 break;
2852             }
2853         default:
2854                 KASSERT(1 == 0, ("Unknown state (%u).", state));
2855                 break;
2856         }
2857         return (0);
2858 }
2859 #undef  DISK_STATE_CHANGED
2860
2861 int
2862 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2863 {
2864         struct g_provider *pp;
2865         u_char *buf;
2866         int error;
2867
2868         g_topology_assert();
2869
2870         error = g_access(cp, 1, 0, 0);
2871         if (error != 0)
2872                 return (error);
2873         pp = cp->provider;
2874         g_topology_unlock();
2875         /* Metadata are stored on last sector. */
2876         buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2877             &error);
2878         g_topology_lock();
2879         g_access(cp, -1, 0, 0);
2880         if (buf == NULL) {
2881                 G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2882                     cp->provider->name, error);
2883                 return (error);
2884         }
2885
2886         /* Decode metadata. */
2887         error = mirror_metadata_decode(buf, md);
2888         g_free(buf);
2889         if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2890                 return (EINVAL);
2891         if (md->md_version > G_MIRROR_VERSION) {
2892                 G_MIRROR_DEBUG(0,
2893                     "Kernel module is too old to handle metadata from %s.",
2894                     cp->provider->name);
2895                 return (EINVAL);
2896         }
2897         if (error != 0) {
2898                 G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2899                     cp->provider->name);
2900                 return (error);
2901         }
2902
2903         return (0);
2904 }
2905
2906 static int
2907 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2908     struct g_mirror_metadata *md)
2909 {
2910
2911         G_MIRROR_DEBUG(2, "%s: md_did 0x%u disk %s device %s md_all 0x%x "
2912             "sc_ndisks 0x%x md_slice 0x%x sc_slice 0x%x md_balance 0x%x "
2913             "sc_balance 0x%x sc_mediasize 0x%jx pp_mediasize 0x%jx "
2914             "md_sectorsize 0x%x sc_sectorsize 0x%x md_mflags 0x%jx "
2915             "md_dflags 0x%jx md_syncid 0x%x md_genid 0x%x md_priority 0x%x "
2916             "sc_state 0x%x.",
2917             __func__, md->md_did, pp->name, sc->sc_name, md->md_all,
2918             sc->sc_ndisks, md->md_slice, sc->sc_slice, md->md_balance,
2919             sc->sc_balance, (uintmax_t)sc->sc_mediasize,
2920             (uintmax_t)pp->mediasize, md->md_sectorsize, sc->sc_sectorsize,
2921             (uintmax_t)md->md_mflags, (uintmax_t)md->md_dflags, md->md_syncid,
2922             md->md_genid, md->md_priority, sc->sc_state);
2923
2924         if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2925                 G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2926                     pp->name, md->md_did);
2927                 return (EEXIST);
2928         }
2929         if (sc->sc_mediasize > pp->mediasize) {
2930                 G_MIRROR_DEBUG(1,
2931                     "Invalid size of disk %s (device %s), skipping.", pp->name,
2932                     sc->sc_name);
2933                 return (EINVAL);
2934         }
2935         if (md->md_sectorsize != sc->sc_sectorsize) {
2936                 G_MIRROR_DEBUG(1,
2937                     "Invalid '%s' field on disk %s (device %s), skipping.",
2938                     "md_sectorsize", pp->name, sc->sc_name);
2939                 return (EINVAL);
2940         }
2941         if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2942                 G_MIRROR_DEBUG(1,
2943                     "Invalid sector size of disk %s (device %s), skipping.",
2944                     pp->name, sc->sc_name);
2945                 return (EINVAL);
2946         }
2947         if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2948                 G_MIRROR_DEBUG(1,
2949                     "Invalid device flags on disk %s (device %s), skipping.",
2950                     pp->name, sc->sc_name);
2951                 return (EINVAL);
2952         }
2953         if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2954                 G_MIRROR_DEBUG(1,
2955                     "Invalid disk flags on disk %s (device %s), skipping.",
2956                     pp->name, sc->sc_name);
2957                 return (EINVAL);
2958         }
2959         return (0);
2960 }
2961
2962 int
2963 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2964     struct g_mirror_metadata *md)
2965 {
2966         struct g_mirror_disk *disk;
2967         int error;
2968
2969         g_topology_assert_not();
2970         G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2971
2972         error = g_mirror_check_metadata(sc, pp, md);
2973         if (error != 0)
2974                 return (error);
2975
2976         if (md->md_genid < sc->sc_genid) {
2977                 G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2978                     pp->name, sc->sc_name);
2979                 return (EINVAL);
2980         }
2981
2982         /*
2983          * If the component disk we're tasting has newer metadata than the
2984          * STARTING gmirror device, refresh the device from the component.
2985          */
2986         error = g_mirror_refresh_device(sc, pp, md);
2987         if (error != 0)
2988                 return (error);
2989
2990         disk = g_mirror_init_disk(sc, pp, md, &error);
2991         if (disk == NULL)
2992                 return (error);
2993         error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2994             G_MIRROR_EVENT_WAIT);
2995         if (error != 0)
2996                 return (error);
2997         if (md->md_version < G_MIRROR_VERSION) {
2998                 G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2999                     pp->name, md->md_version, G_MIRROR_VERSION);
3000                 g_mirror_update_metadata(disk);
3001         }
3002         return (0);
3003 }
3004
3005 static void
3006 g_mirror_destroy_delayed(void *arg, int flag)
3007 {
3008         struct g_mirror_softc *sc;
3009         int error;
3010
3011         if (flag == EV_CANCEL) {
3012                 G_MIRROR_DEBUG(1, "Destroying canceled.");
3013                 return;
3014         }
3015         sc = arg;
3016         g_topology_unlock();
3017         sx_xlock(&sc->sc_lock);
3018         KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
3019             ("DESTROY flag set on %s.", sc->sc_name));
3020         KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
3021             ("CLOSEWAIT flag not set on %s.", sc->sc_name));
3022         G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
3023         error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
3024         if (error != 0) {
3025                 G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
3026                     sc->sc_name, error);
3027                 sx_xunlock(&sc->sc_lock);
3028         }
3029         g_topology_lock();
3030 }
3031
3032 static int
3033 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
3034 {
3035         struct g_mirror_softc *sc;
3036         int error = 0;
3037
3038         g_topology_assert();
3039         G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
3040             acw, ace);
3041
3042         sc = pp->private;
3043         KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
3044
3045         g_topology_unlock();
3046         sx_xlock(&sc->sc_lock);
3047         if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
3048             (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
3049             LIST_EMPTY(&sc->sc_disks)) {
3050                 if (acr > 0 || acw > 0 || ace > 0)
3051                         error = ENXIO;
3052                 goto end;
3053         }
3054         sc->sc_provider_open += acr + acw + ace;
3055         if (pp->acw + acw == 0)
3056                 g_mirror_idle(sc, 0);
3057         if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
3058             sc->sc_provider_open == 0)
3059                 g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
3060 end:
3061         sx_xunlock(&sc->sc_lock);
3062         g_topology_lock();
3063         return (error);
3064 }
3065
3066 static void
3067 g_mirror_reinit_from_metadata(struct g_mirror_softc *sc,
3068     const struct g_mirror_metadata *md)
3069 {
3070
3071         sc->sc_genid = md->md_genid;
3072         sc->sc_syncid = md->md_syncid;
3073
3074         sc->sc_slice = md->md_slice;
3075         sc->sc_balance = md->md_balance;
3076         sc->sc_mediasize = md->md_mediasize;
3077         sc->sc_ndisks = md->md_all;
3078         sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_MASK;
3079         sc->sc_flags |= (md->md_mflags & G_MIRROR_DEVICE_FLAG_MASK);
3080 }
3081
3082 struct g_geom *
3083 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
3084     u_int type)
3085 {
3086         struct g_mirror_softc *sc;
3087         struct g_geom *gp;
3088         int error, timeout;
3089
3090         g_topology_assert();
3091         G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
3092             md->md_mid);
3093
3094         /* One disk is minimum. */
3095         if (md->md_all < 1)
3096                 return (NULL);
3097         /*
3098          * Action geom.
3099          */
3100         gp = g_new_geomf(mp, "%s", md->md_name);
3101         sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
3102         gp->start = g_mirror_start;
3103         gp->orphan = g_mirror_orphan;
3104         gp->access = g_mirror_access;
3105         gp->dumpconf = g_mirror_dumpconf;
3106
3107         sc->sc_type = type;
3108         sc->sc_id = md->md_mid;
3109         g_mirror_reinit_from_metadata(sc, md);
3110         sc->sc_sectorsize = md->md_sectorsize;
3111         sc->sc_bump_id = 0;
3112         sc->sc_idle = 1;
3113         sc->sc_last_write = time_uptime;
3114         sc->sc_writes = 0;
3115         sc->sc_refcnt = 1;
3116         sx_init(&sc->sc_lock, "gmirror:lock");
3117         TAILQ_INIT(&sc->sc_queue);
3118         mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
3119         TAILQ_INIT(&sc->sc_regular_delayed);
3120         TAILQ_INIT(&sc->sc_inflight);
3121         TAILQ_INIT(&sc->sc_sync_delayed);
3122         LIST_INIT(&sc->sc_disks);
3123         TAILQ_INIT(&sc->sc_events);
3124         mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
3125         callout_init(&sc->sc_callout, 1);
3126         mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
3127         sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
3128         gp->softc = sc;
3129         sc->sc_geom = gp;
3130         sc->sc_provider = NULL;
3131         sc->sc_provider_open = 0;
3132         /*
3133          * Synchronization geom.
3134          */
3135         gp = g_new_geomf(mp, "%s.sync", md->md_name);
3136         gp->softc = sc;
3137         gp->orphan = g_mirror_orphan;
3138         sc->sc_sync.ds_geom = gp;
3139         sc->sc_sync.ds_ndisks = 0;
3140         error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
3141             "g_mirror %s", md->md_name);
3142         if (error != 0) {
3143                 G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
3144                     sc->sc_name);
3145                 g_destroy_geom(sc->sc_sync.ds_geom);
3146                 g_destroy_geom(sc->sc_geom);
3147                 g_mirror_free_device(sc);
3148                 return (NULL);
3149         }
3150
3151         G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3152             sc->sc_name, sc->sc_ndisks, sc->sc_id);
3153
3154         sc->sc_rootmount = root_mount_hold("GMIRROR");
3155         G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3156         /*
3157          * Run timeout.
3158          */
3159         timeout = g_mirror_timeout * hz;
3160         callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3161         return (sc->sc_geom);
3162 }
3163
3164 int
3165 g_mirror_destroy(struct g_mirror_softc *sc, int how)
3166 {
3167         struct g_mirror_disk *disk;
3168
3169         g_topology_assert_not();
3170         sx_assert(&sc->sc_lock, SX_XLOCKED);
3171
3172         if (sc->sc_provider_open != 0) {
3173                 switch (how) {
3174                 case G_MIRROR_DESTROY_SOFT:
3175                         G_MIRROR_DEBUG(1,
3176                             "Device %s is still open (%d).", sc->sc_name,
3177                             sc->sc_provider_open);
3178                         return (EBUSY);
3179                 case G_MIRROR_DESTROY_DELAYED:
3180                         G_MIRROR_DEBUG(1,
3181                             "Device %s will be destroyed on last close.",
3182                             sc->sc_name);
3183                         LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3184                                 if (disk->d_state ==
3185                                     G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3186                                         g_mirror_sync_stop(disk, 1);
3187                                 }
3188                         }
3189                         sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
3190                         return (EBUSY);
3191                 case G_MIRROR_DESTROY_HARD:
3192                         G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3193                             "can't be definitely removed.", sc->sc_name);
3194                 }
3195         }
3196
3197         if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3198                 sx_xunlock(&sc->sc_lock);
3199                 return (0);
3200         }
3201         sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3202         sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
3203         G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3204         sx_xunlock(&sc->sc_lock);
3205         mtx_lock(&sc->sc_queue_mtx);
3206         wakeup(sc);
3207         mtx_unlock(&sc->sc_queue_mtx);
3208         G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3209         while (sc->sc_worker != NULL)
3210                 tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3211         G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3212         sx_xlock(&sc->sc_lock);
3213         g_mirror_destroy_device(sc);
3214         return (0);
3215 }
3216
3217 static void
3218 g_mirror_taste_orphan(struct g_consumer *cp)
3219 {
3220
3221         KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3222             cp->provider->name));
3223 }
3224
3225 static struct g_geom *
3226 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3227 {
3228         struct g_mirror_metadata md;
3229         struct g_mirror_softc *sc;
3230         struct g_consumer *cp;
3231         struct g_geom *gp;
3232         int error;
3233
3234         g_topology_assert();
3235         g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3236         G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3237
3238         gp = g_new_geomf(mp, "mirror:taste");
3239         /*
3240          * This orphan function should be never called.
3241          */
3242         gp->orphan = g_mirror_taste_orphan;
3243         cp = g_new_consumer(gp);
3244         error = g_attach(cp, pp);
3245         if (error == 0) {
3246                 error = g_mirror_read_metadata(cp, &md);
3247                 g_detach(cp);
3248         }
3249         g_destroy_consumer(cp);
3250         g_destroy_geom(gp);
3251         if (error != 0)
3252                 return (NULL);
3253         gp = NULL;
3254
3255         if (md.md_provider[0] != '\0' &&
3256             !g_compare_names(md.md_provider, pp->name))
3257                 return (NULL);
3258         if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3259                 return (NULL);
3260         if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3261                 G_MIRROR_DEBUG(0,
3262                     "Device %s: provider %s marked as inactive, skipping.",
3263                     md.md_name, pp->name);
3264                 return (NULL);
3265         }
3266         if (g_mirror_debug >= 2)
3267                 mirror_metadata_dump(&md);
3268
3269         /*
3270          * Let's check if device already exists.
3271          */
3272         sc = NULL;
3273         LIST_FOREACH(gp, &mp->geom, geom) {
3274                 sc = gp->softc;
3275                 if (sc == NULL)
3276                         continue;
3277                 if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
3278                         continue;
3279                 if (sc->sc_sync.ds_geom == gp)
3280                         continue;
3281                 if (strcmp(md.md_name, sc->sc_name) != 0)
3282                         continue;
3283                 if (md.md_mid != sc->sc_id) {
3284                         G_MIRROR_DEBUG(0, "Device %s already configured.",
3285                             sc->sc_name);
3286                         return (NULL);
3287                 }
3288                 break;
3289         }
3290         if (gp == NULL) {
3291                 gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
3292                 if (gp == NULL) {
3293                         G_MIRROR_DEBUG(0, "Cannot create device %s.",
3294                             md.md_name);
3295                         return (NULL);
3296                 }
3297                 sc = gp->softc;
3298         }
3299         G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3300         g_topology_unlock();
3301         sx_xlock(&sc->sc_lock);
3302         sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3303         error = g_mirror_add_disk(sc, pp, &md);
3304         sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3305         if (error != 0) {
3306                 G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3307                     pp->name, gp->name, error);
3308                 if (LIST_EMPTY(&sc->sc_disks)) {
3309                         g_cancel_event(sc);
3310                         g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3311                         g_topology_lock();
3312                         return (NULL);
3313                 }
3314                 gp = NULL;
3315         }
3316         if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3317                 g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3318                 g_topology_lock();
3319                 return (NULL);
3320         }
3321         sx_xunlock(&sc->sc_lock);
3322         g_topology_lock();
3323         return (gp);
3324 }
3325
3326 static void
3327 g_mirror_resize(struct g_consumer *cp)
3328 {
3329         struct g_mirror_disk *disk;
3330
3331         g_topology_assert();
3332         g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3333
3334         disk = cp->private;
3335         if (disk == NULL)
3336                 return;
3337         g_topology_unlock();
3338         g_mirror_update_metadata(disk);
3339         g_topology_lock();
3340 }
3341
3342 static int
3343 g_mirror_destroy_geom(struct gctl_req *req __unused,
3344     struct g_class *mp __unused, struct g_geom *gp)
3345 {
3346         struct g_mirror_softc *sc;
3347         int error;
3348
3349         g_topology_unlock();
3350         sc = gp->softc;
3351         sx_xlock(&sc->sc_lock);
3352         g_cancel_event(sc);
3353         error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3354         if (error != 0)
3355                 sx_xunlock(&sc->sc_lock);
3356         g_topology_lock();
3357         return (error);
3358 }
3359
3360 static void
3361 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3362     struct g_consumer *cp, struct g_provider *pp)
3363 {
3364         struct g_mirror_softc *sc;
3365
3366         g_topology_assert();
3367
3368         sc = gp->softc;
3369         if (sc == NULL)
3370                 return;
3371         /* Skip synchronization geom. */
3372         if (gp == sc->sc_sync.ds_geom)
3373                 return;
3374         if (pp != NULL) {
3375                 /* Nothing here. */
3376         } else if (cp != NULL) {
3377                 struct g_mirror_disk *disk;
3378
3379                 disk = cp->private;
3380                 if (disk == NULL)
3381                         return;
3382                 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3383                 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3384                         sbuf_printf(sb, "%s<Synchronized>", indent);
3385                         if (disk->d_sync.ds_offset == 0)
3386                                 sbuf_cat(sb, "0%");
3387                         else
3388                                 sbuf_printf(sb, "%u%%",
3389                                     (u_int)((disk->d_sync.ds_offset * 100) /
3390                                     sc->sc_mediasize));
3391                         sbuf_cat(sb, "</Synchronized>\n");
3392                         if (disk->d_sync.ds_offset > 0)
3393                                 sbuf_printf(sb, "%s<BytesSynced>%jd"
3394                                     "</BytesSynced>\n", indent,
3395                                     (intmax_t)disk->d_sync.ds_offset);
3396                 }
3397                 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3398                     disk->d_sync.ds_syncid);
3399                 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3400                     disk->d_genid);
3401                 sbuf_printf(sb, "%s<Flags>", indent);
3402                 if (disk->d_flags == 0)
3403                         sbuf_cat(sb, "NONE");
3404                 else {
3405                         int first = 1;
3406
3407 #define ADD_FLAG(flag, name)    do {                                    \
3408         if ((disk->d_flags & (flag)) != 0) {                            \
3409                 if (!first)                                             \
3410                         sbuf_cat(sb, ", ");                             \
3411                 else                                                    \
3412                         first = 0;                                      \
3413                 sbuf_cat(sb, name);                                     \
3414         }                                                               \
3415 } while (0)
3416                         ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3417                         ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3418                         ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3419                         ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3420                             "SYNCHRONIZING");
3421                         ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3422                         ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3423 #undef  ADD_FLAG
3424                 }
3425                 sbuf_cat(sb, "</Flags>\n");
3426                 sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3427                     disk->d_priority);
3428                 sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3429                     g_mirror_disk_state2str(disk->d_state));
3430         } else {
3431                 sbuf_printf(sb, "%s<Type>", indent);
3432                 switch (sc->sc_type) {
3433                 case G_MIRROR_TYPE_AUTOMATIC:
3434                         sbuf_cat(sb, "AUTOMATIC");
3435                         break;
3436                 case G_MIRROR_TYPE_MANUAL:
3437                         sbuf_cat(sb, "MANUAL");
3438                         break;
3439                 default:
3440                         sbuf_cat(sb, "UNKNOWN");
3441                         break;
3442                 }
3443                 sbuf_cat(sb, "</Type>\n");
3444                 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3445                 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3446                 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3447                 sbuf_printf(sb, "%s<Flags>", indent);
3448                 if (sc->sc_flags == 0)
3449                         sbuf_cat(sb, "NONE");
3450                 else {
3451                         int first = 1;
3452
3453 #define ADD_FLAG(flag, name)    do {                                    \
3454         if ((sc->sc_flags & (flag)) != 0) {                             \
3455                 if (!first)                                             \
3456                         sbuf_cat(sb, ", ");                             \
3457                 else                                                    \
3458                         first = 0;                                      \
3459                 sbuf_cat(sb, name);                                     \
3460         }                                                               \
3461 } while (0)
3462                         ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3463                         ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3464 #undef  ADD_FLAG
3465                 }
3466                 sbuf_cat(sb, "</Flags>\n");
3467                 sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3468                     (u_int)sc->sc_slice);
3469                 sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3470                     balance_name(sc->sc_balance));
3471                 sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3472                     sc->sc_ndisks);
3473                 sbuf_printf(sb, "%s<State>", indent);
3474                 if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3475                         sbuf_printf(sb, "%s", "STARTING");
3476                 else if (sc->sc_ndisks ==
3477                     g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3478                         sbuf_printf(sb, "%s", "COMPLETE");
3479                 else
3480                         sbuf_printf(sb, "%s", "DEGRADED");
3481                 sbuf_cat(sb, "</State>\n");
3482         }
3483 }
3484
3485 static void
3486 g_mirror_shutdown_post_sync(void *arg, int howto)
3487 {
3488         struct g_class *mp;
3489         struct g_geom *gp, *gp2;
3490         struct g_mirror_softc *sc;
3491         int error;
3492
3493         if (KERNEL_PANICKED())
3494                 return;
3495
3496         mp = arg;
3497         g_topology_lock();
3498         g_mirror_shutdown = 1;
3499         LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3500                 if ((sc = gp->softc) == NULL)
3501                         continue;
3502                 /* Skip synchronization geom. */
3503                 if (gp == sc->sc_sync.ds_geom)
3504                         continue;
3505                 g_topology_unlock();
3506                 sx_xlock(&sc->sc_lock);
3507                 g_mirror_idle(sc, -1);
3508                 g_cancel_event(sc);
3509                 error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3510                 if (error != 0)
3511                         sx_xunlock(&sc->sc_lock);
3512                 g_topology_lock();
3513         }
3514         g_topology_unlock();
3515 }
3516
3517 static void
3518 g_mirror_init(struct g_class *mp)
3519 {
3520
3521         g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3522             g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3523         if (g_mirror_post_sync == NULL)
3524                 G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3525 }
3526
3527 static void
3528 g_mirror_fini(struct g_class *mp)
3529 {
3530
3531         if (g_mirror_post_sync != NULL)
3532                 EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3533 }
3534
3535 /*
3536  * Refresh the mirror device's metadata when gmirror encounters a newer
3537  * generation as the individual components are being added to the mirror set.
3538  */
3539 static int
3540 g_mirror_refresh_device(struct g_mirror_softc *sc, const struct g_provider *pp,
3541     const struct g_mirror_metadata *md)
3542 {
3543
3544         g_topology_assert_not();
3545         sx_assert(&sc->sc_lock, SX_XLOCKED);
3546
3547         KASSERT(sc->sc_genid <= md->md_genid,
3548             ("%s: attempted to refresh from stale component %s (device %s) "
3549             "(%u < %u).", __func__, pp->name, sc->sc_name, md->md_genid,
3550             sc->sc_genid));
3551
3552         if (sc->sc_genid > md->md_genid || (sc->sc_genid == md->md_genid &&
3553             sc->sc_syncid >= md->md_syncid))
3554                 return (0);
3555
3556         G_MIRROR_DEBUG(0, "Found newer version for device %s (genid: curr=%u "
3557             "new=%u; syncid: curr=%u new=%u; ndisks: curr=%u new=%u; "
3558             "provider=%s).", sc->sc_name, sc->sc_genid, md->md_genid,
3559             sc->sc_syncid, md->md_syncid, sc->sc_ndisks, md->md_all, pp->name);
3560
3561         if (sc->sc_state != G_MIRROR_DEVICE_STATE_STARTING) {
3562                 /* Probable data corruption detected */
3563                 G_MIRROR_DEBUG(0, "Cannot refresh metadata in %s state "
3564                     "(device=%s genid=%u). A stale mirror device was launched.",
3565                     g_mirror_device_state2str(sc->sc_state), sc->sc_name,
3566                     sc->sc_genid);
3567                 return (EINVAL);
3568         }
3569
3570         /* Update softc */
3571         g_mirror_reinit_from_metadata(sc, md);
3572
3573         G_MIRROR_DEBUG(1, "Refresh device %s (id=%u, state=%s) from disk %s "
3574             "(genid=%u syncid=%u md_all=%u).", sc->sc_name, md->md_mid,
3575             g_mirror_device_state2str(sc->sc_state), pp->name, md->md_genid,
3576             md->md_syncid, (unsigned)md->md_all);
3577
3578         return (0);
3579 }
3580
3581 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3582 MODULE_VERSION(geom_mirror, 0);