]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/geom/union/g_union.c
contrib/tzdata: import tzdata 2022a
[FreeBSD/FreeBSD.git] / sys / geom / union / g_union.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2022 Marshall Kirk McKusick <mckusick@mckusick.com>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27
28 #include <sys/param.h>
29 #include <sys/bio.h>
30 #include <sys/buf.h>
31 #include <sys/ctype.h>
32 #include <sys/kernel.h>
33 #include <sys/lock.h>
34 #include <sys/malloc.h>
35 #include <sys/module.h>
36 #include <sys/reboot.h>
37 #include <sys/rwlock.h>
38 #include <sys/sbuf.h>
39 #include <sys/sysctl.h>
40
41 #include <geom/geom.h>
42 #include <geom/geom_dbg.h>
43 #include <geom/union/g_union.h>
44
45 SYSCTL_DECL(_kern_geom);
46 static SYSCTL_NODE(_kern_geom, OID_AUTO, union, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
47     "GEOM_UNION stuff");
48 static u_int g_union_debug = 0;
49 SYSCTL_UINT(_kern_geom_union, OID_AUTO, debug, CTLFLAG_RW, &g_union_debug, 0,
50     "Debug level");
51
52 static void g_union_config(struct gctl_req *req, struct g_class *mp,
53     const char *verb);
54 static g_access_t g_union_access;
55 static g_start_t g_union_start;
56 static g_dumpconf_t g_union_dumpconf;
57 static g_orphan_t g_union_orphan;
58 static int g_union_destroy_geom(struct gctl_req *req, struct g_class *mp,
59     struct g_geom *gp);
60 static g_provgone_t g_union_providergone;
61 static g_resize_t g_union_resize;
62
63 struct g_class g_union_class = {
64         .name = G_UNION_CLASS_NAME,
65         .version = G_VERSION,
66         .ctlreq = g_union_config,
67         .access = g_union_access,
68         .start = g_union_start,
69         .dumpconf = g_union_dumpconf,
70         .orphan = g_union_orphan,
71         .destroy_geom = g_union_destroy_geom,
72         .providergone = g_union_providergone,
73         .resize = g_union_resize,
74 };
75
76 static void g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool);
77 static intmax_t g_union_fetcharg(struct gctl_req *req, const char *name);
78 static bool g_union_verify_nprefix(const char *name);
79 static void g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool);
80 static struct g_geom *g_union_find_geom(struct g_class *mp, const char *name);
81 static void g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool);
82 static void g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool);
83 static void g_union_revert(struct g_union_softc *sc);
84 static void g_union_doio(struct g_union_wip *wip);
85 static void g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool);
86 static void g_union_setmap(struct bio *bp, struct g_union_softc *sc);
87 static bool g_union_getmap(struct bio *bp, struct g_union_softc *sc,
88         off_t *len2read);
89 static void g_union_done(struct bio *bp);
90 static void g_union_kerneldump(struct bio *bp, struct g_union_softc *sc);
91 static int g_union_dumper(void *, void *, vm_offset_t, off_t, size_t);
92 static int g_union_destroy(struct gctl_req *req, struct g_geom *gp, bool force);
93
94 /*
95  * Operate on union-specific configuration commands.
96  */
97 static void
98 g_union_config(struct gctl_req *req, struct g_class *mp, const char *verb)
99 {
100         uint32_t *version, *verbose;
101
102         g_topology_assert();
103
104         version = gctl_get_paraml(req, "version", sizeof(*version));
105         if (version == NULL) {
106                 gctl_error(req, "No '%s' argument.", "version");
107                 return;
108         }
109         if (*version != G_UNION_VERSION) {
110                 gctl_error(req, "Userland and kernel parts are out of sync.");
111                 return;
112         }
113         verbose = gctl_get_paraml(req, "verbose", sizeof(*verbose));
114         if (verbose == NULL) {
115                 gctl_error(req, "No '%s' argument.", "verbose");
116                 return;
117         }
118         if (strcmp(verb, "create") == 0) {
119                 g_union_ctl_create(req, mp, *verbose);
120                 return;
121         } else if (strcmp(verb, "destroy") == 0) {
122                 g_union_ctl_destroy(req, mp, *verbose);
123                 return;
124         } else if (strcmp(verb, "reset") == 0) {
125                 g_union_ctl_reset(req, mp, *verbose);
126                 return;
127         } else if (strcmp(verb, "revert") == 0) {
128                 g_union_ctl_revert(req, mp, *verbose);
129                 return;
130         } else if (strcmp(verb, "commit") == 0) {
131                 g_union_ctl_commit(req, mp, *verbose);
132                 return;
133         }
134
135         gctl_error(req, "Unknown verb.");
136 }
137
138 /*
139  * Create a union device.
140  */
141 static void
142 g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool verbose)
143 {
144         struct g_provider *upperpp, *lowerpp, *newpp;
145         struct g_consumer *uppercp, *lowercp;
146         struct g_union_softc *sc;
147         struct g_geom_alias *gap;
148         struct g_geom *gp;
149         intmax_t offset, secsize, size, needed;
150         const char *gunionname;
151         int *nargs, error, i, n;
152         char name[64];
153
154         g_topology_assert();
155
156         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
157         if (nargs == NULL) {
158                 gctl_error(req, "No '%s' argument.", "nargs");
159                 return;
160         }
161         if (*nargs < 2) {
162                 gctl_error(req, "Missing device(s).");
163                 return;
164         }
165         if (*nargs > 2) {
166                 gctl_error(req, "Extra device(s).");
167                 return;
168         }
169
170         offset = g_union_fetcharg(req, "offset");
171         size = g_union_fetcharg(req, "size");
172         secsize = g_union_fetcharg(req, "secsize");
173         gunionname = gctl_get_asciiparam(req, "gunionname");
174
175         upperpp = gctl_get_provider(req, "arg0");
176         lowerpp = gctl_get_provider(req, "arg1");
177         if (upperpp == NULL || lowerpp == NULL)
178                 /* error message provided by gctl_get_provider() */
179                 return;
180         /* Create the union */
181         if (secsize == 0)
182                 secsize = lowerpp->sectorsize;
183         else if ((secsize % lowerpp->sectorsize) != 0) {
184                 gctl_error(req, "Sector size %jd is not a multiple of lower "
185                     "provider %s's %jd sector size.", (intmax_t)secsize,
186                     lowerpp->name, (intmax_t)lowerpp->sectorsize);
187                 return;
188         }
189         if (secsize > maxphys) {
190                 gctl_error(req, "Too big secsize %jd for lower provider %s.",
191                     (intmax_t)secsize, lowerpp->name);
192                 return;
193         }
194         if (secsize % upperpp->sectorsize != 0) {
195                 gctl_error(req, "Sector size %jd is not a multiple of upper "
196                     "provider %s's %jd sector size.", (intmax_t)secsize,
197                     upperpp->name, (intmax_t)upperpp->sectorsize);
198                 return;
199         }
200         if ((offset % secsize) != 0) {
201                 gctl_error(req, "Offset %jd is not a multiple of lower "
202                     "provider %s's %jd sector size.", (intmax_t)offset,
203                     lowerpp->name, (intmax_t)lowerpp->sectorsize);
204                 return;
205         }
206         if (size == 0)
207                 size = lowerpp->mediasize - offset;
208         else
209                 size -= offset;
210         if ((size % secsize) != 0) {
211                 gctl_error(req, "Size %jd is not a multiple of sector size "
212                     "%jd.", (intmax_t)size, (intmax_t)secsize);
213                 return;
214         }
215         if (offset + size < lowerpp->mediasize) {
216                 gctl_error(req, "Size %jd is too small for lower provider %s, "
217                     "needs %jd.", (intmax_t)(offset + size), lowerpp->name,
218                     lowerpp->mediasize);
219                 return;
220         }
221         if (size > upperpp->mediasize) {
222                 gctl_error(req, "Upper provider %s size (%jd) is too small, "
223                     "needs %jd.", upperpp->name, (intmax_t)upperpp->mediasize,
224                     (intmax_t)size);
225                 return;
226         }
227         if (gunionname != NULL && !g_union_verify_nprefix(gunionname)) {
228                 gctl_error(req, "Gunion name %s must be alphanumeric.",
229                     gunionname);
230                 return;
231         }
232         if (gunionname != NULL) {
233                 n = snprintf(name, sizeof(name), "%s%s", gunionname,
234                     G_UNION_SUFFIX);
235         } else {
236                 n = snprintf(name, sizeof(name), "%s-%s%s", upperpp->name,
237                     lowerpp->name, G_UNION_SUFFIX);
238         }
239         if (n <= 0 || n >= sizeof(name)) {
240                 gctl_error(req, "Invalid provider name.");
241                 return;
242         }
243         LIST_FOREACH(gp, &mp->geom, geom) {
244                 if (strcmp(gp->name, name) == 0) {
245                         gctl_error(req, "Provider %s already exists.", name);
246                         return;
247                 }
248         }
249         gp = g_new_geomf(mp, "%s", name);
250         sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
251         rw_init(&sc->sc_rwlock, "gunion");
252         TAILQ_INIT(&sc->sc_wiplist);
253         sc->sc_offset = offset;
254         sc->sc_size = size;
255         sc->sc_sectorsize = secsize;
256         sc->sc_reads = 0;
257         sc->sc_writes = 0;
258         sc->sc_deletes = 0;
259         sc->sc_getattrs = 0;
260         sc->sc_flushes = 0;
261         sc->sc_speedups = 0;
262         sc->sc_cmd0s = 0;
263         sc->sc_cmd1s = 0;
264         sc->sc_cmd2s = 0;
265         sc->sc_readbytes = 0;
266         sc->sc_wrotebytes = 0;
267         sc->sc_writemap_memory = 0;
268         gp->softc = sc;
269
270         newpp = g_new_providerf(gp, "%s", gp->name);
271         newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
272         newpp->mediasize = size;
273         newpp->sectorsize = secsize;
274         LIST_FOREACH(gap, &upperpp->aliases, ga_next)
275                 g_provider_add_alias(newpp, "%s%s", gap->ga_alias,
276                     G_UNION_SUFFIX);
277         LIST_FOREACH(gap, &lowerpp->aliases, ga_next)
278                 g_provider_add_alias(newpp, "%s%s", gap->ga_alias,
279                     G_UNION_SUFFIX);
280         lowercp = g_new_consumer(gp);
281         lowercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
282         if ((error = g_attach(lowercp, lowerpp)) != 0) {
283                 gctl_error(req, "Error %d: cannot attach to provider %s.",
284                     error, lowerpp->name);
285                 goto fail1;
286         }
287         /* request read and exclusive access for lower */
288         if ((error = g_access(lowercp, 1, 0, 1)) != 0) {
289                 gctl_error(req, "Error %d: cannot obtain exclusive access to "
290                     "%s.\n\tMust be unmounted or mounted read-only.", error,
291                     lowerpp->name);
292                 goto fail2;
293         }
294         uppercp = g_new_consumer(gp);
295         uppercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
296         if ((error = g_attach(uppercp, upperpp)) != 0) {
297                 gctl_error(req, "Error %d: cannot attach to provider %s.",
298                     error, upperpp->name);
299                 goto fail3;
300         }
301         /* request read, write, and exclusive access for upper */
302         if ((error = g_access(uppercp, 1, 1, 1)) != 0) {
303                 gctl_error(req, "Error %d: cannot obtain write access to %s.",
304                     error, upperpp->name);
305                 goto fail4;
306         }
307         sc->sc_uppercp = uppercp;
308         sc->sc_lowercp = lowercp;
309
310         newpp->flags |= (upperpp->flags & G_PF_ACCEPT_UNMAPPED) &
311             (lowerpp->flags & G_PF_ACCEPT_UNMAPPED);
312         g_error_provider(newpp, 0);
313         /*
314          * Allocate the map that tracks the sectors that have been written
315          * to the top layer. We use a 2-level hierarchy as that lets us
316          * map up to 1 petabyte using allocations of less than 33 Mb
317          * when using 4K byte sectors (or 268 Mb with 512 byte sectors).
318          *
319          * We totally populate the leaf nodes rather than allocating them
320          * as they are first used because their usage occurs in the
321          * g_union_start() routine that may be running in the g_down
322          * thread which cannot sleep.
323          */
324         sc->sc_map_size = roundup(size / secsize, BITS_PER_ENTRY);
325         needed = sc->sc_map_size / BITS_PER_ENTRY;
326         for (sc->sc_root_size = 1;
327              sc->sc_root_size * sc->sc_root_size < needed;
328              sc->sc_root_size++)
329                 continue;
330         sc->sc_writemap_root = g_malloc(sc->sc_root_size * sizeof(uint64_t *),
331             M_WAITOK | M_ZERO);
332         sc->sc_leaf_size = sc->sc_root_size;
333         sc->sc_bits_per_leaf = sc->sc_leaf_size * BITS_PER_ENTRY;
334         sc->sc_leafused = g_malloc(roundup(sc->sc_root_size, BITS_PER_ENTRY),
335             M_WAITOK | M_ZERO);
336         for (i = 0; i < sc->sc_root_size; i++)
337                 sc->sc_writemap_root[i] =
338                     g_malloc(sc->sc_leaf_size * sizeof(uint64_t),
339                     M_WAITOK | M_ZERO);
340         sc->sc_writemap_memory =
341             (sc->sc_root_size + sc->sc_root_size * sc->sc_leaf_size) *
342             sizeof(uint64_t) + roundup(sc->sc_root_size, BITS_PER_ENTRY);
343         if (verbose)
344                 gctl_error(req, "Device %s created with memory map size %jd.",
345                     gp->name, (intmax_t)sc->sc_writemap_memory);
346         G_UNION_DEBUG(1, "Device %s created with memory map size %jd.",
347             gp->name, (intmax_t)sc->sc_writemap_memory);
348         return;
349
350 fail4:
351         g_detach(uppercp);
352 fail3:
353         g_destroy_consumer(uppercp);
354         g_access(lowercp, -1, 0, -1);
355 fail2:
356         g_detach(lowercp);
357 fail1:
358         g_destroy_consumer(lowercp);
359         g_destroy_provider(newpp);
360         g_destroy_geom(gp);
361 }
362
363 /*
364  * Fetch named option and verify that it is positive.
365  */
366 static intmax_t
367 g_union_fetcharg(struct gctl_req *req, const char *name)
368 {
369         intmax_t *val;
370
371         val = gctl_get_paraml_opt(req, name, sizeof(*val));
372         if (val == NULL)
373                 return (0);
374         if (*val >= 0)
375                 return (*val);
376         gctl_error(req, "Invalid '%s': negative value, using default.", name);
377         return (0);
378 }
379
380 /*
381  * Verify that a name is alphanumeric.
382  */
383 static bool
384 g_union_verify_nprefix(const char *name)
385 {
386         int i;
387
388         for (i = 0; i < strlen(name); i++) {
389                 if (isalpha(name[i]) == 0 && isdigit(name[i]) == 0) {
390                         return (false);
391                 }
392         }
393         return (true);
394 }
395
396 /*
397  * Destroy a union device.
398  */
399 static void
400 g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool verbose)
401 {
402         int *nargs, *force, error, i;
403         struct g_geom *gp;
404         const char *name;
405         char param[16];
406
407         g_topology_assert();
408
409         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
410         if (nargs == NULL) {
411                 gctl_error(req, "No '%s' argument.", "nargs");
412                 return;
413         }
414         if (*nargs <= 0) {
415                 gctl_error(req, "Missing device(s).");
416                 return;
417         }
418         force = gctl_get_paraml(req, "force", sizeof(*force));
419         if (force == NULL) {
420                 gctl_error(req, "No 'force' argument.");
421                 return;
422         }
423
424         for (i = 0; i < *nargs; i++) {
425                 snprintf(param, sizeof(param), "arg%d", i);
426                 name = gctl_get_asciiparam(req, param);
427                 if (name == NULL) {
428                         gctl_msg(req, "No '%s' argument.", param);
429                         continue;
430                 }
431                 if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) == 0)
432                         name += strlen(_PATH_DEV);
433                 gp = g_union_find_geom(mp, name);
434                 if (gp == NULL) {
435                         gctl_msg(req, "Device %s is invalid.", name);
436                         continue;
437                 }
438                 error = g_union_destroy(verbose ? req : NULL, gp, *force);
439                 if (error != 0)
440                         gctl_msg(req, "Error %d: cannot destroy device %s.",
441                             error, gp->name);
442         }
443         gctl_post_messages(req);
444 }
445
446 /*
447  * Find a union geom.
448  */
449 static struct g_geom *
450 g_union_find_geom(struct g_class *mp, const char *name)
451 {
452         struct g_geom *gp;
453
454         LIST_FOREACH(gp, &mp->geom, geom) {
455                 if (strcmp(gp->name, name) == 0)
456                         return (gp);
457         }
458         return (NULL);
459 }
460
461 /*
462  * Zero out all the statistics associated with a union device.
463  */
464 static void
465 g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool verbose)
466 {
467         struct g_union_softc *sc;
468         struct g_provider *pp;
469         struct g_geom *gp;
470         char param[16];
471         int i, *nargs;
472
473         g_topology_assert();
474
475         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
476         if (nargs == NULL) {
477                 gctl_error(req, "No '%s' argument.", "nargs");
478                 return;
479         }
480         if (*nargs <= 0) {
481                 gctl_error(req, "Missing device(s).");
482                 return;
483         }
484
485         for (i = 0; i < *nargs; i++) {
486                 snprintf(param, sizeof(param), "arg%d", i);
487                 pp = gctl_get_provider(req, param);
488                 if (pp == NULL) {
489                         gctl_msg(req, "No '%s' argument.", param);
490                         continue;
491                 }
492                 gp = pp->geom;
493                 if (gp->class != mp) {
494                         gctl_msg(req, "Provider %s is invalid.",
495                             pp->name);
496                         continue;
497                 }
498                 sc = gp->softc;
499                 sc->sc_reads = 0;
500                 sc->sc_writes = 0;
501                 sc->sc_deletes = 0;
502                 sc->sc_getattrs = 0;
503                 sc->sc_flushes = 0;
504                 sc->sc_speedups = 0;
505                 sc->sc_cmd0s = 0;
506                 sc->sc_cmd1s = 0;
507                 sc->sc_cmd2s = 0;
508                 sc->sc_readbytes = 0;
509                 sc->sc_wrotebytes = 0;
510                 if (verbose)
511                         gctl_msg(req, "Device %s has been reset.", pp->name);
512                 G_UNION_DEBUG(1, "Device %s has been reset.", pp->name);
513         }
514         gctl_post_messages(req);
515 }
516
517 /*
518  * Revert all write requests made to the top layer of the union.
519  */
520 static void
521 g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool verbose)
522 {
523         struct g_union_softc *sc;
524         struct g_provider *pp;
525         struct g_geom *gp;
526         char param[16];
527         int i, *nargs;
528
529         g_topology_assert();
530
531         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
532         if (nargs == NULL) {
533                 gctl_error(req, "No '%s' argument.", "nargs");
534                 return;
535         }
536         if (*nargs <= 0) {
537                 gctl_error(req, "Missing device(s).");
538                 return;
539         }
540
541         for (i = 0; i < *nargs; i++) {
542                 snprintf(param, sizeof(param), "arg%d", i);
543                 pp = gctl_get_provider(req, param);
544                 if (pp == NULL) {
545                         gctl_msg(req, "No '%s' argument.", param);
546                         continue;
547                 }
548                 gp = pp->geom;
549                 if (gp->class != mp) {
550                         gctl_msg(req, "Provider %s is invalid.", pp->name);
551                         continue;
552                 }
553                 sc = gp->softc;
554                 if (g_union_get_writelock(sc) != 0) {
555                         gctl_msg(req, "Revert already in progress for "
556                             "provider %s.", pp->name);
557                         continue;
558                 }
559                 /*
560                  * No mount or other use of union is allowed.
561                  */
562                 if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) {
563                         gctl_msg(req, "Unable to get exclusive access for "
564                             "reverting of %s;\n\t%s cannot be mounted or "
565                             "otherwise open during a revert.",
566                              pp->name, pp->name);
567                         g_union_rel_writelock(sc);
568                         continue;
569                 }
570                 g_union_revert(sc);
571                 g_union_rel_writelock(sc);
572                 if (verbose)
573                         gctl_msg(req, "Device %s has been reverted.", pp->name);
574                 G_UNION_DEBUG(1, "Device %s has been reverted.", pp->name);
575         }
576         gctl_post_messages(req);
577 }
578
579 /*
580  * Revert union writes by zero'ing out the writemap.
581  */
582 static void
583 g_union_revert(struct g_union_softc *sc)
584 {
585         int i;
586
587         G_WLOCK(sc);
588         for (i = 0; i < sc->sc_root_size; i++)
589                 memset(sc->sc_writemap_root[i], 0,
590                     sc->sc_leaf_size * sizeof(uint64_t));
591         memset(sc->sc_leafused, 0, roundup(sc->sc_root_size, BITS_PER_ENTRY));
592         G_WUNLOCK(sc);
593 }
594
595 /*
596  * Commit all the writes made in the top layer to the lower layer.
597  */
598 static void
599 g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool verbose)
600 {
601         struct g_union_softc *sc;
602         struct g_provider *pp, *lowerpp;
603         struct g_consumer *lowercp;
604         struct g_geom *gp;
605         struct bio *bp;
606         char param[16];
607         off_t len2rd, len2wt, savelen;
608         int i, error, error1, *nargs, *force, *reboot;
609
610         g_topology_assert();
611
612         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
613         if (nargs == NULL) {
614                 gctl_error(req, "No '%s' argument.", "nargs");
615                 return;
616         }
617         if (*nargs <= 0) {
618                 gctl_error(req, "Missing device(s).");
619                 return;
620         }
621         force = gctl_get_paraml(req, "force", sizeof(*force));
622         if (force == NULL) {
623                 gctl_error(req, "No 'force' argument.");
624                 return;
625         }
626         reboot = gctl_get_paraml(req, "reboot", sizeof(*reboot));
627         if (reboot == NULL) {
628                 gctl_error(req, "No 'reboot' argument.");
629                 return;
630         }
631
632         /* Get a bio buffer to do our I/O */
633         bp = g_alloc_bio();
634         bp->bio_data = g_malloc(MAXBSIZE, M_WAITOK);
635         bp->bio_done = biodone;
636         for (i = 0; i < *nargs; i++) {
637                 snprintf(param, sizeof(param), "arg%d", i);
638                 pp = gctl_get_provider(req, param);
639                 if (pp == NULL) {
640                         gctl_msg(req, "No '%s' argument.", param);
641                         continue;
642                 }
643                 gp = pp->geom;
644                 if (gp->class != mp) {
645                         gctl_msg(req, "Provider %s is invalid.", pp->name);
646                         continue;
647                 }
648                 sc = gp->softc;
649                 if (g_union_get_writelock(sc) != 0) {
650                         gctl_msg(req, "Commit already in progress for "
651                             "provider %s.", pp->name);
652                         continue;
653                 }
654         
655                 /* upgrade to write access for lower */
656                 lowercp = sc->sc_lowercp;
657                 lowerpp = lowercp->provider;
658                 /*
659                  * No mount or other use of union is allowed, unless the
660                  * -f flag is given which allows read-only mount or usage.
661                  */
662                 if ((*force == false && pp->acr > 0) || pp->acw > 0 ||
663                      pp->ace > 0) {
664                         gctl_msg(req, "Unable to get exclusive access for "
665                             "writing of %s.\n\tNote that %s cannot be mounted "
666                             "or otherwise\n\topen during a commit unless the "
667                             "-f flag is used.", pp->name, pp->name);
668                         g_union_rel_writelock(sc);
669                         continue;
670                 }
671                 /*
672                  * No mount or other use of lower media is allowed, unless the
673                  * -f flag is given which allows read-only mount or usage.
674                  */
675                 if ((*force == false && lowerpp->acr > lowercp->acr) ||
676                      lowerpp->acw > lowercp->acw ||
677                      lowerpp->ace > lowercp->ace) {
678                         gctl_msg(req, "provider %s is unable to get "
679                             "exclusive access to %s\n\tfor writing. Note that "
680                             "%s cannot be mounted or otherwise open\n\tduring "
681                             "a commit unless the -f flag is used.", pp->name,
682                             lowerpp->name, lowerpp->name);
683                         g_union_rel_writelock(sc);
684                         continue;
685                 }
686                 if ((error = g_access(lowercp, 0, 1, 0)) != 0) {
687                         gctl_msg(req, "Error %d: provider %s is unable to "
688                             "access %s for writing.", error, pp->name,
689                             lowerpp->name);
690                         g_union_rel_writelock(sc);
691                         continue;
692                 }
693                 g_topology_unlock();
694                 /* Loop over write map copying across written blocks */
695                 bp->bio_offset = 0;
696                 bp->bio_length = sc->sc_map_size * sc->sc_sectorsize;
697                 G_RLOCK(sc);
698                 error = 0;
699                 while (bp->bio_length > 0) {
700                         if (!g_union_getmap(bp, sc, &len2rd)) {
701                                 /* not written, so skip */
702                                 bp->bio_offset += len2rd;
703                                 bp->bio_length -= len2rd;
704                                 continue;
705                         }
706                         G_RUNLOCK(sc);
707                         /* need to read then write len2rd sectors */
708                         for ( ; len2rd > 0; len2rd -= len2wt) {
709                                 /* limit ourselves to MAXBSIZE size I/Os */
710                                 len2wt = len2rd;
711                                 if (len2wt > MAXBSIZE)
712                                         len2wt = MAXBSIZE;
713                                 savelen = bp->bio_length;
714                                 bp->bio_length = len2wt;
715                                 bp->bio_cmd = BIO_READ;
716                                 g_io_request(bp, sc->sc_uppercp);
717                                 if ((error = biowait(bp, "rdunion")) != 0) {
718                                         gctl_msg(req, "Commit read error %d "
719                                             "in provider %s, commit aborted.",
720                                             error, pp->name);
721                                         goto cleanup;
722                                 }
723                                 bp->bio_flags &= ~BIO_DONE;
724                                 bp->bio_cmd = BIO_WRITE;
725                                 g_io_request(bp, lowercp);
726                                 if ((error = biowait(bp, "wtunion")) != 0) {
727                                         gctl_msg(req, "Commit write error %d "
728                                             "in provider %s, commit aborted.",
729                                             error, pp->name);
730                                         goto cleanup;
731                                 }
732                                 bp->bio_flags &= ~BIO_DONE;
733                                 bp->bio_offset += len2wt;
734                                 bp->bio_length = savelen - len2wt;
735                         }
736                         G_RLOCK(sc);
737                 }
738                 G_RUNLOCK(sc);
739                 /* clear the write map */
740                 g_union_revert(sc);
741 cleanup:
742                 g_topology_lock();
743                 /* return lower to previous access */
744                 if ((error1 = g_access(lowercp, 0, -1, 0)) != 0) {
745                         G_UNION_DEBUG(2, "Error %d: device %s could not reset "
746                             "access to %s (r=0 w=-1 e=0).", error1, pp->name,
747                             lowerpp->name);
748                 }
749                 g_union_rel_writelock(sc);
750                 if (error == 0 && verbose)
751                         gctl_msg(req, "Device %s has been committed.",
752                             pp->name);
753                 G_UNION_DEBUG(1, "Device %s has been committed.", pp->name);
754         }
755         gctl_post_messages(req);
756         g_free(bp->bio_data);
757         g_destroy_bio(bp);
758         if (*reboot)
759                 kern_reboot(RB_AUTOBOOT);
760 }
761
762 /*
763  * Generally allow access unless a commit is in progress.
764  */
765 static int
766 g_union_access(struct g_provider *pp, int r, int w, int e)
767 {
768         struct g_union_softc *sc;
769
770         sc = pp->geom->softc;
771         if (sc == NULL) {
772                 if (r <= 0 && w <= 0 && e <= 0)
773                         return (0);
774                 return (ENXIO);
775         }
776         r += pp->acr;
777         w += pp->acw;
778         e += pp->ace;
779         if (g_union_get_writelock(sc) != 0) {
780                 if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0)
781                         return (0);
782                 return (EBUSY);
783         }
784         g_union_rel_writelock(sc);
785         return (0);
786 }
787
788 /*
789  * Initiate an I/O operation on the union device.
790  */
791 static void
792 g_union_start(struct bio *bp)
793 {
794         struct g_union_softc *sc;
795         struct g_union_wip *wip;
796         struct bio *cbp;
797
798         sc = bp->bio_to->geom->softc;
799         if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
800                 wip = g_malloc(sizeof(*wip), M_NOWAIT);
801                 if (wip == NULL) {
802                         g_io_deliver(bp, ENOMEM);
803                         return;
804                 }
805                 TAILQ_INIT(&wip->wip_waiting);
806                 wip->wip_bp = bp;
807                 wip->wip_sc = sc;
808                 wip->wip_start = bp->bio_offset + sc->sc_offset;
809                 wip->wip_end = wip->wip_start + bp->bio_length - 1;
810                 wip->wip_numios = 1;
811                 wip->wip_error = 0;
812                 g_union_doio(wip);
813                 return;
814         }
815
816         /*
817          * All commands other than read and write are passed through to
818          * the upper-level device since it is writable and thus able to
819          * respond to delete, flush, and speedup requests.
820          */
821         cbp = g_clone_bio(bp);
822         if (cbp == NULL) {
823                 g_io_deliver(bp, ENOMEM);
824                 return;
825         }
826         cbp->bio_offset = bp->bio_offset + sc->sc_offset;
827         cbp->bio_done = g_std_done;
828
829         switch (cbp->bio_cmd) {
830         case BIO_DELETE:
831                 G_UNION_LOGREQ(cbp, "Delete request received.");
832                 atomic_add_long(&sc->sc_deletes, 1);
833                 break;
834         case BIO_GETATTR:
835                 G_UNION_LOGREQ(cbp, "Getattr request received.");
836                 atomic_add_long(&sc->sc_getattrs, 1);
837                 if (strcmp(cbp->bio_attribute, "GEOM::kerneldump") != 0)
838                         /* forward the GETATTR to the lower-level device */
839                         break;
840                 g_union_kerneldump(bp, sc);
841                 return;
842         case BIO_FLUSH:
843                 G_UNION_LOGREQ(cbp, "Flush request received.");
844                 atomic_add_long(&sc->sc_flushes, 1);
845                 break;
846         case BIO_SPEEDUP:
847                 G_UNION_LOGREQ(cbp, "Speedup request received.");
848                 atomic_add_long(&sc->sc_speedups, 1);
849                 break;
850         case BIO_CMD0:
851                 G_UNION_LOGREQ(cbp, "Cmd0 request received.");
852                 atomic_add_long(&sc->sc_cmd0s, 1);
853                 break;
854         case BIO_CMD1:
855                 G_UNION_LOGREQ(cbp, "Cmd1 request received.");
856                 atomic_add_long(&sc->sc_cmd1s, 1);
857                 break;
858         case BIO_CMD2:
859                 G_UNION_LOGREQ(cbp, "Cmd2 request received.");
860                 atomic_add_long(&sc->sc_cmd2s, 1);
861                 break;
862         default:
863                 G_UNION_LOGREQ(cbp, "Unknown (%d) request received.",
864                     cbp->bio_cmd);
865                 break;
866         }
867         g_io_request(cbp, sc->sc_uppercp);
868 }
869
870 /*
871  * Initiate a read or write operation on the union device.
872  */
873 static void
874 g_union_doio(struct g_union_wip *wip)
875 {
876         struct g_union_softc *sc;
877         struct g_consumer *cp, *firstcp;
878         struct g_union_wip *activewip;
879         struct bio *cbp, *firstbp;
880         off_t rdlen, len2rd, offset;
881         int iocnt, needstoblock;
882         char *level;
883
884         /*
885          * To maintain consistency, we cannot allow concurrent reads
886          * or writes to the same block.
887          *
888          * A work-in-progress (wip) structure is allocated for each
889          * read or write request. All active requests are kept on the
890          * softc sc_wiplist. As each request arrives, it is checked to
891          * see if it overlaps any of the active entries. If it does not
892          * overlap, then it is added to the active list and initiated.
893          * If it does overlap an active entry, it is added to the
894          * wip_waiting list for the active entry that it overlaps.
895          * When an active entry completes, it restarts all the requests
896          * on its wip_waiting list.
897          */
898         sc = wip->wip_sc;
899         G_WLOCK(sc);
900         TAILQ_FOREACH(activewip, &sc->sc_wiplist, wip_next) {
901                 if (wip->wip_end < activewip->wip_start ||
902                     wip->wip_start > activewip->wip_end)
903                         continue;
904                 needstoblock = 1;
905                 if (wip->wip_bp->bio_cmd == BIO_WRITE)
906                         if (activewip->wip_bp->bio_cmd == BIO_WRITE)
907                                 sc->sc_writeblockwrite += 1;
908                         else
909                                 sc->sc_readblockwrite += 1;
910                 else
911                         if (activewip->wip_bp->bio_cmd == BIO_WRITE)
912                                 sc->sc_writeblockread += 1;
913                         else {
914                                 sc->sc_readcurrentread += 1;
915                                 needstoblock = 0;
916                         }
917                 /* Put request on a waiting list if necessary */
918                 if (needstoblock) {
919                         TAILQ_INSERT_TAIL(&activewip->wip_waiting, wip,
920                             wip_next);
921                         G_WUNLOCK(sc);
922                         return;
923                 }
924         }
925         /* Put request on the active list */
926         TAILQ_INSERT_TAIL(&sc->sc_wiplist, wip, wip_next);
927
928         /*
929          * Process I/O requests that have been cleared to go.
930          */
931         cbp = g_clone_bio(wip->wip_bp);
932         if (cbp == NULL) {
933                 TAILQ_REMOVE(&sc->sc_wiplist, wip, wip_next);
934                 G_WUNLOCK(sc);
935                 KASSERT(TAILQ_FIRST(&wip->wip_waiting) == NULL,
936                     ("g_union_doio: non-empty work-in-progress waiting queue"));
937                 g_io_deliver(wip->wip_bp, ENOMEM);
938                 g_free(wip);
939                 return;
940         }
941         G_WUNLOCK(sc);
942         cbp->bio_caller1 = wip;
943         cbp->bio_done = g_union_done;
944         cbp->bio_offset = wip->wip_start;
945
946         /*
947          * Writes are always done to the top level. The blocks that
948          * are written are recorded in the bitmap when the I/O completes.
949          */
950         if (cbp->bio_cmd == BIO_WRITE) {
951                 G_UNION_LOGREQ(cbp, "Sending %jd byte write request to upper "
952                     "level.", cbp->bio_length);
953                 atomic_add_long(&sc->sc_writes, 1);
954                 atomic_add_long(&sc->sc_wrotebytes, cbp->bio_length);
955                 g_io_request(cbp, sc->sc_uppercp);
956                 return;
957         }
958         /*
959          * The usual read case is that we either read the top layer
960          * if the block has been previously written or the bottom layer
961          * if it has not been written. However, it is possible that
962          * only part of the block has been written, For example we may
963          * have written a UFS/FFS file fragment comprising several
964          * sectors out of an 8-sector block.  Here, if the entire
965          * 8-sector block is read for example by a snapshot needing
966          * to copy the full block, then we need to read the written
967          * sectors from the upper level and the unwritten sectors from
968          * the lower level. We do this by alternately reading from the
969          * top and bottom layers until we complete the read. We
970          * simplify for the common case to just do the I/O and return.
971          */
972         atomic_add_long(&sc->sc_reads, 1);
973         atomic_add_long(&sc->sc_readbytes, cbp->bio_length);
974         rdlen = cbp->bio_length;
975         offset = 0;
976         for (iocnt = 0; ; iocnt++) {
977                 if (g_union_getmap(cbp, sc, &len2rd)) {
978                         /* read top */
979                         cp = sc->sc_uppercp;
980                         level = "upper";
981                 } else {
982                         /* read bottom */
983                         cp = sc->sc_lowercp;
984                         level = "lower";
985                 }
986                 /* Check if only a single read is required */
987                 if (iocnt == 0 && rdlen == len2rd) {
988                         G_UNION_LOGREQLVL((cp == sc->sc_uppercp) ?
989                             3 : 4, cbp, "Sending %jd byte read "
990                             "request to %s level.", len2rd, level);
991                         g_io_request(cbp, cp);
992                         return;
993                 }
994                 cbp->bio_length = len2rd;
995                 if ((cbp->bio_flags & BIO_UNMAPPED) != 0)
996                         cbp->bio_ma_offset += offset;
997                 else
998                         cbp->bio_data += offset;
999                 offset += len2rd;
1000                 rdlen -= len2rd;
1001                 G_UNION_LOGREQLVL(3, cbp, "Sending %jd byte read "
1002                     "request to %s level.", len2rd, level);
1003                 /*
1004                  * To avoid prematurely notifying our consumer
1005                  * that their I/O has completed, we have to delay
1006                  * issuing our first I/O request until we have
1007                  * issued all the additional I/O requests.
1008                  */
1009                 if (iocnt > 0) {
1010                         atomic_add_long(&wip->wip_numios, 1);
1011                         g_io_request(cbp, cp);
1012                 } else {
1013                         firstbp = cbp;
1014                         firstcp = cp;
1015                 }
1016                 if (rdlen == 0)
1017                         break;
1018                 /* set up for next read */
1019                 cbp = g_clone_bio(wip->wip_bp);
1020                 if (cbp == NULL) {
1021                         wip->wip_error = ENOMEM;
1022                         atomic_add_long(&wip->wip_numios, -1);
1023                         break;
1024                 }
1025                 cbp->bio_caller1 = wip;
1026                 cbp->bio_done = g_union_done;
1027                 cbp->bio_offset += offset;
1028                 cbp->bio_length = rdlen;
1029                 atomic_add_long(&sc->sc_reads, 1);
1030         }
1031         /* We have issued all our I/O, so start the first one */
1032         g_io_request(firstbp, firstcp);
1033         return;
1034 }
1035
1036 /*
1037  * Used when completing a union I/O operation.
1038  */
1039 static void
1040 g_union_done(struct bio *bp)
1041 {
1042         struct g_union_wip *wip, *waitingwip;
1043         struct g_union_softc *sc;
1044
1045         wip = bp->bio_caller1;
1046         if (wip->wip_error != 0 && bp->bio_error == 0)
1047                 bp->bio_error = wip->wip_error;
1048         wip->wip_error = 0;
1049         if (atomic_fetchadd_long(&wip->wip_numios, -1) == 1) {
1050                 sc = wip->wip_sc;
1051                 G_WLOCK(sc);
1052                 if (bp->bio_cmd == BIO_WRITE)
1053                         g_union_setmap(bp, sc);
1054                 TAILQ_REMOVE(&sc->sc_wiplist, wip, wip_next);
1055                 G_WUNLOCK(sc);
1056                 while ((waitingwip = TAILQ_FIRST(&wip->wip_waiting)) != NULL) {
1057                         TAILQ_REMOVE(&wip->wip_waiting, waitingwip, wip_next);
1058                         g_union_doio(waitingwip);
1059                 }
1060                 g_free(wip);
1061         }
1062         g_std_done(bp);
1063 }
1064
1065 /*
1066  * Record blocks that have been written in the map.
1067  */
1068 static void
1069 g_union_setmap(struct bio *bp, struct g_union_softc *sc)
1070 {
1071         size_t root_idx;
1072         uint64_t **leaf;
1073         uint64_t *wordp;
1074         off_t start, numsec;
1075
1076         G_WLOCKOWNED(sc);
1077         KASSERT(bp->bio_offset % sc->sc_sectorsize == 0,
1078             ("g_union_setmap: offset not on sector boundry"));
1079         KASSERT(bp->bio_length % sc->sc_sectorsize == 0,
1080             ("g_union_setmap: length not a multiple of sectors"));
1081         start = bp->bio_offset / sc->sc_sectorsize;
1082         numsec = bp->bio_length / sc->sc_sectorsize;
1083         KASSERT(start + numsec <= sc->sc_map_size,
1084             ("g_union_setmap: block %jd is out of range", start + numsec));
1085         for ( ; numsec > 0; numsec--, start++) {
1086                 root_idx = start / sc->sc_bits_per_leaf;
1087                 leaf = &sc->sc_writemap_root[root_idx];
1088                 wordp = &(*leaf)
1089                     [(start % sc->sc_bits_per_leaf) / BITS_PER_ENTRY];
1090                 *wordp |= 1ULL << (start % BITS_PER_ENTRY);
1091                 sc->sc_leafused[root_idx / BITS_PER_ENTRY] |=
1092                     1ULL << (root_idx % BITS_PER_ENTRY);
1093         }
1094 }
1095
1096 /*
1097  * Check map to determine whether blocks have been written.
1098  *
1099  * Return true if they have been written so should be read from the top
1100  * layer. Return false if they have not been written so should be read
1101  * from the bottom layer. Return in len2read the bytes to be read. See
1102  * the comment above the BIO_READ implementation in g_union_start() for
1103  * an explantion of why len2read may be shorter than the buffer length.
1104  */
1105 static bool
1106 g_union_getmap(struct bio *bp, struct g_union_softc *sc, off_t *len2read)
1107 {
1108         off_t start, numsec, leafresid, bitloc;
1109         bool first, maptype, retval;
1110         uint64_t *leaf, word;
1111         size_t root_idx;
1112
1113         KASSERT(bp->bio_offset % sc->sc_sectorsize == 0,
1114             ("g_union_getmap: offset not on sector boundry"));
1115         KASSERT(bp->bio_length % sc->sc_sectorsize == 0,
1116             ("g_union_getmap: length not a multiple of sectors"));
1117         start = bp->bio_offset / sc->sc_sectorsize;
1118         numsec = bp->bio_length / sc->sc_sectorsize;
1119         G_UNION_DEBUG(4, "g_union_getmap: check %jd sectors starting at %jd\n",
1120             numsec, start);
1121         KASSERT(start + numsec <= sc->sc_map_size,
1122             ("g_union_getmap: block %jd is out of range", start + numsec));
1123                 root_idx = start / sc->sc_bits_per_leaf;
1124         first = true;
1125         maptype = false;
1126         while (numsec > 0) {
1127                 /* Check first if the leaf records any written sectors */
1128                 root_idx = start / sc->sc_bits_per_leaf;
1129                 leafresid = sc->sc_bits_per_leaf -
1130                     (start % sc->sc_bits_per_leaf);
1131                 if (((sc->sc_leafused[root_idx / BITS_PER_ENTRY]) &
1132                     (1ULL << (root_idx % BITS_PER_ENTRY))) == 0) {
1133                         if (first) {
1134                                 maptype = false;
1135                                 first = false;
1136                         }
1137                         if (maptype)
1138                                 break;
1139                         numsec -= leafresid;
1140                         start += leafresid;
1141                         continue;
1142                 }
1143                 /* Check up to a word boundry, then check word by word */
1144                 leaf = sc->sc_writemap_root[root_idx];
1145                 word = leaf[(start % sc->sc_bits_per_leaf) / BITS_PER_ENTRY];
1146                 bitloc = start % BITS_PER_ENTRY;
1147                 if (bitloc == 0 && (word == 0 || word == ~0)) {
1148                         if (first) {
1149                                 if (word == 0)
1150                                         maptype = false;
1151                                 else
1152                                         maptype = true;
1153                                 first = false;
1154                         }
1155                         if ((word == 0 && maptype) ||
1156                             (word == ~0 && !maptype))
1157                                 break;
1158                         numsec -= BITS_PER_ENTRY;
1159                         start += BITS_PER_ENTRY;
1160                         continue;
1161                 }
1162                 for ( ; bitloc < BITS_PER_ENTRY; bitloc ++) {
1163                         retval = (word & (1ULL << bitloc)) != 0;
1164                         if (first) {
1165                                 maptype = retval;
1166                                 first = false;
1167                         }
1168                         if (maptype == retval) {
1169                                 numsec--;
1170                                 start++;
1171                                 continue;
1172                         }
1173                         goto out;
1174                 }
1175         }
1176 out:
1177         if (numsec < 0) {
1178                 start += numsec;
1179                 numsec = 0;
1180         }
1181         *len2read = bp->bio_length - (numsec * sc->sc_sectorsize);
1182         G_UNION_DEBUG(maptype ? 3 : 4,
1183             "g_union_getmap: return maptype %swritten for %jd "
1184             "sectors ending at %jd\n", maptype ? "" : "NOT ",
1185             *len2read / sc->sc_sectorsize, start - 1);
1186         return (maptype);
1187 }
1188
1189 /*
1190  * Fill in details for a BIO_GETATTR request.
1191  */
1192 static void
1193 g_union_kerneldump(struct bio *bp, struct g_union_softc *sc)
1194 {
1195         struct g_kerneldump *gkd;
1196         struct g_geom *gp;
1197         struct g_provider *pp;
1198
1199         gkd = (struct g_kerneldump *)bp->bio_data;
1200         gp = bp->bio_to->geom;
1201         g_trace(G_T_TOPOLOGY, "%s(%s, %jd, %jd)", __func__, gp->name,
1202             (intmax_t)gkd->offset, (intmax_t)gkd->length);
1203
1204         pp = LIST_FIRST(&gp->provider);
1205
1206         gkd->di.dumper = g_union_dumper;
1207         gkd->di.priv = sc;
1208         gkd->di.blocksize = pp->sectorsize;
1209         gkd->di.maxiosize = DFLTPHYS;
1210         gkd->di.mediaoffset = sc->sc_offset + gkd->offset;
1211         if (gkd->offset > sc->sc_size) {
1212                 g_io_deliver(bp, ENODEV);
1213                 return;
1214         }
1215         if (gkd->offset + gkd->length > sc->sc_size)
1216                 gkd->length = sc->sc_size - gkd->offset;
1217         gkd->di.mediasize = gkd->length;
1218         g_io_deliver(bp, 0);
1219 }
1220
1221 /*
1222  * Handler for g_union_kerneldump().
1223  */
1224 static int
1225 g_union_dumper(void *priv, void *virtual, vm_offset_t physical, off_t offset,
1226     size_t length)
1227 {
1228
1229         return (0);
1230 }
1231
1232 /*
1233  * List union statistics.
1234  */
1235 static void
1236 g_union_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1237     struct g_consumer *cp, struct g_provider *pp)
1238 {
1239         struct g_union_softc *sc;
1240
1241         if (pp != NULL || cp != NULL || gp->softc == NULL)
1242                 return;
1243         sc = gp->softc;
1244         sbuf_printf(sb, "%s<Reads>%ju</Reads>\n", indent,
1245             (uintmax_t)sc->sc_reads);
1246         sbuf_printf(sb, "%s<Writes>%ju</Writes>\n", indent,
1247             (uintmax_t)sc->sc_writes);
1248         sbuf_printf(sb, "%s<Deletes>%ju</Deletes>\n", indent,
1249             (uintmax_t)sc->sc_deletes);
1250         sbuf_printf(sb, "%s<Getattrs>%ju</Getattrs>\n", indent,
1251             (uintmax_t)sc->sc_getattrs);
1252         sbuf_printf(sb, "%s<Flushes>%ju</Flushes>\n", indent,
1253             (uintmax_t)sc->sc_flushes);
1254         sbuf_printf(sb, "%s<Speedups>%ju</Speedups>\n", indent,
1255             (uintmax_t)sc->sc_speedups);
1256         sbuf_printf(sb, "%s<Cmd0s>%ju</Cmd0s>\n", indent,
1257             (uintmax_t)sc->sc_cmd0s);
1258         sbuf_printf(sb, "%s<Cmd1s>%ju</Cmd1s>\n", indent,
1259             (uintmax_t)sc->sc_cmd1s);
1260         sbuf_printf(sb, "%s<Cmd2s>%ju</Cmd2s>\n", indent,
1261             (uintmax_t)sc->sc_cmd2s);
1262         sbuf_printf(sb, "%s<ReadCurrentRead>%ju</ReadCurrentRead>\n", indent,
1263             (uintmax_t)sc->sc_readcurrentread);
1264         sbuf_printf(sb, "%s<ReadBlockWrite>%ju</ReadBlockWrite>\n", indent,
1265             (uintmax_t)sc->sc_readblockwrite);
1266         sbuf_printf(sb, "%s<WriteBlockRead>%ju</WriteBlockRead>\n", indent,
1267             (uintmax_t)sc->sc_writeblockread);
1268         sbuf_printf(sb, "%s<WriteBlockWrite>%ju</WriteBlockWrite>\n", indent,
1269             (uintmax_t)sc->sc_writeblockwrite);
1270         sbuf_printf(sb, "%s<ReadBytes>%ju</ReadBytes>\n", indent,
1271             (uintmax_t)sc->sc_readbytes);
1272         sbuf_printf(sb, "%s<WroteBytes>%ju</WroteBytes>\n", indent,
1273             (uintmax_t)sc->sc_wrotebytes);
1274         sbuf_printf(sb, "%s<Offset>%jd</Offset>\n", indent,
1275             (intmax_t)sc->sc_offset);
1276 }
1277
1278 /*
1279  * Clean up an orphaned geom.
1280  */
1281 static void
1282 g_union_orphan(struct g_consumer *cp)
1283 {
1284
1285         g_topology_assert();
1286         g_union_destroy(NULL, cp->geom, true);
1287 }
1288
1289 /*
1290  * Clean up a union geom.
1291  */
1292 static int
1293 g_union_destroy_geom(struct gctl_req *req, struct g_class *mp,
1294     struct g_geom *gp)
1295 {
1296
1297         return (g_union_destroy(NULL, gp, false));
1298 }
1299
1300 /*
1301  * Clean up a union device.
1302  */
1303 static int
1304 g_union_destroy(struct gctl_req *req, struct g_geom *gp, bool force)
1305 {
1306         struct g_union_softc *sc;
1307         struct g_provider *pp;
1308         int error;
1309
1310         g_topology_assert();
1311         sc = gp->softc;
1312         if (sc == NULL)
1313                 return (ENXIO);
1314         pp = LIST_FIRST(&gp->provider);
1315         if ((sc->sc_flags & DOING_COMMIT) != 0 ||
1316             (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0))) {
1317                 if (force) {
1318                         if (req != NULL)
1319                                 gctl_msg(req, "Device %s is still in use, "
1320                                     "so is being forcibly removed.", pp->name);
1321                         G_UNION_DEBUG(1, "Device %s is still in use, so "
1322                             "is being forcibly removed.", pp->name);
1323                 } else {
1324                         if (req != NULL)
1325                                 gctl_msg(req, "Device %s is still open "
1326                                     "(r=%d w=%d e=%d).", pp->name, pp->acr,
1327                                     pp->acw, pp->ace);
1328                         G_UNION_DEBUG(1, "Device %s is still open "
1329                             "(r=%d w=%d e=%d).", pp->name, pp->acr,
1330                             pp->acw, pp->ace);
1331                         return (EBUSY);
1332                 }
1333         } else {
1334                 if (req != NULL)
1335                         gctl_msg(req, "Device %s removed.", pp->name);
1336                 G_UNION_DEBUG(1, "Device %s removed.", pp->name);
1337         }
1338         /* Close consumers */
1339         if ((error = g_access(sc->sc_lowercp, -1, 0, -1)) != 0)
1340                 G_UNION_DEBUG(2, "Error %d: device %s could not reset access "
1341                     "to %s.", error, pp->name, sc->sc_lowercp->provider->name);
1342         if ((error = g_access(sc->sc_uppercp, -1, -1, -1)) != 0)
1343                 G_UNION_DEBUG(2, "Error %d: device %s could not reset access "
1344                     "to %s.", error, pp->name, sc->sc_uppercp->provider->name);
1345
1346         g_wither_geom(gp, ENXIO);
1347
1348         return (0);
1349 }
1350
1351 /*
1352  * Clean up a union provider.
1353  */
1354 static void
1355 g_union_providergone(struct g_provider *pp)
1356 {
1357         struct g_geom *gp;
1358         struct g_union_softc *sc;
1359         size_t i;
1360
1361         gp = pp->geom;
1362         sc = gp->softc;
1363         gp->softc = NULL;
1364         for (i = 0; i < sc->sc_root_size; i++)
1365                 g_free(sc->sc_writemap_root[i]);
1366         g_free(sc->sc_writemap_root);
1367         g_free(sc->sc_leafused);
1368         rw_destroy(&sc->sc_rwlock);
1369         g_free(sc);
1370 }
1371
1372 /*
1373  * Respond to a resized provider.
1374  */
1375 static void
1376 g_union_resize(struct g_consumer *cp)
1377 {
1378         struct g_union_softc *sc;
1379         struct g_geom *gp;
1380
1381         g_topology_assert();
1382
1383         gp = cp->geom;
1384         sc = gp->softc;
1385
1386         /*
1387          * If size has gotten bigger, ignore it and just keep using
1388          * the space we already had. Otherwise we are done.
1389          */
1390         if (sc->sc_size < cp->provider->mediasize - sc->sc_offset)
1391                 return;
1392         g_union_destroy(NULL, gp, true);
1393 }
1394
1395 DECLARE_GEOM_CLASS(g_union_class, g_union);
1396 MODULE_VERSION(geom_union, 0);