]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/geom/union/g_union.c
MFV: cherry-pick "PR/358: Fix width for -f - (jpalus)"
[FreeBSD/FreeBSD.git] / sys / geom / union / g_union.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2022 Marshall Kirk McKusick <mckusick@mckusick.com>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27
28 #include <sys/param.h>
29 #include <sys/bio.h>
30 #include <sys/buf.h>
31 #include <sys/ctype.h>
32 #include <sys/kernel.h>
33 #include <sys/lock.h>
34 #include <sys/malloc.h>
35 #include <sys/module.h>
36 #include <sys/reboot.h>
37 #include <sys/rwlock.h>
38 #include <sys/sbuf.h>
39 #include <sys/sysctl.h>
40
41 #include <geom/geom.h>
42 #include <geom/geom_dbg.h>
43 #include <geom/union/g_union.h>
44
45 SYSCTL_DECL(_kern_geom);
46 static SYSCTL_NODE(_kern_geom, OID_AUTO, union, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
47     "GEOM_UNION stuff");
48 static u_int g_union_debug = 0;
49 SYSCTL_UINT(_kern_geom_union, OID_AUTO, debug, CTLFLAG_RW, &g_union_debug, 0,
50     "Debug level");
51
52 static void g_union_config(struct gctl_req *req, struct g_class *mp,
53     const char *verb);
54 static g_access_t g_union_access;
55 static g_start_t g_union_start;
56 static g_dumpconf_t g_union_dumpconf;
57 static g_orphan_t g_union_orphan;
58 static int g_union_destroy_geom(struct gctl_req *req, struct g_class *mp,
59     struct g_geom *gp);
60 static g_provgone_t g_union_providergone;
61 static g_resize_t g_union_resize;
62
63 struct g_class g_union_class = {
64         .name = G_UNION_CLASS_NAME,
65         .version = G_VERSION,
66         .ctlreq = g_union_config,
67         .access = g_union_access,
68         .start = g_union_start,
69         .dumpconf = g_union_dumpconf,
70         .orphan = g_union_orphan,
71         .destroy_geom = g_union_destroy_geom,
72         .providergone = g_union_providergone,
73         .resize = g_union_resize,
74 };
75
76 static void g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool);
77 static intmax_t g_union_fetcharg(struct gctl_req *req, const char *name);
78 static bool g_union_verify_nprefix(const char *name);
79 static void g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool);
80 static struct g_geom *g_union_find_geom(struct g_class *mp, const char *name);
81 static void g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool);
82 static void g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool);
83 static void g_union_revert(struct g_union_softc *sc);
84 static void g_union_doio(struct g_union_wip *wip);
85 static void g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool);
86 static void g_union_setmap(struct bio *bp, struct g_union_softc *sc);
87 static bool g_union_getmap(struct bio *bp, struct g_union_softc *sc,
88         off_t *len2read);
89 static void g_union_done(struct bio *bp);
90 static void g_union_kerneldump(struct bio *bp, struct g_union_softc *sc);
91 static int g_union_dumper(void *, void *, off_t, size_t);
92 static int g_union_destroy(struct gctl_req *req, struct g_geom *gp, bool force);
93
94 /*
95  * Operate on union-specific configuration commands.
96  */
97 static void
98 g_union_config(struct gctl_req *req, struct g_class *mp, const char *verb)
99 {
100         uint32_t *version, *verbose;
101
102         g_topology_assert();
103
104         version = gctl_get_paraml(req, "version", sizeof(*version));
105         if (version == NULL) {
106                 gctl_error(req, "No '%s' argument.", "version");
107                 return;
108         }
109         if (*version != G_UNION_VERSION) {
110                 gctl_error(req, "Userland and kernel parts are out of sync.");
111                 return;
112         }
113         verbose = gctl_get_paraml(req, "verbose", sizeof(*verbose));
114         if (verbose == NULL) {
115                 gctl_error(req, "No '%s' argument.", "verbose");
116                 return;
117         }
118         if (strcmp(verb, "create") == 0) {
119                 g_union_ctl_create(req, mp, *verbose);
120                 return;
121         } else if (strcmp(verb, "destroy") == 0) {
122                 g_union_ctl_destroy(req, mp, *verbose);
123                 return;
124         } else if (strcmp(verb, "reset") == 0) {
125                 g_union_ctl_reset(req, mp, *verbose);
126                 return;
127         } else if (strcmp(verb, "revert") == 0) {
128                 g_union_ctl_revert(req, mp, *verbose);
129                 return;
130         } else if (strcmp(verb, "commit") == 0) {
131                 g_union_ctl_commit(req, mp, *verbose);
132                 return;
133         }
134
135         gctl_error(req, "Unknown verb.");
136 }
137
138 /*
139  * Create a union device.
140  */
141 static void
142 g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool verbose)
143 {
144         struct g_provider *upperpp, *lowerpp, *newpp;
145         struct g_consumer *uppercp, *lowercp;
146         struct g_union_softc *sc;
147         struct g_geom_alias *gap;
148         struct g_geom *gp;
149         intmax_t offset, secsize, size, needed;
150         const char *gunionname;
151         int *nargs, error, i, n;
152         char name[64];
153
154         g_topology_assert();
155
156         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
157         if (nargs == NULL) {
158                 gctl_error(req, "No '%s' argument.", "nargs");
159                 return;
160         }
161         if (*nargs < 2) {
162                 gctl_error(req, "Missing device(s).");
163                 return;
164         }
165         if (*nargs > 2) {
166                 gctl_error(req, "Extra device(s).");
167                 return;
168         }
169
170         offset = g_union_fetcharg(req, "offset");
171         size = g_union_fetcharg(req, "size");
172         secsize = g_union_fetcharg(req, "secsize");
173         gunionname = gctl_get_asciiparam(req, "gunionname");
174
175         upperpp = gctl_get_provider(req, "arg0");
176         lowerpp = gctl_get_provider(req, "arg1");
177         if (upperpp == NULL || lowerpp == NULL)
178                 /* error message provided by gctl_get_provider() */
179                 return;
180         /* Create the union */
181         if (secsize == 0)
182                 secsize = lowerpp->sectorsize;
183         else if ((secsize % lowerpp->sectorsize) != 0) {
184                 gctl_error(req, "Sector size %jd is not a multiple of lower "
185                     "provider %s's %jd sector size.", (intmax_t)secsize,
186                     lowerpp->name, (intmax_t)lowerpp->sectorsize);
187                 return;
188         }
189         if (secsize > maxphys) {
190                 gctl_error(req, "Too big secsize %jd for lower provider %s.",
191                     (intmax_t)secsize, lowerpp->name);
192                 return;
193         }
194         if (secsize % upperpp->sectorsize != 0) {
195                 gctl_error(req, "Sector size %jd is not a multiple of upper "
196                     "provider %s's %jd sector size.", (intmax_t)secsize,
197                     upperpp->name, (intmax_t)upperpp->sectorsize);
198                 return;
199         }
200         if ((offset % secsize) != 0) {
201                 gctl_error(req, "Offset %jd is not a multiple of lower "
202                     "provider %s's %jd sector size.", (intmax_t)offset,
203                     lowerpp->name, (intmax_t)lowerpp->sectorsize);
204                 return;
205         }
206         if (size == 0)
207                 size = lowerpp->mediasize - offset;
208         else
209                 size -= offset;
210         if ((size % secsize) != 0) {
211                 gctl_error(req, "Size %jd is not a multiple of sector size "
212                     "%jd.", (intmax_t)size, (intmax_t)secsize);
213                 return;
214         }
215         if (offset + size < lowerpp->mediasize) {
216                 gctl_error(req, "Size %jd is too small for lower provider %s, "
217                     "needs %jd.", (intmax_t)(offset + size), lowerpp->name,
218                     lowerpp->mediasize);
219                 return;
220         }
221         if (size > upperpp->mediasize) {
222                 gctl_error(req, "Upper provider %s size (%jd) is too small, "
223                     "needs %jd.", upperpp->name, (intmax_t)upperpp->mediasize,
224                     (intmax_t)size);
225                 return;
226         }
227         if (gunionname != NULL && !g_union_verify_nprefix(gunionname)) {
228                 gctl_error(req, "Gunion name %s must be alphanumeric.",
229                     gunionname);
230                 return;
231         }
232         if (gunionname != NULL) {
233                 n = snprintf(name, sizeof(name), "%s%s", gunionname,
234                     G_UNION_SUFFIX);
235         } else {
236                 n = snprintf(name, sizeof(name), "%s-%s%s", upperpp->name,
237                     lowerpp->name, G_UNION_SUFFIX);
238         }
239         if (n <= 0 || n >= sizeof(name)) {
240                 gctl_error(req, "Invalid provider name.");
241                 return;
242         }
243         LIST_FOREACH(gp, &mp->geom, geom) {
244                 if (strcmp(gp->name, name) == 0) {
245                         gctl_error(req, "Provider %s already exists.", name);
246                         return;
247                 }
248         }
249         gp = g_new_geomf(mp, "%s", name);
250         sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
251         rw_init(&sc->sc_rwlock, "gunion");
252         TAILQ_INIT(&sc->sc_wiplist);
253         sc->sc_offset = offset;
254         sc->sc_size = size;
255         sc->sc_sectorsize = secsize;
256         sc->sc_reads = 0;
257         sc->sc_writes = 0;
258         sc->sc_deletes = 0;
259         sc->sc_getattrs = 0;
260         sc->sc_flushes = 0;
261         sc->sc_speedups = 0;
262         sc->sc_cmd0s = 0;
263         sc->sc_cmd1s = 0;
264         sc->sc_cmd2s = 0;
265         sc->sc_readbytes = 0;
266         sc->sc_wrotebytes = 0;
267         sc->sc_writemap_memory = 0;
268         gp->softc = sc;
269
270         newpp = g_new_providerf(gp, "%s", gp->name);
271         newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
272         newpp->mediasize = size;
273         newpp->sectorsize = secsize;
274         LIST_FOREACH(gap, &upperpp->aliases, ga_next)
275                 g_provider_add_alias(newpp, "%s%s", gap->ga_alias,
276                     G_UNION_SUFFIX);
277         LIST_FOREACH(gap, &lowerpp->aliases, ga_next)
278                 g_provider_add_alias(newpp, "%s%s", gap->ga_alias,
279                     G_UNION_SUFFIX);
280         lowercp = g_new_consumer(gp);
281         lowercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
282         if ((error = g_attach(lowercp, lowerpp)) != 0) {
283                 gctl_error(req, "Error %d: cannot attach to provider %s.",
284                     error, lowerpp->name);
285                 goto fail1;
286         }
287         /* request read and exclusive access for lower */
288         if ((error = g_access(lowercp, 1, 0, 1)) != 0) {
289                 gctl_error(req, "Error %d: cannot obtain exclusive access to "
290                     "%s.\n\tMust be unmounted or mounted read-only.", error,
291                     lowerpp->name);
292                 goto fail2;
293         }
294         uppercp = g_new_consumer(gp);
295         uppercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
296         if ((error = g_attach(uppercp, upperpp)) != 0) {
297                 gctl_error(req, "Error %d: cannot attach to provider %s.",
298                     error, upperpp->name);
299                 goto fail3;
300         }
301         /* request read, write, and exclusive access for upper */
302         if ((error = g_access(uppercp, 1, 1, 1)) != 0) {
303                 gctl_error(req, "Error %d: cannot obtain write access to %s.",
304                     error, upperpp->name);
305                 goto fail4;
306         }
307         sc->sc_uppercp = uppercp;
308         sc->sc_lowercp = lowercp;
309
310         newpp->flags |= (upperpp->flags & G_PF_ACCEPT_UNMAPPED) &
311             (lowerpp->flags & G_PF_ACCEPT_UNMAPPED);
312         g_error_provider(newpp, 0);
313         /*
314          * Allocate the map that tracks the sectors that have been written
315          * to the top layer. We use a 2-level hierarchy as that lets us
316          * map up to 1 petabyte using allocations of less than 33 Mb
317          * when using 4K byte sectors (or 268 Mb with 512 byte sectors).
318          *
319          * We totally populate the leaf nodes rather than allocating them
320          * as they are first used because their usage occurs in the
321          * g_union_start() routine that may be running in the g_down
322          * thread which cannot sleep.
323          */
324         sc->sc_map_size = roundup(size / secsize, BITS_PER_ENTRY);
325         needed = sc->sc_map_size / BITS_PER_ENTRY;
326         for (sc->sc_root_size = 1;
327              sc->sc_root_size * sc->sc_root_size < needed;
328              sc->sc_root_size++)
329                 continue;
330         sc->sc_writemap_root = g_malloc(sc->sc_root_size * sizeof(uint64_t *),
331             M_WAITOK | M_ZERO);
332         sc->sc_leaf_size = sc->sc_root_size;
333         sc->sc_bits_per_leaf = sc->sc_leaf_size * BITS_PER_ENTRY;
334         sc->sc_leafused = g_malloc(roundup(sc->sc_root_size, BITS_PER_ENTRY),
335             M_WAITOK | M_ZERO);
336         for (i = 0; i < sc->sc_root_size; i++)
337                 sc->sc_writemap_root[i] =
338                     g_malloc(sc->sc_leaf_size * sizeof(uint64_t),
339                     M_WAITOK | M_ZERO);
340         sc->sc_writemap_memory =
341             (sc->sc_root_size + sc->sc_root_size * sc->sc_leaf_size) *
342             sizeof(uint64_t) + roundup(sc->sc_root_size, BITS_PER_ENTRY);
343         if (verbose)
344                 gctl_msg(req, 0, "Device %s created with memory map size %jd.",
345                     gp->name, (intmax_t)sc->sc_writemap_memory);
346         gctl_post_messages(req);
347         G_UNION_DEBUG(1, "Device %s created with memory map size %jd.",
348             gp->name, (intmax_t)sc->sc_writemap_memory);
349         return;
350
351 fail4:
352         g_detach(uppercp);
353 fail3:
354         g_destroy_consumer(uppercp);
355         g_access(lowercp, -1, 0, -1);
356 fail2:
357         g_detach(lowercp);
358 fail1:
359         g_destroy_consumer(lowercp);
360         g_destroy_provider(newpp);
361         g_destroy_geom(gp);
362 }
363
364 /*
365  * Fetch named option and verify that it is positive.
366  */
367 static intmax_t
368 g_union_fetcharg(struct gctl_req *req, const char *name)
369 {
370         intmax_t *val;
371
372         val = gctl_get_paraml_opt(req, name, sizeof(*val));
373         if (val == NULL)
374                 return (0);
375         if (*val >= 0)
376                 return (*val);
377         gctl_msg(req, EINVAL, "Invalid '%s' (%jd): negative value, "
378             "using default.", name, *val);
379         return (0);
380 }
381
382 /*
383  * Verify that a name is alphanumeric.
384  */
385 static bool
386 g_union_verify_nprefix(const char *name)
387 {
388         int i;
389
390         for (i = 0; i < strlen(name); i++) {
391                 if (isalpha(name[i]) == 0 && isdigit(name[i]) == 0) {
392                         return (false);
393                 }
394         }
395         return (true);
396 }
397
398 /*
399  * Destroy a union device.
400  */
401 static void
402 g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool verbose)
403 {
404         int *nargs, *force, error, i;
405         struct g_geom *gp;
406         const char *name;
407         char param[16];
408
409         g_topology_assert();
410
411         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
412         if (nargs == NULL) {
413                 gctl_error(req, "No '%s' argument.", "nargs");
414                 return;
415         }
416         if (*nargs <= 0) {
417                 gctl_error(req, "Missing device(s).");
418                 return;
419         }
420         force = gctl_get_paraml(req, "force", sizeof(*force));
421         if (force == NULL) {
422                 gctl_error(req, "No 'force' argument.");
423                 return;
424         }
425
426         for (i = 0; i < *nargs; i++) {
427                 snprintf(param, sizeof(param), "arg%d", i);
428                 name = gctl_get_asciiparam(req, param);
429                 if (name == NULL) {
430                         gctl_msg(req, EINVAL, "No '%s' argument.", param);
431                         continue;
432                 }
433                 if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) == 0)
434                         name += strlen(_PATH_DEV);
435                 gp = g_union_find_geom(mp, name);
436                 if (gp == NULL) {
437                         gctl_msg(req, EINVAL, "Device %s is invalid.", name);
438                         continue;
439                 }
440                 error = g_union_destroy(verbose ? req : NULL, gp, *force);
441                 if (error != 0)
442                         gctl_msg(req, error, "Error %d: "
443                             "cannot destroy device %s.", error, gp->name);
444         }
445         gctl_post_messages(req);
446 }
447
448 /*
449  * Find a union geom.
450  */
451 static struct g_geom *
452 g_union_find_geom(struct g_class *mp, const char *name)
453 {
454         struct g_geom *gp;
455
456         LIST_FOREACH(gp, &mp->geom, geom) {
457                 if (strcmp(gp->name, name) == 0)
458                         return (gp);
459         }
460         return (NULL);
461 }
462
463 /*
464  * Zero out all the statistics associated with a union device.
465  */
466 static void
467 g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool verbose)
468 {
469         struct g_union_softc *sc;
470         struct g_provider *pp;
471         struct g_geom *gp;
472         char param[16];
473         int i, *nargs;
474
475         g_topology_assert();
476
477         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
478         if (nargs == NULL) {
479                 gctl_error(req, "No '%s' argument.", "nargs");
480                 return;
481         }
482         if (*nargs <= 0) {
483                 gctl_error(req, "Missing device(s).");
484                 return;
485         }
486
487         for (i = 0; i < *nargs; i++) {
488                 snprintf(param, sizeof(param), "arg%d", i);
489                 pp = gctl_get_provider(req, param);
490                 if (pp == NULL) {
491                         gctl_msg(req, EINVAL, "No '%s' argument.", param);
492                         continue;
493                 }
494                 gp = pp->geom;
495                 if (gp->class != mp) {
496                         gctl_msg(req, EINVAL, "Provider %s is invalid.",
497                             pp->name);
498                         continue;
499                 }
500                 sc = gp->softc;
501                 sc->sc_reads = 0;
502                 sc->sc_writes = 0;
503                 sc->sc_deletes = 0;
504                 sc->sc_getattrs = 0;
505                 sc->sc_flushes = 0;
506                 sc->sc_speedups = 0;
507                 sc->sc_cmd0s = 0;
508                 sc->sc_cmd1s = 0;
509                 sc->sc_cmd2s = 0;
510                 sc->sc_readbytes = 0;
511                 sc->sc_wrotebytes = 0;
512                 if (verbose)
513                         gctl_msg(req, 0, "Device %s has been reset.", pp->name);
514                 G_UNION_DEBUG(1, "Device %s has been reset.", pp->name);
515         }
516         gctl_post_messages(req);
517 }
518
519 /*
520  * Revert all write requests made to the top layer of the union.
521  */
522 static void
523 g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool verbose)
524 {
525         struct g_union_softc *sc;
526         struct g_provider *pp;
527         struct g_geom *gp;
528         char param[16];
529         int i, *nargs;
530
531         g_topology_assert();
532
533         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
534         if (nargs == NULL) {
535                 gctl_error(req, "No '%s' argument.", "nargs");
536                 return;
537         }
538         if (*nargs <= 0) {
539                 gctl_error(req, "Missing device(s).");
540                 return;
541         }
542
543         for (i = 0; i < *nargs; i++) {
544                 snprintf(param, sizeof(param), "arg%d", i);
545                 pp = gctl_get_provider(req, param);
546                 if (pp == NULL) {
547                         gctl_msg(req, EINVAL, "No '%s' argument.", param);
548                         continue;
549                 }
550                 gp = pp->geom;
551                 if (gp->class != mp) {
552                         gctl_msg(req, EINVAL, "Provider %s is invalid.",
553                             pp->name);
554                         continue;
555                 }
556                 sc = gp->softc;
557                 if (g_union_get_writelock(sc) != 0) {
558                         gctl_msg(req, EINVAL, "Revert already in progress for "
559                             "provider %s.", pp->name);
560                         continue;
561                 }
562                 /*
563                  * No mount or other use of union is allowed.
564                  */
565                 if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) {
566                         gctl_msg(req, EPERM, "Unable to get exclusive access "
567                             "for reverting of %s;\n\t%s cannot be mounted or "
568                             "otherwise open during a revert.",
569                              pp->name, pp->name);
570                         g_union_rel_writelock(sc);
571                         continue;
572                 }
573                 g_union_revert(sc);
574                 g_union_rel_writelock(sc);
575                 if (verbose)
576                         gctl_msg(req, 0, "Device %s has been reverted.",
577                             pp->name);
578                 G_UNION_DEBUG(1, "Device %s has been reverted.", pp->name);
579         }
580         gctl_post_messages(req);
581 }
582
583 /*
584  * Revert union writes by zero'ing out the writemap.
585  */
586 static void
587 g_union_revert(struct g_union_softc *sc)
588 {
589         int i;
590
591         G_WLOCK(sc);
592         for (i = 0; i < sc->sc_root_size; i++)
593                 memset(sc->sc_writemap_root[i], 0,
594                     sc->sc_leaf_size * sizeof(uint64_t));
595         memset(sc->sc_leafused, 0, roundup(sc->sc_root_size, BITS_PER_ENTRY));
596         G_WUNLOCK(sc);
597 }
598
599 /*
600  * Commit all the writes made in the top layer to the lower layer.
601  */
602 static void
603 g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool verbose)
604 {
605         struct g_union_softc *sc;
606         struct g_provider *pp, *lowerpp;
607         struct g_consumer *lowercp;
608         struct g_geom *gp;
609         struct bio *bp;
610         char param[16];
611         off_t len2rd, len2wt, savelen;
612         int i, error, error1, *nargs, *force, *reboot;
613
614         g_topology_assert();
615
616         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
617         if (nargs == NULL) {
618                 gctl_error(req, "No '%s' argument.", "nargs");
619                 return;
620         }
621         if (*nargs <= 0) {
622                 gctl_error(req, "Missing device(s).");
623                 return;
624         }
625         force = gctl_get_paraml(req, "force", sizeof(*force));
626         if (force == NULL) {
627                 gctl_error(req, "No 'force' argument.");
628                 return;
629         }
630         reboot = gctl_get_paraml(req, "reboot", sizeof(*reboot));
631         if (reboot == NULL) {
632                 gctl_error(req, "No 'reboot' argument.");
633                 return;
634         }
635
636         /* Get a bio buffer to do our I/O */
637         bp = g_alloc_bio();
638         bp->bio_data = g_malloc(MAXBSIZE, M_WAITOK);
639         bp->bio_done = biodone;
640         for (i = 0; i < *nargs; i++) {
641                 snprintf(param, sizeof(param), "arg%d", i);
642                 pp = gctl_get_provider(req, param);
643                 if (pp == NULL) {
644                         gctl_msg(req, EINVAL, "No '%s' argument.", param);
645                         continue;
646                 }
647                 gp = pp->geom;
648                 if (gp->class != mp) {
649                         gctl_msg(req, EINVAL, "Provider %s is invalid.",
650                             pp->name);
651                         continue;
652                 }
653                 sc = gp->softc;
654                 if (g_union_get_writelock(sc) != 0) {
655                         gctl_msg(req, EINVAL, "Commit already in progress for "
656                             "provider %s.", pp->name);
657                         continue;
658                 }
659         
660                 /* upgrade to write access for lower */
661                 lowercp = sc->sc_lowercp;
662                 lowerpp = lowercp->provider;
663                 /*
664                  * No mount or other use of union is allowed, unless the
665                  * -f flag is given which allows read-only mount or usage.
666                  */
667                 if ((*force == false && pp->acr > 0) || pp->acw > 0 ||
668                      pp->ace > 0) {
669                         gctl_msg(req, EPERM, "Unable to get exclusive access "
670                             "for writing of %s.\n\tNote that %s cannot be "
671                             "mounted or otherwise\n\topen during a commit "
672                             "unless the -f flag is used.", pp->name, pp->name);
673                         g_union_rel_writelock(sc);
674                         continue;
675                 }
676                 /*
677                  * No mount or other use of lower media is allowed, unless the
678                  * -f flag is given which allows read-only mount or usage.
679                  */
680                 if ((*force == false && lowerpp->acr > lowercp->acr) ||
681                      lowerpp->acw > lowercp->acw ||
682                      lowerpp->ace > lowercp->ace) {
683                         gctl_msg(req, EPERM, "provider %s is unable to get "
684                             "exclusive access to %s\n\tfor writing. Note that "
685                             "%s cannot be mounted or otherwise open\n\tduring "
686                             "a commit unless the -f flag is used.", pp->name,
687                             lowerpp->name, lowerpp->name);
688                         g_union_rel_writelock(sc);
689                         continue;
690                 }
691                 if ((error = g_access(lowercp, 0, 1, 0)) != 0) {
692                         gctl_msg(req, error, "Error %d: provider %s is unable "
693                             "to access %s for writing.", error, pp->name,
694                             lowerpp->name);
695                         g_union_rel_writelock(sc);
696                         continue;
697                 }
698                 g_topology_unlock();
699                 /* Loop over write map copying across written blocks */
700                 bp->bio_offset = 0;
701                 bp->bio_length = sc->sc_map_size * sc->sc_sectorsize;
702                 G_RLOCK(sc);
703                 error = 0;
704                 while (bp->bio_length > 0) {
705                         if (!g_union_getmap(bp, sc, &len2rd)) {
706                                 /* not written, so skip */
707                                 bp->bio_offset += len2rd;
708                                 bp->bio_length -= len2rd;
709                                 continue;
710                         }
711                         G_RUNLOCK(sc);
712                         /* need to read then write len2rd sectors */
713                         for ( ; len2rd > 0; len2rd -= len2wt) {
714                                 /* limit ourselves to MAXBSIZE size I/Os */
715                                 len2wt = len2rd;
716                                 if (len2wt > MAXBSIZE)
717                                         len2wt = MAXBSIZE;
718                                 savelen = bp->bio_length;
719                                 bp->bio_length = len2wt;
720                                 bp->bio_cmd = BIO_READ;
721                                 g_io_request(bp, sc->sc_uppercp);
722                                 if ((error = biowait(bp, "rdunion")) != 0) {
723                                         gctl_msg(req, error, "Commit read "
724                                             "error %d in provider %s, commit "
725                                             "aborted.", error, pp->name);
726                                         goto cleanup;
727                                 }
728                                 bp->bio_flags &= ~BIO_DONE;
729                                 bp->bio_cmd = BIO_WRITE;
730                                 g_io_request(bp, lowercp);
731                                 if ((error = biowait(bp, "wtunion")) != 0) {
732                                         gctl_msg(req, error, "Commit write "
733                                             "error %d in provider %s, commit "
734                                             "aborted.", error, pp->name);
735                                         goto cleanup;
736                                 }
737                                 bp->bio_flags &= ~BIO_DONE;
738                                 bp->bio_offset += len2wt;
739                                 bp->bio_length = savelen - len2wt;
740                         }
741                         G_RLOCK(sc);
742                 }
743                 G_RUNLOCK(sc);
744                 /* clear the write map */
745                 g_union_revert(sc);
746 cleanup:
747                 g_topology_lock();
748                 /* return lower to previous access */
749                 if ((error1 = g_access(lowercp, 0, -1, 0)) != 0) {
750                         G_UNION_DEBUG(2, "Error %d: device %s could not reset "
751                             "access to %s (r=0 w=-1 e=0).", error1, pp->name,
752                             lowerpp->name);
753                 }
754                 g_union_rel_writelock(sc);
755                 if (error == 0 && verbose)
756                         gctl_msg(req, 0, "Device %s has been committed.",
757                             pp->name);
758                 G_UNION_DEBUG(1, "Device %s has been committed.", pp->name);
759         }
760         gctl_post_messages(req);
761         g_free(bp->bio_data);
762         g_destroy_bio(bp);
763         if (*reboot)
764                 kern_reboot(RB_AUTOBOOT);
765 }
766
767 /*
768  * Generally allow access unless a commit is in progress.
769  */
770 static int
771 g_union_access(struct g_provider *pp, int r, int w, int e)
772 {
773         struct g_union_softc *sc;
774
775         sc = pp->geom->softc;
776         if (sc == NULL) {
777                 if (r <= 0 && w <= 0 && e <= 0)
778                         return (0);
779                 return (ENXIO);
780         }
781         r += pp->acr;
782         w += pp->acw;
783         e += pp->ace;
784         if (g_union_get_writelock(sc) != 0) {
785                 if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0)
786                         return (0);
787                 return (EBUSY);
788         }
789         g_union_rel_writelock(sc);
790         return (0);
791 }
792
793 /*
794  * Initiate an I/O operation on the union device.
795  */
796 static void
797 g_union_start(struct bio *bp)
798 {
799         struct g_union_softc *sc;
800         struct g_union_wip *wip;
801         struct bio *cbp;
802
803         sc = bp->bio_to->geom->softc;
804         if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
805                 wip = g_malloc(sizeof(*wip), M_NOWAIT);
806                 if (wip == NULL) {
807                         g_io_deliver(bp, ENOMEM);
808                         return;
809                 }
810                 TAILQ_INIT(&wip->wip_waiting);
811                 wip->wip_bp = bp;
812                 wip->wip_sc = sc;
813                 wip->wip_start = bp->bio_offset + sc->sc_offset;
814                 wip->wip_end = wip->wip_start + bp->bio_length - 1;
815                 wip->wip_numios = 1;
816                 wip->wip_error = 0;
817                 g_union_doio(wip);
818                 return;
819         }
820
821         /*
822          * All commands other than read and write are passed through to
823          * the upper-level device since it is writable and thus able to
824          * respond to delete, flush, and speedup requests.
825          */
826         cbp = g_clone_bio(bp);
827         if (cbp == NULL) {
828                 g_io_deliver(bp, ENOMEM);
829                 return;
830         }
831         cbp->bio_offset = bp->bio_offset + sc->sc_offset;
832         cbp->bio_done = g_std_done;
833
834         switch (cbp->bio_cmd) {
835         case BIO_DELETE:
836                 G_UNION_LOGREQ(cbp, "Delete request received.");
837                 atomic_add_long(&sc->sc_deletes, 1);
838                 break;
839         case BIO_GETATTR:
840                 G_UNION_LOGREQ(cbp, "Getattr request received.");
841                 atomic_add_long(&sc->sc_getattrs, 1);
842                 if (strcmp(cbp->bio_attribute, "GEOM::kerneldump") != 0)
843                         /* forward the GETATTR to the lower-level device */
844                         break;
845                 g_union_kerneldump(bp, sc);
846                 return;
847         case BIO_FLUSH:
848                 G_UNION_LOGREQ(cbp, "Flush request received.");
849                 atomic_add_long(&sc->sc_flushes, 1);
850                 break;
851         case BIO_SPEEDUP:
852                 G_UNION_LOGREQ(cbp, "Speedup request received.");
853                 atomic_add_long(&sc->sc_speedups, 1);
854                 break;
855         case BIO_CMD0:
856                 G_UNION_LOGREQ(cbp, "Cmd0 request received.");
857                 atomic_add_long(&sc->sc_cmd0s, 1);
858                 break;
859         case BIO_CMD1:
860                 G_UNION_LOGREQ(cbp, "Cmd1 request received.");
861                 atomic_add_long(&sc->sc_cmd1s, 1);
862                 break;
863         case BIO_CMD2:
864                 G_UNION_LOGREQ(cbp, "Cmd2 request received.");
865                 atomic_add_long(&sc->sc_cmd2s, 1);
866                 break;
867         default:
868                 G_UNION_LOGREQ(cbp, "Unknown (%d) request received.",
869                     cbp->bio_cmd);
870                 break;
871         }
872         g_io_request(cbp, sc->sc_uppercp);
873 }
874
875 /*
876  * Initiate a read or write operation on the union device.
877  */
878 static void
879 g_union_doio(struct g_union_wip *wip)
880 {
881         struct g_union_softc *sc;
882         struct g_consumer *cp, *firstcp;
883         struct g_union_wip *activewip;
884         struct bio *cbp, *firstbp;
885         off_t rdlen, len2rd, offset;
886         int iocnt, needstoblock;
887         char *level;
888
889         /*
890          * To maintain consistency, we cannot allow concurrent reads
891          * or writes to the same block.
892          *
893          * A work-in-progress (wip) structure is allocated for each
894          * read or write request. All active requests are kept on the
895          * softc sc_wiplist. As each request arrives, it is checked to
896          * see if it overlaps any of the active entries. If it does not
897          * overlap, then it is added to the active list and initiated.
898          * If it does overlap an active entry, it is added to the
899          * wip_waiting list for the active entry that it overlaps.
900          * When an active entry completes, it restarts all the requests
901          * on its wip_waiting list.
902          */
903         sc = wip->wip_sc;
904         G_WLOCK(sc);
905         TAILQ_FOREACH(activewip, &sc->sc_wiplist, wip_next) {
906                 if (wip->wip_end < activewip->wip_start ||
907                     wip->wip_start > activewip->wip_end)
908                         continue;
909                 needstoblock = 1;
910                 if (wip->wip_bp->bio_cmd == BIO_WRITE)
911                         if (activewip->wip_bp->bio_cmd == BIO_WRITE)
912                                 sc->sc_writeblockwrite += 1;
913                         else
914                                 sc->sc_readblockwrite += 1;
915                 else
916                         if (activewip->wip_bp->bio_cmd == BIO_WRITE)
917                                 sc->sc_writeblockread += 1;
918                         else {
919                                 sc->sc_readcurrentread += 1;
920                                 needstoblock = 0;
921                         }
922                 /* Put request on a waiting list if necessary */
923                 if (needstoblock) {
924                         TAILQ_INSERT_TAIL(&activewip->wip_waiting, wip,
925                             wip_next);
926                         G_WUNLOCK(sc);
927                         return;
928                 }
929         }
930         /* Put request on the active list */
931         TAILQ_INSERT_TAIL(&sc->sc_wiplist, wip, wip_next);
932
933         /*
934          * Process I/O requests that have been cleared to go.
935          */
936         cbp = g_clone_bio(wip->wip_bp);
937         if (cbp == NULL) {
938                 TAILQ_REMOVE(&sc->sc_wiplist, wip, wip_next);
939                 G_WUNLOCK(sc);
940                 KASSERT(TAILQ_FIRST(&wip->wip_waiting) == NULL,
941                     ("g_union_doio: non-empty work-in-progress waiting queue"));
942                 g_io_deliver(wip->wip_bp, ENOMEM);
943                 g_free(wip);
944                 return;
945         }
946         G_WUNLOCK(sc);
947         cbp->bio_caller1 = wip;
948         cbp->bio_done = g_union_done;
949         cbp->bio_offset = wip->wip_start;
950
951         /*
952          * Writes are always done to the top level. The blocks that
953          * are written are recorded in the bitmap when the I/O completes.
954          */
955         if (cbp->bio_cmd == BIO_WRITE) {
956                 G_UNION_LOGREQ(cbp, "Sending %jd byte write request to upper "
957                     "level.", cbp->bio_length);
958                 atomic_add_long(&sc->sc_writes, 1);
959                 atomic_add_long(&sc->sc_wrotebytes, cbp->bio_length);
960                 g_io_request(cbp, sc->sc_uppercp);
961                 return;
962         }
963         /*
964          * The usual read case is that we either read the top layer
965          * if the block has been previously written or the bottom layer
966          * if it has not been written. However, it is possible that
967          * only part of the block has been written, For example we may
968          * have written a UFS/FFS file fragment comprising several
969          * sectors out of an 8-sector block.  Here, if the entire
970          * 8-sector block is read for example by a snapshot needing
971          * to copy the full block, then we need to read the written
972          * sectors from the upper level and the unwritten sectors from
973          * the lower level. We do this by alternately reading from the
974          * top and bottom layers until we complete the read. We
975          * simplify for the common case to just do the I/O and return.
976          */
977         atomic_add_long(&sc->sc_reads, 1);
978         atomic_add_long(&sc->sc_readbytes, cbp->bio_length);
979         rdlen = cbp->bio_length;
980         offset = 0;
981         for (iocnt = 0; ; iocnt++) {
982                 if (g_union_getmap(cbp, sc, &len2rd)) {
983                         /* read top */
984                         cp = sc->sc_uppercp;
985                         level = "upper";
986                 } else {
987                         /* read bottom */
988                         cp = sc->sc_lowercp;
989                         level = "lower";
990                 }
991                 /* Check if only a single read is required */
992                 if (iocnt == 0 && rdlen == len2rd) {
993                         G_UNION_LOGREQLVL((cp == sc->sc_uppercp) ?
994                             3 : 4, cbp, "Sending %jd byte read "
995                             "request to %s level.", len2rd, level);
996                         g_io_request(cbp, cp);
997                         return;
998                 }
999                 cbp->bio_length = len2rd;
1000                 if ((cbp->bio_flags & BIO_UNMAPPED) != 0)
1001                         cbp->bio_ma_offset += offset;
1002                 else
1003                         cbp->bio_data += offset;
1004                 offset += len2rd;
1005                 rdlen -= len2rd;
1006                 G_UNION_LOGREQLVL(3, cbp, "Sending %jd byte read "
1007                     "request to %s level.", len2rd, level);
1008                 /*
1009                  * To avoid prematurely notifying our consumer
1010                  * that their I/O has completed, we have to delay
1011                  * issuing our first I/O request until we have
1012                  * issued all the additional I/O requests.
1013                  */
1014                 if (iocnt > 0) {
1015                         atomic_add_long(&wip->wip_numios, 1);
1016                         g_io_request(cbp, cp);
1017                 } else {
1018                         firstbp = cbp;
1019                         firstcp = cp;
1020                 }
1021                 if (rdlen == 0)
1022                         break;
1023                 /* set up for next read */
1024                 cbp = g_clone_bio(wip->wip_bp);
1025                 if (cbp == NULL) {
1026                         wip->wip_error = ENOMEM;
1027                         atomic_add_long(&wip->wip_numios, -1);
1028                         break;
1029                 }
1030                 cbp->bio_caller1 = wip;
1031                 cbp->bio_done = g_union_done;
1032                 cbp->bio_offset += offset;
1033                 cbp->bio_length = rdlen;
1034                 atomic_add_long(&sc->sc_reads, 1);
1035         }
1036         /* We have issued all our I/O, so start the first one */
1037         g_io_request(firstbp, firstcp);
1038         return;
1039 }
1040
1041 /*
1042  * Used when completing a union I/O operation.
1043  */
1044 static void
1045 g_union_done(struct bio *bp)
1046 {
1047         struct g_union_wip *wip, *waitingwip;
1048         struct g_union_softc *sc;
1049
1050         wip = bp->bio_caller1;
1051         if (wip->wip_error != 0 && bp->bio_error == 0)
1052                 bp->bio_error = wip->wip_error;
1053         wip->wip_error = 0;
1054         if (atomic_fetchadd_long(&wip->wip_numios, -1) == 1) {
1055                 sc = wip->wip_sc;
1056                 G_WLOCK(sc);
1057                 if (bp->bio_cmd == BIO_WRITE)
1058                         g_union_setmap(bp, sc);
1059                 TAILQ_REMOVE(&sc->sc_wiplist, wip, wip_next);
1060                 G_WUNLOCK(sc);
1061                 while ((waitingwip = TAILQ_FIRST(&wip->wip_waiting)) != NULL) {
1062                         TAILQ_REMOVE(&wip->wip_waiting, waitingwip, wip_next);
1063                         g_union_doio(waitingwip);
1064                 }
1065                 g_free(wip);
1066         }
1067         g_std_done(bp);
1068 }
1069
1070 /*
1071  * Record blocks that have been written in the map.
1072  */
1073 static void
1074 g_union_setmap(struct bio *bp, struct g_union_softc *sc)
1075 {
1076         size_t root_idx;
1077         uint64_t **leaf;
1078         uint64_t *wordp;
1079         off_t start, numsec;
1080
1081         G_WLOCKOWNED(sc);
1082         KASSERT(bp->bio_offset % sc->sc_sectorsize == 0,
1083             ("g_union_setmap: offset not on sector boundry"));
1084         KASSERT(bp->bio_length % sc->sc_sectorsize == 0,
1085             ("g_union_setmap: length not a multiple of sectors"));
1086         start = bp->bio_offset / sc->sc_sectorsize;
1087         numsec = bp->bio_length / sc->sc_sectorsize;
1088         KASSERT(start + numsec <= sc->sc_map_size,
1089             ("g_union_setmap: block %jd is out of range", start + numsec));
1090         for ( ; numsec > 0; numsec--, start++) {
1091                 root_idx = start / sc->sc_bits_per_leaf;
1092                 leaf = &sc->sc_writemap_root[root_idx];
1093                 wordp = &(*leaf)
1094                     [(start % sc->sc_bits_per_leaf) / BITS_PER_ENTRY];
1095                 *wordp |= 1ULL << (start % BITS_PER_ENTRY);
1096                 sc->sc_leafused[root_idx / BITS_PER_ENTRY] |=
1097                     1ULL << (root_idx % BITS_PER_ENTRY);
1098         }
1099 }
1100
1101 /*
1102  * Check map to determine whether blocks have been written.
1103  *
1104  * Return true if they have been written so should be read from the top
1105  * layer. Return false if they have not been written so should be read
1106  * from the bottom layer. Return in len2read the bytes to be read. See
1107  * the comment above the BIO_READ implementation in g_union_start() for
1108  * an explantion of why len2read may be shorter than the buffer length.
1109  */
1110 static bool
1111 g_union_getmap(struct bio *bp, struct g_union_softc *sc, off_t *len2read)
1112 {
1113         off_t start, numsec, leafresid, bitloc;
1114         bool first, maptype, retval;
1115         uint64_t *leaf, word;
1116         size_t root_idx;
1117
1118         KASSERT(bp->bio_offset % sc->sc_sectorsize == 0,
1119             ("g_union_getmap: offset not on sector boundry"));
1120         KASSERT(bp->bio_length % sc->sc_sectorsize == 0,
1121             ("g_union_getmap: length not a multiple of sectors"));
1122         start = bp->bio_offset / sc->sc_sectorsize;
1123         numsec = bp->bio_length / sc->sc_sectorsize;
1124         G_UNION_DEBUG(4, "g_union_getmap: check %jd sectors starting at %jd\n",
1125             numsec, start);
1126         KASSERT(start + numsec <= sc->sc_map_size,
1127             ("g_union_getmap: block %jd is out of range", start + numsec));
1128                 root_idx = start / sc->sc_bits_per_leaf;
1129         first = true;
1130         maptype = false;
1131         while (numsec > 0) {
1132                 /* Check first if the leaf records any written sectors */
1133                 root_idx = start / sc->sc_bits_per_leaf;
1134                 leafresid = sc->sc_bits_per_leaf -
1135                     (start % sc->sc_bits_per_leaf);
1136                 if (((sc->sc_leafused[root_idx / BITS_PER_ENTRY]) &
1137                     (1ULL << (root_idx % BITS_PER_ENTRY))) == 0) {
1138                         if (first) {
1139                                 maptype = false;
1140                                 first = false;
1141                         }
1142                         if (maptype)
1143                                 break;
1144                         numsec -= leafresid;
1145                         start += leafresid;
1146                         continue;
1147                 }
1148                 /* Check up to a word boundry, then check word by word */
1149                 leaf = sc->sc_writemap_root[root_idx];
1150                 word = leaf[(start % sc->sc_bits_per_leaf) / BITS_PER_ENTRY];
1151                 bitloc = start % BITS_PER_ENTRY;
1152                 if (bitloc == 0 && (word == 0 || word == ~0)) {
1153                         if (first) {
1154                                 if (word == 0)
1155                                         maptype = false;
1156                                 else
1157                                         maptype = true;
1158                                 first = false;
1159                         }
1160                         if ((word == 0 && maptype) ||
1161                             (word == ~0 && !maptype))
1162                                 break;
1163                         numsec -= BITS_PER_ENTRY;
1164                         start += BITS_PER_ENTRY;
1165                         continue;
1166                 }
1167                 for ( ; bitloc < BITS_PER_ENTRY; bitloc ++) {
1168                         retval = (word & (1ULL << bitloc)) != 0;
1169                         if (first) {
1170                                 maptype = retval;
1171                                 first = false;
1172                         }
1173                         if (maptype == retval) {
1174                                 numsec--;
1175                                 start++;
1176                                 continue;
1177                         }
1178                         goto out;
1179                 }
1180         }
1181 out:
1182         if (numsec < 0) {
1183                 start += numsec;
1184                 numsec = 0;
1185         }
1186         *len2read = bp->bio_length - (numsec * sc->sc_sectorsize);
1187         G_UNION_DEBUG(maptype ? 3 : 4,
1188             "g_union_getmap: return maptype %swritten for %jd "
1189             "sectors ending at %jd\n", maptype ? "" : "NOT ",
1190             *len2read / sc->sc_sectorsize, start - 1);
1191         return (maptype);
1192 }
1193
1194 /*
1195  * Fill in details for a BIO_GETATTR request.
1196  */
1197 static void
1198 g_union_kerneldump(struct bio *bp, struct g_union_softc *sc)
1199 {
1200         struct g_kerneldump *gkd;
1201         struct g_geom *gp;
1202         struct g_provider *pp;
1203
1204         gkd = (struct g_kerneldump *)bp->bio_data;
1205         gp = bp->bio_to->geom;
1206         g_trace(G_T_TOPOLOGY, "%s(%s, %jd, %jd)", __func__, gp->name,
1207             (intmax_t)gkd->offset, (intmax_t)gkd->length);
1208
1209         pp = LIST_FIRST(&gp->provider);
1210
1211         gkd->di.dumper = g_union_dumper;
1212         gkd->di.priv = sc;
1213         gkd->di.blocksize = pp->sectorsize;
1214         gkd->di.maxiosize = DFLTPHYS;
1215         gkd->di.mediaoffset = sc->sc_offset + gkd->offset;
1216         if (gkd->offset > sc->sc_size) {
1217                 g_io_deliver(bp, ENODEV);
1218                 return;
1219         }
1220         if (gkd->offset + gkd->length > sc->sc_size)
1221                 gkd->length = sc->sc_size - gkd->offset;
1222         gkd->di.mediasize = gkd->length;
1223         g_io_deliver(bp, 0);
1224 }
1225
1226 /*
1227  * Handler for g_union_kerneldump().
1228  */
1229 static int
1230 g_union_dumper(void *priv, void *virtual, off_t offset, size_t length)
1231 {
1232
1233         return (0);
1234 }
1235
1236 /*
1237  * List union statistics.
1238  */
1239 static void
1240 g_union_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
1241     struct g_consumer *cp, struct g_provider *pp)
1242 {
1243         struct g_union_softc *sc;
1244
1245         if (pp != NULL || cp != NULL || gp->softc == NULL)
1246                 return;
1247         sc = gp->softc;
1248         sbuf_printf(sb, "%s<Reads>%ju</Reads>\n", indent,
1249             (uintmax_t)sc->sc_reads);
1250         sbuf_printf(sb, "%s<Writes>%ju</Writes>\n", indent,
1251             (uintmax_t)sc->sc_writes);
1252         sbuf_printf(sb, "%s<Deletes>%ju</Deletes>\n", indent,
1253             (uintmax_t)sc->sc_deletes);
1254         sbuf_printf(sb, "%s<Getattrs>%ju</Getattrs>\n", indent,
1255             (uintmax_t)sc->sc_getattrs);
1256         sbuf_printf(sb, "%s<Flushes>%ju</Flushes>\n", indent,
1257             (uintmax_t)sc->sc_flushes);
1258         sbuf_printf(sb, "%s<Speedups>%ju</Speedups>\n", indent,
1259             (uintmax_t)sc->sc_speedups);
1260         sbuf_printf(sb, "%s<Cmd0s>%ju</Cmd0s>\n", indent,
1261             (uintmax_t)sc->sc_cmd0s);
1262         sbuf_printf(sb, "%s<Cmd1s>%ju</Cmd1s>\n", indent,
1263             (uintmax_t)sc->sc_cmd1s);
1264         sbuf_printf(sb, "%s<Cmd2s>%ju</Cmd2s>\n", indent,
1265             (uintmax_t)sc->sc_cmd2s);
1266         sbuf_printf(sb, "%s<ReadCurrentRead>%ju</ReadCurrentRead>\n", indent,
1267             (uintmax_t)sc->sc_readcurrentread);
1268         sbuf_printf(sb, "%s<ReadBlockWrite>%ju</ReadBlockWrite>\n", indent,
1269             (uintmax_t)sc->sc_readblockwrite);
1270         sbuf_printf(sb, "%s<WriteBlockRead>%ju</WriteBlockRead>\n", indent,
1271             (uintmax_t)sc->sc_writeblockread);
1272         sbuf_printf(sb, "%s<WriteBlockWrite>%ju</WriteBlockWrite>\n", indent,
1273             (uintmax_t)sc->sc_writeblockwrite);
1274         sbuf_printf(sb, "%s<ReadBytes>%ju</ReadBytes>\n", indent,
1275             (uintmax_t)sc->sc_readbytes);
1276         sbuf_printf(sb, "%s<WroteBytes>%ju</WroteBytes>\n", indent,
1277             (uintmax_t)sc->sc_wrotebytes);
1278         sbuf_printf(sb, "%s<Offset>%jd</Offset>\n", indent,
1279             (intmax_t)sc->sc_offset);
1280 }
1281
1282 /*
1283  * Clean up an orphaned geom.
1284  */
1285 static void
1286 g_union_orphan(struct g_consumer *cp)
1287 {
1288
1289         g_topology_assert();
1290         g_union_destroy(NULL, cp->geom, true);
1291 }
1292
1293 /*
1294  * Clean up a union geom.
1295  */
1296 static int
1297 g_union_destroy_geom(struct gctl_req *req, struct g_class *mp,
1298     struct g_geom *gp)
1299 {
1300
1301         return (g_union_destroy(NULL, gp, false));
1302 }
1303
1304 /*
1305  * Clean up a union device.
1306  */
1307 static int
1308 g_union_destroy(struct gctl_req *req, struct g_geom *gp, bool force)
1309 {
1310         struct g_union_softc *sc;
1311         struct g_provider *pp;
1312         int error;
1313
1314         g_topology_assert();
1315         sc = gp->softc;
1316         if (sc == NULL)
1317                 return (ENXIO);
1318         pp = LIST_FIRST(&gp->provider);
1319         if ((sc->sc_flags & DOING_COMMIT) != 0 ||
1320             (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0))) {
1321                 if (force) {
1322                         if (req != NULL)
1323                                 gctl_msg(req, 0, "Device %s is still in use, "
1324                                     "so is being forcibly removed.", gp->name);
1325                         G_UNION_DEBUG(1, "Device %s is still in use, so "
1326                             "is being forcibly removed.", gp->name);
1327                 } else {
1328                         if (req != NULL)
1329                                 gctl_msg(req, EBUSY, "Device %s is still open "
1330                                     "(r=%d w=%d e=%d).", gp->name, pp->acr,
1331                                     pp->acw, pp->ace);
1332                         G_UNION_DEBUG(1, "Device %s is still open "
1333                             "(r=%d w=%d e=%d).", gp->name, pp->acr,
1334                             pp->acw, pp->ace);
1335                         return (EBUSY);
1336                 }
1337         } else {
1338                 if (req != NULL)
1339                         gctl_msg(req, 0, "Device %s removed.", gp->name);
1340                 G_UNION_DEBUG(1, "Device %s removed.", gp->name);
1341         }
1342         /* Close consumers */
1343         if ((error = g_access(sc->sc_lowercp, -1, 0, -1)) != 0)
1344                 G_UNION_DEBUG(2, "Error %d: device %s could not reset access "
1345                     "to %s.", error, gp->name, sc->sc_lowercp->provider->name);
1346         if ((error = g_access(sc->sc_uppercp, -1, -1, -1)) != 0)
1347                 G_UNION_DEBUG(2, "Error %d: device %s could not reset access "
1348                     "to %s.", error, gp->name, sc->sc_uppercp->provider->name);
1349
1350         g_wither_geom(gp, ENXIO);
1351
1352         return (0);
1353 }
1354
1355 /*
1356  * Clean up a union provider.
1357  */
1358 static void
1359 g_union_providergone(struct g_provider *pp)
1360 {
1361         struct g_geom *gp;
1362         struct g_union_softc *sc;
1363         size_t i;
1364
1365         gp = pp->geom;
1366         sc = gp->softc;
1367         gp->softc = NULL;
1368         for (i = 0; i < sc->sc_root_size; i++)
1369                 g_free(sc->sc_writemap_root[i]);
1370         g_free(sc->sc_writemap_root);
1371         g_free(sc->sc_leafused);
1372         rw_destroy(&sc->sc_rwlock);
1373         g_free(sc);
1374 }
1375
1376 /*
1377  * Respond to a resized provider.
1378  */
1379 static void
1380 g_union_resize(struct g_consumer *cp)
1381 {
1382         struct g_union_softc *sc;
1383         struct g_geom *gp;
1384
1385         g_topology_assert();
1386
1387         gp = cp->geom;
1388         sc = gp->softc;
1389
1390         /*
1391          * If size has gotten bigger, ignore it and just keep using
1392          * the space we already had. Otherwise we are done.
1393          */
1394         if (sc->sc_size < cp->provider->mediasize - sc->sc_offset)
1395                 return;
1396         g_union_destroy(NULL, gp, true);
1397 }
1398
1399 DECLARE_GEOM_CLASS(g_union_class, g_union);
1400 MODULE_VERSION(geom_union, 0);