2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 * products derived from this software without specific prior written
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/devicestat.h>
42 #include <sys/kernel.h>
43 #include <sys/malloc.h>
45 #include <sys/sysctl.h>
47 #include <sys/kthread.h>
49 #include <sys/mutex.h>
50 #include <sys/errno.h>
52 #include <geom/geom.h>
53 #include <geom/geom_int.h>
54 #include <machine/stdarg.h>
56 struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes);
57 static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms);
58 char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim;
67 * This event offers a new class a chance to taste all preexisting providers.
70 g_load_class(void *arg, int flag)
73 struct g_class *mp2, *mp;
75 struct g_provider *pp;
78 if (flag == EV_CANCEL) /* XXX: can't happen ? */
90 g_trace(G_T_TOPOLOGY, "g_load_class(%s)", mp->name);
91 KASSERT(mp->name != NULL && *mp->name != '\0',
92 ("GEOM class has no name"));
93 LIST_FOREACH(mp2, &g_classes, class) {
95 printf("The GEOM class %s is already loaded.\n",
100 } else if (strcmp(mp2->name, mp->name) == 0) {
101 printf("A GEOM class %s is already loaded.\n",
109 LIST_INIT(&mp->geom);
110 LIST_INSERT_HEAD(&g_classes, mp, class);
111 if (mp->init != NULL)
113 if (mp->taste == NULL)
115 LIST_FOREACH(mp2, &g_classes, class) {
118 LIST_FOREACH(gp, &mp2->geom, geom) {
119 LIST_FOREACH(pp, &gp->provider, provider) {
120 mp->taste(mp, pp, 0);
128 g_unload_class(void *arg, int flag)
133 struct g_provider *pp;
134 struct g_consumer *cp;
141 g_trace(G_T_TOPOLOGY, "g_unload_class(%s)", mp->name);
144 * We allow unloading if we have no geoms, or a class
145 * method we can use to get rid of them.
147 if (!LIST_EMPTY(&mp->geom) && mp->destroy_geom == NULL) {
148 hh->error = EOPNOTSUPP;
152 /* We refuse to unload if anything is open */
153 LIST_FOREACH(gp, &mp->geom, geom) {
154 LIST_FOREACH(pp, &gp->provider, provider)
155 if (pp->acr || pp->acw || pp->ace) {
159 LIST_FOREACH(cp, &gp->consumer, consumer)
160 if (cp->acr || cp->acw || cp->ace) {
166 /* Bar new entries */
172 gp = LIST_FIRST(&mp->geom);
175 error = mp->destroy_geom(NULL, mp, gp);
180 if (mp->fini != NULL)
182 LIST_REMOVE(mp, class);
189 g_modevent(module_t mod, int type, void *data)
193 static int g_ignition;
197 if (mp->version != G_VERSION) {
198 printf("GEOM class %s has Wrong version %x\n",
199 mp->name, mp->version);
206 hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO);
211 g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", hh->mp->name);
213 * Once the system is not cold, MOD_LOAD calls will be
214 * from the userland and the g_event thread will be able
215 * to acknowledge their completion.
219 error = g_post_event(g_load_class, hh, M_WAITOK, NULL);
221 error = g_waitfor_event(g_load_class, hh, M_WAITOK,
229 g_trace(G_T_TOPOLOGY, "g_modevent(%s, UNLOAD)", hh->mp->name);
230 error = g_waitfor_event(g_unload_class, hh, M_WAITOK, NULL);
234 KASSERT(LIST_EMPTY(&hh->mp->geom),
235 ("Unloaded class (%s) still has geom", hh->mp->name));
247 g_new_geomf(struct g_class *mp, const char *fmt, ...)
255 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
257 sbuf_vprintf(sb, fmt, ap);
260 gp = g_malloc(sizeof *gp, M_WAITOK | M_ZERO);
261 gp->name = g_malloc(sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
264 LIST_INIT(&gp->consumer);
265 LIST_INIT(&gp->provider);
266 LIST_INSERT_HEAD(&mp->geom, gp, geom);
267 TAILQ_INSERT_HEAD(&geoms, gp, geoms);
268 strcpy(gp->name, sbuf_data(sb));
270 /* Fill in defaults from class */
271 gp->start = mp->start;
272 gp->spoiled = mp->spoiled;
273 gp->dumpconf = mp->dumpconf;
274 gp->access = mp->access;
275 gp->orphan = mp->orphan;
276 gp->ioctl = mp->ioctl;
281 g_destroy_geom(struct g_geom *gp)
286 g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name);
287 KASSERT(LIST_EMPTY(&gp->consumer),
288 ("g_destroy_geom(%s) with consumer(s) [%p]",
289 gp->name, LIST_FIRST(&gp->consumer)));
290 KASSERT(LIST_EMPTY(&gp->provider),
291 ("g_destroy_geom(%s) with provider(s) [%p]",
292 gp->name, LIST_FIRST(&gp->provider)));
294 LIST_REMOVE(gp, geom);
295 TAILQ_REMOVE(&geoms, gp, geoms);
301 * This function is called (repeatedly) until the has withered away.
304 g_wither_geom(struct g_geom *gp, int error)
306 struct g_provider *pp;
310 g_trace(G_T_TOPOLOGY, "g_wither_geom(%p(%s))", gp, gp->name);
311 if (!(gp->flags & G_GEOM_WITHER)) {
312 gp->flags |= G_GEOM_WITHER;
313 LIST_FOREACH(pp, &gp->provider, provider)
314 if (!(pp->flags & G_PF_ORPHAN))
315 g_orphan_provider(pp, error);
321 * This function is called (repeatedly) until the has withered away.
324 g_wither_geom_close(struct g_geom *gp, int error)
326 struct g_consumer *cp;
330 g_trace(G_T_TOPOLOGY, "g_wither_geom_close(%p(%s))", gp, gp->name);
331 LIST_FOREACH(cp, &gp->consumer, consumer)
332 if (cp->acr || cp->acw || cp->ace)
333 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
334 g_wither_geom(gp, error);
338 * This function is called (repeatedly) until we cant wash away more
339 * withered bits at present. Return value contains two bits. Bit 0
340 * set means "withering stuff we can't wash now", bit 1 means "call
341 * me again, there may be stuff I didn't get the first time around.
347 struct g_geom *gp, *gp2;
348 struct g_provider *pp, *pp2;
349 struct g_consumer *cp, *cp2;
354 LIST_FOREACH(mp, &g_classes, class) {
355 LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
356 LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) {
357 if (!(pp->flags & G_PF_WITHER))
359 if (LIST_EMPTY(&pp->consumers))
360 g_destroy_provider(pp);
364 if (!(gp->flags & G_GEOM_WITHER))
366 LIST_FOREACH_SAFE(pp, &gp->provider, provider, pp2) {
367 if (LIST_EMPTY(&pp->consumers))
368 g_destroy_provider(pp);
372 LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp2) {
373 if (cp->acr || cp->acw || cp->ace) {
377 if (cp->provider != NULL)
379 g_destroy_consumer(cp);
382 if (LIST_EMPTY(&gp->provider) &&
383 LIST_EMPTY(&gp->consumer))
393 g_new_consumer(struct g_geom *gp)
395 struct g_consumer *cp;
399 KASSERT(!(gp->flags & G_GEOM_WITHER),
400 ("g_new_consumer on WITHERing geom(%s) (class %s)",
401 gp->name, gp->class->name));
402 KASSERT(gp->orphan != NULL,
403 ("g_new_consumer on geom(%s) (class %s) without orphan",
404 gp->name, gp->class->name));
406 cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO);
408 cp->stat = devstat_new_entry(cp, -1, 0, DEVSTAT_ALL_SUPPORTED,
409 DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
410 LIST_INSERT_HEAD(&gp->consumer, cp, consumer);
415 g_destroy_consumer(struct g_consumer *cp)
420 G_VALID_CONSUMER(cp);
421 g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp);
422 KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached"));
423 KASSERT (cp->acr == 0, ("g_destroy_consumer with acr"));
424 KASSERT (cp->acw == 0, ("g_destroy_consumer with acw"));
425 KASSERT (cp->ace == 0, ("g_destroy_consumer with ace"));
428 LIST_REMOVE(cp, consumer);
429 devstat_remove_entry(cp->stat);
431 if (gp->flags & G_GEOM_WITHER)
436 g_new_provider_event(void *arg, int flag)
439 struct g_provider *pp;
440 struct g_consumer *cp;
444 if (flag == EV_CANCEL)
449 G_VALID_PROVIDER(pp);
450 LIST_FOREACH(mp, &g_classes, class) {
451 if (mp->taste == NULL)
454 LIST_FOREACH(cp, &pp->consumers, consumers)
455 if (cp->geom->class == mp)
459 mp->taste(mp, pp, 0);
466 g_new_providerf(struct g_geom *gp, const char *fmt, ...)
468 struct g_provider *pp;
474 KASSERT(gp->access != NULL,
475 ("new provider on geom(%s) without ->access (class %s)",
476 gp->name, gp->class->name));
477 KASSERT(gp->start != NULL,
478 ("new provider on geom(%s) without ->start (class %s)",
479 gp->name, gp->class->name));
480 KASSERT(!(gp->flags & G_GEOM_WITHER),
481 ("new provider on WITHERing geom(%s) (class %s)",
482 gp->name, gp->class->name));
483 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
485 sbuf_vprintf(sb, fmt, ap);
488 pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO);
489 pp->name = (char *)(pp + 1);
490 strcpy(pp->name, sbuf_data(sb));
492 LIST_INIT(&pp->consumers);
495 pp->stat = devstat_new_entry(pp, -1, 0, DEVSTAT_ALL_SUPPORTED,
496 DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
497 LIST_INSERT_HEAD(&gp->provider, pp, provider);
498 g_post_event(g_new_provider_event, pp, M_WAITOK, pp, gp, NULL);
503 g_error_provider(struct g_provider *pp, int error)
506 /* G_VALID_PROVIDER(pp); We may not have g_topology */
511 g_provider_by_name(char const *arg)
515 struct g_provider *pp;
517 LIST_FOREACH(cp, &g_classes, class) {
518 LIST_FOREACH(gp, &cp->geom, geom) {
519 LIST_FOREACH(pp, &gp->provider, provider) {
520 if (!strcmp(arg, pp->name))
529 g_destroy_provider(struct g_provider *pp)
534 G_VALID_PROVIDER(pp);
535 KASSERT(LIST_EMPTY(&pp->consumers),
536 ("g_destroy_provider but attached"));
537 KASSERT (pp->acr == 0, ("g_destroy_provider with acr"));
538 KASSERT (pp->acw == 0, ("g_destroy_provider with acw"));
539 KASSERT (pp->acw == 0, ("g_destroy_provider with ace"));
541 LIST_REMOVE(pp, provider);
543 devstat_remove_entry(pp->stat);
545 if ((gp->flags & G_GEOM_WITHER))
550 * We keep the "geoms" list sorted by topological order (== increasing
551 * numerical rank) at all times.
552 * When an attach is done, the attaching geoms rank is invalidated
553 * and it is moved to the tail of the list.
554 * All geoms later in the sequence has their ranks reevaluated in
555 * sequence. If we cannot assign rank to a geom because it's
556 * prerequisites do not have rank, we move that element to the tail
557 * of the sequence with invalid rank as well.
558 * At some point we encounter our original geom and if we stil fail
559 * to assign it a rank, there must be a loop and we fail back to
560 * g_attach() which detach again and calls redo_rank again
561 * to fix up the damage.
562 * It would be much simpler code wise to do it recursively, but we
563 * can't risk that on the kernel stack.
567 redo_rank(struct g_geom *gp)
569 struct g_consumer *cp;
570 struct g_geom *gp1, *gp2;
576 /* Invalidate this geoms rank and move it to the tail */
577 gp1 = TAILQ_NEXT(gp, geoms);
580 TAILQ_REMOVE(&geoms, gp, geoms);
581 TAILQ_INSERT_TAIL(&geoms, gp, geoms);
586 /* re-rank the rest of the sequence */
587 for (; gp1 != NULL; gp1 = gp2) {
590 LIST_FOREACH(cp, &gp1->consumer, consumer) {
591 if (cp->provider == NULL)
593 n = cp->provider->geom->rank;
601 gp2 = TAILQ_NEXT(gp1, geoms);
603 /* got a rank, moving on */
607 /* no rank to original geom means loop */
611 /* no rank, put it at the end move on */
612 TAILQ_REMOVE(&geoms, gp1, geoms);
613 TAILQ_INSERT_TAIL(&geoms, gp1, geoms);
619 g_attach(struct g_consumer *cp, struct g_provider *pp)
624 G_VALID_CONSUMER(cp);
625 G_VALID_PROVIDER(pp);
626 KASSERT(cp->provider == NULL, ("attach but attached"));
628 LIST_INSERT_HEAD(&pp->consumers, cp, consumers);
629 error = redo_rank(cp->geom);
631 LIST_REMOVE(cp, consumers);
639 g_detach(struct g_consumer *cp)
641 struct g_provider *pp;
644 G_VALID_CONSUMER(cp);
645 g_trace(G_T_TOPOLOGY, "g_detach(%p)", cp);
646 KASSERT(cp->provider != NULL, ("detach but not attached"));
647 KASSERT(cp->acr == 0, ("detach but nonzero acr"));
648 KASSERT(cp->acw == 0, ("detach but nonzero acw"));
649 KASSERT(cp->ace == 0, ("detach but nonzero ace"));
650 KASSERT(cp->nstart == cp->nend,
651 ("detach with active requests"));
653 LIST_REMOVE(cp, consumers);
655 if (pp->geom->flags & G_GEOM_WITHER)
657 else if (pp->flags & G_PF_WITHER)
665 * Access-check with delta values. The question asked is "can provider
666 * "cp" change the access counters by the relative amounts dc[rwe] ?"
670 g_access(struct g_consumer *cp, int dcr, int dcw, int dce)
672 struct g_provider *pp;
677 G_VALID_CONSUMER(cp);
679 KASSERT(pp != NULL, ("access but not attached"));
680 G_VALID_PROVIDER(pp);
682 g_trace(G_T_ACCESS, "g_access(%p(%s), %d, %d, %d)",
683 cp, pp->name, dcr, dcw, dce);
685 KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr"));
686 KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw"));
687 KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace"));
688 KASSERT(dcr != 0 || dcw != 0 || dce != 0, ("NOP access request"));
689 KASSERT(pp->geom->access != NULL, ("NULL geom->access"));
692 * If our class cares about being spoiled, and we have been, we
693 * are probably just ahead of the event telling us that. Fail
694 * now rather than having to unravel this later.
696 if (cp->geom->spoiled != NULL && cp->spoiled &&
697 (dcr > 0 || dcw > 0 || dce > 0))
701 * Figure out what counts the provider would have had, if this
702 * consumer had (r0w0e0) at this time.
704 pr = pp->acr - cp->acr;
705 pw = pp->acw - cp->acw;
706 pe = pp->ace - cp->ace;
709 "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)",
711 cp->acr, cp->acw, cp->ace,
712 pp->acr, pp->acw, pp->ace,
715 /* If foot-shooting is enabled, any open on rank#1 is OK */
716 if ((g_debugflags & 16) && pp->geom->rank == 1)
718 /* If we try exclusive but already write: fail */
719 else if (dce > 0 && pw > 0)
721 /* If we try write but already exclusive: fail */
722 else if (dcw > 0 && pe > 0)
724 /* If we try to open more but provider is error'ed: fail */
725 else if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0)
730 error = pp->geom->access(pp, dcr, dcw, dce);
731 KASSERT(dcr > 0 || dcw > 0 || dce > 0 || error == 0,
732 ("Geom provider %s::%s failed closing ->access()",
733 pp->geom->class->name, pp->name));
736 * If we open first write, spoil any partner consumers.
737 * If we close last write, trigger re-taste.
739 if (pp->acw == 0 && dcw != 0)
741 else if (pp->acw != 0 && pp->acw == -dcw &&
742 !(pp->geom->flags & G_GEOM_WITHER))
743 g_post_event(g_new_provider_event, pp, M_WAITOK,
752 if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)
753 KASSERT(pp->sectorsize > 0,
754 ("Provider %s lacks sectorsize", pp->name));
760 g_handleattr_int(struct bio *bp, const char *attribute, int val)
763 return (g_handleattr(bp, attribute, &val, sizeof val));
767 g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val)
770 return (g_handleattr(bp, attribute, &val, sizeof val));
774 g_handleattr(struct bio *bp, const char *attribute, void *val, int len)
778 if (strcmp(bp->bio_attribute, attribute))
780 if (bp->bio_length != len) {
781 printf("bio_length %jd len %d -> EFAULT\n",
782 (intmax_t)bp->bio_length, len);
786 bcopy(val, bp->bio_data, len);
787 bp->bio_completed = len;
789 g_io_deliver(bp, error);
794 g_std_access(struct g_provider *pp,
795 int dr __unused, int dw __unused, int de __unused)
799 G_VALID_PROVIDER(pp);
804 g_std_done(struct bio *bp)
808 bp2 = bp->bio_parent;
809 if (bp2->bio_error == 0)
810 bp2->bio_error = bp->bio_error;
811 bp2->bio_completed += bp->bio_completed;
814 if (bp2->bio_children == bp2->bio_inbed)
815 g_io_deliver(bp2, bp2->bio_error);
818 /* XXX: maybe this is only g_slice_spoiled */
821 g_std_spoiled(struct g_consumer *cp)
824 struct g_provider *pp;
827 G_VALID_CONSUMER(cp);
828 g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp);
831 LIST_FOREACH(pp, &gp->provider, provider)
832 g_orphan_provider(pp, ENXIO);
833 g_destroy_consumer(cp);
834 if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer))
837 gp->flags |= G_GEOM_WITHER;
841 * Spoiling happens when a provider is opened for writing, but consumers
842 * which are configured by in-band data are attached (slicers for instance).
843 * Since the write might potentially change the in-band data, such consumers
844 * need to re-evaluate their existence after the writing session closes.
845 * We do this by (offering to) tear them down when the open for write happens
846 * in return for a re-taste when it closes again.
847 * Together with the fact that such consumers grab an 'e' bit whenever they
848 * are open, regardless of mode, this ends up DTRT.
852 g_spoil_event(void *arg, int flag)
854 struct g_provider *pp;
855 struct g_consumer *cp, *cp2;
858 if (flag == EV_CANCEL)
861 G_VALID_PROVIDER(pp);
862 for (cp = LIST_FIRST(&pp->consumers); cp != NULL; cp = cp2) {
863 cp2 = LIST_NEXT(cp, consumers);
867 if (cp->geom->spoiled == NULL)
869 cp->geom->spoiled(cp);
875 g_spoil(struct g_provider *pp, struct g_consumer *cp)
877 struct g_consumer *cp2;
880 G_VALID_PROVIDER(pp);
881 G_VALID_CONSUMER(cp);
883 LIST_FOREACH(cp2, &pp->consumers, consumers) {
887 KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr));
888 KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw));
890 KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace));
893 g_post_event(g_spoil_event, pp, M_WAITOK, pp, NULL);
897 g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len)
902 error = g_io_getattr(attr, cp, &i, var);
912 * This function walks (topologically unsafely) the mesh and return a
913 * non-zero integer if it finds the argument pointer is an object.
914 * The return value indicates which type of object it is belived to be.
915 * If topology is not locked, this function is potentially dangerous,
916 * but since it is for debugging purposes and can be useful for instance
917 * from DDB, we do not assert topology lock is held.
920 g_valid_obj(void const *ptr)
924 struct g_consumer *cp;
925 struct g_provider *pp;
927 LIST_FOREACH(mp, &g_classes, class) {
930 LIST_FOREACH(gp, &mp->geom, geom) {
933 LIST_FOREACH(cp, &gp->consumer, consumer)
936 LIST_FOREACH(pp, &gp->provider, provider)