2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 * products derived from this software without specific prior written
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * This is the method for dealing with BSD disklabels. It has been
38 * extensively (by my standards at least) commented, in the vain hope that
39 * it will serve as the source in future copy&paste operations.
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD$");
45 #include <sys/param.h>
46 #include <sys/endian.h>
47 #include <sys/systm.h>
48 #include <sys/sysctl.h>
49 #include <sys/kernel.h>
50 #include <sys/fcntl.h>
53 #include <sys/malloc.h>
55 #include <sys/mutex.h>
57 #include <sys/errno.h>
58 #include <sys/disklabel.h>
62 #include <geom/geom.h>
63 #include <geom/geom_slice.h>
65 FEATURE(geom_bsd, "GEOM BSD disklabels support");
67 #define BSD_CLASS_NAME "BSD"
69 #define ALPHA_LABEL_OFFSET 64
70 #define HISTORIC_LABEL_OFFSET 512
72 #define LABELSIZE (148 + 16 * MAXPARTITIONS)
74 static void g_bsd_hotwrite(void *arg, int flag);
76 * Our private data about one instance. All the rest is handled by the
77 * slice code and stored in its softc, so this is just the stuff
78 * specific to BSD disklabels.
84 struct disklabel ondisk;
85 u_char label[LABELSIZE];
90 * Modify our slicer to match proposed disklabel, if possible.
91 * This is where we make sure we don't do something stupid.
94 g_bsd_modify(struct g_geom *gp, u_char *label)
97 struct partition *ppp;
99 struct g_consumer *cp;
100 struct g_bsd_softc *ms;
110 error = bsd_disklabel_le_dec(label, &dl, MAXPARTITIONS);
115 /* Get dimensions of our device. */
116 cp = LIST_FIRST(&gp->consumer);
117 secsize = cp->provider->sectorsize;
119 /* ... or a smaller sector size. */
120 if (dl.d_secsize < secsize) {
124 /* ... or a non-multiple sector size. */
125 if (dl.d_secsize % secsize != 0) {
129 /* Historical braindamage... */
130 rawoffset = (off_t)dl.d_partitions[RAW_PART].p_offset * dl.d_secsize;
132 for (i = 0; i < dl.d_npartitions; i++) {
133 ppp = &dl.d_partitions[i];
134 if (ppp->p_size == 0)
136 o = (off_t)ppp->p_offset * dl.d_secsize;
142 if (rawoffset != 0 && (off_t)rawoffset != ms->mbroffset)
143 printf("WARNING: %s expected rawoffset %jd, found %jd\n",
145 (intmax_t)ms->mbroffset/dl.d_secsize,
146 (intmax_t)rawoffset/dl.d_secsize);
148 /* Don't munge open partitions. */
149 for (i = 0; i < dl.d_npartitions; i++) {
150 ppp = &dl.d_partitions[i];
152 o = (off_t)ppp->p_offset * dl.d_secsize;
155 error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK,
157 (off_t)ppp->p_size * dl.d_secsize,
159 "%s%c", gp->name, 'a' + i);
164 /* Look good, go for it... */
165 for (u = 0; u < gsp->nslice; u++) {
166 ppp = &dl.d_partitions[u];
167 o = (off_t)ppp->p_offset * dl.d_secsize;
170 g_slice_config(gp, u, G_SLICE_CONFIG_SET,
172 (off_t)ppp->p_size * dl.d_secsize,
174 "%s%c", gp->name, 'a' + u);
177 /* Update our softc */
179 if (label != ms->label)
180 bcopy(label, ms->label, LABELSIZE);
181 ms->rawoffset = rawoffset;
184 * In order to avoid recursively attaching to the same
185 * on-disk label (it's usually visible through the 'c'
186 * partition) we calculate an MD5 and ask if other BSD's
187 * below us love that label. If they do, we don't.
190 MD5Update(&md5sum, ms->label, sizeof(ms->label));
191 MD5Final(ms->labelsum, &md5sum);
197 * This is an internal helper function, called multiple times from the taste
198 * function to try to locate a disklabel on the disk. More civilized formats
199 * will not need this, as there is only one possible place on disk to look
200 * for the magic spot.
204 g_bsd_try(struct g_geom *gp, struct g_slicer *gsp, struct g_consumer *cp, int secsize, struct g_bsd_softc *ms, off_t offset)
208 struct disklabel *dl;
212 * We need to read entire aligned sectors, and we assume that the
213 * disklabel does not span sectors, so one sector is enough.
215 secoff = offset % secsize;
216 buf = g_read_data(cp, offset - secoff, secsize, NULL);
220 /* Decode into our native format. */
222 error = bsd_disklabel_le_dec(buf + secoff, dl, MAXPARTITIONS);
224 bcopy(buf + secoff, ms->label, LABELSIZE);
226 /* Remember to free the buffer g_read_data() gave us. */
229 ms->labeloffset = offset;
234 * This function writes the current label to disk, possibly updating
235 * the alpha SRM checksum.
239 g_bsd_writelabel(struct g_geom *gp, u_char *bootcode)
243 struct g_consumer *cp;
244 struct g_slicer *gsp;
245 struct g_bsd_softc *ms;
252 cp = LIST_FIRST(&gp->consumer);
253 /* Get sector size, we need it to read data. */
254 secsize = cp->provider->sectorsize;
255 secoff = ms->labeloffset % secsize;
256 if (bootcode == NULL) {
257 buf = g_read_data(cp, ms->labeloffset - secoff, secsize, &error);
260 bcopy(ms->label, buf + secoff, sizeof(ms->label));
263 bcopy(ms->label, buf + ms->labeloffset, sizeof(ms->label));
265 if (ms->labeloffset == ALPHA_LABEL_OFFSET) {
267 for (i = 0; i < 63; i++)
268 sum += le64dec(buf + i * 8);
269 le64enc(buf + 504, sum);
271 if (bootcode == NULL) {
272 error = g_write_data(cp, ms->labeloffset - secoff, buf, secsize);
275 error = g_write_data(cp, 0, bootcode, BBSIZE);
281 * If the user tries to overwrite our disklabel through an open partition
282 * or via a magicwrite config call, we end up here and try to prevent
283 * footshooting as best we can.
286 g_bsd_hotwrite(void *arg, int flag)
290 struct g_slicer *gsp;
292 struct g_bsd_softc *ms;
298 * We should never get canceled, because that would amount to a removal
299 * of the geom while there was outstanding I/O requests.
301 KASSERT(flag != EV_CANCEL, ("g_bsd_hotwrite cancelled"));
303 gp = bp->bio_to->geom;
306 gsl = &gsp->slices[bp->bio_to->index];
307 p = (u_char*)bp->bio_data + ms->labeloffset
308 - (bp->bio_offset + gsl->offset);
309 error = g_bsd_modify(gp, p);
311 g_io_deliver(bp, EPERM);
314 g_slice_finish_hot(bp);
318 * This start routine is only called for non-trivial requests, all the
319 * trivial ones are handled autonomously by the slice code.
320 * For requests we handle here, we must call the g_io_deliver() on the
321 * bio, and return non-zero to indicate to the slice code that we did so.
322 * This code executes in the "DOWN" I/O path, this means:
324 * * Don't grab the topology lock.
325 * * Don't call biowait, g_getattr(), g_setattr() or g_read_data()
328 g_bsd_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td)
331 struct g_bsd_softc *ms;
332 struct g_slicer *gsp;
342 /* Return a copy of the disklabel to userland. */
343 bsd_disklabel_le_dec(ms->label, data, MAXPARTITIONS);
346 struct g_consumer *cp;
352 if (!(fflag & FWRITE))
354 /* The disklabel to set is the ioctl argument. */
355 buf = g_malloc(BBSIZE, M_WAITOK);
357 error = copyin(p, buf, BBSIZE);
359 /* XXX: Rude, but supposedly safe */
362 /* Validate and modify our slice instance to match. */
363 error = g_bsd_modify(gp, buf + ms->labeloffset);
365 cp = LIST_FIRST(&gp->consumer);
366 if (ms->labeloffset == ALPHA_LABEL_OFFSET) {
368 for (i = 0; i < 63; i++)
369 sum += le64dec(buf + i * 8);
370 le64enc(buf + 504, sum);
372 error = g_write_data(cp, 0, buf, BBSIZE);
382 if (!(fflag & FWRITE))
384 label = g_malloc(LABELSIZE, M_WAITOK);
385 /* The disklabel to set is the ioctl argument. */
386 bsd_disklabel_le_enc(label, data);
390 /* Validate and modify our slice instance to match. */
391 error = g_bsd_modify(gp, label);
392 if (error == 0 && cmd == DIOCWDINFO)
393 error = g_bsd_writelabel(gp, NULL);
405 g_bsd_start(struct bio *bp)
408 struct g_bsd_softc *ms;
409 struct g_slicer *gsp;
411 gp = bp->bio_to->geom;
414 if (bp->bio_cmd == BIO_GETATTR) {
415 if (g_handleattr(bp, "BSD::labelsum", ms->labelsum,
416 sizeof(ms->labelsum)))
423 * Dump configuration information in XML format.
424 * Notice that the function is called once for the geom and once for each
425 * consumer and provider. We let g_slice_dumpconf() do most of the work.
428 g_bsd_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
430 struct g_bsd_softc *ms;
431 struct g_slicer *gsp;
435 g_slice_dumpconf(sb, indent, gp, cp, pp);
436 if (indent != NULL && pp == NULL && cp == NULL) {
437 sbuf_printf(sb, "%s<labeloffset>%jd</labeloffset>\n",
438 indent, (intmax_t)ms->labeloffset);
439 sbuf_printf(sb, "%s<rawoffset>%jd</rawoffset>\n",
440 indent, (intmax_t)ms->rawoffset);
441 sbuf_printf(sb, "%s<mbroffset>%jd</mbroffset>\n",
442 indent, (intmax_t)ms->mbroffset);
443 } else if (pp != NULL) {
445 sbuf_printf(sb, " ty %d",
446 ms->ondisk.d_partitions[pp->index].p_fstype);
448 sbuf_printf(sb, "%s<type>%d</type>\n", indent,
449 ms->ondisk.d_partitions[pp->index].p_fstype);
454 * The taste function is called from the event-handler, with the topology
455 * lock already held and a provider to examine. The flags are unused.
457 * If flags == G_TF_NORMAL, the idea is to take a bite of the provider and
458 * if we find valid, consistent magic on it, build a geom on it.
460 * There may be cases where the operator would like to put a BSD-geom on
461 * providers which do not meet all of the requirements. This can be done
462 * by instead passing the G_TF_INSIST flag, which will override these
465 * The final flags value is G_TF_TRANSPARENT, which instructs the method
466 * to put a geom on top of the provider and configure it to be as transparent
467 * as possible. This is not really relevant to the BSD method and therefore
468 * not implemented here.
471 static struct uuid freebsd_slice = GPT_ENT_TYPE_FREEBSD;
473 static struct g_geom *
474 g_bsd_taste(struct g_class *mp, struct g_provider *pp, int flags)
477 struct g_consumer *cp;
479 struct g_bsd_softc *ms;
481 struct g_slicer *gsp;
486 g_trace(G_T_TOPOLOGY, "bsd_taste(%s,%s)", mp->name, pp->name);
489 /* We don't implement transparent inserts. */
490 if (flags == G_TF_TRANSPARENT)
494 * BSD labels are a subclass of the general "slicing" topology so
495 * a lot of the work can be done by the common "slice" code.
496 * Create a geom with space for MAXPARTITIONS providers, one consumer
497 * and a softc structure for us. Specify the provider to attach
498 * the consumer to and our "start" routine for special requests.
499 * The provider is opened with mode (1,0,0) so we can do reads
502 gp = g_slice_new(mp, MAXPARTITIONS, pp, &cp, &ms,
503 sizeof(*ms), g_bsd_start);
507 /* Get the geom_slicer softc from the geom. */
511 * The do...while loop here allows us to have multiple escapes
512 * using a simple "break". This improves code clarity without
513 * ending up in deep nesting and without using goto or come from.
517 * If the provider is an MBR we will only auto attach
518 * to type 165 slices in the G_TF_NORMAL case. We will
519 * attach to any other type.
521 error = g_getattr("MBR::type", cp, &i);
523 if (i != 165 && flags == G_TF_NORMAL)
525 error = g_getattr("MBR::offset", cp, &ms->mbroffset);
530 /* Same thing if we are inside a PC98 */
531 error = g_getattr("PC98::type", cp, &i);
533 if (i != 0xc494 && flags == G_TF_NORMAL)
535 error = g_getattr("PC98::offset", cp, &ms->mbroffset);
540 /* Same thing if we are inside a GPT */
541 error = g_getattr("GPT::type", cp, &uuid);
543 if (memcmp(&uuid, &freebsd_slice, sizeof(uuid)) != 0 &&
544 flags == G_TF_NORMAL)
548 /* Get sector size, we need it to read data. */
549 secsize = cp->provider->sectorsize;
553 /* First look for a label at the start of the second sector. */
554 error = g_bsd_try(gp, gsp, cp, secsize, ms, secsize);
557 * If sector size is not 512 the label still can be at
558 * offset 512, not at the start of the second sector. At least
559 * it's true for labels created by the FreeBSD's bsdlabel(8).
561 if (error && secsize != HISTORIC_LABEL_OFFSET)
562 error = g_bsd_try(gp, gsp, cp, secsize, ms,
563 HISTORIC_LABEL_OFFSET);
565 /* Next, look for alpha labels */
567 error = g_bsd_try(gp, gsp, cp, secsize, ms,
570 /* If we didn't find a label, punt. */
575 * In order to avoid recursively attaching to the same
576 * on-disk label (it's usually visible through the 'c'
577 * partition) we calculate an MD5 and ask if other BSD's
578 * below us love that label. If they do, we don't.
581 MD5Update(&md5sum, ms->label, sizeof(ms->label));
582 MD5Final(ms->labelsum, &md5sum);
584 error = g_getattr("BSD::labelsum", cp, &hash);
585 if (!error && !bcmp(ms->labelsum, hash, sizeof(hash)))
589 * Process the found disklabel, and modify our "slice"
590 * instance to match it, if possible.
592 error = g_bsd_modify(gp, ms->label);
595 /* Success or failure, we can close our provider now. */
596 g_access(cp, -1, 0, 0);
598 /* If we have configured any providers, return the new geom. */
599 if (gsp->nprovider > 0) {
600 g_slice_conf_hot(gp, 0, ms->labeloffset, LABELSIZE,
601 G_SLICE_HOT_ALLOW, G_SLICE_HOT_DENY, G_SLICE_HOT_CALL);
602 gsp->hot = g_bsd_hotwrite;
606 * ...else push the "self-destruct" button, by spoiling our own
607 * consumer. This triggers a call to g_slice_spoiled which will
608 * dismantle what was setup.
616 struct g_bsd_softc *ms;
622 g_bsd_callconfig(void *arg, int flag)
627 hp->error = g_bsd_modify(hp->gp, hp->label);
629 hp->error = g_bsd_writelabel(hp->gp, NULL);
633 * NB! curthread is user process which GCTL'ed.
636 g_bsd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
642 struct g_slicer *gsp;
643 struct g_consumer *cp;
644 struct g_bsd_softc *ms;
647 gp = gctl_get_geom(req, mp, "geom");
650 cp = LIST_FIRST(&gp->consumer);
653 if (!strcmp(verb, "read mbroffset")) {
654 gctl_set_param_err(req, "mbroffset", &ms->mbroffset,
655 sizeof(ms->mbroffset));
657 } else if (!strcmp(verb, "write label")) {
658 label = gctl_get_paraml(req, "label", LABELSIZE);
662 h0h0.ms = gsp->softc;
665 /* XXX: Does this reference register with our selfdestruct code ? */
666 error = g_access(cp, 1, 1, 1);
668 gctl_error(req, "could not access consumer");
671 g_bsd_callconfig(&h0h0, 0);
673 g_access(cp, -1, -1, -1);
674 } else if (!strcmp(verb, "write bootcode")) {
675 label = gctl_get_paraml(req, "bootcode", BBSIZE);
678 /* XXX: Does this reference register with our selfdestruct code ? */
679 error = g_access(cp, 1, 1, 1);
681 gctl_error(req, "could not access consumer");
684 error = g_bsd_writelabel(gp, label);
685 g_access(cp, -1, -1, -1);
687 gctl_error(req, "Unknown verb parameter");
693 /* Finally, register with GEOM infrastructure. */
694 static struct g_class g_bsd_class = {
695 .name = BSD_CLASS_NAME,
696 .version = G_VERSION,
697 .taste = g_bsd_taste,
698 .ctlreq = g_bsd_config,
699 .dumpconf = g_bsd_dumpconf,
700 .ioctl = g_bsd_ioctl,
703 DECLARE_GEOM_CLASS(g_bsd_class, g_bsd);