3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */
6 * Copyright (c) 1995 Jason R. Thorpe.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project
21 * 4. The name of the author may not be used to endorse or promote products
22 * derived from this software without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * Copyright (c) 1988 University of Utah.
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. All advertising materials mentioning features or use of this software
55 * must display the following acknowledgement:
56 * This product includes software developed by the University of
57 * California, Berkeley and its contributors.
58 * 4. Neither the name of the University nor the names of its contributors
59 * may be used to endorse or promote products derived from this software
60 * without specific prior written permission.
62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
74 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 * @(#)cd.c 8.2 (Berkeley) 11/16/93
80 * "Concatenated" disk driver.
82 * Dynamic configuration and disklabel support by:
83 * Jason R. Thorpe <thorpej@nas.nasa.gov>
84 * Numerical Aerodynamic Simulation Facility
86 * NASA Ames Research Center
87 * Moffett Field, CA 94035
92 #include <sys/param.h>
93 #include <sys/systm.h>
94 #include <sys/kernel.h>
95 #include <sys/module.h>
98 #include <sys/malloc.h>
99 #include <sys/namei.h>
100 #include <sys/conf.h>
101 #include <sys/stat.h>
102 #include <sys/sysctl.h>
103 #include <sys/disklabel.h>
104 #include <ufs/ffs/fs.h>
105 #include <sys/devicestat.h>
106 #include <sys/fcntl.h>
107 #include <sys/vnode.h>
109 #include <sys/ccdvar.h>
111 #if defined(CCDDEBUG) && !defined(DEBUG)
116 #define CCDB_FOLLOW 0x01
117 #define CCDB_INIT 0x02
119 #define CCDB_LABEL 0x08
120 #define CCDB_VNODE 0x10
121 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
123 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
127 #define ccdunit(x) dkunit(x)
128 #define ccdpart(x) dkpart(x)
131 This is how mirroring works (only writes are special):
133 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
134 linked together by the cb_mirror field. "cb_pflags &
135 CCDPF_MIRROR_DONE" is set to 0 on both of them.
137 When a component returns to ccdiodone(), it checks if "cb_pflags &
138 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's
139 flag and returns. If it is, it means its partner has already
140 returned, so it will go to the regular cleanup.
145 struct bio cb_buf; /* new I/O buf */
146 struct bio *cb_obp; /* ptr. to original I/O buf */
147 struct ccdbuf *cb_freenext; /* free list link */
148 int cb_unit; /* target unit */
149 int cb_comp; /* target component */
150 int cb_pflags; /* mirror/parity status flag */
151 struct ccdbuf *cb_mirror; /* mirror counterpart */
154 /* bits in cb_pflags */
155 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */
157 #define CCDLABELDEV(dev) \
158 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
160 static d_open_t ccdopen;
161 static d_close_t ccdclose;
162 static d_strategy_t ccdstrategy;
163 static d_ioctl_t ccdioctl;
164 static d_dump_t ccddump;
165 static d_psize_t ccdsize;
167 #define NCCDFREEHIWAT 16
169 #define CDEV_MAJOR 74
170 #define BDEV_MAJOR 21
172 static struct cdevsw ccd_cdevsw = {
174 /* close */ ccdclose,
176 /* write */ physwrite,
177 /* ioctl */ ccdioctl,
180 /* strategy */ ccdstrategy,
182 /* maj */ CDEV_MAJOR,
186 /* bmaj */ BDEV_MAJOR
189 /* called during module initialization */
190 static void ccdattach __P((void));
191 static int ccd_modevent __P((module_t, int, void *));
193 /* called by biodone() at interrupt time */
194 static void ccdiodone __P((struct bio *bp));
196 static void ccdstart __P((struct ccd_softc *, struct bio *));
197 static void ccdinterleave __P((struct ccd_softc *, int));
198 static void ccdintr __P((struct ccd_softc *, struct bio *));
199 static int ccdinit __P((struct ccddevice *, char **, struct proc *));
200 static int ccdlookup __P((char *, struct proc *p, struct vnode **));
201 static void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *,
202 struct bio *, daddr_t, caddr_t, long));
203 static void ccdgetdisklabel __P((dev_t));
204 static void ccdmakedisklabel __P((struct ccd_softc *));
205 static int ccdlock __P((struct ccd_softc *));
206 static void ccdunlock __P((struct ccd_softc *));
209 static void printiinfo __P((struct ccdiinfo *));
212 /* Non-private for the benefit of libkvm. */
213 struct ccd_softc *ccd_softc;
214 struct ccddevice *ccddevs;
215 struct ccdbuf *ccdfreebufs;
216 static int numccdfreebufs;
217 static int numccd = 0;
220 * getccdbuf() - Allocate and zero a ccd buffer.
222 * This routine is called at splbio().
227 getccdbuf(struct ccdbuf *cpy)
232 * Allocate from freelist or malloc as necessary
234 if ((cbp = ccdfreebufs) != NULL) {
235 ccdfreebufs = cbp->cb_freenext;
238 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK);
242 * Used by mirroring code
245 bcopy(cpy, cbp, sizeof(struct ccdbuf));
247 bzero(cbp, sizeof(struct ccdbuf));
250 * independant struct bio initialization
257 * putccdbuf() - Free a ccd buffer.
259 * This routine is called at splbio().
264 putccdbuf(struct ccdbuf *cbp)
267 if (numccdfreebufs < NCCDFREEHIWAT) {
268 cbp->cb_freenext = ccdfreebufs;
272 free((caddr_t)cbp, M_DEVBUF);
278 * Number of blocks to untouched in front of a component partition.
279 * This is to avoid violating its disklabel area when it starts at the
280 * beginning of the slice.
282 #if !defined(CCD_OFFSET)
283 #define CCD_OFFSET 16
287 ccd_clone(void *arg, char *name, int namelen, dev_t *dev)
294 i = dev_stdclone(name, &s, "ccd", &u);
299 if (*s <= 'a' || *s >= 'h')
303 *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a',
304 UID_ROOT, GID_OPERATOR, 0640, name);
308 * Called by main() during pseudo-device attachment. All we need
309 * to do is allocate enough space for devices to be configured later, and
319 printf("ccd0-%d: Concatenated disk drivers\n", num-1);
321 printf("ccd0: Concatenated disk driver\n");
323 ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc),
325 ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice),
327 if ((ccd_softc == NULL) || (ccddevs == NULL)) {
328 printf("WARNING: no memory for concatenated disks\n");
329 if (ccd_softc != NULL)
330 free(ccd_softc, M_DEVBUF);
332 free(ccddevs, M_DEVBUF);
336 bzero(ccd_softc, num * sizeof(struct ccd_softc));
337 bzero(ccddevs, num * sizeof(struct ccddevice));
339 cdevsw_add(&ccd_cdevsw);
340 /* XXX: is this necessary? */
341 for (i = 0; i < numccd; ++i)
342 ccddevs[i].ccd_dk = -1;
343 EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000);
347 ccd_modevent(mod, type, data)
360 printf("ccd0: Unload not supported!\n");
364 default: /* MOD_SHUTDOWN etc */
370 DEV_MODULE(ccd, ccd_modevent, NULL);
373 ccdinit(ccd, cpaths, p)
374 struct ccddevice *ccd;
378 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
379 struct ccdcinfo *ci = NULL; /* XXX */
386 struct partinfo dpart;
387 struct ccdgeom *ccg = &cs->sc_geom;
388 char tmppath[MAXPATHLEN];
392 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
393 printf("ccdinit: unit %d\n", ccd->ccd_unit);
397 cs->sc_ileave = ccd->ccd_interleave;
398 cs->sc_nccdisks = ccd->ccd_ndev;
400 /* Allocate space for the component info. */
401 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
405 * Verify that each component piece exists and record
406 * relevant information about it.
414 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
415 vp = ccd->ccd_vpp[ix];
416 ci = &cs->sc_cinfo[ix];
420 * Copy in the pathname of the component.
422 bzero(tmppath, sizeof(tmppath)); /* sanity */
423 if ((error = copyinstr(cpaths[ix], tmppath,
424 MAXPATHLEN, &ci->ci_pathlen)) != 0) {
426 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
427 printf("ccd%d: can't copy path, error = %d\n",
428 ccd->ccd_unit, error);
432 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
433 bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
435 ci->ci_dev = vn_todev(vp);
438 * Get partition information for the component.
440 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
441 FREAD, uc, p)) != 0) {
443 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
444 printf("ccd%d: %s: ioctl failed, error = %d\n",
445 ccd->ccd_unit, ci->ci_path, error);
449 if (dpart.part->p_fstype == FS_BSDFFS) {
451 ((dpart.disklab->d_secsize > maxsecsize) ?
452 dpart.disklab->d_secsize : maxsecsize);
453 size = dpart.part->p_size - CCD_OFFSET;
456 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
457 printf("ccd%d: %s: incorrect partition type\n",
458 ccd->ccd_unit, ci->ci_path);
465 * Calculate the size, truncating to an interleave
466 * boundary if necessary.
469 if (cs->sc_ileave > 1)
470 size -= size % cs->sc_ileave;
474 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
475 printf("ccd%d: %s: size == 0\n",
476 ccd->ccd_unit, ci->ci_path);
482 if (minsize == 0 || size < minsize)
490 * Don't allow the interleave to be smaller than
491 * the biggest component sector.
493 if ((cs->sc_ileave > 0) &&
494 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
496 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
497 printf("ccd%d: interleave must be at least %d\n",
498 ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
505 * If uniform interleave is desired set all sizes to that of
506 * the smallest component. This will guarentee that a single
507 * interleave table is generated.
509 * Lost space must be taken into account when calculating the
510 * overall size. Half the space is lost when CCDF_MIRROR is
511 * specified. One disk is lost when CCDF_PARITY is specified.
513 if (ccd->ccd_flags & CCDF_UNIFORM) {
514 for (ci = cs->sc_cinfo;
515 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
516 ci->ci_size = minsize;
518 if (ccd->ccd_flags & CCDF_MIRROR) {
520 * Check to see if an even number of components
521 * have been specified. The interleave must also
522 * be non-zero in order for us to be able to
523 * guarentee the topology.
525 if (cs->sc_nccdisks % 2) {
526 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
530 if (cs->sc_ileave == 0) {
531 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
535 cs->sc_size = (cs->sc_nccdisks/2) * minsize;
536 } else if (ccd->ccd_flags & CCDF_PARITY) {
537 cs->sc_size = (cs->sc_nccdisks-1) * minsize;
539 if (cs->sc_ileave == 0) {
540 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
544 cs->sc_size = cs->sc_nccdisks * minsize;
549 * Construct the interleave table.
551 ccdinterleave(cs, ccd->ccd_unit);
554 * Create pseudo-geometry based on 1MB cylinders. It's
557 ccg->ccg_secsize = maxsecsize;
558 ccg->ccg_ntracks = 1;
559 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
560 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
563 * Add an devstat entry for this device.
565 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
566 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
567 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
568 DEVSTAT_PRIORITY_ARRAY);
570 cs->sc_flags |= CCDF_INITED;
571 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */
572 cs->sc_unit = ccd->ccd_unit;
576 while (ci > cs->sc_cinfo) {
578 free(ci->ci_path, M_DEVBUF);
580 free(cs->sc_cinfo, M_DEVBUF);
585 ccdinterleave(cs, unit)
586 struct ccd_softc *cs;
589 struct ccdcinfo *ci, *smallci;
596 if (ccddebug & CCDB_INIT)
597 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
601 * Allocate an interleave table. The worst case occurs when each
602 * of N disks is of a different size, resulting in N interleave
605 * Chances are this is too big, but we don't care.
607 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
608 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF,
612 * Trivial case: no interleave (actually interleave of disk size).
613 * Each table entry represents a single component in its entirety.
615 * An interleave of 0 may not be used with a mirror or parity setup.
617 if (cs->sc_ileave == 0) {
621 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
622 /* Allocate space for ii_index. */
623 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
625 ii->ii_startblk = bn;
627 ii->ii_index[0] = ix;
628 bn += cs->sc_cinfo[ix].ci_size;
633 if (ccddebug & CCDB_INIT)
634 printiinfo(cs->sc_itable);
640 * The following isn't fast or pretty; it doesn't have to be.
644 for (ii = cs->sc_itable; ; ii++) {
646 * Allocate space for ii_index. We might allocate more then
649 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
653 * Locate the smallest of the remaining components
656 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
658 if (ci->ci_size > size &&
660 ci->ci_size < smallci->ci_size)) {
666 * Nobody left, all done
668 if (smallci == NULL) {
674 * Record starting logical block using an sc_ileave blocksize.
676 ii->ii_startblk = bn / cs->sc_ileave;
679 * Record starting comopnent block using an sc_ileave
680 * blocksize. This value is relative to the beginning of
683 ii->ii_startoff = lbn;
686 * Determine how many disks take part in this interleave
687 * and record their indices.
690 for (ci = cs->sc_cinfo;
691 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
692 if (ci->ci_size >= smallci->ci_size) {
693 ii->ii_index[ix++] = ci - cs->sc_cinfo;
697 bn += ix * (smallci->ci_size - size);
698 lbn = smallci->ci_size / cs->sc_ileave;
699 size = smallci->ci_size;
702 if (ccddebug & CCDB_INIT)
703 printiinfo(cs->sc_itable);
709 ccdopen(dev, flags, fmt, p)
714 int unit = ccdunit(dev);
715 struct ccd_softc *cs;
716 struct disklabel *lp;
717 int error = 0, part, pmask;
720 if (ccddebug & CCDB_FOLLOW)
721 printf("ccdopen(%x, %x)\n", dev, flags);
725 cs = &ccd_softc[unit];
727 if ((error = ccdlock(cs)) != 0)
736 * If we're initialized, check to see if there are any other
737 * open partitions. If not, then it's safe to update
738 * the in-core disklabel.
740 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0))
741 ccdgetdisklabel(dev);
743 /* Check that the partition exists. */
744 if (part != RAW_PART && ((part >= lp->d_npartitions) ||
745 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
750 cs->sc_openmask |= pmask;
758 ccdclose(dev, flags, fmt, p)
763 int unit = ccdunit(dev);
764 struct ccd_softc *cs;
768 if (ccddebug & CCDB_FOLLOW)
769 printf("ccdclose(%x, %x)\n", dev, flags);
774 cs = &ccd_softc[unit];
776 if ((error = ccdlock(cs)) != 0)
781 /* ...that much closer to allowing unconfiguration... */
782 cs->sc_openmask &= ~(1 << part);
791 int unit = ccdunit(bp->bio_dev);
792 struct ccd_softc *cs = &ccd_softc[unit];
795 struct disklabel *lp;
798 if (ccddebug & CCDB_FOLLOW)
799 printf("ccdstrategy(%x): unit %d\n", bp, unit);
801 if ((cs->sc_flags & CCDF_INITED) == 0) {
802 bp->bio_error = ENXIO;
803 bp->bio_flags |= BIO_ERROR;
807 /* If it's a nil transfer, wake up the top half now. */
808 if (bp->bio_bcount == 0)
814 * Do bounds checking and adjust transfer. If there's an
815 * error, the bounds check will flag that for us.
817 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
818 if (ccdpart(bp->bio_dev) != RAW_PART) {
819 if (bounds_check_with_label(bp, lp, wlabel) <= 0)
822 int pbn; /* in sc_secsize chunks */
823 long sz; /* in sc_secsize chunks */
825 pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE);
826 sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize);
829 * If out of bounds return an error. If at the EOF point,
830 * simply read or write less.
833 if (pbn < 0 || pbn >= cs->sc_size) {
834 bp->bio_resid = bp->bio_bcount;
835 if (pbn != cs->sc_size) {
836 bp->bio_error = EINVAL;
837 bp->bio_flags |= BIO_ERROR;
843 * If the request crosses EOF, truncate the request.
845 if (pbn + sz > cs->sc_size) {
846 bp->bio_bcount = (cs->sc_size - pbn) *
847 cs->sc_geom.ccg_secsize;
851 bp->bio_resid = bp->bio_bcount;
866 struct ccd_softc *cs;
870 struct ccdbuf *cbp[4];
871 /* XXX! : 2 reads and 2 writes for RAID 4/5 */
874 struct partition *pp;
877 if (ccddebug & CCDB_FOLLOW)
878 printf("ccdstart(%x, %x)\n", cs, bp);
881 /* Record the transaction start */
882 devstat_start_transaction(&cs->device_stats);
885 * Translate the partition-relative block number to an absolute.
888 if (ccdpart(bp->bio_dev) != RAW_PART) {
889 pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)];
894 * Allocate component buffers and fire off the requests
897 for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) {
898 ccdbuffer(cbp, cs, bp, bn, addr, bcount);
899 rcount = cbp[0]->cb_buf.bio_bcount;
901 if (cs->sc_cflags & CCDF_MIRROR) {
903 * Mirroring. Writes go to both disks, reads are
904 * taken from whichever disk seems most appropriate.
906 * We attempt to localize reads to the disk whos arm
907 * is nearest the read request. We ignore seeks due
908 * to writes when making this determination and we
909 * also try to avoid hogging.
911 if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) {
912 BIO_STRATEGY(&cbp[0]->cb_buf, 0);
913 BIO_STRATEGY(&cbp[1]->cb_buf, 0);
915 int pick = cs->sc_pick;
916 daddr_t range = cs->sc_size / 16;
918 if (bn < cs->sc_blk[pick] - range ||
919 bn > cs->sc_blk[pick] + range
921 cs->sc_pick = pick = 1 - pick;
923 cs->sc_blk[pick] = bn + btodb(rcount);
924 BIO_STRATEGY(&cbp[pick]->cb_buf, 0);
930 BIO_STRATEGY(&cbp[0]->cb_buf, 0);
938 * Build a component buffer header.
941 ccdbuffer(cb, cs, bp, bn, addr, bcount)
943 struct ccd_softc *cs;
949 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */
955 if (ccddebug & CCDB_IO)
956 printf("ccdbuffer(%x, %x, %d, %x, %d)\n",
957 cs, bp, bn, addr, bcount);
960 * Determine which component bn falls in.
965 if (cs->sc_ileave == 0) {
967 * Serially concatenated and neither a mirror nor a parity
968 * config. This is a special case.
973 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
981 * Calculate cbn, the logical superblock (sc_ileave chunks),
982 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
985 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */
986 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */
989 * Figure out which interleave table to use.
991 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
992 if (ii->ii_startblk > cbn)
998 * off is the logical superblock relative to the beginning
999 * of this interleave block.
1001 off = cbn - ii->ii_startblk;
1004 * We must calculate which disk component to use (ccdisk),
1005 * and recalculate cbn to be the superblock relative to
1006 * the beginning of the component. This is typically done by
1007 * adding 'off' and ii->ii_startoff together. However, 'off'
1008 * must typically be divided by the number of components in
1009 * this interleave array to be properly convert it from a
1010 * CCD-relative logical superblock number to a
1011 * component-relative superblock number.
1013 if (ii->ii_ndisk == 1) {
1015 * When we have just one disk, it can't be a mirror
1016 * or a parity config.
1018 ccdisk = ii->ii_index[0];
1019 cbn = ii->ii_startoff + off;
1021 if (cs->sc_cflags & CCDF_MIRROR) {
1023 * We have forced a uniform mapping, resulting
1024 * in a single interleave array. We double
1025 * up on the first half of the available
1026 * components and our mirror is in the second
1027 * half. This only works with a single
1028 * interleave array because doubling up
1029 * doubles the number of sectors, so there
1030 * cannot be another interleave array because
1031 * the next interleave array's calculations
1034 int ndisk2 = ii->ii_ndisk / 2;
1035 ccdisk = ii->ii_index[off % ndisk2];
1036 cbn = ii->ii_startoff + off / ndisk2;
1037 ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1038 } else if (cs->sc_cflags & CCDF_PARITY) {
1040 * XXX not implemented yet
1042 int ndisk2 = ii->ii_ndisk - 1;
1043 ccdisk = ii->ii_index[off % ndisk2];
1044 cbn = ii->ii_startoff + off / ndisk2;
1045 if (cbn % ii->ii_ndisk <= ccdisk)
1048 ccdisk = ii->ii_index[off % ii->ii_ndisk];
1049 cbn = ii->ii_startoff + off / ii->ii_ndisk;
1053 ci = &cs->sc_cinfo[ccdisk];
1056 * Convert cbn from a superblock to a normal block so it
1057 * can be used to calculate (along with cboff) the normal
1058 * block index into this particular disk.
1060 cbn *= cs->sc_ileave;
1064 * Fill in the component buf structure.
1066 cbp = getccdbuf(NULL);
1067 cbp->cb_buf.bio_cmd = bp->bio_cmd;
1068 cbp->cb_buf.bio_done = ccdiodone;
1069 cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */
1070 cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET;
1071 cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET);
1072 cbp->cb_buf.bio_data = addr;
1073 if (cs->sc_ileave == 0)
1074 cbc = dbtob((off_t)(ci->ci_size - cbn));
1076 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1077 cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount;
1078 cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount;
1081 * context for ccdiodone
1084 cbp->cb_unit = cs - ccd_softc;
1085 cbp->cb_comp = ci - cs->sc_cinfo;
1088 if (ccddebug & CCDB_IO)
1089 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n",
1090 ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.bio_blkno,
1091 cbp->cb_buf.bio_data, cbp->cb_buf.bio_bcount);
1096 * Note: both I/O's setup when reading from mirror, but only one
1099 if (cs->sc_cflags & CCDF_MIRROR) {
1100 /* mirror, setup second I/O */
1101 cbp = getccdbuf(cb[0]);
1102 cbp->cb_buf.bio_dev = ci2->ci_dev;
1103 cbp->cb_comp = ci2 - cs->sc_cinfo;
1105 /* link together the ccdbuf's and clear "mirror done" flag */
1106 cb[0]->cb_mirror = cb[1];
1107 cb[1]->cb_mirror = cb[0];
1108 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1109 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1115 struct ccd_softc *cs;
1119 if (ccddebug & CCDB_FOLLOW)
1120 printf("ccdintr(%x, %x)\n", cs, bp);
1123 * Request is done for better or worse, wakeup the top half.
1125 if (bp->bio_flags & BIO_ERROR)
1126 bp->bio_resid = bp->bio_bcount;
1127 devstat_end_transaction_bio(&cs->device_stats, bp);
1132 * Called at interrupt time.
1133 * Mark the component as done and if all components are done,
1134 * take a ccd interrupt.
1140 struct ccdbuf *cbp = (struct ccdbuf *)ibp;
1141 struct bio *bp = cbp->cb_obp;
1142 int unit = cbp->cb_unit;
1147 if (ccddebug & CCDB_FOLLOW)
1148 printf("ccdiodone(%x)\n", cbp);
1149 if (ccddebug & CCDB_IO) {
1150 printf("ccdiodone: bp %x bcount %d resid %d\n",
1151 bp, bp->bio_bcount, bp->bio_resid);
1152 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n",
1153 cbp->cb_buf.bio_dev, cbp->cb_comp, cbp,
1154 cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data,
1155 cbp->cb_buf.bio_bcount);
1159 * If an error occured, report it. If this is a mirrored
1160 * configuration and the first of two possible reads, do not
1161 * set the error in the bp yet because the second read may
1165 if (cbp->cb_buf.bio_flags & BIO_ERROR) {
1166 const char *msg = "";
1168 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
1169 (cbp->cb_buf.bio_cmd == BIO_READ) &&
1170 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1172 * We will try our read on the other disk down
1173 * below, also reverse the default pick so if we
1174 * are doing a scan we do not keep hitting the
1177 struct ccd_softc *cs = &ccd_softc[unit];
1179 msg = ", trying other disk";
1180 cs->sc_pick = 1 - cs->sc_pick;
1181 cs->sc_blk[cs->sc_pick] = bp->bio_blkno;
1183 bp->bio_flags |= BIO_ERROR;
1184 bp->bio_error = cbp->cb_buf.bio_error ?
1185 cbp->cb_buf.bio_error : EIO;
1187 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n",
1188 unit, bp->bio_error, cbp->cb_comp,
1189 (int)cbp->cb_buf.bio_blkno, bp->bio_blkno, msg);
1193 * Process mirror. If we are writing, I/O has been initiated on both
1194 * buffers and we fall through only after both are finished.
1196 * If we are reading only one I/O is initiated at a time. If an
1197 * error occurs we initiate the second I/O and return, otherwise
1198 * we free the second I/O without initiating it.
1201 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
1202 if (cbp->cb_buf.bio_cmd == BIO_WRITE) {
1204 * When writing, handshake with the second buffer
1205 * to determine when both are done. If both are not
1206 * done, return here.
1208 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1209 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1216 * When reading, either dispose of the second buffer
1217 * or initiate I/O on the second buffer if an error
1218 * occured with this one.
1220 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1221 if (cbp->cb_buf.bio_flags & BIO_ERROR) {
1222 cbp->cb_mirror->cb_pflags |=
1224 BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0);
1229 putccdbuf(cbp->cb_mirror);
1237 * use bio_caller1 to determine how big the original request was rather
1238 * then bio_bcount, because bio_bcount may have been truncated for EOF.
1240 * XXX We check for an error, but we do not test the resid for an
1241 * aligned EOF condition. This may result in character & block
1242 * device access not recognizing EOF properly when read or written
1243 * sequentially, but will not effect filesystems.
1245 count = (long)cbp->cb_buf.bio_caller1;
1249 * If all done, "interrupt".
1251 bp->bio_resid -= count;
1252 if (bp->bio_resid < 0)
1253 panic("ccdiodone: count");
1254 if (bp->bio_resid == 0)
1255 ccdintr(&ccd_softc[unit], bp);
1260 ccdioctl(dev, cmd, data, flag, p)
1267 int unit = ccdunit(dev);
1268 int i, j, lookedup = 0, error = 0;
1270 struct ccd_softc *cs;
1271 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1272 struct ccddevice ccd;
1279 cs = &ccd_softc[unit];
1281 bzero(&ccd, sizeof(ccd));
1285 if (cs->sc_flags & CCDF_INITED)
1288 if ((flag & FWRITE) == 0)
1291 if ((error = ccdlock(cs)) != 0)
1294 /* Fill in some important bits. */
1295 ccd.ccd_unit = unit;
1296 ccd.ccd_interleave = ccio->ccio_ileave;
1297 if (ccd.ccd_interleave == 0 &&
1298 ((ccio->ccio_flags & CCDF_MIRROR) ||
1299 (ccio->ccio_flags & CCDF_PARITY))) {
1300 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1301 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1303 if ((ccio->ccio_flags & CCDF_MIRROR) &&
1304 (ccio->ccio_flags & CCDF_PARITY)) {
1305 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1306 ccio->ccio_flags &= ~CCDF_PARITY;
1308 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1309 !(ccio->ccio_flags & CCDF_UNIFORM)) {
1310 printf("ccd%d: mirror/parity forces uniform flag\n",
1312 ccio->ccio_flags |= CCDF_UNIFORM;
1314 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1317 * Allocate space for and copy in the array of
1318 * componet pathnames and device numbers.
1320 cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1321 M_DEVBUF, M_WAITOK);
1322 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1323 M_DEVBUF, M_WAITOK);
1325 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1326 ccio->ccio_ndisks * sizeof(char **));
1328 free(vpp, M_DEVBUF);
1329 free(cpp, M_DEVBUF);
1335 if (ccddebug & CCDB_INIT)
1336 for (i = 0; i < ccio->ccio_ndisks; ++i)
1337 printf("ccdioctl: component %d: 0x%x\n",
1341 for (i = 0; i < ccio->ccio_ndisks; ++i) {
1343 if (ccddebug & CCDB_INIT)
1344 printf("ccdioctl: lookedup = %d\n", lookedup);
1346 if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) {
1351 for (j = 0; j < lookedup; ++j)
1352 (void)vn_close(vpp[j], FREAD|FWRITE,
1354 free(vpp, M_DEVBUF);
1355 free(cpp, M_DEVBUF);
1364 ccd.ccd_ndev = ccio->ccio_ndisks;
1367 * Initialize the ccd. Fills in the softc for us.
1369 if ((error = ccdinit(&ccd, cpp, p)) != 0) {
1374 for (j = 0; j < lookedup; ++j)
1375 (void)vn_close(vpp[j], FREAD|FWRITE, uc, p);
1376 bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
1377 free(vpp, M_DEVBUF);
1378 free(cpp, M_DEVBUF);
1385 * The ccd has been successfully initialized, so
1386 * we can place it into the array and read the disklabel.
1388 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1389 ccio->ccio_unit = unit;
1390 ccio->ccio_size = cs->sc_size;
1391 ccdgetdisklabel(dev);
1398 if ((cs->sc_flags & CCDF_INITED) == 0)
1401 if ((flag & FWRITE) == 0)
1404 if ((error = ccdlock(cs)) != 0)
1407 /* Don't unconfigure if any other partitions are open */
1408 part = ccdpart(dev);
1409 pmask = (1 << part);
1410 if ((cs->sc_openmask & ~pmask)) {
1416 * Free ccd_softc information and clear entry.
1419 /* Close the components and free their pathnames. */
1424 for (i = 0; i < cs->sc_nccdisks; ++i) {
1426 * XXX: this close could potentially fail and
1427 * cause Bad Things. Maybe we need to force
1428 * the close to happen?
1431 if (ccddebug & CCDB_VNODE)
1432 vprint("CCDIOCCLR: vnode info",
1433 cs->sc_cinfo[i].ci_vp);
1435 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE,
1437 free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1441 /* Free interleave index. */
1442 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1443 free(cs->sc_itable[i].ii_index, M_DEVBUF);
1445 /* Free component info and interleave table. */
1446 free(cs->sc_cinfo, M_DEVBUF);
1447 free(cs->sc_itable, M_DEVBUF);
1448 cs->sc_flags &= ~CCDF_INITED;
1451 * Free ccddevice information and clear entry.
1453 free(ccddevs[unit].ccd_cpp, M_DEVBUF);
1454 free(ccddevs[unit].ccd_vpp, M_DEVBUF);
1456 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1459 * And remove the devstat entry.
1461 devstat_remove_entry(&cs->device_stats);
1463 /* This must be atomic. */
1466 bzero(cs, sizeof(struct ccd_softc));
1472 if ((cs->sc_flags & CCDF_INITED) == 0)
1475 *(struct disklabel *)data = cs->sc_label;
1479 if ((cs->sc_flags & CCDF_INITED) == 0)
1482 ((struct partinfo *)data)->disklab = &cs->sc_label;
1483 ((struct partinfo *)data)->part =
1484 &cs->sc_label.d_partitions[ccdpart(dev)];
1489 if ((cs->sc_flags & CCDF_INITED) == 0)
1492 if ((flag & FWRITE) == 0)
1495 if ((error = ccdlock(cs)) != 0)
1498 cs->sc_flags |= CCDF_LABELLING;
1500 error = setdisklabel(&cs->sc_label,
1501 (struct disklabel *)data, 0);
1503 if (cmd == DIOCWDINFO)
1504 error = writedisklabel(CCDLABELDEV(dev),
1508 cs->sc_flags &= ~CCDF_LABELLING;
1517 if ((cs->sc_flags & CCDF_INITED) == 0)
1520 if ((flag & FWRITE) == 0)
1522 if (*(int *)data != 0)
1523 cs->sc_flags |= CCDF_WLABEL;
1525 cs->sc_flags &= ~CCDF_WLABEL;
1539 struct ccd_softc *cs;
1542 if (ccdopen(dev, 0, S_IFCHR, curproc))
1545 cs = &ccd_softc[ccdunit(dev)];
1546 part = ccdpart(dev);
1548 if ((cs->sc_flags & CCDF_INITED) == 0)
1551 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1554 size = cs->sc_label.d_partitions[part].p_size;
1556 if (ccdclose(dev, 0, S_IFCHR, curproc))
1567 /* Not implemented. */
1572 * Lookup the provided name in the filesystem. If the file exists,
1573 * is a valid block device, and isn't being used by anyone else,
1574 * set *vpp to the file's vnode.
1577 ccdlookup(path, p, vpp)
1580 struct vnode **vpp; /* result */
1582 struct nameidata nd;
1587 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p);
1588 flags = FREAD | FWRITE;
1589 if ((error = vn_open(&nd, &flags, 0)) != 0) {
1591 if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1592 printf("ccdlookup: vn_open error = %d\n", error);
1598 if (vp->v_usecount > 1) {
1603 if (!vn_isdisk(vp, &error))
1607 if (ccddebug & CCDB_VNODE)
1608 vprint("ccdlookup: vnode info", vp);
1611 VOP_UNLOCK(vp, 0, p);
1612 NDFREE(&nd, NDF_ONLY_PNBUF);
1616 VOP_UNLOCK(vp, 0, p);
1617 NDFREE(&nd, NDF_ONLY_PNBUF);
1618 /* vn_close does vrele() for vp */
1623 (void)vn_close(vp, FREAD|FWRITE, uc, p);
1629 * Read the disklabel from the ccd. If one is not present, fake one
1633 ccdgetdisklabel(dev)
1636 int unit = ccdunit(dev);
1637 struct ccd_softc *cs = &ccd_softc[unit];
1639 struct disklabel *lp = &cs->sc_label;
1640 struct ccdgeom *ccg = &cs->sc_geom;
1642 bzero(lp, sizeof(*lp));
1644 lp->d_secperunit = cs->sc_size;
1645 lp->d_secsize = ccg->ccg_secsize;
1646 lp->d_nsectors = ccg->ccg_nsectors;
1647 lp->d_ntracks = ccg->ccg_ntracks;
1648 lp->d_ncylinders = ccg->ccg_ncylinders;
1649 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1651 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1652 lp->d_type = DTYPE_CCD;
1653 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1655 lp->d_interleave = 1;
1658 lp->d_partitions[RAW_PART].p_offset = 0;
1659 lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1660 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1661 lp->d_npartitions = RAW_PART + 1;
1663 lp->d_bbsize = BBSIZE; /* XXX */
1664 lp->d_sbsize = SBSIZE; /* XXX */
1666 lp->d_magic = DISKMAGIC;
1667 lp->d_magic2 = DISKMAGIC;
1668 lp->d_checksum = dkcksum(&cs->sc_label);
1671 * Call the generic disklabel extraction routine.
1673 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label);
1674 if (errstring != NULL)
1675 ccdmakedisklabel(cs);
1678 /* It's actually extremely common to have unlabeled ccds. */
1679 if (ccddebug & CCDB_LABEL)
1680 if (errstring != NULL)
1681 printf("ccd%d: %s\n", unit, errstring);
1686 * Take care of things one might want to take care of in the event
1687 * that a disklabel isn't present.
1690 ccdmakedisklabel(cs)
1691 struct ccd_softc *cs;
1693 struct disklabel *lp = &cs->sc_label;
1696 * For historical reasons, if there's no disklabel present
1697 * the raw partition must be marked FS_BSDFFS.
1699 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1701 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1705 * Wait interruptibly for an exclusive lock.
1708 * Several drivers do this; it should be abstracted and made MP-safe.
1712 struct ccd_softc *cs;
1716 while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1717 cs->sc_flags |= CCDF_WANTED;
1718 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0)
1721 cs->sc_flags |= CCDF_LOCKED;
1726 * Unlock and wake up any waiters.
1730 struct ccd_softc *cs;
1733 cs->sc_flags &= ~CCDF_LOCKED;
1734 if ((cs->sc_flags & CCDF_WANTED) != 0) {
1735 cs->sc_flags &= ~CCDF_WANTED;
1743 struct ccdiinfo *ii;
1747 for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1748 printf(" itab[%d]: #dk %d sblk %d soff %d",
1749 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1750 for (i = 0; i < ii->ii_ndisk; i++)
1751 printf(" %d", ii->ii_index[i]);