3 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */
6 * Copyright (c) 1995 Jason R. Thorpe.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project
21 * 4. The name of the author may not be used to endorse or promote products
22 * derived from this software without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * Copyright (c) 1988 University of Utah.
39 * Copyright (c) 1990, 1993
40 * The Regents of the University of California. All rights reserved.
42 * This code is derived from software contributed to Berkeley by
43 * the Systems Programming Group of the University of Utah Computer
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. All advertising materials mentioning features or use of this software
55 * must display the following acknowledgement:
56 * This product includes software developed by the University of
57 * California, Berkeley and its contributors.
58 * 4. Neither the name of the University nor the names of its contributors
59 * may be used to endorse or promote products derived from this software
60 * without specific prior written permission.
62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
74 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 * @(#)cd.c 8.2 (Berkeley) 11/16/93
80 * "Concatenated" disk driver.
82 * Dynamic configuration and disklabel support by:
83 * Jason R. Thorpe <thorpej@nas.nasa.gov>
84 * Numerical Aerodynamic Simulation Facility
86 * NASA Ames Research Center
87 * Moffett Field, CA 94035
93 #include <sys/param.h>
94 #include <sys/systm.h>
95 #include <sys/kernel.h>
96 #include <sys/module.h>
99 #include <sys/malloc.h>
100 #include <sys/namei.h>
101 #include <sys/conf.h>
102 #include <sys/stat.h>
103 #include <sys/sysctl.h>
104 #include <sys/disklabel.h>
105 #include <ufs/ffs/fs.h>
106 #include <sys/device.h>
107 #include <sys/devicestat.h>
108 #include <sys/fcntl.h>
109 #include <sys/vnode.h>
111 #include <sys/ccdvar.h>
113 #if defined(CCDDEBUG) && !defined(DEBUG)
118 #define CCDB_FOLLOW 0x01
119 #define CCDB_INIT 0x02
121 #define CCDB_LABEL 0x08
122 #define CCDB_VNODE 0x10
123 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
125 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
129 #define ccdunit(x) dkunit(x)
130 #define ccdpart(x) dkpart(x)
133 This is how mirroring works (only writes are special):
135 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
136 linked together by the cb_mirror field. "cb_pflags &
137 CCDPF_MIRROR_DONE" is set to 0 on both of them.
139 When a component returns to ccdiodone(), it checks if "cb_pflags &
140 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's
141 flag and returns. If it is, it means its partner has already
142 returned, so it will go to the regular cleanup.
147 struct buf cb_buf; /* new I/O buf */
148 struct buf *cb_obp; /* ptr. to original I/O buf */
149 struct ccdbuf *cb_freenext; /* free list link */
150 int cb_unit; /* target unit */
151 int cb_comp; /* target component */
152 int cb_pflags; /* mirror/parity status flag */
153 struct ccdbuf *cb_mirror; /* mirror counterpart */
156 /* bits in cb_pflags */
157 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */
159 #define CCDLABELDEV(dev) \
160 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
162 static d_open_t ccdopen;
163 static d_close_t ccdclose;
164 static d_strategy_t ccdstrategy;
165 static d_ioctl_t ccdioctl;
166 static d_dump_t ccddump;
167 static d_psize_t ccdsize;
169 #define NCCDFREEHIWAT 16
171 #define CDEV_MAJOR 74
172 #define BDEV_MAJOR 21
174 static struct cdevsw ccd_cdevsw = {
176 /* close */ ccdclose,
178 /* write */ physwrite,
179 /* ioctl */ ccdioctl,
182 /* strategy */ ccdstrategy,
184 /* maj */ CDEV_MAJOR,
188 /* bmaj */ BDEV_MAJOR
191 /* called during module initialization */
192 static void ccdattach __P((void));
193 static int ccd_modevent __P((module_t, int, void *));
195 /* called by biodone() at interrupt time */
196 static void ccdiodone __P((struct ccdbuf *cbp));
198 static void ccdstart __P((struct ccd_softc *, struct buf *));
199 static void ccdinterleave __P((struct ccd_softc *, int));
200 static void ccdintr __P((struct ccd_softc *, struct buf *));
201 static int ccdinit __P((struct ccddevice *, char **, struct proc *));
202 static int ccdlookup __P((char *, struct proc *p, struct vnode **));
203 static void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *,
204 struct buf *, daddr_t, caddr_t, long));
205 static void ccdgetdisklabel __P((dev_t));
206 static void ccdmakedisklabel __P((struct ccd_softc *));
207 static int ccdlock __P((struct ccd_softc *));
208 static void ccdunlock __P((struct ccd_softc *));
211 static void printiinfo __P((struct ccdiinfo *));
214 /* Non-private for the benefit of libkvm. */
215 struct ccd_softc *ccd_softc;
216 struct ccddevice *ccddevs;
217 struct ccdbuf *ccdfreebufs;
218 static int numccdfreebufs;
219 static int numccd = 0;
222 * getccdbuf() - Allocate and zero a ccd buffer.
224 * This routine is called at splbio().
229 getccdbuf(struct ccdbuf *cpy)
234 * Allocate from freelist or malloc as necessary
236 if ((cbp = ccdfreebufs) != NULL) {
237 ccdfreebufs = cbp->cb_freenext;
240 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK);
244 * Used by mirroring code
247 bcopy(cpy, cbp, sizeof(struct ccdbuf));
249 bzero(cbp, sizeof(struct ccdbuf));
252 * independant struct buf initialization
254 LIST_INIT(&cbp->cb_buf.b_dep);
255 BUF_LOCKINIT(&cbp->cb_buf);
256 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
257 BUF_KERNPROC(&cbp->cb_buf);
263 * putccdbuf() - Allocate and zero a ccd buffer.
265 * This routine is called at splbio().
270 putccdbuf(struct ccdbuf *cbp)
272 BUF_UNLOCK(&cbp->cb_buf);
273 BUF_LOCKFREE(&cbp->cb_buf);
275 if (numccdfreebufs < NCCDFREEHIWAT) {
276 cbp->cb_freenext = ccdfreebufs;
280 free((caddr_t)cbp, M_DEVBUF);
286 * Number of blocks to untouched in front of a component partition.
287 * This is to avoid violating its disklabel area when it starts at the
288 * beginning of the slice.
290 #if !defined(CCD_OFFSET)
291 #define CCD_OFFSET 16
295 * Called by main() during pseudo-device attachment. All we need
296 * to do is allocate enough space for devices to be configured later, and
306 printf("ccd0-%d: Concatenated disk drivers\n", num-1);
308 printf("ccd0: Concatenated disk driver\n");
310 ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc),
312 ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice),
314 if ((ccd_softc == NULL) || (ccddevs == NULL)) {
315 printf("WARNING: no memory for concatenated disks\n");
316 if (ccd_softc != NULL)
317 free(ccd_softc, M_DEVBUF);
319 free(ccddevs, M_DEVBUF);
323 bzero(ccd_softc, num * sizeof(struct ccd_softc));
324 bzero(ccddevs, num * sizeof(struct ccddevice));
326 /* XXX: is this necessary? */
327 for (i = 0; i < numccd; ++i)
328 ccddevs[i].ccd_dk = -1;
332 ccd_modevent(mod, type, data)
345 printf("ccd0: Unload not supported!\n");
349 default: /* MOD_SHUTDOWN etc */
355 DEV_MODULE(ccd, CDEV_MAJOR, BDEV_MAJOR, ccd_cdevsw, ccd_modevent, NULL);
358 ccdinit(ccd, cpaths, p)
359 struct ccddevice *ccd;
363 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
364 struct ccdcinfo *ci = NULL; /* XXX */
370 struct partinfo dpart;
371 struct ccdgeom *ccg = &cs->sc_geom;
372 char tmppath[MAXPATHLEN];
376 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
377 printf("ccdinit: unit %d\n", ccd->ccd_unit);
381 cs->sc_ileave = ccd->ccd_interleave;
382 cs->sc_nccdisks = ccd->ccd_ndev;
384 /* Allocate space for the component info. */
385 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
389 * Verify that each component piece exists and record
390 * relevant information about it.
394 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
395 vp = ccd->ccd_vpp[ix];
396 ci = &cs->sc_cinfo[ix];
400 * Copy in the pathname of the component.
402 bzero(tmppath, sizeof(tmppath)); /* sanity */
403 if ((error = copyinstr(cpaths[ix], tmppath,
404 MAXPATHLEN, &ci->ci_pathlen)) != 0) {
406 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
407 printf("ccd%d: can't copy path, error = %d\n",
408 ccd->ccd_unit, error);
412 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
413 bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
415 ci->ci_dev = vn_todev(vp);
418 * Get partition information for the component.
420 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
421 FREAD, p->p_ucred, p)) != 0) {
423 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
424 printf("ccd%d: %s: ioctl failed, error = %d\n",
425 ccd->ccd_unit, ci->ci_path, error);
429 if (dpart.part->p_fstype == FS_BSDFFS) {
431 ((dpart.disklab->d_secsize > maxsecsize) ?
432 dpart.disklab->d_secsize : maxsecsize);
433 size = dpart.part->p_size - CCD_OFFSET;
436 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
437 printf("ccd%d: %s: incorrect partition type\n",
438 ccd->ccd_unit, ci->ci_path);
445 * Calculate the size, truncating to an interleave
446 * boundary if necessary.
449 if (cs->sc_ileave > 1)
450 size -= size % cs->sc_ileave;
454 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
455 printf("ccd%d: %s: size == 0\n",
456 ccd->ccd_unit, ci->ci_path);
462 if (minsize == 0 || size < minsize)
469 * Don't allow the interleave to be smaller than
470 * the biggest component sector.
472 if ((cs->sc_ileave > 0) &&
473 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
475 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
476 printf("ccd%d: interleave must be at least %d\n",
477 ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
484 * If uniform interleave is desired set all sizes to that of
485 * the smallest component. This will guarentee that a single
486 * interleave table is generated.
488 * Lost space must be taken into account when calculating the
489 * overall size. Half the space is lost when CCDF_MIRROR is
490 * specified. One disk is lost when CCDF_PARITY is specified.
492 if (ccd->ccd_flags & CCDF_UNIFORM) {
493 for (ci = cs->sc_cinfo;
494 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
495 ci->ci_size = minsize;
497 if (ccd->ccd_flags & CCDF_MIRROR) {
499 * Check to see if an even number of components
500 * have been specified. The interleave must also
501 * be non-zero in order for us to be able to
502 * guarentee the topology.
504 if (cs->sc_nccdisks % 2) {
505 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
509 if (cs->sc_ileave == 0) {
510 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
514 cs->sc_size = (cs->sc_nccdisks/2) * minsize;
515 } else if (ccd->ccd_flags & CCDF_PARITY) {
516 cs->sc_size = (cs->sc_nccdisks-1) * minsize;
518 if (cs->sc_ileave == 0) {
519 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
523 cs->sc_size = cs->sc_nccdisks * minsize;
528 * Construct the interleave table.
530 ccdinterleave(cs, ccd->ccd_unit);
533 * Create pseudo-geometry based on 1MB cylinders. It's
536 ccg->ccg_secsize = maxsecsize;
537 ccg->ccg_ntracks = 1;
538 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
539 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
542 * Add an devstat entry for this device.
544 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
545 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
546 DEVSTAT_TYPE_ASC0 |DEVSTAT_TYPE_IF_OTHER,
547 DEVSTAT_PRIORITY_CCD);
549 cs->sc_flags |= CCDF_INITED;
550 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */
551 cs->sc_unit = ccd->ccd_unit;
554 while (ci > cs->sc_cinfo) {
556 free(ci->ci_path, M_DEVBUF);
558 free(cs->sc_cinfo, M_DEVBUF);
563 ccdinterleave(cs, unit)
564 struct ccd_softc *cs;
567 struct ccdcinfo *ci, *smallci;
574 if (ccddebug & CCDB_INIT)
575 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
579 * Allocate an interleave table. The worst case occurs when each
580 * of N disks is of a different size, resulting in N interleave
583 * Chances are this is too big, but we don't care.
585 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
586 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK);
587 bzero((caddr_t)cs->sc_itable, size);
590 * Trivial case: no interleave (actually interleave of disk size).
591 * Each table entry represents a single component in its entirety.
593 * An interleave of 0 may not be used with a mirror or parity setup.
595 if (cs->sc_ileave == 0) {
599 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
600 /* Allocate space for ii_index. */
601 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
603 ii->ii_startblk = bn;
605 ii->ii_index[0] = ix;
606 bn += cs->sc_cinfo[ix].ci_size;
611 if (ccddebug & CCDB_INIT)
612 printiinfo(cs->sc_itable);
618 * The following isn't fast or pretty; it doesn't have to be.
622 for (ii = cs->sc_itable; ; ii++) {
624 * Allocate space for ii_index. We might allocate more then
627 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
631 * Locate the smallest of the remaining components
634 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
636 if (ci->ci_size > size &&
638 ci->ci_size < smallci->ci_size)) {
644 * Nobody left, all done
646 if (smallci == NULL) {
652 * Record starting logical block using an sc_ileave blocksize.
654 ii->ii_startblk = bn / cs->sc_ileave;
657 * Record starting comopnent block using an sc_ileave
658 * blocksize. This value is relative to the beginning of
661 ii->ii_startoff = lbn;
664 * Determine how many disks take part in this interleave
665 * and record their indices.
668 for (ci = cs->sc_cinfo;
669 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
670 if (ci->ci_size >= smallci->ci_size) {
671 ii->ii_index[ix++] = ci - cs->sc_cinfo;
675 bn += ix * (smallci->ci_size - size);
676 lbn = smallci->ci_size / cs->sc_ileave;
677 size = smallci->ci_size;
680 if (ccddebug & CCDB_INIT)
681 printiinfo(cs->sc_itable);
687 ccdopen(dev, flags, fmt, p)
692 int unit = ccdunit(dev);
693 struct ccd_softc *cs;
694 struct disklabel *lp;
695 int error = 0, part, pmask;
698 if (ccddebug & CCDB_FOLLOW)
699 printf("ccdopen(%x, %x)\n", dev, flags);
703 cs = &ccd_softc[unit];
705 if ((error = ccdlock(cs)) != 0)
714 * If we're initialized, check to see if there are any other
715 * open partitions. If not, then it's safe to update
716 * the in-core disklabel.
718 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0))
719 ccdgetdisklabel(dev);
721 /* Check that the partition exists. */
722 if (part != RAW_PART && ((part >= lp->d_npartitions) ||
723 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
728 /* Prevent our unit from being unconfigured while open. */
731 cs->sc_copenmask |= pmask;
735 cs->sc_bopenmask |= pmask;
739 cs->sc_copenmask | cs->sc_bopenmask;
748 ccdclose(dev, flags, fmt, p)
753 int unit = ccdunit(dev);
754 struct ccd_softc *cs;
758 if (ccddebug & CCDB_FOLLOW)
759 printf("ccdclose(%x, %x)\n", dev, flags);
764 cs = &ccd_softc[unit];
766 if ((error = ccdlock(cs)) != 0)
771 /* ...that much closer to allowing unconfiguration... */
774 cs->sc_copenmask &= ~(1 << part);
778 cs->sc_bopenmask &= ~(1 << part);
782 cs->sc_copenmask | cs->sc_bopenmask;
792 int unit = ccdunit(bp->b_dev);
793 struct ccd_softc *cs = &ccd_softc[unit];
796 struct disklabel *lp;
799 if (ccddebug & CCDB_FOLLOW)
800 printf("ccdstrategy(%x): unit %d\n", bp, unit);
802 if ((cs->sc_flags & CCDF_INITED) == 0) {
804 bp->b_flags |= B_ERROR;
808 /* If it's a nil transfer, wake up the top half now. */
809 if (bp->b_bcount == 0)
815 * Do bounds checking and adjust transfer. If there's an
816 * error, the bounds check will flag that for us.
818 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
819 if (ccdpart(bp->b_dev) != RAW_PART) {
820 if (bounds_check_with_label(bp, lp, wlabel) <= 0)
823 int pbn; /* in sc_secsize chunks */
824 long sz; /* in sc_secsize chunks */
826 pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE);
827 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
830 * If out of bounds return an error. If at the EOF point,
831 * simply read or write less.
834 if (pbn < 0 || pbn >= cs->sc_size) {
835 bp->b_resid = bp->b_bcount;
836 if (pbn != cs->sc_size) {
837 bp->b_error = EINVAL;
838 bp->b_flags |= B_ERROR | B_INVAL;
844 * If the request crosses EOF, truncate the request.
846 if (pbn + sz > cs->sc_size) {
847 bp->b_bcount = (cs->sc_size - pbn) *
848 cs->sc_geom.ccg_secsize;
852 bp->b_resid = bp->b_bcount;
867 struct ccd_softc *cs;
871 struct ccdbuf *cbp[4];
872 /* XXX! : 2 reads and 2 writes for RAID 4/5 */
875 struct partition *pp;
878 if (ccddebug & CCDB_FOLLOW)
879 printf("ccdstart(%x, %x)\n", cs, bp);
882 /* Record the transaction start */
883 devstat_start_transaction(&cs->device_stats);
886 * Translate the partition-relative block number to an absolute.
889 if (ccdpart(bp->b_dev) != RAW_PART) {
890 pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)];
895 * Allocate component buffers and fire off the requests
898 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
899 ccdbuffer(cbp, cs, bp, bn, addr, bcount);
900 rcount = cbp[0]->cb_buf.b_bcount;
902 if (cs->sc_cflags & CCDF_MIRROR) {
904 * Mirroring. Writes go to both disks, reads are
905 * taken from whichever disk seems most appropriate.
907 * We attempt to localize reads to the disk whos arm
908 * is nearest the read request. We ignore seeks due
909 * to writes when making this determination and we
910 * also try to avoid hogging.
912 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) {
913 cbp[0]->cb_buf.b_vp->v_numoutput++;
914 cbp[1]->cb_buf.b_vp->v_numoutput++;
915 VOP_STRATEGY(cbp[0]->cb_buf.b_vp,
917 VOP_STRATEGY(cbp[1]->cb_buf.b_vp,
920 int pick = cs->sc_pick;
921 daddr_t range = cs->sc_size / 16;
923 if (bn < cs->sc_blk[pick] - range ||
924 bn > cs->sc_blk[pick] + range
926 cs->sc_pick = pick = 1 - pick;
928 cs->sc_blk[pick] = bn + btodb(rcount);
929 VOP_STRATEGY(cbp[pick]->cb_buf.b_vp,
936 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0)
937 cbp[0]->cb_buf.b_vp->v_numoutput++;
938 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, &cbp[0]->cb_buf);
946 * Build a component buffer header.
949 ccdbuffer(cb, cs, bp, bn, addr, bcount)
951 struct ccd_softc *cs;
957 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */
963 if (ccddebug & CCDB_IO)
964 printf("ccdbuffer(%x, %x, %d, %x, %d)\n",
965 cs, bp, bn, addr, bcount);
968 * Determine which component bn falls in.
973 if (cs->sc_ileave == 0) {
975 * Serially concatenated and neither a mirror nor a parity
976 * config. This is a special case.
981 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
989 * Calculate cbn, the logical superblock (sc_ileave chunks),
990 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
993 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */
994 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */
997 * Figure out which interleave table to use.
999 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
1000 if (ii->ii_startblk > cbn)
1006 * off is the logical superblock relative to the beginning
1007 * of this interleave block.
1009 off = cbn - ii->ii_startblk;
1012 * We must calculate which disk component to use (ccdisk),
1013 * and recalculate cbn to be the superblock relative to
1014 * the beginning of the component. This is typically done by
1015 * adding 'off' and ii->ii_startoff together. However, 'off'
1016 * must typically be divided by the number of components in
1017 * this interleave array to be properly convert it from a
1018 * CCD-relative logical superblock number to a
1019 * component-relative superblock number.
1021 if (ii->ii_ndisk == 1) {
1023 * When we have just one disk, it can't be a mirror
1024 * or a parity config.
1026 ccdisk = ii->ii_index[0];
1027 cbn = ii->ii_startoff + off;
1029 if (cs->sc_cflags & CCDF_MIRROR) {
1031 * We have forced a uniform mapping, resulting
1032 * in a single interleave array. We double
1033 * up on the first half of the available
1034 * components and our mirror is in the second
1035 * half. This only works with a single
1036 * interleave array because doubling up
1037 * doubles the number of sectors, so there
1038 * cannot be another interleave array because
1039 * the next interleave array's calculations
1042 int ndisk2 = ii->ii_ndisk / 2;
1043 ccdisk = ii->ii_index[off % ndisk2];
1044 cbn = ii->ii_startoff + off / ndisk2;
1045 ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1046 } else if (cs->sc_cflags & CCDF_PARITY) {
1048 * XXX not implemented yet
1050 int ndisk2 = ii->ii_ndisk - 1;
1051 ccdisk = ii->ii_index[off % ndisk2];
1052 cbn = ii->ii_startoff + off / ndisk2;
1053 if (cbn % ii->ii_ndisk <= ccdisk)
1056 ccdisk = ii->ii_index[off % ii->ii_ndisk];
1057 cbn = ii->ii_startoff + off / ii->ii_ndisk;
1061 ci = &cs->sc_cinfo[ccdisk];
1064 * Convert cbn from a superblock to a normal block so it
1065 * can be used to calculate (along with cboff) the normal
1066 * block index into this particular disk.
1068 cbn *= cs->sc_ileave;
1072 * Fill in the component buf structure.
1074 cbp = getccdbuf(NULL);
1075 cbp->cb_buf.b_flags = bp->b_flags | B_CALL;
1076 cbp->cb_buf.b_iodone = (void (*)(struct buf *))ccdiodone;
1077 cbp->cb_buf.b_dev = ci->ci_dev; /* XXX */
1078 cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET;
1079 cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET);
1080 cbp->cb_buf.b_data = addr;
1081 cbp->cb_buf.b_vp = ci->ci_vp;
1082 if (cs->sc_ileave == 0)
1083 cbc = dbtob((off_t)(ci->ci_size - cbn));
1085 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1086 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1087 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1090 * context for ccdiodone
1093 cbp->cb_unit = cs - ccd_softc;
1094 cbp->cb_comp = ci - cs->sc_cinfo;
1097 if (ccddebug & CCDB_IO)
1098 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n",
1099 ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno,
1100 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1105 * Note: both I/O's setup when reading from mirror, but only one
1108 if (cs->sc_cflags & CCDF_MIRROR) {
1109 /* mirror, setup second I/O */
1110 cbp = getccdbuf(cb[0]);
1111 cbp->cb_buf.b_dev = ci2->ci_dev;
1112 cbp->cb_buf.b_vp = ci2->ci_vp;
1113 cbp->cb_comp = ci2 - cs->sc_cinfo;
1115 /* link together the ccdbuf's and clear "mirror done" flag */
1116 cb[0]->cb_mirror = cb[1];
1117 cb[1]->cb_mirror = cb[0];
1118 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1119 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1125 struct ccd_softc *cs;
1129 if (ccddebug & CCDB_FOLLOW)
1130 printf("ccdintr(%x, %x)\n", cs, bp);
1133 * Request is done for better or worse, wakeup the top half.
1135 if (bp->b_flags & B_ERROR)
1136 bp->b_resid = bp->b_bcount;
1137 devstat_end_transaction_buf(&cs->device_stats, bp);
1142 * Called at interrupt time.
1143 * Mark the component as done and if all components are done,
1144 * take a ccd interrupt.
1150 struct buf *bp = cbp->cb_obp;
1151 int unit = cbp->cb_unit;
1156 if (ccddebug & CCDB_FOLLOW)
1157 printf("ccdiodone(%x)\n", cbp);
1158 if (ccddebug & CCDB_IO) {
1159 printf("ccdiodone: bp %x bcount %d resid %d\n",
1160 bp, bp->b_bcount, bp->b_resid);
1161 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n",
1162 cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
1163 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
1164 cbp->cb_buf.b_bcount);
1168 * If an error occured, report it. If this is a mirrored
1169 * configuration and the first of two possible reads, do not
1170 * set the error in the bp yet because the second read may
1174 if (cbp->cb_buf.b_flags & B_ERROR) {
1175 const char *msg = "";
1177 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
1178 (cbp->cb_buf.b_flags & B_READ) &&
1179 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1181 * We will try our read on the other disk down
1182 * below, also reverse the default pick so if we
1183 * are doing a scan we do not keep hitting the
1186 struct ccd_softc *cs = &ccd_softc[unit];
1188 msg = ", trying other disk";
1189 cs->sc_pick = 1 - cs->sc_pick;
1190 cs->sc_blk[cs->sc_pick] = bp->b_blkno;
1192 bp->b_flags |= B_ERROR;
1193 bp->b_error = cbp->cb_buf.b_error ?
1194 cbp->cb_buf.b_error : EIO;
1196 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n",
1197 unit, bp->b_error, cbp->cb_comp,
1198 (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg);
1202 * Process mirror. If we are writing, I/O has been initiated on both
1203 * buffers and we fall through only after both are finished.
1205 * If we are reading only one I/O is initiated at a time. If an
1206 * error occurs we initiate the second I/O and return, otherwise
1207 * we free the second I/O without initiating it.
1210 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
1211 if ((cbp->cb_buf.b_flags & B_READ) == 0) {
1213 * When writing, handshake with the second buffer
1214 * to determine when both are done. If both are not
1215 * done, return here.
1217 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1218 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1225 * When reading, either dispose of the second buffer
1226 * or initiate I/O on the second buffer if an error
1227 * occured with this one.
1229 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1230 if (cbp->cb_buf.b_flags & B_ERROR) {
1231 cbp->cb_mirror->cb_pflags |=
1234 cbp->cb_mirror->cb_buf.b_vp,
1235 &cbp->cb_mirror->cb_buf
1241 putccdbuf(cbp->cb_mirror);
1249 * use b_bufsize to determine how big the original request was rather
1250 * then b_bcount, because b_bcount may have been truncated for EOF.
1252 * XXX We check for an error, but we do not test the resid for an
1253 * aligned EOF condition. This may result in character & block
1254 * device access not recognizing EOF properly when read or written
1255 * sequentially, but will not effect filesystems.
1257 count = cbp->cb_buf.b_bufsize;
1261 * If all done, "interrupt".
1263 bp->b_resid -= count;
1264 if (bp->b_resid < 0)
1265 panic("ccdiodone: count");
1266 if (bp->b_resid == 0)
1267 ccdintr(&ccd_softc[unit], bp);
1272 ccdioctl(dev, cmd, data, flag, p)
1279 int unit = ccdunit(dev);
1280 int i, j, lookedup = 0, error = 0;
1282 struct ccd_softc *cs;
1283 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1284 struct ccddevice ccd;
1290 cs = &ccd_softc[unit];
1292 bzero(&ccd, sizeof(ccd));
1296 if (cs->sc_flags & CCDF_INITED)
1299 if ((flag & FWRITE) == 0)
1302 if ((error = ccdlock(cs)) != 0)
1305 /* Fill in some important bits. */
1306 ccd.ccd_unit = unit;
1307 ccd.ccd_interleave = ccio->ccio_ileave;
1308 if (ccd.ccd_interleave == 0 &&
1309 ((ccio->ccio_flags & CCDF_MIRROR) ||
1310 (ccio->ccio_flags & CCDF_PARITY))) {
1311 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1312 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1314 if ((ccio->ccio_flags & CCDF_MIRROR) &&
1315 (ccio->ccio_flags & CCDF_PARITY)) {
1316 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1317 ccio->ccio_flags &= ~CCDF_PARITY;
1319 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1320 !(ccio->ccio_flags & CCDF_UNIFORM)) {
1321 printf("ccd%d: mirror/parity forces uniform flag\n",
1323 ccio->ccio_flags |= CCDF_UNIFORM;
1325 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1328 * Allocate space for and copy in the array of
1329 * componet pathnames and device numbers.
1331 cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1332 M_DEVBUF, M_WAITOK);
1333 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1334 M_DEVBUF, M_WAITOK);
1336 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1337 ccio->ccio_ndisks * sizeof(char **));
1339 free(vpp, M_DEVBUF);
1340 free(cpp, M_DEVBUF);
1346 if (ccddebug & CCDB_INIT)
1347 for (i = 0; i < ccio->ccio_ndisks; ++i)
1348 printf("ccdioctl: component %d: 0x%x\n",
1352 for (i = 0; i < ccio->ccio_ndisks; ++i) {
1354 if (ccddebug & CCDB_INIT)
1355 printf("ccdioctl: lookedup = %d\n", lookedup);
1357 if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) {
1358 for (j = 0; j < lookedup; ++j)
1359 (void)vn_close(vpp[j], FREAD|FWRITE,
1361 free(vpp, M_DEVBUF);
1362 free(cpp, M_DEVBUF);
1370 ccd.ccd_ndev = ccio->ccio_ndisks;
1373 * Initialize the ccd. Fills in the softc for us.
1375 if ((error = ccdinit(&ccd, cpp, p)) != 0) {
1376 for (j = 0; j < lookedup; ++j)
1377 (void)vn_close(vpp[j], FREAD|FWRITE,
1379 bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
1380 free(vpp, M_DEVBUF);
1381 free(cpp, M_DEVBUF);
1387 * The ccd has been successfully initialized, so
1388 * we can place it into the array and read the disklabel.
1390 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1391 ccio->ccio_unit = unit;
1392 ccio->ccio_size = cs->sc_size;
1393 ccdgetdisklabel(dev);
1400 if ((cs->sc_flags & CCDF_INITED) == 0)
1403 if ((flag & FWRITE) == 0)
1406 if ((error = ccdlock(cs)) != 0)
1410 * Don't unconfigure if any other partitions are open
1411 * or if both the character and block flavors of this
1412 * partition are open.
1414 part = ccdpart(dev);
1415 pmask = (1 << part);
1416 if ((cs->sc_openmask & ~pmask) ||
1417 ((cs->sc_bopenmask & pmask) &&
1418 (cs->sc_copenmask & pmask))) {
1424 * Free ccd_softc information and clear entry.
1427 /* Close the components and free their pathnames. */
1428 for (i = 0; i < cs->sc_nccdisks; ++i) {
1430 * XXX: this close could potentially fail and
1431 * cause Bad Things. Maybe we need to force
1432 * the close to happen?
1435 if (ccddebug & CCDB_VNODE)
1436 vprint("CCDIOCCLR: vnode info",
1437 cs->sc_cinfo[i].ci_vp);
1439 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE,
1441 free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1444 /* Free interleave index. */
1445 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1446 free(cs->sc_itable[i].ii_index, M_DEVBUF);
1448 /* Free component info and interleave table. */
1449 free(cs->sc_cinfo, M_DEVBUF);
1450 free(cs->sc_itable, M_DEVBUF);
1451 cs->sc_flags &= ~CCDF_INITED;
1454 * Free ccddevice information and clear entry.
1456 free(ccddevs[unit].ccd_cpp, M_DEVBUF);
1457 free(ccddevs[unit].ccd_vpp, M_DEVBUF);
1459 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1462 * And remove the devstat entry.
1464 devstat_remove_entry(&cs->device_stats);
1466 /* This must be atomic. */
1469 bzero(cs, sizeof(struct ccd_softc));
1475 if ((cs->sc_flags & CCDF_INITED) == 0)
1478 *(struct disklabel *)data = cs->sc_label;
1482 if ((cs->sc_flags & CCDF_INITED) == 0)
1485 ((struct partinfo *)data)->disklab = &cs->sc_label;
1486 ((struct partinfo *)data)->part =
1487 &cs->sc_label.d_partitions[ccdpart(dev)];
1492 if ((cs->sc_flags & CCDF_INITED) == 0)
1495 if ((flag & FWRITE) == 0)
1498 if ((error = ccdlock(cs)) != 0)
1501 cs->sc_flags |= CCDF_LABELLING;
1503 error = setdisklabel(&cs->sc_label,
1504 (struct disklabel *)data, 0);
1506 if (cmd == DIOCWDINFO)
1507 error = writedisklabel(CCDLABELDEV(dev),
1511 cs->sc_flags &= ~CCDF_LABELLING;
1520 if ((cs->sc_flags & CCDF_INITED) == 0)
1523 if ((flag & FWRITE) == 0)
1525 if (*(int *)data != 0)
1526 cs->sc_flags |= CCDF_WLABEL;
1528 cs->sc_flags &= ~CCDF_WLABEL;
1542 struct ccd_softc *cs;
1545 if (ccdopen(dev, 0, S_IFBLK, curproc))
1548 cs = &ccd_softc[ccdunit(dev)];
1549 part = ccdpart(dev);
1551 if ((cs->sc_flags & CCDF_INITED) == 0)
1554 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1557 size = cs->sc_label.d_partitions[part].p_size;
1559 if (ccdclose(dev, 0, S_IFBLK, curproc))
1570 /* Not implemented. */
1575 * Lookup the provided name in the filesystem. If the file exists,
1576 * is a valid block device, and isn't being used by anyone else,
1577 * set *vpp to the file's vnode.
1580 ccdlookup(path, p, vpp)
1583 struct vnode **vpp; /* result */
1585 struct nameidata nd;
1590 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p);
1591 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
1593 if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1594 printf("ccdlookup: vn_open error = %d\n", error);
1600 if (vp->v_usecount > 1) {
1601 VOP_UNLOCK(vp, 0, p);
1602 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1606 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1608 if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1609 printf("ccdlookup: getattr error = %d\n", error);
1611 VOP_UNLOCK(vp, 0, p);
1612 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1616 /* XXX: eventually we should handle VREG, too. */
1617 if (va.va_type != VBLK) {
1618 VOP_UNLOCK(vp, 0, p);
1619 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1624 if (ccddebug & CCDB_VNODE)
1625 vprint("ccdlookup: vnode info", vp);
1628 VOP_UNLOCK(vp, 0, p);
1634 * Read the disklabel from the ccd. If one is not present, fake one
1638 ccdgetdisklabel(dev)
1641 int unit = ccdunit(dev);
1642 struct ccd_softc *cs = &ccd_softc[unit];
1644 struct disklabel *lp = &cs->sc_label;
1645 struct ccdgeom *ccg = &cs->sc_geom;
1647 bzero(lp, sizeof(*lp));
1649 lp->d_secperunit = cs->sc_size;
1650 lp->d_secsize = ccg->ccg_secsize;
1651 lp->d_nsectors = ccg->ccg_nsectors;
1652 lp->d_ntracks = ccg->ccg_ntracks;
1653 lp->d_ncylinders = ccg->ccg_ncylinders;
1654 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1656 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1657 lp->d_type = DTYPE_CCD;
1658 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1660 lp->d_interleave = 1;
1663 lp->d_partitions[RAW_PART].p_offset = 0;
1664 lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1665 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1666 lp->d_npartitions = RAW_PART + 1;
1668 lp->d_bbsize = BBSIZE; /* XXX */
1669 lp->d_sbsize = SBSIZE; /* XXX */
1671 lp->d_magic = DISKMAGIC;
1672 lp->d_magic2 = DISKMAGIC;
1673 lp->d_checksum = dkcksum(&cs->sc_label);
1676 * Call the generic disklabel extraction routine.
1678 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label);
1679 if (errstring != NULL)
1680 ccdmakedisklabel(cs);
1683 /* It's actually extremely common to have unlabeled ccds. */
1684 if (ccddebug & CCDB_LABEL)
1685 if (errstring != NULL)
1686 printf("ccd%d: %s\n", unit, errstring);
1691 * Take care of things one might want to take care of in the event
1692 * that a disklabel isn't present.
1695 ccdmakedisklabel(cs)
1696 struct ccd_softc *cs;
1698 struct disklabel *lp = &cs->sc_label;
1701 * For historical reasons, if there's no disklabel present
1702 * the raw partition must be marked FS_BSDFFS.
1704 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1706 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1710 * Wait interruptibly for an exclusive lock.
1713 * Several drivers do this; it should be abstracted and made MP-safe.
1717 struct ccd_softc *cs;
1721 while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1722 cs->sc_flags |= CCDF_WANTED;
1723 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0)
1726 cs->sc_flags |= CCDF_LOCKED;
1731 * Unlock and wake up any waiters.
1735 struct ccd_softc *cs;
1738 cs->sc_flags &= ~CCDF_LOCKED;
1739 if ((cs->sc_flags & CCDF_WANTED) != 0) {
1740 cs->sc_flags &= ~CCDF_WANTED;
1748 struct ccdiinfo *ii;
1752 for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1753 printf(" itab[%d]: #dk %d sblk %d soff %d",
1754 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1755 for (i = 0; i < ii->ii_ndisk; i++)
1756 printf(" %d", ii->ii_index[i]);
1762 #endif /* NCCD > 0 */
1764 /* Local Variables: */
1765 /* c-argdecl-indent: 8 */
1766 /* c-continued-statement-offset: 8 */
1767 /* c-indent-level: 8 */