2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95
34 * $Id: spec_vnops.c,v 1.60 1998/03/08 09:57:36 julian Exp $
37 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
43 #include <sys/mount.h>
44 #include <sys/vnode.h>
46 #include <sys/fcntl.h>
47 #include <sys/disklabel.h>
48 #include <sys/vmmeter.h>
51 #include <vm/vm_prot.h>
52 #include <vm/vm_object.h>
53 #include <vm/vm_page.h>
54 #include <vm/vm_pager.h>
55 #include <vm/vnode_pager.h>
56 #include <vm/vm_extern.h>
58 #include <miscfs/specfs/specdev.h>
60 static int spec_getattr __P((struct vop_getattr_args *));
61 static int spec_badop __P((void));
62 static int spec_strategy __P((struct vop_strategy_args *));
63 static int spec_print __P((struct vop_print_args *));
64 static int spec_lookup __P((struct vop_lookup_args *));
65 static int spec_open __P((struct vop_open_args *));
66 static int spec_close __P((struct vop_close_args *));
67 static int spec_read __P((struct vop_read_args *));
68 static int spec_write __P((struct vop_write_args *));
69 static int spec_ioctl __P((struct vop_ioctl_args *));
70 static int spec_poll __P((struct vop_poll_args *));
71 static int spec_inactive __P((struct vop_inactive_args *));
72 static int spec_fsync __P((struct vop_fsync_args *));
73 static int spec_bmap __P((struct vop_bmap_args *));
74 static int spec_advlock __P((struct vop_advlock_args *));
75 static int spec_getpages __P((struct vop_getpages_args *));
77 struct vnode *speclisth[SPECHSZ];
78 vop_t **spec_vnodeop_p;
79 static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
80 { &vop_default_desc, (vop_t *) vop_defaultop },
81 { &vop_access_desc, (vop_t *) vop_ebadf },
82 { &vop_advlock_desc, (vop_t *) spec_advlock },
83 { &vop_bmap_desc, (vop_t *) spec_bmap },
84 { &vop_close_desc, (vop_t *) spec_close },
85 { &vop_create_desc, (vop_t *) spec_badop },
86 { &vop_fsync_desc, (vop_t *) spec_fsync },
87 { &vop_getattr_desc, (vop_t *) spec_getattr },
88 { &vop_getpages_desc, (vop_t *) spec_getpages },
89 { &vop_inactive_desc, (vop_t *) spec_inactive },
90 { &vop_ioctl_desc, (vop_t *) spec_ioctl },
91 { &vop_lease_desc, (vop_t *) vop_null },
92 { &vop_link_desc, (vop_t *) spec_badop },
93 { &vop_lookup_desc, (vop_t *) spec_lookup },
94 { &vop_mkdir_desc, (vop_t *) spec_badop },
95 { &vop_mknod_desc, (vop_t *) spec_badop },
96 { &vop_open_desc, (vop_t *) spec_open },
97 { &vop_pathconf_desc, (vop_t *) vop_stdpathconf },
98 { &vop_poll_desc, (vop_t *) spec_poll },
99 { &vop_print_desc, (vop_t *) spec_print },
100 { &vop_read_desc, (vop_t *) spec_read },
101 { &vop_readdir_desc, (vop_t *) spec_badop },
102 { &vop_readlink_desc, (vop_t *) spec_badop },
103 { &vop_reallocblks_desc, (vop_t *) spec_badop },
104 { &vop_reclaim_desc, (vop_t *) vop_null },
105 { &vop_remove_desc, (vop_t *) spec_badop },
106 { &vop_rename_desc, (vop_t *) spec_badop },
107 { &vop_rmdir_desc, (vop_t *) spec_badop },
108 { &vop_setattr_desc, (vop_t *) vop_ebadf },
109 { &vop_strategy_desc, (vop_t *) spec_strategy },
110 { &vop_symlink_desc, (vop_t *) spec_badop },
111 { &vop_write_desc, (vop_t *) spec_write },
114 static struct vnodeopv_desc spec_vnodeop_opv_desc =
115 { &spec_vnodeop_p, spec_vnodeop_entries };
117 VNODEOP_SET(spec_vnodeop_opv_desc);
122 struct vop_generic_args /* {
123 struct vnodeop_desc *a_desc;
124 <other random data follows, presumably>
127 return (VOCALL(spec_vnodeop_p, ap->a_desc->vdesc_offset, ap));
130 static void spec_getpages_iodone __P((struct buf *bp));
133 * Trivial lookup routine that always fails.
137 struct vop_lookup_args /* {
139 struct vnode **a_vpp;
140 struct componentname *a_cnp;
149 * Open a special file.
154 struct vop_open_args /* {
157 struct ucred *a_cred;
161 struct proc *p = ap->a_p;
162 struct vnode *bvp, *vp = ap->a_vp;
163 dev_t bdev, dev = (dev_t)vp->v_rdev;
164 int maj = major(dev);
168 * Don't allow open if fs is mounted -nodev.
170 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
173 switch (vp->v_type) {
176 if ((u_int)maj >= nchrdev)
178 if ( (cdevsw[maj] == NULL) || (cdevsw[maj]->d_open == NULL))
180 if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
182 * When running in very secure mode, do not allow
183 * opens for writing of any disk character devices.
186 && cdevsw[maj]->d_bdev
187 && (cdevsw[maj]->d_bdev->d_flags & D_TYPEMASK) ==
191 * When running in secure mode, do not allow opens
192 * for writing of /dev/mem, /dev/kmem, or character
193 * devices whose corresponding block devices are
196 if (securelevel >= 1) {
197 if ((bdev = chrtoblk(dev)) != NODEV &&
198 vfinddev(bdev, VBLK, &bvp) &&
199 bvp->v_usecount > 0 &&
200 (error = vfs_mountedon(bvp)))
208 * Lite2 stuff. We will almost certainly do this
209 * differently with devfs. The only use of this flag
210 * is in dead_read to make ttys return EOF instead of
211 * EIO when they are dead. Pre-lite2 FreeBSD returns
212 * EOF for all character devices.
214 if (cdevsw[maj]->d_type == D_TTY)
215 vp->v_flag |= VISTTY;
217 VOP_UNLOCK(vp, 0, p);
218 error = (*cdevsw[maj]->d_open)(dev, ap->a_mode, S_IFCHR, p);
219 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
223 if ((u_int)maj >= nblkdev)
225 if ( (bdevsw[maj] == NULL) || (bdevsw[maj]->d_open == NULL))
228 * When running in very secure mode, do not allow
229 * opens for writing of any disk block devices.
231 if (securelevel >= 2 && ap->a_cred != FSCRED &&
232 (ap->a_mode & FWRITE) &&
233 (bdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK)
237 * Do not allow opens of block devices that are
240 error = vfs_mountedon(vp);
243 return ((*bdevsw[maj]->d_open)(dev, ap->a_mode, S_IFBLK, p));
254 struct vop_read_args /* {
258 struct ucred *a_cred;
261 register struct vnode *vp = ap->a_vp;
262 register struct uio *uio = ap->a_uio;
263 struct proc *p = uio->uio_procp;
267 struct partinfo dpart;
274 if (uio->uio_rw != UIO_READ)
275 panic("spec_read mode");
276 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
277 panic("spec_read proc");
279 if (uio->uio_resid == 0)
282 switch (vp->v_type) {
285 VOP_UNLOCK(vp, 0, p);
286 error = (*cdevsw[major(vp->v_rdev)]->d_read)
287 (vp->v_rdev, uio, ap->a_ioflag);
288 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
292 if (uio->uio_offset < 0)
294 bsize = BLKDEV_IOSIZE;
296 if ((majordev = major(dev)) < nblkdev &&
297 (ioctl = bdevsw[majordev]->d_ioctl) != NULL &&
298 (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
299 dpart.part->p_fstype == FS_BSDFFS &&
300 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
301 bsize = dpart.part->p_frag * dpart.part->p_fsize;
302 bscale = btodb(bsize);
304 bn = btodb(uio->uio_offset) & ~(bscale - 1);
305 on = uio->uio_offset % bsize;
306 n = min((unsigned)(bsize - on), uio->uio_resid);
307 if (vp->v_lastr + bscale == bn) {
308 nextbn = bn + bscale;
309 error = breadn(vp, bn, (int)bsize, &nextbn,
310 (int *)&bsize, 1, NOCRED, &bp);
312 error = bread(vp, bn, (int)bsize, NOCRED, &bp);
314 n = min(n, bsize - bp->b_resid);
319 error = uiomove((char *)bp->b_data + on, n, uio);
321 } while (error == 0 && uio->uio_resid > 0 && n != 0);
325 panic("spec_read type");
336 struct vop_write_args /* {
340 struct ucred *a_cred;
343 register struct vnode *vp = ap->a_vp;
344 register struct uio *uio = ap->a_uio;
345 struct proc *p = uio->uio_procp;
349 struct partinfo dpart;
354 if (uio->uio_rw != UIO_WRITE)
355 panic("spec_write mode");
356 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
357 panic("spec_write proc");
360 switch (vp->v_type) {
363 VOP_UNLOCK(vp, 0, p);
364 error = (*cdevsw[major(vp->v_rdev)]->d_write)
365 (vp->v_rdev, uio, ap->a_ioflag);
366 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
370 if (uio->uio_resid == 0)
372 if (uio->uio_offset < 0)
374 bsize = BLKDEV_IOSIZE;
375 if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART,
376 (caddr_t)&dpart, FREAD, p) == 0) {
377 if (dpart.part->p_fstype == FS_BSDFFS &&
378 dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
379 bsize = dpart.part->p_frag *
382 blkmask = btodb(bsize) - 1;
384 bn = btodb(uio->uio_offset) & ~blkmask;
385 on = uio->uio_offset % bsize;
386 n = min((unsigned)(bsize - on), uio->uio_resid);
388 bp = getblk(vp, bn, bsize, 0, 0);
390 error = bread(vp, bn, bsize, NOCRED, &bp);
391 n = min(n, bsize - bp->b_resid);
396 error = uiomove((char *)bp->b_data + on, n, uio);
401 } while (error == 0 && uio->uio_resid > 0 && n != 0);
405 panic("spec_write type");
411 * Device ioctl operation.
416 struct vop_ioctl_args /* {
421 struct ucred *a_cred;
425 dev_t dev = ap->a_vp->v_rdev;
427 switch (ap->a_vp->v_type) {
430 return ((*cdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data,
431 ap->a_fflag, ap->a_p));
434 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE)
435 if ((bdevsw[major(dev)]->d_flags & D_TYPEMASK) ==
440 return ((*bdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data,
441 ap->a_fflag, ap->a_p));
452 struct vop_poll_args /* {
455 struct ucred *a_cred;
461 switch (ap->a_vp->v_type) {
464 dev = ap->a_vp->v_rdev;
465 return (*cdevsw[major(dev)]->d_poll)(dev, ap->a_events, ap->a_p);
467 return (vop_defaultop((struct vop_generic_args *)ap));
472 * Synch buffers associated with a block device
477 struct vop_fsync_args /* {
479 struct ucred *a_cred;
484 register struct vnode *vp = ap->a_vp;
485 register struct buf *bp;
489 if (vp->v_type == VCHR)
492 * Flush all dirty buffers associated with a block device.
496 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
497 nbp = bp->b_vnbufs.le_next;
498 if ((bp->b_flags & B_BUSY))
500 if ((bp->b_flags & B_DELWRI) == 0)
501 panic("spec_fsync: not dirty");
502 if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
507 bp->b_flags |= B_BUSY;
513 if (ap->a_waitfor == MNT_WAIT) {
514 while (vp->v_numoutput) {
515 vp->v_flag |= VBWAIT;
516 (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0);
519 if (vp->v_dirtyblkhd.lh_first) {
520 vprint("spec_fsync: dirty", vp);
532 struct vop_inactive_args /* {
538 VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
543 * Just call the device strategy routine
547 struct vop_strategy_args /* {
554 if ((LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start)
555 (*bioops.io_start)(bp);
556 (*bdevsw[major(bp->b_dev)]->d_strategy)(bp);
561 * This is a noop, simply returning what one has been given.
565 struct vop_bmap_args /* {
568 struct vnode **a_vpp;
575 if (ap->a_vpp != NULL)
576 *ap->a_vpp = ap->a_vp;
577 if (ap->a_bnp != NULL)
578 *ap->a_bnp = ap->a_bn;
579 if (ap->a_runp != NULL)
581 if (ap->a_runb != NULL)
587 * Device close routine
592 struct vop_close_args /* {
595 struct ucred *a_cred;
599 register struct vnode *vp = ap->a_vp;
600 struct proc *p = ap->a_p;
601 dev_t dev = vp->v_rdev;
605 switch (vp->v_type) {
609 * Hack: a tty device that is a controlling terminal
610 * has a reference from the session structure.
611 * We cannot easily tell that a character device is
612 * a controlling terminal, unless it is the closing
613 * process' controlling terminal. In that case,
614 * if the reference count is 2 (this last descriptor
615 * plus the session), release the reference from the session.
617 if (vcount(vp) == 2 && ap->a_p &&
618 (vp->v_flag & VXLOCK) == 0 &&
619 vp == ap->a_p->p_session->s_ttyvp) {
621 ap->a_p->p_session->s_ttyvp = NULL;
624 * If the vnode is locked, then we are in the midst
625 * of forcably closing the device, otherwise we only
626 * close on last reference.
628 if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
630 devclose = cdevsw[major(dev)]->d_close;
636 * On last close of a block device (that isn't mounted)
637 * we must invalidate any in core blocks, so that
638 * we can, for instance, change floppy disks.
640 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
641 error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
642 VOP_UNLOCK(vp, 0, ap->a_p);
647 * We do not want to really close the device if it
648 * is still in use unless we are trying to close it
649 * forcibly. Since every use (buffer, vnode, swap, cmap)
650 * holds a reference to the vnode, and because we mark
651 * any other vnodes that alias this device, when the
652 * sum of the reference counts on all the aliased
653 * vnodes descends to one, we are on last close.
655 if ((vcount(vp) > 1) && (vp->v_flag & VXLOCK) == 0)
658 devclose = bdevsw[major(dev)]->d_close;
663 panic("spec_close: not special");
666 return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
670 * Print out the contents of a special device vnode.
674 struct vop_print_args /* {
679 printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
680 minor(ap->a_vp->v_rdev));
685 * Special device advisory byte-level locks.
690 struct vop_advlock_args /* {
699 return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
703 * Special device bad operation
709 panic("spec_badop called");
714 spec_getpages_iodone(bp)
718 bp->b_flags |= B_DONE;
724 struct vop_getpages_args *ap;
728 int i, pcount, size, s;
733 int toff, nextoff, nread;
734 struct vnode *vp = ap->a_vp;
739 pcount = round_page(ap->a_count) / PAGE_SIZE;
742 * Calculate the offset of the transfer.
744 offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;
746 /* XXX sanity check before we go into details. */
747 /* XXX limits should be defined elsewhere. */
748 #define DADDR_T_BIT 32
749 #define OFFSET_MAX ((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1)
750 if (offset < 0 || offset > OFFSET_MAX) {
751 /* XXX still no %q in kernel. */
752 printf("spec_getpages: preposterous offset 0x%x%08x\n",
753 (u_int)((u_quad_t)offset >> 32),
754 (u_int)(offset & 0xffffffff));
755 return (VM_PAGER_ERROR);
758 blkno = btodb(offset);
761 * Round up physical size for real devices, use the
762 * fundamental blocksize of the fs if possible.
764 if (vp && vp->v_mount) {
765 if (vp->v_type != VBLK) {
766 vprint("Non VBLK", vp);
768 blksiz = vp->v_mount->mnt_stat.f_bsize;
769 if (blksiz < DEV_BSIZE) {
775 size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
778 kva = (vm_offset_t)bp->b_data;
781 * Map the pages to be read into the kva.
783 pmap_qenter(kva, ap->a_m, pcount);
785 /* Build a minimal buffer header. */
786 bp->b_flags = B_BUSY | B_READ | B_CALL;
787 bp->b_iodone = spec_getpages_iodone;
789 /* B_PHYS is not set, but it is nice to fill this in. */
790 bp->b_proc = curproc;
791 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
792 if (bp->b_rcred != NOCRED)
794 if (bp->b_wcred != NOCRED)
797 bp->b_lblkno = blkno;
798 pbgetvp(ap->a_vp, bp);
800 bp->b_bufsize = size;
804 cnt.v_vnodepgsin += pcount;
811 /* We definitely need to be at splbio here. */
812 while ((bp->b_flags & B_DONE) == 0)
813 tsleep(bp, PVM, "spread", 0);
817 if ((bp->b_flags & B_ERROR) != 0) {
824 nread = size - bp->b_resid;
826 if (nread < ap->a_count) {
827 bzero((caddr_t)kva + nread,
828 ap->a_count - nread);
830 pmap_qremove(kva, pcount);
834 for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
835 nextoff = toff + PAGE_SIZE;
838 m->flags &= ~PG_ZERO;
840 if (nextoff <= nread) {
841 m->valid = VM_PAGE_BITS_ALL;
843 } else if (toff < nread) {
844 int nvalid = ((nread + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1);
845 vm_page_set_validclean(m, 0, nvalid);
851 if (i != ap->a_reqpage) {
853 * Just in case someone was asking for this page we
854 * now tell them that it is ok to use.
856 if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
858 if (m->flags & PG_WANTED) {
861 vm_page_deactivate(m);
870 } else if (m->valid) {
875 m = ap->a_m[ap->a_reqpage];
877 printf("spec_getpages: I/O read failure: (error code=%d)\n", error);
878 printf(" size: %d, resid: %d, a_count: %d, valid: 0x%x\n",
879 size, bp->b_resid, ap->a_count, m->valid);
880 printf(" nread: %d, reqpage: %d, pindex: %d, pcount: %d\n",
881 nread, ap->a_reqpage, m->pindex, pcount);
884 * Free the buffer header back to the swap buffer pool.
887 return VM_PAGER_ERROR;
890 * Free the buffer header back to the swap buffer pool.
899 struct vop_getattr_args /* {
902 struct ucred *a_cred;
906 register struct vnode *vp = ap->a_vp;
907 register struct vattr *vap = ap->a_vap;
908 struct partinfo dpart;
910 bzero(vap, sizeof (*vap));
912 if (vp->v_type == VBLK)
913 vap->va_blocksize = BLKDEV_IOSIZE;
914 else if (vp->v_type == VCHR)
915 vap->va_blocksize = MAXBSIZE;
917 if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART,
918 (caddr_t)&dpart, FREAD, ap->a_p) == 0) {
919 vap->va_bytes = dbtob(dpart.disklab->d_partitions
920 [minor(vp->v_rdev)].p_size);
921 vap->va_size = vap->va_bytes;