2 * modified for Lites 1.1
4 * Aug 1995, Godmar Back (gback@cs.utah.edu)
5 * University of Utah, Department of Computer Science
9 * The Regents of the University of California. All rights reserved.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * @(#)ufs_readwrite.c 8.7 (Berkeley) 1/21/94
39 /* XXX TODO: remove these obfuscations (as in ffs_vnops.c). */
40 #define BLKSIZE(a, b, c) blksize(a, b, c)
41 #define FS struct m_ext2fs
43 #define READ ext2_read
44 #define READ_S "ext2_read"
45 #define WRITE ext2_write
46 #define WRITE_S "ext2_write"
49 #include <vm/vm_extern.h>
50 #include <vm/vm_object.h>
51 #include <vm/vm_page.h>
52 #include <vm/vm_pager.h>
53 #include <vm/vnode_pager.h>
55 #include "opt_directio.h"
58 * Vnode op for reading.
62 struct vop_read_args /* {
76 long size, xfersize, blkoffset;
77 int error, orig_resid, seqcount;
82 ioflag = ap->a_ioflag;
84 seqcount = ap->a_ioflag >> IO_SEQSHIFT;
88 if (uio->uio_rw != UIO_READ)
89 panic("%s: mode", READ_S);
91 if (vp->v_type == VLNK) {
92 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
93 panic("%s: short symlink", READ_S);
94 } else if (vp->v_type != VREG && vp->v_type != VDIR)
95 panic("%s: type %d", READ_S, vp->v_type);
97 orig_resid = uio->uio_resid;
98 KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
101 KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
103 if (uio->uio_offset < ip->i_size &&
104 uio->uio_offset >= fs->e2fs_maxfilesize)
107 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
108 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
110 lbn = lblkno(fs, uio->uio_offset);
112 size = BLKSIZE(fs, ip, lbn);
113 blkoffset = blkoff(fs, uio->uio_offset);
115 xfersize = fs->e2fs_fsize - blkoffset;
116 if (uio->uio_resid < xfersize)
117 xfersize = uio->uio_resid;
118 if (bytesinfile < xfersize)
119 xfersize = bytesinfile;
121 if (lblktosize(fs, nextlbn) >= ip->i_size)
122 error = bread(vp, lbn, size, NOCRED, &bp);
123 else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0)
124 error = cluster_read(vp, ip->i_size, lbn, size,
125 NOCRED, blkoffset + uio->uio_resid, seqcount, &bp);
126 else if (seqcount > 1) {
127 int nextsize = BLKSIZE(fs, ip, nextlbn);
128 error = breadn(vp, lbn,
129 size, &nextlbn, &nextsize, 1, NOCRED, &bp);
131 error = bread(vp, lbn, size, NOCRED, &bp);
139 * If IO_DIRECT then set B_DIRECT for the buffer. This
140 * will cause us to attempt to release the buffer later on
141 * and will cause the buffer cache to attempt to free the
144 if (ioflag & IO_DIRECT)
145 bp->b_flags |= B_DIRECT;
148 * We should only get non-zero b_resid when an I/O error
149 * has occurred, which should cause us to break above.
150 * However, if the short read did not cause an error,
151 * then we want to ensure that we do not uiomove bad
152 * or uninitialized data.
155 if (size < xfersize) {
160 error = uiomove((char *)bp->b_data + blkoffset,
165 if (ioflag & (IO_VMIO|IO_DIRECT)) {
167 * If it's VMIO or direct I/O, then we don't
168 * need the buf, mark it available for
169 * freeing. If it's non-direct VMIO, the VM has
172 bp->b_flags |= B_RELBUF;
176 * Otherwise let whoever
177 * made the request take care of
178 * freeing it. We just queue
179 * it onto another list.
186 * This can only happen in the case of an error
187 * because the loop above resets bp to NULL on each iteration
188 * and on normal completion has not set a new value into it.
189 * so it must have come from a 'break' statement
192 if (ioflag & (IO_VMIO|IO_DIRECT)) {
193 bp->b_flags |= B_RELBUF;
200 if ((error == 0 || uio->uio_resid != orig_resid) &&
201 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
202 ip->i_flag |= IN_ACCESS;
207 * Vnode op for writing.
211 struct vop_write_args /* {
215 struct ucred *a_cred;
225 int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize;
227 ioflag = ap->a_ioflag;
231 seqcount = ioflag >> IO_SEQSHIFT;
235 if (uio->uio_rw != UIO_WRITE)
236 panic("%s: mode", WRITE_S);
239 switch (vp->v_type) {
241 if (ioflag & IO_APPEND)
242 uio->uio_offset = ip->i_size;
243 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
249 /* XXX differs from ffs -- this is called from ext2_mkdir(). */
250 if ((ioflag & IO_SYNC) == 0)
251 panic("ext2_write: nonsync dir write");
254 panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp,
255 vp->v_type, (intmax_t)uio->uio_offset,
256 (intmax_t)uio->uio_resid);
259 KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0"));
260 KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0"));
262 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize)
265 * Maybe this should be above the vnode op call, but so long as
266 * file servers have no limits, I don't think it matters.
268 if (vn_rlimit_fsize(vp, uio, uio->uio_td))
271 resid = uio->uio_resid;
273 if (seqcount > BA_SEQMAX)
274 flags = BA_SEQMAX << BA_SEQSHIFT;
276 flags = seqcount << BA_SEQSHIFT;
277 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
280 for (error = 0; uio->uio_resid > 0;) {
281 lbn = lblkno(fs, uio->uio_offset);
282 blkoffset = blkoff(fs, uio->uio_offset);
283 xfersize = fs->e2fs_fsize - blkoffset;
284 if (uio->uio_resid < xfersize)
285 xfersize = uio->uio_resid;
286 if (uio->uio_offset + xfersize > ip->i_size)
287 vnode_pager_setsize(vp, uio->uio_offset + xfersize);
290 * We must perform a read-before-write if the transfer size
291 * does not cover the entire buffer.
293 if (fs->e2fs_bsize > xfersize)
297 error = ext2_balloc(ip, lbn, blkoffset + xfersize,
298 ap->a_cred, &bp, flags);
303 * If the buffer is not valid and we did not clear garbage
304 * out above, we have to do so here even though the write
305 * covers the entire buffer in order to avoid a mmap()/write
306 * race where another process may see the garbage prior to
307 * the uiomove() for a write replacing it.
309 if ((bp->b_flags & B_CACHE) == 0 && fs->e2fs_bsize <= xfersize)
311 if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
312 bp->b_flags |= B_NOCACHE;
313 if (uio->uio_offset + xfersize > ip->i_size)
314 ip->i_size = uio->uio_offset + xfersize;
315 size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
320 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
321 if (ioflag & (IO_VMIO|IO_DIRECT)) {
322 bp->b_flags |= B_RELBUF;
326 * If IO_SYNC each buffer is written synchronously. Otherwise
327 * if we have a severe page deficiency write the buffer
328 * asynchronously. Otherwise try to cluster, and if that
329 * doesn't do it then either do an async write (if O_DIRECT),
330 * or a delayed write (if not).
332 if (ioflag & IO_SYNC) {
334 } else if (vm_page_count_severe() ||
335 buf_dirty_count_severe() ||
336 (ioflag & IO_ASYNC)) {
337 bp->b_flags |= B_CLUSTEROK;
339 } else if (xfersize + blkoffset == fs->e2fs_fsize) {
340 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
341 bp->b_flags |= B_CLUSTEROK;
342 cluster_write(vp, bp, ip->i_size, seqcount);
346 } else if (ioflag & IO_DIRECT) {
347 bp->b_flags |= B_CLUSTEROK;
350 bp->b_flags |= B_CLUSTEROK;
353 if (error || xfersize == 0)
357 * If we successfully wrote any data, and we are not the superuser
358 * we clear the setuid and setgid bits as a precaution against
361 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
363 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0))
364 ip->i_mode &= ~(ISUID | ISGID);
367 if (ioflag & IO_UNIT) {
368 (void)ext2_truncate(vp, osize,
369 ioflag & IO_SYNC, ap->a_cred, uio->uio_td);
370 uio->uio_offset -= resid - uio->uio_resid;
371 uio->uio_resid = resid;
374 if (uio->uio_resid != resid) {
375 ip->i_flag |= IN_CHANGE | IN_UPDATE;
376 if (ioflag & IO_SYNC)
377 error = ext2_update(vp, 1);