2 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * Copyright (c) 1982, 1986, 1989, 1993
33 * The Regents of the University of California. All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD$");
65 #include <sys/param.h>
66 #include <sys/systm.h>
70 #include <sys/mount.h>
71 #include <sys/vnode.h>
72 #include <sys/vmmeter.h>
74 #include <ufs/ufs/quota.h>
75 #include <ufs/ufs/inode.h>
76 #include <ufs/ufs/ufs_extern.h>
77 #include <ufs/ufs/extattr.h>
78 #include <ufs/ufs/ufsmount.h>
80 #include <ufs/ffs/fs.h>
81 #include <ufs/ffs/ffs_extern.h>
84 * Balloc defines the structure of filesystem storage
85 * by allocating the physical blocks on a device given
86 * the inode and the logical block number in a file.
87 * This is the allocation strategy for UFS1. Below is
88 * the allocation strategy for UFS2.
91 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
92 struct ucred *cred, int flags, struct buf **bpp)
95 struct ufs1_dinode *dp;
96 ufs_lbn_t lbn, lastlbn;
100 struct ufsmount *ump;
101 struct indir indirs[UFS_NIADDR + 2];
102 int deallocated, osize, nsize, num, i, error;
104 ufs1_daddr_t *bap, pref;
105 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
106 ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
109 static struct timeval lastfail;
111 int gbflags, reclaimed;
117 lbn = lblkno(fs, startoffset);
118 size = blkoff(fs, startoffset) + size;
120 if (size > fs->fs_bsize)
121 panic("ffs_balloc_ufs1: blk too big");
127 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
129 if (DOINGSOFTDEP(vp))
130 softdep_prealloc(vp, MNT_WAIT);
132 * If the next write will extend the file into a new block,
133 * and the file is currently composed of a fragment
134 * this fragment has to be extended to be a full block.
136 lastlbn = lblkno(fs, ip->i_size);
137 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
139 osize = blksize(fs, ip, nb);
140 if (osize < fs->fs_bsize && osize > 0) {
142 error = ffs_realloccg(ip, nb, dp->di_db[nb],
143 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
144 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
148 if (DOINGSOFTDEP(vp))
149 softdep_setup_allocdirect(ip, nb,
150 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
151 fs->fs_bsize, osize, bp);
152 ip->i_size = smalllblktosize(fs, nb + 1);
153 dp->di_size = ip->i_size;
154 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
155 ip->i_flag |= IN_CHANGE | IN_UPDATE;
158 else if (DOINGASYNC(vp))
165 * The first UFS_NDADDR blocks are direct blocks
167 if (lbn < UFS_NDADDR) {
168 if (flags & BA_METAONLY)
169 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
171 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
172 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
177 bp->b_blkno = fsbtodb(fs, nb);
183 * Consider need to reallocate a fragment.
185 osize = fragroundup(fs, blkoff(fs, ip->i_size));
186 nsize = fragroundup(fs, size);
187 if (nsize <= osize) {
188 error = bread(vp, lbn, osize, NOCRED, &bp);
193 bp->b_blkno = fsbtodb(fs, nb);
196 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
197 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
198 &dp->di_db[0]), osize, nsize, flags,
202 if (DOINGSOFTDEP(vp))
203 softdep_setup_allocdirect(ip, lbn,
204 dbtofsb(fs, bp->b_blkno), nb,
208 if (ip->i_size < smalllblktosize(fs, lbn + 1))
209 nsize = fragroundup(fs, size);
211 nsize = fs->fs_bsize;
213 error = ffs_alloc(ip, lbn,
214 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
215 nsize, flags, cred, &newb);
218 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
219 bp->b_blkno = fsbtodb(fs, newb);
220 if (flags & BA_CLRBUF)
222 if (DOINGSOFTDEP(vp))
223 softdep_setup_allocdirect(ip, lbn, newb, 0,
226 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
227 ip->i_flag |= IN_CHANGE | IN_UPDATE;
232 * Determine the number of levels of indirection.
235 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
239 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
241 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
243 * Fetch the first indirect block allocating if necessary.
246 nb = dp->di_ib[indirs[0].in_off];
248 allocblk = allociblk;
252 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
254 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
255 flags, cred, &newb)) != 0) {
256 curthread_pflags_restore(saved_inbdflush);
259 pref = newb + fs->fs_frag;
261 MPASS(allocblk < allociblk + nitems(allociblk));
262 MPASS(lbns_remfree < lbns + nitems(lbns));
264 *lbns_remfree++ = indirs[1].in_lbn;
265 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
266 bp->b_blkno = fsbtodb(fs, nb);
268 if (DOINGSOFTDEP(vp)) {
269 softdep_setup_allocdirect(ip,
270 UFS_NDADDR + indirs[0].in_off, newb, 0,
271 fs->fs_bsize, 0, bp);
273 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
274 if (bp->b_bufsize == fs->fs_bsize)
275 bp->b_flags |= B_CLUSTEROK;
278 if ((error = bwrite(bp)) != 0)
281 allocib = &dp->di_ib[indirs[0].in_off];
283 ip->i_flag |= IN_CHANGE | IN_UPDATE;
286 * Fetch through the indirect blocks, allocating as necessary.
291 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
296 bap = (ufs1_daddr_t *)bp->b_data;
297 nb = bap[indirs[i].in_off];
307 * If parent indirect has just been allocated, try to cluster
308 * immediately following it.
311 pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
313 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
314 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
316 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
318 softdep_request_cleanup(fs, vp, cred,
323 if (ppsratecheck(&lastfail, &curfail, 1)) {
324 ffs_fserr(fs, ip->i_number, "filesystem full");
325 uprintf("\n%s: write failed, filesystem "
326 "is full\n", fs->fs_fsmnt);
330 pref = newb + fs->fs_frag;
332 MPASS(allocblk < allociblk + nitems(allociblk));
333 MPASS(lbns_remfree < lbns + nitems(lbns));
335 *lbns_remfree++ = indirs[i].in_lbn;
336 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
337 nbp->b_blkno = fsbtodb(fs, nb);
339 if (DOINGSOFTDEP(vp)) {
340 softdep_setup_allocindir_meta(nbp, ip, bp,
341 indirs[i - 1].in_off, nb);
343 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
344 if (nbp->b_bufsize == fs->fs_bsize)
345 nbp->b_flags |= B_CLUSTEROK;
348 if ((error = bwrite(nbp)) != 0) {
353 bap[indirs[i - 1].in_off] = nb;
354 if (allocib == NULL && unwindidx < 0)
357 * If required, write synchronously, otherwise use
360 if (flags & IO_SYNC) {
363 if (bp->b_bufsize == fs->fs_bsize)
364 bp->b_flags |= B_CLUSTEROK;
369 * If asked only for the indirect block, then return it.
371 if (flags & BA_METAONLY) {
372 curthread_pflags_restore(saved_inbdflush);
377 * Get the data block, allocating if necessary.
382 * If allocating metadata at the front of the cylinder
383 * group and parent indirect block has just been allocated,
384 * then cluster next to it if it is the first indirect in
385 * the file. Otherwise it has been allocated in the metadata
386 * area, so we want to find our own place out in the data area.
388 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
389 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
391 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
392 flags | IO_BUFLOCKED, cred, &newb);
395 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
397 softdep_request_cleanup(fs, vp, cred,
402 if (ppsratecheck(&lastfail, &curfail, 1)) {
403 ffs_fserr(fs, ip->i_number, "filesystem full");
404 uprintf("\n%s: write failed, filesystem "
405 "is full\n", fs->fs_fsmnt);
410 MPASS(allocblk < allociblk + nitems(allociblk));
411 MPASS(lbns_remfree < lbns + nitems(lbns));
413 *lbns_remfree++ = lbn;
414 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
415 nbp->b_blkno = fsbtodb(fs, nb);
416 if (flags & BA_CLRBUF)
418 if (DOINGSOFTDEP(vp))
419 softdep_setup_allocindir_page(ip, lbn, bp,
420 indirs[i].in_off, nb, 0, nbp);
421 bap[indirs[i].in_off] = nb;
423 * If required, write synchronously, otherwise use
426 if (flags & IO_SYNC) {
429 if (bp->b_bufsize == fs->fs_bsize)
430 bp->b_flags |= B_CLUSTEROK;
433 curthread_pflags_restore(saved_inbdflush);
438 if (flags & BA_CLRBUF) {
439 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
441 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
442 !(vm_page_count_severe() || buf_dirty_count_severe())) {
443 error = cluster_read(vp, ip->i_size, lbn,
444 (int)fs->fs_bsize, NOCRED,
445 MAXBSIZE, seqcount, gbflags, &nbp);
447 error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
455 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
456 nbp->b_blkno = fsbtodb(fs, nb);
458 curthread_pflags_restore(saved_inbdflush);
462 curthread_pflags_restore(saved_inbdflush);
464 * If we have failed to allocate any blocks, simply return the error.
465 * This is the usual case and avoids the need to fsync the file.
467 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
470 * If we have failed part way through block allocation, we
471 * have to deallocate any indirect blocks that we have allocated.
472 * We have to fsync the file before we start to get rid of all
473 * of its dependencies so that we do not leave them dangling.
474 * We have to sync it at the end so that the soft updates code
475 * does not find any untracked changes. Although this is really
476 * slow, running out of disk space is not expected to be a common
477 * occurrence. The error return from fsync is ignored as we already
478 * have an error to return to the user.
480 * XXX Still have to journal the free below
482 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
483 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
484 blkp < allocblk; blkp++, lbns_remfree++) {
486 * We shall not leave the freed blocks on the vnode
487 * buffer object lists.
489 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
490 GB_NOCREAT | GB_UNMAPPED);
492 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
493 ("mismatch1 l %jd %jd b %ju %ju",
494 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
495 (uintmax_t)bp->b_blkno,
496 (uintmax_t)fsbtodb(fs, *blkp)));
497 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
498 bp->b_flags &= ~(B_ASYNC | B_CACHE);
501 deallocated += fs->fs_bsize;
503 if (allocib != NULL) {
505 } else if (unwindidx >= 0) {
508 r = bread(vp, indirs[unwindidx].in_lbn,
509 (int)fs->fs_bsize, NOCRED, &bp);
511 panic("Could not unwind indirect block, error %d", r);
514 bap = (ufs1_daddr_t *)bp->b_data;
515 bap[indirs[unwindidx].in_off] = 0;
516 if (flags & IO_SYNC) {
519 if (bp->b_bufsize == fs->fs_bsize)
520 bp->b_flags |= B_CLUSTEROK;
528 * Restore user's disk quota because allocation failed.
530 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
532 dp->di_blocks -= btodb(deallocated);
533 ip->i_flag |= IN_CHANGE | IN_UPDATE;
535 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
537 * After the buffers are invalidated and on-disk pointers are
538 * cleared, free the blocks.
540 for (blkp = allociblk; blkp < allocblk; blkp++) {
542 if (blkp == allociblk)
544 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
545 GB_NOCREAT | GB_UNMAPPED);
547 panic("zombie1 %jd %ju %ju",
548 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
549 (uintmax_t)fsbtodb(fs, *blkp));
553 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
554 ip->i_number, vp->v_type, NULL);
560 * Balloc defines the structure of file system storage
561 * by allocating the physical blocks on a device given
562 * the inode and the logical block number in a file.
563 * This is the allocation strategy for UFS2. Above is
564 * the allocation strategy for UFS1.
567 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
568 struct ucred *cred, int flags, struct buf **bpp)
571 struct ufs2_dinode *dp;
572 ufs_lbn_t lbn, lastlbn;
574 struct buf *bp, *nbp;
575 struct ufsmount *ump;
576 struct indir indirs[UFS_NIADDR + 2];
577 ufs2_daddr_t nb, newb, *bap, pref;
578 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
579 ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
580 int deallocated, osize, nsize, num, i, error;
583 static struct timeval lastfail;
585 int gbflags, reclaimed;
591 lbn = lblkno(fs, startoffset);
592 size = blkoff(fs, startoffset) + size;
594 if (size > fs->fs_bsize)
595 panic("ffs_balloc_ufs2: blk too big");
599 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
601 if (DOINGSOFTDEP(vp))
602 softdep_prealloc(vp, MNT_WAIT);
605 * Check for allocating external data.
607 if (flags & IO_EXT) {
608 if (lbn >= UFS_NXADDR)
611 * If the next write will extend the data into a new block,
612 * and the data is currently composed of a fragment
613 * this fragment has to be extended to be a full block.
615 lastlbn = lblkno(fs, dp->di_extsize);
618 osize = sblksize(fs, dp->di_extsize, nb);
619 if (osize < fs->fs_bsize && osize > 0) {
621 error = ffs_realloccg(ip, -1 - nb,
623 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
624 &dp->di_extb[0]), osize,
625 (int)fs->fs_bsize, flags, cred, &bp);
628 if (DOINGSOFTDEP(vp))
629 softdep_setup_allocext(ip, nb,
630 dbtofsb(fs, bp->b_blkno),
632 fs->fs_bsize, osize, bp);
633 dp->di_extsize = smalllblktosize(fs, nb + 1);
634 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
635 bp->b_xflags |= BX_ALTDATA;
636 ip->i_flag |= IN_CHANGE;
644 * All blocks are direct blocks
646 if (flags & BA_METAONLY)
647 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
648 nb = dp->di_extb[lbn];
649 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
650 error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
656 bp->b_blkno = fsbtodb(fs, nb);
657 bp->b_xflags |= BX_ALTDATA;
663 * Consider need to reallocate a fragment.
665 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
666 nsize = fragroundup(fs, size);
667 if (nsize <= osize) {
668 error = bread_gb(vp, -1 - lbn, osize, NOCRED,
674 bp->b_blkno = fsbtodb(fs, nb);
675 bp->b_xflags |= BX_ALTDATA;
678 error = ffs_realloccg(ip, -1 - lbn,
680 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
681 &dp->di_extb[0]), osize, nsize, flags,
685 bp->b_xflags |= BX_ALTDATA;
686 if (DOINGSOFTDEP(vp))
687 softdep_setup_allocext(ip, lbn,
688 dbtofsb(fs, bp->b_blkno), nb,
692 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
693 nsize = fragroundup(fs, size);
695 nsize = fs->fs_bsize;
697 error = ffs_alloc(ip, lbn,
698 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
699 nsize, flags, cred, &newb);
702 bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
703 bp->b_blkno = fsbtodb(fs, newb);
704 bp->b_xflags |= BX_ALTDATA;
705 if (flags & BA_CLRBUF)
707 if (DOINGSOFTDEP(vp))
708 softdep_setup_allocext(ip, lbn, newb, 0,
711 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
712 ip->i_flag |= IN_CHANGE;
717 * If the next write will extend the file into a new block,
718 * and the file is currently composed of a fragment
719 * this fragment has to be extended to be a full block.
721 lastlbn = lblkno(fs, ip->i_size);
722 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
724 osize = blksize(fs, ip, nb);
725 if (osize < fs->fs_bsize && osize > 0) {
727 error = ffs_realloccg(ip, nb, dp->di_db[nb],
728 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
729 &dp->di_db[0]), osize, (int)fs->fs_bsize,
733 if (DOINGSOFTDEP(vp))
734 softdep_setup_allocdirect(ip, nb,
735 dbtofsb(fs, bp->b_blkno),
737 fs->fs_bsize, osize, bp);
738 ip->i_size = smalllblktosize(fs, nb + 1);
739 dp->di_size = ip->i_size;
740 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
741 ip->i_flag |= IN_CHANGE | IN_UPDATE;
749 * The first UFS_NDADDR blocks are direct blocks
751 if (lbn < UFS_NDADDR) {
752 if (flags & BA_METAONLY)
753 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
755 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
756 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
762 bp->b_blkno = fsbtodb(fs, nb);
768 * Consider need to reallocate a fragment.
770 osize = fragroundup(fs, blkoff(fs, ip->i_size));
771 nsize = fragroundup(fs, size);
772 if (nsize <= osize) {
773 error = bread_gb(vp, lbn, osize, NOCRED,
779 bp->b_blkno = fsbtodb(fs, nb);
782 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
783 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
784 &dp->di_db[0]), osize, nsize, flags,
788 if (DOINGSOFTDEP(vp))
789 softdep_setup_allocdirect(ip, lbn,
790 dbtofsb(fs, bp->b_blkno), nb,
794 if (ip->i_size < smalllblktosize(fs, lbn + 1))
795 nsize = fragroundup(fs, size);
797 nsize = fs->fs_bsize;
799 error = ffs_alloc(ip, lbn,
800 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
801 &dp->di_db[0]), nsize, flags, cred, &newb);
804 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
805 bp->b_blkno = fsbtodb(fs, newb);
806 if (flags & BA_CLRBUF)
808 if (DOINGSOFTDEP(vp))
809 softdep_setup_allocdirect(ip, lbn, newb, 0,
812 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
813 ip->i_flag |= IN_CHANGE | IN_UPDATE;
818 * Determine the number of levels of indirection.
821 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
825 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
827 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
829 * Fetch the first indirect block allocating if necessary.
832 nb = dp->di_ib[indirs[0].in_off];
834 allocblk = allociblk;
838 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
840 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
841 flags, cred, &newb)) != 0) {
842 curthread_pflags_restore(saved_inbdflush);
845 pref = newb + fs->fs_frag;
847 MPASS(allocblk < allociblk + nitems(allociblk));
848 MPASS(lbns_remfree < lbns + nitems(lbns));
850 *lbns_remfree++ = indirs[1].in_lbn;
851 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
853 bp->b_blkno = fsbtodb(fs, nb);
855 if (DOINGSOFTDEP(vp)) {
856 softdep_setup_allocdirect(ip,
857 UFS_NDADDR + indirs[0].in_off, newb, 0,
858 fs->fs_bsize, 0, bp);
860 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
861 if (bp->b_bufsize == fs->fs_bsize)
862 bp->b_flags |= B_CLUSTEROK;
865 if ((error = bwrite(bp)) != 0)
868 allocib = &dp->di_ib[indirs[0].in_off];
870 ip->i_flag |= IN_CHANGE | IN_UPDATE;
873 * Fetch through the indirect blocks, allocating as necessary.
878 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
883 bap = (ufs2_daddr_t *)bp->b_data;
884 nb = bap[indirs[i].in_off];
894 * If parent indirect has just been allocated, try to cluster
895 * immediately following it.
898 pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
900 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
901 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
903 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
905 softdep_request_cleanup(fs, vp, cred,
910 if (ppsratecheck(&lastfail, &curfail, 1)) {
911 ffs_fserr(fs, ip->i_number, "filesystem full");
912 uprintf("\n%s: write failed, filesystem "
913 "is full\n", fs->fs_fsmnt);
917 pref = newb + fs->fs_frag;
919 MPASS(allocblk < allociblk + nitems(allociblk));
920 MPASS(lbns_remfree < lbns + nitems(lbns));
922 *lbns_remfree++ = indirs[i].in_lbn;
923 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
925 nbp->b_blkno = fsbtodb(fs, nb);
927 if (DOINGSOFTDEP(vp)) {
928 softdep_setup_allocindir_meta(nbp, ip, bp,
929 indirs[i - 1].in_off, nb);
931 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
932 if (nbp->b_bufsize == fs->fs_bsize)
933 nbp->b_flags |= B_CLUSTEROK;
936 if ((error = bwrite(nbp)) != 0) {
941 bap[indirs[i - 1].in_off] = nb;
942 if (allocib == NULL && unwindidx < 0)
945 * If required, write synchronously, otherwise use
948 if (flags & IO_SYNC) {
951 if (bp->b_bufsize == fs->fs_bsize)
952 bp->b_flags |= B_CLUSTEROK;
957 * If asked only for the indirect block, then return it.
959 if (flags & BA_METAONLY) {
960 curthread_pflags_restore(saved_inbdflush);
965 * Get the data block, allocating if necessary.
970 * If allocating metadata at the front of the cylinder
971 * group and parent indirect block has just been allocated,
972 * then cluster next to it if it is the first indirect in
973 * the file. Otherwise it has been allocated in the metadata
974 * area, so we want to find our own place out in the data area.
976 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
977 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
979 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
980 flags | IO_BUFLOCKED, cred, &newb);
983 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
985 softdep_request_cleanup(fs, vp, cred,
990 if (ppsratecheck(&lastfail, &curfail, 1)) {
991 ffs_fserr(fs, ip->i_number, "filesystem full");
992 uprintf("\n%s: write failed, filesystem "
993 "is full\n", fs->fs_fsmnt);
998 MPASS(allocblk < allociblk + nitems(allociblk));
999 MPASS(lbns_remfree < lbns + nitems(lbns));
1001 *lbns_remfree++ = lbn;
1002 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1003 nbp->b_blkno = fsbtodb(fs, nb);
1004 if (flags & BA_CLRBUF)
1005 vfs_bio_clrbuf(nbp);
1006 if (DOINGSOFTDEP(vp))
1007 softdep_setup_allocindir_page(ip, lbn, bp,
1008 indirs[i].in_off, nb, 0, nbp);
1009 bap[indirs[i].in_off] = nb;
1011 * If required, write synchronously, otherwise use
1014 if (flags & IO_SYNC) {
1017 if (bp->b_bufsize == fs->fs_bsize)
1018 bp->b_flags |= B_CLUSTEROK;
1021 curthread_pflags_restore(saved_inbdflush);
1027 * If requested clear invalid portions of the buffer. If we
1028 * have to do a read-before-write (typical if BA_CLRBUF is set),
1029 * try to do some read-ahead in the sequential case to reduce
1030 * the number of I/O transactions.
1032 if (flags & BA_CLRBUF) {
1033 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1034 if (seqcount != 0 &&
1035 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1036 !(vm_page_count_severe() || buf_dirty_count_severe())) {
1037 error = cluster_read(vp, ip->i_size, lbn,
1038 (int)fs->fs_bsize, NOCRED,
1039 MAXBSIZE, seqcount, gbflags, &nbp);
1041 error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1042 NOCRED, gbflags, &nbp);
1049 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1050 nbp->b_blkno = fsbtodb(fs, nb);
1052 curthread_pflags_restore(saved_inbdflush);
1056 curthread_pflags_restore(saved_inbdflush);
1058 * If we have failed to allocate any blocks, simply return the error.
1059 * This is the usual case and avoids the need to fsync the file.
1061 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1064 * If we have failed part way through block allocation, we
1065 * have to deallocate any indirect blocks that we have allocated.
1066 * We have to fsync the file before we start to get rid of all
1067 * of its dependencies so that we do not leave them dangling.
1068 * We have to sync it at the end so that the soft updates code
1069 * does not find any untracked changes. Although this is really
1070 * slow, running out of disk space is not expected to be a common
1071 * occurrence. The error return from fsync is ignored as we already
1072 * have an error to return to the user.
1074 * XXX Still have to journal the free below
1076 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1077 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1078 blkp < allocblk; blkp++, lbns_remfree++) {
1080 * We shall not leave the freed blocks on the vnode
1081 * buffer object lists.
1083 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1084 GB_NOCREAT | GB_UNMAPPED);
1086 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1087 ("mismatch2 l %jd %jd b %ju %ju",
1088 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1089 (uintmax_t)bp->b_blkno,
1090 (uintmax_t)fsbtodb(fs, *blkp)));
1091 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1092 bp->b_flags &= ~(B_ASYNC | B_CACHE);
1095 deallocated += fs->fs_bsize;
1097 if (allocib != NULL) {
1099 } else if (unwindidx >= 0) {
1102 r = bread(vp, indirs[unwindidx].in_lbn,
1103 (int)fs->fs_bsize, NOCRED, &bp);
1105 panic("Could not unwind indirect block, error %d", r);
1108 bap = (ufs2_daddr_t *)bp->b_data;
1109 bap[indirs[unwindidx].in_off] = 0;
1110 if (flags & IO_SYNC) {
1113 if (bp->b_bufsize == fs->fs_bsize)
1114 bp->b_flags |= B_CLUSTEROK;
1122 * Restore user's disk quota because allocation failed.
1124 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1126 dp->di_blocks -= btodb(deallocated);
1127 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1129 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1131 * After the buffers are invalidated and on-disk pointers are
1132 * cleared, free the blocks.
1134 for (blkp = allociblk; blkp < allocblk; blkp++) {
1136 if (blkp == allociblk)
1137 lbns_remfree = lbns;
1138 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1139 GB_NOCREAT | GB_UNMAPPED);
1141 panic("zombie2 %jd %ju %ju",
1142 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1143 (uintmax_t)fsbtodb(fs, *blkp));
1147 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1148 ip->i_number, vp->v_type, NULL);