2 * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * Copyright (c) 1982, 1986, 1989, 1993
35 * The Regents of the University of California. All rights reserved.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
67 #include <sys/param.h>
68 #include <sys/systm.h>
72 #include <sys/mount.h>
73 #include <sys/vnode.h>
74 #include <sys/vmmeter.h>
76 #include <ufs/ufs/quota.h>
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/extattr.h>
80 #include <ufs/ufs/ufsmount.h>
82 #include <ufs/ffs/fs.h>
83 #include <ufs/ffs/ffs_extern.h>
86 * Balloc defines the structure of filesystem storage
87 * by allocating the physical blocks on a device given
88 * the inode and the logical block number in a file.
89 * This is the allocation strategy for UFS1. Below is
90 * the allocation strategy for UFS2.
93 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
94 struct ucred *cred, int flags, struct buf **bpp)
97 struct ufs1_dinode *dp;
98 ufs_lbn_t lbn, lastlbn;
101 struct buf *bp, *nbp;
103 struct ufsmount *ump;
104 struct indir indirs[UFS_NIADDR + 2];
105 int deallocated, osize, nsize, num, i, error;
107 ufs1_daddr_t *bap, pref;
108 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
109 ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
112 int gbflags, reclaimed;
119 lbn = lblkno(fs, startoffset);
120 size = blkoff(fs, startoffset) + size;
122 if (size > fs->fs_bsize)
123 panic("ffs_balloc_ufs1: blk too big");
129 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
132 * If the next write will extend the file into a new block,
133 * and the file is currently composed of a fragment
134 * this fragment has to be extended to be a full block.
136 lastlbn = lblkno(fs, ip->i_size);
137 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
139 osize = blksize(fs, ip, nb);
140 if (osize < fs->fs_bsize && osize > 0) {
142 error = ffs_realloccg(ip, nb, dp->di_db[nb],
143 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
144 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
148 if (DOINGSOFTDEP(vp))
149 softdep_setup_allocdirect(ip, nb,
150 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
151 fs->fs_bsize, osize, bp);
152 ip->i_size = smalllblktosize(fs, nb + 1);
153 dp->di_size = ip->i_size;
154 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
155 UFS_INODE_SET_FLAG(ip,
156 IN_SIZEMOD | IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
159 else if (DOINGASYNC(vp))
166 * The first UFS_NDADDR blocks are direct blocks
168 if (lbn < UFS_NDADDR) {
169 if (flags & BA_METAONLY)
170 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
172 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
173 if ((flags & BA_CLRBUF) != 0) {
174 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
179 bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
185 bp->b_blkno = fsbtodb(fs, nb);
191 * Consider need to reallocate a fragment.
193 osize = fragroundup(fs, blkoff(fs, ip->i_size));
194 nsize = fragroundup(fs, size);
195 if (nsize <= osize) {
196 error = bread(vp, lbn, osize, NOCRED, &bp);
200 bp->b_blkno = fsbtodb(fs, nb);
203 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
204 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
205 &dp->di_db[0]), osize, nsize, flags,
209 if (DOINGSOFTDEP(vp))
210 softdep_setup_allocdirect(ip, lbn,
211 dbtofsb(fs, bp->b_blkno), nb,
215 if (ip->i_size < smalllblktosize(fs, lbn + 1))
216 nsize = fragroundup(fs, size);
218 nsize = fs->fs_bsize;
220 error = ffs_alloc(ip, lbn,
221 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
222 nsize, flags, cred, &newb);
225 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
226 bp->b_blkno = fsbtodb(fs, newb);
227 if (flags & BA_CLRBUF)
229 if (DOINGSOFTDEP(vp))
230 softdep_setup_allocdirect(ip, lbn, newb, 0,
233 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
234 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
239 * Determine the number of levels of indirection.
242 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
246 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
248 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
250 * Fetch the first indirect block allocating if necessary.
253 nb = dp->di_ib[indirs[0].in_off];
255 allocblk = allociblk;
259 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
261 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
262 flags, cred, &newb)) != 0) {
263 curthread_pflags_restore(saved_inbdflush);
266 pref = newb + fs->fs_frag;
268 MPASS(allocblk < allociblk + nitems(allociblk));
269 MPASS(lbns_remfree < lbns + nitems(lbns));
271 *lbns_remfree++ = indirs[1].in_lbn;
272 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
273 bp->b_blkno = fsbtodb(fs, nb);
275 if (DOINGSOFTDEP(vp)) {
276 softdep_setup_allocdirect(ip,
277 UFS_NDADDR + indirs[0].in_off, newb, 0,
278 fs->fs_bsize, 0, bp);
280 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
281 if (bp->b_bufsize == fs->fs_bsize)
282 bp->b_flags |= B_CLUSTEROK;
285 if ((error = bwrite(bp)) != 0)
288 allocib = &dp->di_ib[indirs[0].in_off];
290 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
293 * Fetch through the indirect blocks, allocating as necessary.
298 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
302 bap = (ufs1_daddr_t *)bp->b_data;
303 nb = bap[indirs[i].in_off];
304 if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
305 fs->fs_bsize)) != 0) {
318 * If parent indirect has just been allocated, try to cluster
319 * immediately following it.
322 pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
324 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
325 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
328 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
329 softdep_request_cleanup(fs, vp, cred,
334 if (!ffs_fsfail_cleanup_locked(ump, error) &&
335 ppsratecheck(&ump->um_last_fullmsg,
336 &ump->um_secs_fullmsg, 1)) {
338 ffs_fserr(fs, ip->i_number, "filesystem full");
339 uprintf("\n%s: write failed, filesystem "
340 "is full\n", fs->fs_fsmnt);
346 pref = newb + fs->fs_frag;
348 MPASS(allocblk < allociblk + nitems(allociblk));
349 MPASS(lbns_remfree < lbns + nitems(lbns));
351 *lbns_remfree++ = indirs[i].in_lbn;
352 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
353 nbp->b_blkno = fsbtodb(fs, nb);
355 if (DOINGSOFTDEP(vp)) {
356 softdep_setup_allocindir_meta(nbp, ip, bp,
357 indirs[i - 1].in_off, nb);
359 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
360 if (nbp->b_bufsize == fs->fs_bsize)
361 nbp->b_flags |= B_CLUSTEROK;
364 if ((error = bwrite(nbp)) != 0) {
369 bap[indirs[i - 1].in_off] = nb;
370 if (allocib == NULL && unwindidx < 0)
373 * If required, write synchronously, otherwise use
376 if (flags & IO_SYNC) {
379 if (bp->b_bufsize == fs->fs_bsize)
380 bp->b_flags |= B_CLUSTEROK;
385 * If asked only for the indirect block, then return it.
387 if (flags & BA_METAONLY) {
388 curthread_pflags_restore(saved_inbdflush);
393 * Get the data block, allocating if necessary.
398 * If allocating metadata at the front of the cylinder
399 * group and parent indirect block has just been allocated,
400 * then cluster next to it if it is the first indirect in
401 * the file. Otherwise it has been allocated in the metadata
402 * area, so we want to find our own place out in the data area.
404 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
405 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
407 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
408 flags | IO_BUFLOCKED, cred, &newb);
412 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
413 softdep_request_cleanup(fs, vp, cred,
418 if (!ffs_fsfail_cleanup_locked(ump, error) &&
419 ppsratecheck(&ump->um_last_fullmsg,
420 &ump->um_secs_fullmsg, 1)) {
422 ffs_fserr(fs, ip->i_number, "filesystem full");
423 uprintf("\n%s: write failed, filesystem "
424 "is full\n", fs->fs_fsmnt);
431 MPASS(allocblk < allociblk + nitems(allociblk));
432 MPASS(lbns_remfree < lbns + nitems(lbns));
434 *lbns_remfree++ = lbn;
435 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
436 nbp->b_blkno = fsbtodb(fs, nb);
437 if (flags & BA_CLRBUF)
439 if (DOINGSOFTDEP(vp))
440 softdep_setup_allocindir_page(ip, lbn, bp,
441 indirs[i].in_off, nb, 0, nbp);
442 bap[indirs[i].in_off] = nb;
444 * If required, write synchronously, otherwise use
447 if (flags & IO_SYNC) {
450 if (bp->b_bufsize == fs->fs_bsize)
451 bp->b_flags |= B_CLUSTEROK;
454 curthread_pflags_restore(saved_inbdflush);
459 if (flags & BA_CLRBUF) {
460 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
462 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
463 !(vm_page_count_severe() || buf_dirty_count_severe())) {
464 error = cluster_read(vp, ip->i_size, lbn,
465 (int)fs->fs_bsize, NOCRED,
466 MAXBSIZE, seqcount, gbflags, &nbp);
468 error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
476 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
477 nbp->b_blkno = fsbtodb(fs, nb);
479 curthread_pflags_restore(saved_inbdflush);
483 curthread_pflags_restore(saved_inbdflush);
485 * If we have failed to allocate any blocks, simply return the error.
486 * This is the usual case and avoids the need to fsync the file.
488 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
491 * If we have failed part way through block allocation, we
492 * have to deallocate any indirect blocks that we have allocated.
493 * We have to fsync the file before we start to get rid of all
494 * of its dependencies so that we do not leave them dangling.
495 * We have to sync it at the end so that the soft updates code
496 * does not find any untracked changes. Although this is really
497 * slow, running out of disk space is not expected to be a common
498 * occurrence. The error return from fsync is ignored as we already
499 * have an error to return to the user.
501 * XXX Still have to journal the free below
503 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
504 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
505 blkp < allocblk; blkp++, lbns_remfree++) {
507 * We shall not leave the freed blocks on the vnode
508 * buffer object lists.
510 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
511 GB_NOCREAT | GB_UNMAPPED);
513 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
514 ("mismatch1 l %jd %jd b %ju %ju",
515 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
516 (uintmax_t)bp->b_blkno,
517 (uintmax_t)fsbtodb(fs, *blkp)));
518 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
519 bp->b_flags &= ~(B_ASYNC | B_CACHE);
522 deallocated += fs->fs_bsize;
524 if (allocib != NULL) {
526 } else if (unwindidx >= 0) {
529 r = bread(vp, indirs[unwindidx].in_lbn,
530 (int)fs->fs_bsize, NOCRED, &bp);
532 panic("Could not unwind indirect block, error %d", r);
535 bap = (ufs1_daddr_t *)bp->b_data;
536 bap[indirs[unwindidx].in_off] = 0;
537 if (flags & IO_SYNC) {
540 if (bp->b_bufsize == fs->fs_bsize)
541 bp->b_flags |= B_CLUSTEROK;
549 * Restore user's disk quota because allocation failed.
551 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
553 dp->di_blocks -= btodb(deallocated);
554 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
556 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
558 * After the buffers are invalidated and on-disk pointers are
559 * cleared, free the blocks.
561 for (blkp = allociblk; blkp < allocblk; blkp++) {
563 if (blkp == allociblk)
565 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
566 GB_NOCREAT | GB_UNMAPPED);
568 panic("zombie1 %jd %ju %ju",
569 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
570 (uintmax_t)fsbtodb(fs, *blkp));
574 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
575 ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
581 * Balloc defines the structure of file system storage
582 * by allocating the physical blocks on a device given
583 * the inode and the logical block number in a file.
584 * This is the allocation strategy for UFS2. Above is
585 * the allocation strategy for UFS1.
588 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
589 struct ucred *cred, int flags, struct buf **bpp)
592 struct ufs2_dinode *dp;
593 ufs_lbn_t lbn, lastlbn;
595 struct buf *bp, *nbp;
597 struct ufsmount *ump;
598 struct indir indirs[UFS_NIADDR + 2];
599 ufs2_daddr_t nb, newb, *bap, pref;
600 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
601 ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
602 int deallocated, osize, nsize, num, i, error;
605 int gbflags, reclaimed;
612 lbn = lblkno(fs, startoffset);
613 size = blkoff(fs, startoffset) + size;
615 if (size > fs->fs_bsize)
616 panic("ffs_balloc_ufs2: blk too big");
620 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
623 * Check for allocating external data.
625 if (flags & IO_EXT) {
626 if (lbn >= UFS_NXADDR)
629 * If the next write will extend the data into a new block,
630 * and the data is currently composed of a fragment
631 * this fragment has to be extended to be a full block.
633 lastlbn = lblkno(fs, dp->di_extsize);
636 osize = sblksize(fs, dp->di_extsize, nb);
637 if (osize < fs->fs_bsize && osize > 0) {
639 error = ffs_realloccg(ip, -1 - nb,
641 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
642 &dp->di_extb[0]), osize,
643 (int)fs->fs_bsize, flags, cred, &bp);
646 if (DOINGSOFTDEP(vp))
647 softdep_setup_allocext(ip, nb,
648 dbtofsb(fs, bp->b_blkno),
650 fs->fs_bsize, osize, bp);
651 dp->di_extsize = smalllblktosize(fs, nb + 1);
652 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
653 bp->b_xflags |= BX_ALTDATA;
654 UFS_INODE_SET_FLAG(ip,
655 IN_SIZEMOD | IN_CHANGE | IN_IBLKDATA);
663 * All blocks are direct blocks
665 if (flags & BA_METAONLY)
666 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
667 nb = dp->di_extb[lbn];
668 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
669 error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
674 bp->b_blkno = fsbtodb(fs, nb);
675 bp->b_xflags |= BX_ALTDATA;
681 * Consider need to reallocate a fragment.
683 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
684 nsize = fragroundup(fs, size);
685 if (nsize <= osize) {
686 error = bread_gb(vp, -1 - lbn, osize, NOCRED,
691 bp->b_blkno = fsbtodb(fs, nb);
692 bp->b_xflags |= BX_ALTDATA;
695 error = ffs_realloccg(ip, -1 - lbn,
697 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
698 &dp->di_extb[0]), osize, nsize, flags,
702 bp->b_xflags |= BX_ALTDATA;
703 if (DOINGSOFTDEP(vp))
704 softdep_setup_allocext(ip, lbn,
705 dbtofsb(fs, bp->b_blkno), nb,
709 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
710 nsize = fragroundup(fs, size);
712 nsize = fs->fs_bsize;
714 error = ffs_alloc(ip, lbn,
715 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
716 nsize, flags, cred, &newb);
719 bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
720 bp->b_blkno = fsbtodb(fs, newb);
721 bp->b_xflags |= BX_ALTDATA;
722 if (flags & BA_CLRBUF)
724 if (DOINGSOFTDEP(vp))
725 softdep_setup_allocext(ip, lbn, newb, 0,
728 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
729 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_IBLKDATA);
734 * If the next write will extend the file into a new block,
735 * and the file is currently composed of a fragment
736 * this fragment has to be extended to be a full block.
738 lastlbn = lblkno(fs, ip->i_size);
739 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
741 osize = blksize(fs, ip, nb);
742 if (osize < fs->fs_bsize && osize > 0) {
744 error = ffs_realloccg(ip, nb, dp->di_db[nb],
745 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
746 &dp->di_db[0]), osize, (int)fs->fs_bsize,
750 if (DOINGSOFTDEP(vp))
751 softdep_setup_allocdirect(ip, nb,
752 dbtofsb(fs, bp->b_blkno),
754 fs->fs_bsize, osize, bp);
755 ip->i_size = smalllblktosize(fs, nb + 1);
756 dp->di_size = ip->i_size;
757 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
758 UFS_INODE_SET_FLAG(ip,
759 IN_SIZEMOD |IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
767 * The first UFS_NDADDR blocks are direct blocks
769 if (lbn < UFS_NDADDR) {
770 if (flags & BA_METAONLY)
771 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
773 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
774 if ((flags & BA_CLRBUF) != 0) {
775 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
780 bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
786 bp->b_blkno = fsbtodb(fs, nb);
792 * Consider need to reallocate a fragment.
794 osize = fragroundup(fs, blkoff(fs, ip->i_size));
795 nsize = fragroundup(fs, size);
796 if (nsize <= osize) {
797 error = bread_gb(vp, lbn, osize, NOCRED,
802 bp->b_blkno = fsbtodb(fs, nb);
805 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
806 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
807 &dp->di_db[0]), osize, nsize, flags,
811 if (DOINGSOFTDEP(vp))
812 softdep_setup_allocdirect(ip, lbn,
813 dbtofsb(fs, bp->b_blkno), nb,
817 if (ip->i_size < smalllblktosize(fs, lbn + 1))
818 nsize = fragroundup(fs, size);
820 nsize = fs->fs_bsize;
822 error = ffs_alloc(ip, lbn,
823 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
824 &dp->di_db[0]), nsize, flags, cred, &newb);
827 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
828 bp->b_blkno = fsbtodb(fs, newb);
829 if (flags & BA_CLRBUF)
831 if (DOINGSOFTDEP(vp))
832 softdep_setup_allocdirect(ip, lbn, newb, 0,
835 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
836 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
841 * Determine the number of levels of indirection.
844 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
848 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
850 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
852 * Fetch the first indirect block allocating if necessary.
855 nb = dp->di_ib[indirs[0].in_off];
857 allocblk = allociblk;
861 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
863 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
864 flags, cred, &newb)) != 0) {
865 curthread_pflags_restore(saved_inbdflush);
868 pref = newb + fs->fs_frag;
870 MPASS(allocblk < allociblk + nitems(allociblk));
871 MPASS(lbns_remfree < lbns + nitems(lbns));
873 *lbns_remfree++ = indirs[1].in_lbn;
874 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
876 bp->b_blkno = fsbtodb(fs, nb);
878 if (DOINGSOFTDEP(vp)) {
879 softdep_setup_allocdirect(ip,
880 UFS_NDADDR + indirs[0].in_off, newb, 0,
881 fs->fs_bsize, 0, bp);
883 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
884 if (bp->b_bufsize == fs->fs_bsize)
885 bp->b_flags |= B_CLUSTEROK;
888 if ((error = bwrite(bp)) != 0)
891 allocib = &dp->di_ib[indirs[0].in_off];
893 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
896 * Fetch through the indirect blocks, allocating as necessary.
901 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
905 bap = (ufs2_daddr_t *)bp->b_data;
906 nb = bap[indirs[i].in_off];
907 if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
908 fs->fs_bsize)) != 0) {
921 * If parent indirect has just been allocated, try to cluster
922 * immediately following it.
925 pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
927 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
928 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
931 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
932 softdep_request_cleanup(fs, vp, cred,
937 if (!ffs_fsfail_cleanup_locked(ump, error) &&
938 ppsratecheck(&ump->um_last_fullmsg,
939 &ump->um_secs_fullmsg, 1)) {
941 ffs_fserr(fs, ip->i_number, "filesystem full");
942 uprintf("\n%s: write failed, filesystem "
943 "is full\n", fs->fs_fsmnt);
949 pref = newb + fs->fs_frag;
951 MPASS(allocblk < allociblk + nitems(allociblk));
952 MPASS(lbns_remfree < lbns + nitems(lbns));
954 *lbns_remfree++ = indirs[i].in_lbn;
955 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
957 nbp->b_blkno = fsbtodb(fs, nb);
959 if (DOINGSOFTDEP(vp)) {
960 softdep_setup_allocindir_meta(nbp, ip, bp,
961 indirs[i - 1].in_off, nb);
963 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
964 if (nbp->b_bufsize == fs->fs_bsize)
965 nbp->b_flags |= B_CLUSTEROK;
968 if ((error = bwrite(nbp)) != 0) {
973 bap[indirs[i - 1].in_off] = nb;
974 if (allocib == NULL && unwindidx < 0)
977 * If required, write synchronously, otherwise use
980 if (flags & IO_SYNC) {
983 if (bp->b_bufsize == fs->fs_bsize)
984 bp->b_flags |= B_CLUSTEROK;
989 * If asked only for the indirect block, then return it.
991 if (flags & BA_METAONLY) {
992 curthread_pflags_restore(saved_inbdflush);
997 * Get the data block, allocating if necessary.
1002 * If allocating metadata at the front of the cylinder
1003 * group and parent indirect block has just been allocated,
1004 * then cluster next to it if it is the first indirect in
1005 * the file. Otherwise it has been allocated in the metadata
1006 * area, so we want to find our own place out in the data area.
1008 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
1009 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
1011 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
1012 flags | IO_BUFLOCKED, cred, &newb);
1016 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
1017 softdep_request_cleanup(fs, vp, cred,
1022 if (!ffs_fsfail_cleanup_locked(ump, error) &&
1023 ppsratecheck(&ump->um_last_fullmsg,
1024 &ump->um_secs_fullmsg, 1)) {
1026 ffs_fserr(fs, ip->i_number, "filesystem full");
1027 uprintf("\n%s: write failed, filesystem "
1028 "is full\n", fs->fs_fsmnt);
1035 MPASS(allocblk < allociblk + nitems(allociblk));
1036 MPASS(lbns_remfree < lbns + nitems(lbns));
1038 *lbns_remfree++ = lbn;
1039 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1040 nbp->b_blkno = fsbtodb(fs, nb);
1041 if (flags & BA_CLRBUF)
1042 vfs_bio_clrbuf(nbp);
1043 if (DOINGSOFTDEP(vp))
1044 softdep_setup_allocindir_page(ip, lbn, bp,
1045 indirs[i].in_off, nb, 0, nbp);
1046 bap[indirs[i].in_off] = nb;
1048 * If required, write synchronously, otherwise use
1051 if (flags & IO_SYNC) {
1054 if (bp->b_bufsize == fs->fs_bsize)
1055 bp->b_flags |= B_CLUSTEROK;
1058 curthread_pflags_restore(saved_inbdflush);
1064 * If requested clear invalid portions of the buffer. If we
1065 * have to do a read-before-write (typical if BA_CLRBUF is set),
1066 * try to do some read-ahead in the sequential case to reduce
1067 * the number of I/O transactions.
1069 if (flags & BA_CLRBUF) {
1070 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1071 if (seqcount != 0 &&
1072 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1073 !(vm_page_count_severe() || buf_dirty_count_severe())) {
1074 error = cluster_read(vp, ip->i_size, lbn,
1075 (int)fs->fs_bsize, NOCRED,
1076 MAXBSIZE, seqcount, gbflags, &nbp);
1078 error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1079 NOCRED, gbflags, &nbp);
1086 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1087 nbp->b_blkno = fsbtodb(fs, nb);
1089 curthread_pflags_restore(saved_inbdflush);
1093 curthread_pflags_restore(saved_inbdflush);
1095 * If we have failed to allocate any blocks, simply return the error.
1096 * This is the usual case and avoids the need to fsync the file.
1098 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1101 * If we have failed part way through block allocation, we
1102 * have to deallocate any indirect blocks that we have allocated.
1103 * We have to fsync the file before we start to get rid of all
1104 * of its dependencies so that we do not leave them dangling.
1105 * We have to sync it at the end so that the soft updates code
1106 * does not find any untracked changes. Although this is really
1107 * slow, running out of disk space is not expected to be a common
1108 * occurrence. The error return from fsync is ignored as we already
1109 * have an error to return to the user.
1111 * XXX Still have to journal the free below
1113 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1114 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1115 blkp < allocblk; blkp++, lbns_remfree++) {
1117 * We shall not leave the freed blocks on the vnode
1118 * buffer object lists.
1120 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1121 GB_NOCREAT | GB_UNMAPPED);
1123 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1124 ("mismatch2 l %jd %jd b %ju %ju",
1125 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1126 (uintmax_t)bp->b_blkno,
1127 (uintmax_t)fsbtodb(fs, *blkp)));
1128 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1129 bp->b_flags &= ~(B_ASYNC | B_CACHE);
1132 deallocated += fs->fs_bsize;
1134 if (allocib != NULL) {
1136 } else if (unwindidx >= 0) {
1139 r = bread(vp, indirs[unwindidx].in_lbn,
1140 (int)fs->fs_bsize, NOCRED, &bp);
1142 panic("Could not unwind indirect block, error %d", r);
1145 bap = (ufs2_daddr_t *)bp->b_data;
1146 bap[indirs[unwindidx].in_off] = 0;
1147 if (flags & IO_SYNC) {
1150 if (bp->b_bufsize == fs->fs_bsize)
1151 bp->b_flags |= B_CLUSTEROK;
1159 * Restore user's disk quota because allocation failed.
1161 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1163 dp->di_blocks -= btodb(deallocated);
1164 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
1166 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1168 * After the buffers are invalidated and on-disk pointers are
1169 * cleared, free the blocks.
1171 for (blkp = allociblk; blkp < allocblk; blkp++) {
1173 if (blkp == allociblk)
1174 lbns_remfree = lbns;
1175 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1176 GB_NOCREAT | GB_UNMAPPED);
1178 panic("zombie2 %jd %ju %ju",
1179 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1180 (uintmax_t)fsbtodb(fs, *blkp));
1184 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1185 ip->i_number, vp->v_type, NULL, SINGLETON_KEY);