2 * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * Copyright (c) 1982, 1986, 1989, 1993
35 * The Regents of the University of California. All rights reserved.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
67 #include <sys/param.h>
68 #include <sys/systm.h>
72 #include <sys/mount.h>
73 #include <sys/vnode.h>
74 #include <sys/vmmeter.h>
76 #include <ufs/ufs/quota.h>
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/extattr.h>
80 #include <ufs/ufs/ufsmount.h>
82 #include <ufs/ffs/fs.h>
83 #include <ufs/ffs/ffs_extern.h>
86 * Balloc defines the structure of filesystem storage
87 * by allocating the physical blocks on a device given
88 * the inode and the logical block number in a file.
89 * This is the allocation strategy for UFS1. Below is
90 * the allocation strategy for UFS2.
93 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
94 struct ucred *cred, int flags, struct buf **bpp)
97 struct ufs1_dinode *dp;
98 ufs_lbn_t lbn, lastlbn;
101 struct buf *bp, *nbp;
103 struct ufsmount *ump;
104 struct indir indirs[UFS_NIADDR + 2];
105 int deallocated, osize, nsize, num, i, error;
107 ufs1_daddr_t *bap, pref;
108 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
109 ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
112 int gbflags, reclaimed;
119 lbn = lblkno(fs, startoffset);
120 size = blkoff(fs, startoffset) + size;
122 if (size > fs->fs_bsize)
123 panic("ffs_balloc_ufs1: blk too big");
129 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
131 if (DOINGSOFTDEP(vp))
132 softdep_prealloc(vp, MNT_WAIT);
134 * If the next write will extend the file into a new block,
135 * and the file is currently composed of a fragment
136 * this fragment has to be extended to be a full block.
138 lastlbn = lblkno(fs, ip->i_size);
139 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
141 osize = blksize(fs, ip, nb);
142 if (osize < fs->fs_bsize && osize > 0) {
144 error = ffs_realloccg(ip, nb, dp->di_db[nb],
145 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
146 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
150 if (DOINGSOFTDEP(vp))
151 softdep_setup_allocdirect(ip, nb,
152 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
153 fs->fs_bsize, osize, bp);
154 ip->i_size = smalllblktosize(fs, nb + 1);
155 dp->di_size = ip->i_size;
156 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
157 UFS_INODE_SET_FLAG(ip,
158 IN_SIZEMOD | IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
161 else if (DOINGASYNC(vp))
168 * The first UFS_NDADDR blocks are direct blocks
170 if (lbn < UFS_NDADDR) {
171 if (flags & BA_METAONLY)
172 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
174 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
175 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
179 bp->b_blkno = fsbtodb(fs, nb);
185 * Consider need to reallocate a fragment.
187 osize = fragroundup(fs, blkoff(fs, ip->i_size));
188 nsize = fragroundup(fs, size);
189 if (nsize <= osize) {
190 error = bread(vp, lbn, osize, NOCRED, &bp);
194 bp->b_blkno = fsbtodb(fs, nb);
197 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
198 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
199 &dp->di_db[0]), osize, nsize, flags,
203 if (DOINGSOFTDEP(vp))
204 softdep_setup_allocdirect(ip, lbn,
205 dbtofsb(fs, bp->b_blkno), nb,
209 if (ip->i_size < smalllblktosize(fs, lbn + 1))
210 nsize = fragroundup(fs, size);
212 nsize = fs->fs_bsize;
214 error = ffs_alloc(ip, lbn,
215 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
216 nsize, flags, cred, &newb);
219 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
220 bp->b_blkno = fsbtodb(fs, newb);
221 if (flags & BA_CLRBUF)
223 if (DOINGSOFTDEP(vp))
224 softdep_setup_allocdirect(ip, lbn, newb, 0,
227 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
228 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
233 * Determine the number of levels of indirection.
236 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
240 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
242 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
244 * Fetch the first indirect block allocating if necessary.
247 nb = dp->di_ib[indirs[0].in_off];
249 allocblk = allociblk;
253 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
255 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
256 flags, cred, &newb)) != 0) {
257 curthread_pflags_restore(saved_inbdflush);
260 pref = newb + fs->fs_frag;
262 MPASS(allocblk < allociblk + nitems(allociblk));
263 MPASS(lbns_remfree < lbns + nitems(lbns));
265 *lbns_remfree++ = indirs[1].in_lbn;
266 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
267 bp->b_blkno = fsbtodb(fs, nb);
269 if (DOINGSOFTDEP(vp)) {
270 softdep_setup_allocdirect(ip,
271 UFS_NDADDR + indirs[0].in_off, newb, 0,
272 fs->fs_bsize, 0, bp);
274 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
275 if (bp->b_bufsize == fs->fs_bsize)
276 bp->b_flags |= B_CLUSTEROK;
279 if ((error = bwrite(bp)) != 0)
282 allocib = &dp->di_ib[indirs[0].in_off];
284 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
287 * Fetch through the indirect blocks, allocating as necessary.
292 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
296 bap = (ufs1_daddr_t *)bp->b_data;
297 nb = bap[indirs[i].in_off];
298 if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
299 fs->fs_bsize)) != 0) {
312 * If parent indirect has just been allocated, try to cluster
313 * immediately following it.
316 pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
318 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
319 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
322 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
323 softdep_request_cleanup(fs, vp, cred,
328 if (!ffs_fsfail_cleanup_locked(ump, error) &&
329 ppsratecheck(&ump->um_last_fullmsg,
330 &ump->um_secs_fullmsg, 1)) {
332 ffs_fserr(fs, ip->i_number, "filesystem full");
333 uprintf("\n%s: write failed, filesystem "
334 "is full\n", fs->fs_fsmnt);
340 pref = newb + fs->fs_frag;
342 MPASS(allocblk < allociblk + nitems(allociblk));
343 MPASS(lbns_remfree < lbns + nitems(lbns));
345 *lbns_remfree++ = indirs[i].in_lbn;
346 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
347 nbp->b_blkno = fsbtodb(fs, nb);
349 if (DOINGSOFTDEP(vp)) {
350 softdep_setup_allocindir_meta(nbp, ip, bp,
351 indirs[i - 1].in_off, nb);
353 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
354 if (nbp->b_bufsize == fs->fs_bsize)
355 nbp->b_flags |= B_CLUSTEROK;
358 if ((error = bwrite(nbp)) != 0) {
363 bap[indirs[i - 1].in_off] = nb;
364 if (allocib == NULL && unwindidx < 0)
367 * If required, write synchronously, otherwise use
370 if (flags & IO_SYNC) {
373 if (bp->b_bufsize == fs->fs_bsize)
374 bp->b_flags |= B_CLUSTEROK;
379 * If asked only for the indirect block, then return it.
381 if (flags & BA_METAONLY) {
382 curthread_pflags_restore(saved_inbdflush);
387 * Get the data block, allocating if necessary.
392 * If allocating metadata at the front of the cylinder
393 * group and parent indirect block has just been allocated,
394 * then cluster next to it if it is the first indirect in
395 * the file. Otherwise it has been allocated in the metadata
396 * area, so we want to find our own place out in the data area.
398 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
399 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
401 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
402 flags | IO_BUFLOCKED, cred, &newb);
406 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
407 softdep_request_cleanup(fs, vp, cred,
412 if (!ffs_fsfail_cleanup_locked(ump, error) &&
413 ppsratecheck(&ump->um_last_fullmsg,
414 &ump->um_secs_fullmsg, 1)) {
416 ffs_fserr(fs, ip->i_number, "filesystem full");
417 uprintf("\n%s: write failed, filesystem "
418 "is full\n", fs->fs_fsmnt);
425 MPASS(allocblk < allociblk + nitems(allociblk));
426 MPASS(lbns_remfree < lbns + nitems(lbns));
428 *lbns_remfree++ = lbn;
429 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
430 nbp->b_blkno = fsbtodb(fs, nb);
431 if (flags & BA_CLRBUF)
433 if (DOINGSOFTDEP(vp))
434 softdep_setup_allocindir_page(ip, lbn, bp,
435 indirs[i].in_off, nb, 0, nbp);
436 bap[indirs[i].in_off] = nb;
438 * If required, write synchronously, otherwise use
441 if (flags & IO_SYNC) {
444 if (bp->b_bufsize == fs->fs_bsize)
445 bp->b_flags |= B_CLUSTEROK;
448 curthread_pflags_restore(saved_inbdflush);
453 if (flags & BA_CLRBUF) {
454 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
456 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
457 !(vm_page_count_severe() || buf_dirty_count_severe())) {
458 error = cluster_read(vp, ip->i_size, lbn,
459 (int)fs->fs_bsize, NOCRED,
460 MAXBSIZE, seqcount, gbflags, &nbp);
462 error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
470 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
471 nbp->b_blkno = fsbtodb(fs, nb);
473 curthread_pflags_restore(saved_inbdflush);
477 curthread_pflags_restore(saved_inbdflush);
479 * If we have failed to allocate any blocks, simply return the error.
480 * This is the usual case and avoids the need to fsync the file.
482 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
485 * If we have failed part way through block allocation, we
486 * have to deallocate any indirect blocks that we have allocated.
487 * We have to fsync the file before we start to get rid of all
488 * of its dependencies so that we do not leave them dangling.
489 * We have to sync it at the end so that the soft updates code
490 * does not find any untracked changes. Although this is really
491 * slow, running out of disk space is not expected to be a common
492 * occurrence. The error return from fsync is ignored as we already
493 * have an error to return to the user.
495 * XXX Still have to journal the free below
497 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
498 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
499 blkp < allocblk; blkp++, lbns_remfree++) {
501 * We shall not leave the freed blocks on the vnode
502 * buffer object lists.
504 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
505 GB_NOCREAT | GB_UNMAPPED);
507 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
508 ("mismatch1 l %jd %jd b %ju %ju",
509 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
510 (uintmax_t)bp->b_blkno,
511 (uintmax_t)fsbtodb(fs, *blkp)));
512 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
513 bp->b_flags &= ~(B_ASYNC | B_CACHE);
516 deallocated += fs->fs_bsize;
518 if (allocib != NULL) {
520 } else if (unwindidx >= 0) {
523 r = bread(vp, indirs[unwindidx].in_lbn,
524 (int)fs->fs_bsize, NOCRED, &bp);
526 panic("Could not unwind indirect block, error %d", r);
529 bap = (ufs1_daddr_t *)bp->b_data;
530 bap[indirs[unwindidx].in_off] = 0;
531 if (flags & IO_SYNC) {
534 if (bp->b_bufsize == fs->fs_bsize)
535 bp->b_flags |= B_CLUSTEROK;
543 * Restore user's disk quota because allocation failed.
545 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
547 dp->di_blocks -= btodb(deallocated);
548 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
550 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
552 * After the buffers are invalidated and on-disk pointers are
553 * cleared, free the blocks.
555 for (blkp = allociblk; blkp < allocblk; blkp++) {
557 if (blkp == allociblk)
559 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
560 GB_NOCREAT | GB_UNMAPPED);
562 panic("zombie1 %jd %ju %ju",
563 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
564 (uintmax_t)fsbtodb(fs, *blkp));
568 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
569 ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
575 * Balloc defines the structure of file system storage
576 * by allocating the physical blocks on a device given
577 * the inode and the logical block number in a file.
578 * This is the allocation strategy for UFS2. Above is
579 * the allocation strategy for UFS1.
582 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
583 struct ucred *cred, int flags, struct buf **bpp)
586 struct ufs2_dinode *dp;
587 ufs_lbn_t lbn, lastlbn;
589 struct buf *bp, *nbp;
591 struct ufsmount *ump;
592 struct indir indirs[UFS_NIADDR + 2];
593 ufs2_daddr_t nb, newb, *bap, pref;
594 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
595 ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
596 int deallocated, osize, nsize, num, i, error;
599 int gbflags, reclaimed;
606 lbn = lblkno(fs, startoffset);
607 size = blkoff(fs, startoffset) + size;
609 if (size > fs->fs_bsize)
610 panic("ffs_balloc_ufs2: blk too big");
614 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
616 if (DOINGSOFTDEP(vp))
617 softdep_prealloc(vp, MNT_WAIT);
620 * Check for allocating external data.
622 if (flags & IO_EXT) {
623 if (lbn >= UFS_NXADDR)
626 * If the next write will extend the data into a new block,
627 * and the data is currently composed of a fragment
628 * this fragment has to be extended to be a full block.
630 lastlbn = lblkno(fs, dp->di_extsize);
633 osize = sblksize(fs, dp->di_extsize, nb);
634 if (osize < fs->fs_bsize && osize > 0) {
636 error = ffs_realloccg(ip, -1 - nb,
638 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
639 &dp->di_extb[0]), osize,
640 (int)fs->fs_bsize, flags, cred, &bp);
643 if (DOINGSOFTDEP(vp))
644 softdep_setup_allocext(ip, nb,
645 dbtofsb(fs, bp->b_blkno),
647 fs->fs_bsize, osize, bp);
648 dp->di_extsize = smalllblktosize(fs, nb + 1);
649 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
650 bp->b_xflags |= BX_ALTDATA;
651 UFS_INODE_SET_FLAG(ip,
652 IN_SIZEMOD | IN_CHANGE | IN_IBLKDATA);
660 * All blocks are direct blocks
662 if (flags & BA_METAONLY)
663 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
664 nb = dp->di_extb[lbn];
665 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
666 error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
671 bp->b_blkno = fsbtodb(fs, nb);
672 bp->b_xflags |= BX_ALTDATA;
678 * Consider need to reallocate a fragment.
680 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
681 nsize = fragroundup(fs, size);
682 if (nsize <= osize) {
683 error = bread_gb(vp, -1 - lbn, osize, NOCRED,
688 bp->b_blkno = fsbtodb(fs, nb);
689 bp->b_xflags |= BX_ALTDATA;
692 error = ffs_realloccg(ip, -1 - lbn,
694 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
695 &dp->di_extb[0]), osize, nsize, flags,
699 bp->b_xflags |= BX_ALTDATA;
700 if (DOINGSOFTDEP(vp))
701 softdep_setup_allocext(ip, lbn,
702 dbtofsb(fs, bp->b_blkno), nb,
706 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
707 nsize = fragroundup(fs, size);
709 nsize = fs->fs_bsize;
711 error = ffs_alloc(ip, lbn,
712 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
713 nsize, flags, cred, &newb);
716 bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
717 bp->b_blkno = fsbtodb(fs, newb);
718 bp->b_xflags |= BX_ALTDATA;
719 if (flags & BA_CLRBUF)
721 if (DOINGSOFTDEP(vp))
722 softdep_setup_allocext(ip, lbn, newb, 0,
725 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
726 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_IBLKDATA);
731 * If the next write will extend the file into a new block,
732 * and the file is currently composed of a fragment
733 * this fragment has to be extended to be a full block.
735 lastlbn = lblkno(fs, ip->i_size);
736 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
738 osize = blksize(fs, ip, nb);
739 if (osize < fs->fs_bsize && osize > 0) {
741 error = ffs_realloccg(ip, nb, dp->di_db[nb],
742 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
743 &dp->di_db[0]), osize, (int)fs->fs_bsize,
747 if (DOINGSOFTDEP(vp))
748 softdep_setup_allocdirect(ip, nb,
749 dbtofsb(fs, bp->b_blkno),
751 fs->fs_bsize, osize, bp);
752 ip->i_size = smalllblktosize(fs, nb + 1);
753 dp->di_size = ip->i_size;
754 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
755 UFS_INODE_SET_FLAG(ip,
756 IN_SIZEMOD |IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
764 * The first UFS_NDADDR blocks are direct blocks
766 if (lbn < UFS_NDADDR) {
767 if (flags & BA_METAONLY)
768 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
770 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
771 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
776 bp->b_blkno = fsbtodb(fs, nb);
782 * Consider need to reallocate a fragment.
784 osize = fragroundup(fs, blkoff(fs, ip->i_size));
785 nsize = fragroundup(fs, size);
786 if (nsize <= osize) {
787 error = bread_gb(vp, lbn, osize, NOCRED,
792 bp->b_blkno = fsbtodb(fs, nb);
795 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
796 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
797 &dp->di_db[0]), osize, nsize, flags,
801 if (DOINGSOFTDEP(vp))
802 softdep_setup_allocdirect(ip, lbn,
803 dbtofsb(fs, bp->b_blkno), nb,
807 if (ip->i_size < smalllblktosize(fs, lbn + 1))
808 nsize = fragroundup(fs, size);
810 nsize = fs->fs_bsize;
812 error = ffs_alloc(ip, lbn,
813 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
814 &dp->di_db[0]), nsize, flags, cred, &newb);
817 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
818 bp->b_blkno = fsbtodb(fs, newb);
819 if (flags & BA_CLRBUF)
821 if (DOINGSOFTDEP(vp))
822 softdep_setup_allocdirect(ip, lbn, newb, 0,
825 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
826 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
831 * Determine the number of levels of indirection.
834 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
838 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
840 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
842 * Fetch the first indirect block allocating if necessary.
845 nb = dp->di_ib[indirs[0].in_off];
847 allocblk = allociblk;
851 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
853 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
854 flags, cred, &newb)) != 0) {
855 curthread_pflags_restore(saved_inbdflush);
858 pref = newb + fs->fs_frag;
860 MPASS(allocblk < allociblk + nitems(allociblk));
861 MPASS(lbns_remfree < lbns + nitems(lbns));
863 *lbns_remfree++ = indirs[1].in_lbn;
864 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
866 bp->b_blkno = fsbtodb(fs, nb);
868 if (DOINGSOFTDEP(vp)) {
869 softdep_setup_allocdirect(ip,
870 UFS_NDADDR + indirs[0].in_off, newb, 0,
871 fs->fs_bsize, 0, bp);
873 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
874 if (bp->b_bufsize == fs->fs_bsize)
875 bp->b_flags |= B_CLUSTEROK;
878 if ((error = bwrite(bp)) != 0)
881 allocib = &dp->di_ib[indirs[0].in_off];
883 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
886 * Fetch through the indirect blocks, allocating as necessary.
891 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
895 bap = (ufs2_daddr_t *)bp->b_data;
896 nb = bap[indirs[i].in_off];
897 if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
898 fs->fs_bsize)) != 0) {
911 * If parent indirect has just been allocated, try to cluster
912 * immediately following it.
915 pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
917 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
918 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
921 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
922 softdep_request_cleanup(fs, vp, cred,
927 if (!ffs_fsfail_cleanup_locked(ump, error) &&
928 ppsratecheck(&ump->um_last_fullmsg,
929 &ump->um_secs_fullmsg, 1)) {
931 ffs_fserr(fs, ip->i_number, "filesystem full");
932 uprintf("\n%s: write failed, filesystem "
933 "is full\n", fs->fs_fsmnt);
939 pref = newb + fs->fs_frag;
941 MPASS(allocblk < allociblk + nitems(allociblk));
942 MPASS(lbns_remfree < lbns + nitems(lbns));
944 *lbns_remfree++ = indirs[i].in_lbn;
945 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
947 nbp->b_blkno = fsbtodb(fs, nb);
949 if (DOINGSOFTDEP(vp)) {
950 softdep_setup_allocindir_meta(nbp, ip, bp,
951 indirs[i - 1].in_off, nb);
953 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
954 if (nbp->b_bufsize == fs->fs_bsize)
955 nbp->b_flags |= B_CLUSTEROK;
958 if ((error = bwrite(nbp)) != 0) {
963 bap[indirs[i - 1].in_off] = nb;
964 if (allocib == NULL && unwindidx < 0)
967 * If required, write synchronously, otherwise use
970 if (flags & IO_SYNC) {
973 if (bp->b_bufsize == fs->fs_bsize)
974 bp->b_flags |= B_CLUSTEROK;
979 * If asked only for the indirect block, then return it.
981 if (flags & BA_METAONLY) {
982 curthread_pflags_restore(saved_inbdflush);
987 * Get the data block, allocating if necessary.
992 * If allocating metadata at the front of the cylinder
993 * group and parent indirect block has just been allocated,
994 * then cluster next to it if it is the first indirect in
995 * the file. Otherwise it has been allocated in the metadata
996 * area, so we want to find our own place out in the data area.
998 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
999 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
1001 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
1002 flags | IO_BUFLOCKED, cred, &newb);
1006 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
1007 softdep_request_cleanup(fs, vp, cred,
1012 if (!ffs_fsfail_cleanup_locked(ump, error) &&
1013 ppsratecheck(&ump->um_last_fullmsg,
1014 &ump->um_secs_fullmsg, 1)) {
1016 ffs_fserr(fs, ip->i_number, "filesystem full");
1017 uprintf("\n%s: write failed, filesystem "
1018 "is full\n", fs->fs_fsmnt);
1025 MPASS(allocblk < allociblk + nitems(allociblk));
1026 MPASS(lbns_remfree < lbns + nitems(lbns));
1028 *lbns_remfree++ = lbn;
1029 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1030 nbp->b_blkno = fsbtodb(fs, nb);
1031 if (flags & BA_CLRBUF)
1032 vfs_bio_clrbuf(nbp);
1033 if (DOINGSOFTDEP(vp))
1034 softdep_setup_allocindir_page(ip, lbn, bp,
1035 indirs[i].in_off, nb, 0, nbp);
1036 bap[indirs[i].in_off] = nb;
1038 * If required, write synchronously, otherwise use
1041 if (flags & IO_SYNC) {
1044 if (bp->b_bufsize == fs->fs_bsize)
1045 bp->b_flags |= B_CLUSTEROK;
1048 curthread_pflags_restore(saved_inbdflush);
1054 * If requested clear invalid portions of the buffer. If we
1055 * have to do a read-before-write (typical if BA_CLRBUF is set),
1056 * try to do some read-ahead in the sequential case to reduce
1057 * the number of I/O transactions.
1059 if (flags & BA_CLRBUF) {
1060 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1061 if (seqcount != 0 &&
1062 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1063 !(vm_page_count_severe() || buf_dirty_count_severe())) {
1064 error = cluster_read(vp, ip->i_size, lbn,
1065 (int)fs->fs_bsize, NOCRED,
1066 MAXBSIZE, seqcount, gbflags, &nbp);
1068 error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1069 NOCRED, gbflags, &nbp);
1076 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1077 nbp->b_blkno = fsbtodb(fs, nb);
1079 curthread_pflags_restore(saved_inbdflush);
1083 curthread_pflags_restore(saved_inbdflush);
1085 * If we have failed to allocate any blocks, simply return the error.
1086 * This is the usual case and avoids the need to fsync the file.
1088 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1091 * If we have failed part way through block allocation, we
1092 * have to deallocate any indirect blocks that we have allocated.
1093 * We have to fsync the file before we start to get rid of all
1094 * of its dependencies so that we do not leave them dangling.
1095 * We have to sync it at the end so that the soft updates code
1096 * does not find any untracked changes. Although this is really
1097 * slow, running out of disk space is not expected to be a common
1098 * occurrence. The error return from fsync is ignored as we already
1099 * have an error to return to the user.
1101 * XXX Still have to journal the free below
1103 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1104 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1105 blkp < allocblk; blkp++, lbns_remfree++) {
1107 * We shall not leave the freed blocks on the vnode
1108 * buffer object lists.
1110 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1111 GB_NOCREAT | GB_UNMAPPED);
1113 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1114 ("mismatch2 l %jd %jd b %ju %ju",
1115 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1116 (uintmax_t)bp->b_blkno,
1117 (uintmax_t)fsbtodb(fs, *blkp)));
1118 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1119 bp->b_flags &= ~(B_ASYNC | B_CACHE);
1122 deallocated += fs->fs_bsize;
1124 if (allocib != NULL) {
1126 } else if (unwindidx >= 0) {
1129 r = bread(vp, indirs[unwindidx].in_lbn,
1130 (int)fs->fs_bsize, NOCRED, &bp);
1132 panic("Could not unwind indirect block, error %d", r);
1135 bap = (ufs2_daddr_t *)bp->b_data;
1136 bap[indirs[unwindidx].in_off] = 0;
1137 if (flags & IO_SYNC) {
1140 if (bp->b_bufsize == fs->fs_bsize)
1141 bp->b_flags |= B_CLUSTEROK;
1149 * Restore user's disk quota because allocation failed.
1151 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1153 dp->di_blocks -= btodb(deallocated);
1154 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
1156 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1158 * After the buffers are invalidated and on-disk pointers are
1159 * cleared, free the blocks.
1161 for (blkp = allociblk; blkp < allocblk; blkp++) {
1163 if (blkp == allociblk)
1164 lbns_remfree = lbns;
1165 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1166 GB_NOCREAT | GB_UNMAPPED);
1168 panic("zombie2 %jd %ju %ju",
1169 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1170 (uintmax_t)fsbtodb(fs, *blkp));
1174 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1175 ip->i_number, vp->v_type, NULL, SINGLETON_KEY);