2 * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * Copyright (c) 1982, 1986, 1989, 1993
35 * The Regents of the University of California. All rights reserved.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
67 #include <sys/param.h>
68 #include <sys/systm.h>
72 #include <sys/mount.h>
73 #include <sys/vnode.h>
74 #include <sys/vmmeter.h>
76 #include <ufs/ufs/quota.h>
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/extattr.h>
80 #include <ufs/ufs/ufsmount.h>
82 #include <ufs/ffs/fs.h>
83 #include <ufs/ffs/ffs_extern.h>
86 * Balloc defines the structure of filesystem storage
87 * by allocating the physical blocks on a device given
88 * the inode and the logical block number in a file.
89 * This is the allocation strategy for UFS1. Below is
90 * the allocation strategy for UFS2.
93 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
94 struct ucred *cred, int flags, struct buf **bpp)
97 struct ufs1_dinode *dp;
98 ufs_lbn_t lbn, lastlbn;
101 struct buf *bp, *nbp;
103 struct ufsmount *ump;
104 struct indir indirs[UFS_NIADDR + 2];
105 int deallocated, osize, nsize, num, i, error;
107 ufs1_daddr_t *bap, pref;
108 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
109 ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
112 int gbflags, reclaimed;
119 lbn = lblkno(fs, startoffset);
120 size = blkoff(fs, startoffset) + size;
122 if (size > fs->fs_bsize)
123 panic("ffs_balloc_ufs1: blk too big");
129 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
131 vn_seqc_write_begin(vp);
134 * If the next write will extend the file into a new block,
135 * and the file is currently composed of a fragment
136 * this fragment has to be extended to be a full block.
138 lastlbn = lblkno(fs, ip->i_size);
139 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
141 osize = blksize(fs, ip, nb);
142 if (osize < fs->fs_bsize && osize > 0) {
144 error = ffs_realloccg(ip, nb, dp->di_db[nb],
145 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
146 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
150 if (DOINGSOFTDEP(vp))
151 softdep_setup_allocdirect(ip, nb,
152 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
153 fs->fs_bsize, osize, bp);
154 ip->i_size = smalllblktosize(fs, nb + 1);
155 dp->di_size = ip->i_size;
156 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
157 UFS_INODE_SET_FLAG(ip,
158 IN_SIZEMOD | IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
161 else if (DOINGASYNC(vp))
168 * The first UFS_NDADDR blocks are direct blocks
170 if (lbn < UFS_NDADDR) {
171 if (flags & BA_METAONLY)
172 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
174 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
175 if ((flags & BA_CLRBUF) != 0) {
176 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
181 bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
189 bp->b_blkno = fsbtodb(fs, nb);
196 * Consider need to reallocate a fragment.
198 osize = fragroundup(fs, blkoff(fs, ip->i_size));
199 nsize = fragroundup(fs, size);
200 if (nsize <= osize) {
201 error = bread(vp, lbn, osize, NOCRED, &bp);
204 bp->b_blkno = fsbtodb(fs, nb);
207 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
208 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
209 &dp->di_db[0]), osize, nsize, flags,
213 if (DOINGSOFTDEP(vp))
214 softdep_setup_allocdirect(ip, lbn,
215 dbtofsb(fs, bp->b_blkno), nb,
219 if (ip->i_size < smalllblktosize(fs, lbn + 1))
220 nsize = fragroundup(fs, size);
222 nsize = fs->fs_bsize;
224 error = ffs_alloc(ip, lbn,
225 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
226 nsize, flags, cred, &newb);
229 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
230 bp->b_blkno = fsbtodb(fs, newb);
231 if (flags & BA_CLRBUF)
233 if (DOINGSOFTDEP(vp))
234 softdep_setup_allocdirect(ip, lbn, newb, 0,
237 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
238 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
244 * Determine the number of levels of indirection.
247 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
251 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
253 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
255 * Fetch the first indirect block allocating if necessary.
258 nb = dp->di_ib[indirs[0].in_off];
260 allocblk = allociblk;
264 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
266 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
267 flags, cred, &newb)) != 0) {
268 curthread_pflags_restore(saved_inbdflush);
271 pref = newb + fs->fs_frag;
273 MPASS(allocblk < allociblk + nitems(allociblk));
274 MPASS(lbns_remfree < lbns + nitems(lbns));
276 *lbns_remfree++ = indirs[1].in_lbn;
277 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
278 bp->b_blkno = fsbtodb(fs, nb);
280 if (DOINGSOFTDEP(vp)) {
281 softdep_setup_allocdirect(ip,
282 UFS_NDADDR + indirs[0].in_off, newb, 0,
283 fs->fs_bsize, 0, bp);
285 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
286 if (bp->b_bufsize == fs->fs_bsize)
287 bp->b_flags |= B_CLUSTEROK;
290 if ((error = bwrite(bp)) != 0)
293 allocib = &dp->di_ib[indirs[0].in_off];
295 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
298 * Fetch through the indirect blocks, allocating as necessary.
303 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
307 bap = (ufs1_daddr_t *)bp->b_data;
308 nb = bap[indirs[i].in_off];
309 if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
310 fs->fs_bsize)) != 0) {
323 * If parent indirect has just been allocated, try to cluster
324 * immediately following it.
327 pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
329 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
330 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
333 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
334 softdep_request_cleanup(fs, vp, cred,
339 if (!ffs_fsfail_cleanup_locked(ump, error) &&
340 ppsratecheck(&ump->um_last_fullmsg,
341 &ump->um_secs_fullmsg, 1)) {
343 ffs_fserr(fs, ip->i_number, "filesystem full");
344 uprintf("\n%s: write failed, filesystem "
345 "is full\n", fs->fs_fsmnt);
351 pref = newb + fs->fs_frag;
353 MPASS(allocblk < allociblk + nitems(allociblk));
354 MPASS(lbns_remfree < lbns + nitems(lbns));
356 *lbns_remfree++ = indirs[i].in_lbn;
357 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
358 nbp->b_blkno = fsbtodb(fs, nb);
360 if (DOINGSOFTDEP(vp)) {
361 softdep_setup_allocindir_meta(nbp, ip, bp,
362 indirs[i - 1].in_off, nb);
364 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
365 if (nbp->b_bufsize == fs->fs_bsize)
366 nbp->b_flags |= B_CLUSTEROK;
369 if ((error = bwrite(nbp)) != 0) {
374 bap[indirs[i - 1].in_off] = nb;
375 if (allocib == NULL && unwindidx < 0)
378 * If required, write synchronously, otherwise use
381 if (flags & IO_SYNC) {
384 if (bp->b_bufsize == fs->fs_bsize)
385 bp->b_flags |= B_CLUSTEROK;
390 * If asked only for the indirect block, then return it.
392 if (flags & BA_METAONLY) {
393 curthread_pflags_restore(saved_inbdflush);
399 * Get the data block, allocating if necessary.
404 * If allocating metadata at the front of the cylinder
405 * group and parent indirect block has just been allocated,
406 * then cluster next to it if it is the first indirect in
407 * the file. Otherwise it has been allocated in the metadata
408 * area, so we want to find our own place out in the data area.
410 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
411 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
413 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
414 flags | IO_BUFLOCKED, cred, &newb);
418 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
419 softdep_request_cleanup(fs, vp, cred,
424 if (!ffs_fsfail_cleanup_locked(ump, error) &&
425 ppsratecheck(&ump->um_last_fullmsg,
426 &ump->um_secs_fullmsg, 1)) {
428 ffs_fserr(fs, ip->i_number, "filesystem full");
429 uprintf("\n%s: write failed, filesystem "
430 "is full\n", fs->fs_fsmnt);
437 MPASS(allocblk < allociblk + nitems(allociblk));
438 MPASS(lbns_remfree < lbns + nitems(lbns));
440 *lbns_remfree++ = lbn;
441 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
442 nbp->b_blkno = fsbtodb(fs, nb);
443 if (flags & BA_CLRBUF)
445 if (DOINGSOFTDEP(vp))
446 softdep_setup_allocindir_page(ip, lbn, bp,
447 indirs[i].in_off, nb, 0, nbp);
448 bap[indirs[i].in_off] = nb;
450 * If required, write synchronously, otherwise use
453 if (flags & IO_SYNC) {
456 if (bp->b_bufsize == fs->fs_bsize)
457 bp->b_flags |= B_CLUSTEROK;
460 curthread_pflags_restore(saved_inbdflush);
466 if (flags & BA_CLRBUF) {
467 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
469 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
470 !(vm_page_count_severe() || buf_dirty_count_severe())) {
471 error = cluster_read(vp, ip->i_size, lbn,
472 (int)fs->fs_bsize, NOCRED,
473 MAXBSIZE, seqcount, gbflags, &nbp);
475 error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
483 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
484 nbp->b_blkno = fsbtodb(fs, nb);
486 curthread_pflags_restore(saved_inbdflush);
491 curthread_pflags_restore(saved_inbdflush);
493 * If we have failed to allocate any blocks, simply return the error.
494 * This is the usual case and avoids the need to fsync the file.
496 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
499 * If we have failed part way through block allocation, we
500 * have to deallocate any indirect blocks that we have allocated.
501 * We have to fsync the file before we start to get rid of all
502 * of its dependencies so that we do not leave them dangling.
503 * We have to sync it at the end so that the soft updates code
504 * does not find any untracked changes. Although this is really
505 * slow, running out of disk space is not expected to be a common
506 * occurrence. The error return from fsync is ignored as we already
507 * have an error to return to the user.
509 * XXX Still have to journal the free below
511 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
512 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
513 blkp < allocblk; blkp++, lbns_remfree++) {
515 * We shall not leave the freed blocks on the vnode
516 * buffer object lists.
518 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
519 GB_NOCREAT | GB_UNMAPPED);
521 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
522 ("mismatch1 l %jd %jd b %ju %ju",
523 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
524 (uintmax_t)bp->b_blkno,
525 (uintmax_t)fsbtodb(fs, *blkp)));
526 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
527 bp->b_flags &= ~(B_ASYNC | B_CACHE);
530 deallocated += fs->fs_bsize;
532 if (allocib != NULL) {
534 } else if (unwindidx >= 0) {
537 r = bread(vp, indirs[unwindidx].in_lbn,
538 (int)fs->fs_bsize, NOCRED, &bp);
540 panic("Could not unwind indirect block, error %d", r);
543 bap = (ufs1_daddr_t *)bp->b_data;
544 bap[indirs[unwindidx].in_off] = 0;
545 if (flags & IO_SYNC) {
548 if (bp->b_bufsize == fs->fs_bsize)
549 bp->b_flags |= B_CLUSTEROK;
557 * Restore user's disk quota because allocation failed.
559 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
561 dp->di_blocks -= btodb(deallocated);
562 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
564 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
566 * After the buffers are invalidated and on-disk pointers are
567 * cleared, free the blocks.
569 for (blkp = allociblk; blkp < allocblk; blkp++) {
571 if (blkp == allociblk)
573 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
574 GB_NOCREAT | GB_UNMAPPED);
576 panic("zombie1 %jd %ju %ju",
577 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
578 (uintmax_t)fsbtodb(fs, *blkp));
582 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
583 ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
586 vn_seqc_write_end(vp);
591 * Balloc defines the structure of file system storage
592 * by allocating the physical blocks on a device given
593 * the inode and the logical block number in a file.
594 * This is the allocation strategy for UFS2. Above is
595 * the allocation strategy for UFS1.
598 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
599 struct ucred *cred, int flags, struct buf **bpp)
602 struct ufs2_dinode *dp;
603 ufs_lbn_t lbn, lastlbn;
605 struct buf *bp, *nbp;
607 struct ufsmount *ump;
608 struct indir indirs[UFS_NIADDR + 2];
609 ufs2_daddr_t nb, newb, *bap, pref;
610 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
611 ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
612 int deallocated, osize, nsize, num, i, error;
615 int gbflags, reclaimed;
622 lbn = lblkno(fs, startoffset);
623 size = blkoff(fs, startoffset) + size;
625 if (size > fs->fs_bsize)
626 panic("ffs_balloc_ufs2: blk too big");
630 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
632 vn_seqc_write_begin(vp);
635 * Check for allocating external data.
637 if (flags & IO_EXT) {
638 if (lbn >= UFS_NXADDR) {
644 * If the next write will extend the data into a new block,
645 * and the data is currently composed of a fragment
646 * this fragment has to be extended to be a full block.
648 lastlbn = lblkno(fs, dp->di_extsize);
651 osize = sblksize(fs, dp->di_extsize, nb);
652 if (osize < fs->fs_bsize && osize > 0) {
654 error = ffs_realloccg(ip, -1 - nb,
656 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
657 &dp->di_extb[0]), osize,
658 (int)fs->fs_bsize, flags, cred, &bp);
661 if (DOINGSOFTDEP(vp))
662 softdep_setup_allocext(ip, nb,
663 dbtofsb(fs, bp->b_blkno),
665 fs->fs_bsize, osize, bp);
666 dp->di_extsize = smalllblktosize(fs, nb + 1);
667 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
668 bp->b_xflags |= BX_ALTDATA;
669 UFS_INODE_SET_FLAG(ip,
670 IN_SIZEMOD | IN_CHANGE | IN_IBLKDATA);
678 * All blocks are direct blocks
680 if (flags & BA_METAONLY)
681 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
682 nb = dp->di_extb[lbn];
683 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
684 error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
688 bp->b_blkno = fsbtodb(fs, nb);
689 bp->b_xflags |= BX_ALTDATA;
695 * Consider need to reallocate a fragment.
697 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
698 nsize = fragroundup(fs, size);
699 if (nsize <= osize) {
700 error = bread_gb(vp, -1 - lbn, osize, NOCRED,
704 bp->b_blkno = fsbtodb(fs, nb);
705 bp->b_xflags |= BX_ALTDATA;
708 error = ffs_realloccg(ip, -1 - lbn,
710 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
711 &dp->di_extb[0]), osize, nsize, flags,
715 bp->b_xflags |= BX_ALTDATA;
716 if (DOINGSOFTDEP(vp))
717 softdep_setup_allocext(ip, lbn,
718 dbtofsb(fs, bp->b_blkno), nb,
722 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
723 nsize = fragroundup(fs, size);
725 nsize = fs->fs_bsize;
727 error = ffs_alloc(ip, lbn,
728 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
729 nsize, flags, cred, &newb);
732 bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
733 bp->b_blkno = fsbtodb(fs, newb);
734 bp->b_xflags |= BX_ALTDATA;
735 if (flags & BA_CLRBUF)
737 if (DOINGSOFTDEP(vp))
738 softdep_setup_allocext(ip, lbn, newb, 0,
741 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
742 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_IBLKDATA);
748 * If the next write will extend the file into a new block,
749 * and the file is currently composed of a fragment
750 * this fragment has to be extended to be a full block.
752 lastlbn = lblkno(fs, ip->i_size);
753 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
755 osize = blksize(fs, ip, nb);
756 if (osize < fs->fs_bsize && osize > 0) {
758 error = ffs_realloccg(ip, nb, dp->di_db[nb],
759 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
760 &dp->di_db[0]), osize, (int)fs->fs_bsize,
764 if (DOINGSOFTDEP(vp))
765 softdep_setup_allocdirect(ip, nb,
766 dbtofsb(fs, bp->b_blkno),
768 fs->fs_bsize, osize, bp);
769 ip->i_size = smalllblktosize(fs, nb + 1);
770 dp->di_size = ip->i_size;
771 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
772 UFS_INODE_SET_FLAG(ip,
773 IN_SIZEMOD |IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
781 * The first UFS_NDADDR blocks are direct blocks
783 if (lbn < UFS_NDADDR) {
784 if (flags & BA_METAONLY)
785 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
787 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
788 if ((flags & BA_CLRBUF) != 0) {
789 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
794 bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
802 bp->b_blkno = fsbtodb(fs, nb);
809 * Consider need to reallocate a fragment.
811 osize = fragroundup(fs, blkoff(fs, ip->i_size));
812 nsize = fragroundup(fs, size);
813 if (nsize <= osize) {
814 error = bread_gb(vp, lbn, osize, NOCRED,
818 bp->b_blkno = fsbtodb(fs, nb);
821 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
822 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
823 &dp->di_db[0]), osize, nsize, flags,
827 if (DOINGSOFTDEP(vp))
828 softdep_setup_allocdirect(ip, lbn,
829 dbtofsb(fs, bp->b_blkno), nb,
833 if (ip->i_size < smalllblktosize(fs, lbn + 1))
834 nsize = fragroundup(fs, size);
836 nsize = fs->fs_bsize;
838 error = ffs_alloc(ip, lbn,
839 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
840 &dp->di_db[0]), nsize, flags, cred, &newb);
843 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
844 bp->b_blkno = fsbtodb(fs, newb);
845 if (flags & BA_CLRBUF)
847 if (DOINGSOFTDEP(vp))
848 softdep_setup_allocdirect(ip, lbn, newb, 0,
851 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
852 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
858 * Determine the number of levels of indirection.
861 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
865 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
867 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
869 * Fetch the first indirect block allocating if necessary.
872 nb = dp->di_ib[indirs[0].in_off];
874 allocblk = allociblk;
878 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
880 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
881 flags, cred, &newb)) != 0) {
882 curthread_pflags_restore(saved_inbdflush);
885 pref = newb + fs->fs_frag;
887 MPASS(allocblk < allociblk + nitems(allociblk));
888 MPASS(lbns_remfree < lbns + nitems(lbns));
890 *lbns_remfree++ = indirs[1].in_lbn;
891 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
893 bp->b_blkno = fsbtodb(fs, nb);
895 if (DOINGSOFTDEP(vp)) {
896 softdep_setup_allocdirect(ip,
897 UFS_NDADDR + indirs[0].in_off, newb, 0,
898 fs->fs_bsize, 0, bp);
900 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
901 if (bp->b_bufsize == fs->fs_bsize)
902 bp->b_flags |= B_CLUSTEROK;
905 if ((error = bwrite(bp)) != 0)
908 allocib = &dp->di_ib[indirs[0].in_off];
910 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
913 * Fetch through the indirect blocks, allocating as necessary.
918 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
922 bap = (ufs2_daddr_t *)bp->b_data;
923 nb = bap[indirs[i].in_off];
924 if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
925 fs->fs_bsize)) != 0) {
938 * If parent indirect has just been allocated, try to cluster
939 * immediately following it.
942 pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
944 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
945 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
948 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
949 softdep_request_cleanup(fs, vp, cred,
954 if (!ffs_fsfail_cleanup_locked(ump, error) &&
955 ppsratecheck(&ump->um_last_fullmsg,
956 &ump->um_secs_fullmsg, 1)) {
958 ffs_fserr(fs, ip->i_number, "filesystem full");
959 uprintf("\n%s: write failed, filesystem "
960 "is full\n", fs->fs_fsmnt);
966 pref = newb + fs->fs_frag;
968 MPASS(allocblk < allociblk + nitems(allociblk));
969 MPASS(lbns_remfree < lbns + nitems(lbns));
971 *lbns_remfree++ = indirs[i].in_lbn;
972 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
974 nbp->b_blkno = fsbtodb(fs, nb);
976 if (DOINGSOFTDEP(vp)) {
977 softdep_setup_allocindir_meta(nbp, ip, bp,
978 indirs[i - 1].in_off, nb);
980 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
981 if (nbp->b_bufsize == fs->fs_bsize)
982 nbp->b_flags |= B_CLUSTEROK;
985 if ((error = bwrite(nbp)) != 0) {
990 bap[indirs[i - 1].in_off] = nb;
991 if (allocib == NULL && unwindidx < 0)
994 * If required, write synchronously, otherwise use
997 if (flags & IO_SYNC) {
1000 if (bp->b_bufsize == fs->fs_bsize)
1001 bp->b_flags |= B_CLUSTEROK;
1006 * If asked only for the indirect block, then return it.
1008 if (flags & BA_METAONLY) {
1009 curthread_pflags_restore(saved_inbdflush);
1015 * Get the data block, allocating if necessary.
1020 * If allocating metadata at the front of the cylinder
1021 * group and parent indirect block has just been allocated,
1022 * then cluster next to it if it is the first indirect in
1023 * the file. Otherwise it has been allocated in the metadata
1024 * area, so we want to find our own place out in the data area.
1026 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
1027 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
1029 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
1030 flags | IO_BUFLOCKED, cred, &newb);
1034 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
1035 softdep_request_cleanup(fs, vp, cred,
1040 if (!ffs_fsfail_cleanup_locked(ump, error) &&
1041 ppsratecheck(&ump->um_last_fullmsg,
1042 &ump->um_secs_fullmsg, 1)) {
1044 ffs_fserr(fs, ip->i_number, "filesystem full");
1045 uprintf("\n%s: write failed, filesystem "
1046 "is full\n", fs->fs_fsmnt);
1053 MPASS(allocblk < allociblk + nitems(allociblk));
1054 MPASS(lbns_remfree < lbns + nitems(lbns));
1056 *lbns_remfree++ = lbn;
1057 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1058 nbp->b_blkno = fsbtodb(fs, nb);
1059 if (flags & BA_CLRBUF)
1060 vfs_bio_clrbuf(nbp);
1061 if (DOINGSOFTDEP(vp))
1062 softdep_setup_allocindir_page(ip, lbn, bp,
1063 indirs[i].in_off, nb, 0, nbp);
1064 bap[indirs[i].in_off] = nb;
1066 * If required, write synchronously, otherwise use
1069 if (flags & IO_SYNC) {
1072 if (bp->b_bufsize == fs->fs_bsize)
1073 bp->b_flags |= B_CLUSTEROK;
1076 curthread_pflags_restore(saved_inbdflush);
1083 * If requested clear invalid portions of the buffer. If we
1084 * have to do a read-before-write (typical if BA_CLRBUF is set),
1085 * try to do some read-ahead in the sequential case to reduce
1086 * the number of I/O transactions.
1088 if (flags & BA_CLRBUF) {
1089 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1090 if (seqcount != 0 &&
1091 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1092 !(vm_page_count_severe() || buf_dirty_count_severe())) {
1093 error = cluster_read(vp, ip->i_size, lbn,
1094 (int)fs->fs_bsize, NOCRED,
1095 MAXBSIZE, seqcount, gbflags, &nbp);
1097 error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1098 NOCRED, gbflags, &nbp);
1105 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1106 nbp->b_blkno = fsbtodb(fs, nb);
1108 curthread_pflags_restore(saved_inbdflush);
1113 curthread_pflags_restore(saved_inbdflush);
1115 * If we have failed to allocate any blocks, simply return the error.
1116 * This is the usual case and avoids the need to fsync the file.
1118 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1121 * If we have failed part way through block allocation, we
1122 * have to deallocate any indirect blocks that we have allocated.
1123 * We have to fsync the file before we start to get rid of all
1124 * of its dependencies so that we do not leave them dangling.
1125 * We have to sync it at the end so that the soft updates code
1126 * does not find any untracked changes. Although this is really
1127 * slow, running out of disk space is not expected to be a common
1128 * occurrence. The error return from fsync is ignored as we already
1129 * have an error to return to the user.
1131 * XXX Still have to journal the free below
1133 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1134 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1135 blkp < allocblk; blkp++, lbns_remfree++) {
1137 * We shall not leave the freed blocks on the vnode
1138 * buffer object lists.
1140 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1141 GB_NOCREAT | GB_UNMAPPED);
1143 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1144 ("mismatch2 l %jd %jd b %ju %ju",
1145 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1146 (uintmax_t)bp->b_blkno,
1147 (uintmax_t)fsbtodb(fs, *blkp)));
1148 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1149 bp->b_flags &= ~(B_ASYNC | B_CACHE);
1152 deallocated += fs->fs_bsize;
1154 if (allocib != NULL) {
1156 } else if (unwindidx >= 0) {
1159 r = bread(vp, indirs[unwindidx].in_lbn,
1160 (int)fs->fs_bsize, NOCRED, &bp);
1162 panic("Could not unwind indirect block, error %d", r);
1165 bap = (ufs2_daddr_t *)bp->b_data;
1166 bap[indirs[unwindidx].in_off] = 0;
1167 if (flags & IO_SYNC) {
1170 if (bp->b_bufsize == fs->fs_bsize)
1171 bp->b_flags |= B_CLUSTEROK;
1179 * Restore user's disk quota because allocation failed.
1181 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1183 dp->di_blocks -= btodb(deallocated);
1184 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
1186 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1188 * After the buffers are invalidated and on-disk pointers are
1189 * cleared, free the blocks.
1191 for (blkp = allociblk; blkp < allocblk; blkp++) {
1193 if (blkp == allociblk)
1194 lbns_remfree = lbns;
1195 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1196 GB_NOCREAT | GB_UNMAPPED);
1198 panic("zombie2 %jd %ju %ju",
1199 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1200 (uintmax_t)fsbtodb(fs, *blkp));
1204 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1205 ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
1208 vn_seqc_write_end(vp);