2 * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * Copyright (c) 1982, 1986, 1989, 1993
35 * The Regents of the University of California. All rights reserved.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
67 #include <sys/param.h>
68 #include <sys/systm.h>
72 #include <sys/mount.h>
73 #include <sys/vnode.h>
74 #include <sys/vmmeter.h>
76 #include <ufs/ufs/quota.h>
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/extattr.h>
80 #include <ufs/ufs/ufsmount.h>
82 #include <ufs/ffs/fs.h>
83 #include <ufs/ffs/ffs_extern.h>
86 * Balloc defines the structure of filesystem storage
87 * by allocating the physical blocks on a device given
88 * the inode and the logical block number in a file.
89 * This is the allocation strategy for UFS1. Below is
90 * the allocation strategy for UFS2.
93 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
94 struct ucred *cred, int flags, struct buf **bpp)
97 struct ufs1_dinode *dp;
98 ufs_lbn_t lbn, lastlbn;
101 struct buf *bp, *nbp;
103 struct ufsmount *ump;
104 struct indir indirs[UFS_NIADDR + 2];
105 int deallocated, osize, nsize, num, i, error;
107 ufs1_daddr_t *bap, pref;
108 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
109 ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
112 int gbflags, reclaimed;
119 lbn = lblkno(fs, startoffset);
120 size = blkoff(fs, startoffset) + size;
122 if (size > fs->fs_bsize)
123 panic("ffs_balloc_ufs1: blk too big");
129 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
131 if (DOINGSOFTDEP(vp))
132 softdep_prealloc(vp, MNT_WAIT);
134 * If the next write will extend the file into a new block,
135 * and the file is currently composed of a fragment
136 * this fragment has to be extended to be a full block.
138 lastlbn = lblkno(fs, ip->i_size);
139 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
141 osize = blksize(fs, ip, nb);
142 if (osize < fs->fs_bsize && osize > 0) {
144 error = ffs_realloccg(ip, nb, dp->di_db[nb],
145 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
146 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
150 if (DOINGSOFTDEP(vp))
151 softdep_setup_allocdirect(ip, nb,
152 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
153 fs->fs_bsize, osize, bp);
154 ip->i_size = smalllblktosize(fs, nb + 1);
155 dp->di_size = ip->i_size;
156 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
157 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
160 else if (DOINGASYNC(vp))
167 * The first UFS_NDADDR blocks are direct blocks
169 if (lbn < UFS_NDADDR) {
170 if (flags & BA_METAONLY)
171 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
173 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
174 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
178 bp->b_blkno = fsbtodb(fs, nb);
184 * Consider need to reallocate a fragment.
186 osize = fragroundup(fs, blkoff(fs, ip->i_size));
187 nsize = fragroundup(fs, size);
188 if (nsize <= osize) {
189 error = bread(vp, lbn, osize, NOCRED, &bp);
193 bp->b_blkno = fsbtodb(fs, nb);
196 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
197 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
198 &dp->di_db[0]), osize, nsize, flags,
202 if (DOINGSOFTDEP(vp))
203 softdep_setup_allocdirect(ip, lbn,
204 dbtofsb(fs, bp->b_blkno), nb,
208 if (ip->i_size < smalllblktosize(fs, lbn + 1))
209 nsize = fragroundup(fs, size);
211 nsize = fs->fs_bsize;
213 error = ffs_alloc(ip, lbn,
214 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
215 nsize, flags, cred, &newb);
218 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
219 bp->b_blkno = fsbtodb(fs, newb);
220 if (flags & BA_CLRBUF)
222 if (DOINGSOFTDEP(vp))
223 softdep_setup_allocdirect(ip, lbn, newb, 0,
226 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
227 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
232 * Determine the number of levels of indirection.
235 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
239 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
241 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
243 * Fetch the first indirect block allocating if necessary.
246 nb = dp->di_ib[indirs[0].in_off];
248 allocblk = allociblk;
252 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
254 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
255 flags, cred, &newb)) != 0) {
256 curthread_pflags_restore(saved_inbdflush);
259 pref = newb + fs->fs_frag;
261 MPASS(allocblk < allociblk + nitems(allociblk));
262 MPASS(lbns_remfree < lbns + nitems(lbns));
264 *lbns_remfree++ = indirs[1].in_lbn;
265 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
266 bp->b_blkno = fsbtodb(fs, nb);
268 if (DOINGSOFTDEP(vp)) {
269 softdep_setup_allocdirect(ip,
270 UFS_NDADDR + indirs[0].in_off, newb, 0,
271 fs->fs_bsize, 0, bp);
273 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
274 if (bp->b_bufsize == fs->fs_bsize)
275 bp->b_flags |= B_CLUSTEROK;
278 if ((error = bwrite(bp)) != 0)
281 allocib = &dp->di_ib[indirs[0].in_off];
283 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
286 * Fetch through the indirect blocks, allocating as necessary.
291 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
295 bap = (ufs1_daddr_t *)bp->b_data;
296 nb = bap[indirs[i].in_off];
297 if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
298 fs->fs_bsize)) != 0) {
311 * If parent indirect has just been allocated, try to cluster
312 * immediately following it.
315 pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
317 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
318 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
321 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
322 softdep_request_cleanup(fs, vp, cred,
327 if (ppsratecheck(&ump->um_last_fullmsg,
328 &ump->um_secs_fullmsg, 1)) {
330 ffs_fserr(fs, ip->i_number, "filesystem full");
331 uprintf("\n%s: write failed, filesystem "
332 "is full\n", fs->fs_fsmnt);
338 pref = newb + fs->fs_frag;
340 MPASS(allocblk < allociblk + nitems(allociblk));
341 MPASS(lbns_remfree < lbns + nitems(lbns));
343 *lbns_remfree++ = indirs[i].in_lbn;
344 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
345 nbp->b_blkno = fsbtodb(fs, nb);
347 if (DOINGSOFTDEP(vp)) {
348 softdep_setup_allocindir_meta(nbp, ip, bp,
349 indirs[i - 1].in_off, nb);
351 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
352 if (nbp->b_bufsize == fs->fs_bsize)
353 nbp->b_flags |= B_CLUSTEROK;
356 if ((error = bwrite(nbp)) != 0) {
361 bap[indirs[i - 1].in_off] = nb;
362 if (allocib == NULL && unwindidx < 0)
365 * If required, write synchronously, otherwise use
368 if (flags & IO_SYNC) {
371 if (bp->b_bufsize == fs->fs_bsize)
372 bp->b_flags |= B_CLUSTEROK;
377 * If asked only for the indirect block, then return it.
379 if (flags & BA_METAONLY) {
380 curthread_pflags_restore(saved_inbdflush);
385 * Get the data block, allocating if necessary.
390 * If allocating metadata at the front of the cylinder
391 * group and parent indirect block has just been allocated,
392 * then cluster next to it if it is the first indirect in
393 * the file. Otherwise it has been allocated in the metadata
394 * area, so we want to find our own place out in the data area.
396 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
397 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
399 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
400 flags | IO_BUFLOCKED, cred, &newb);
404 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
405 softdep_request_cleanup(fs, vp, cred,
410 if (ppsratecheck(&ump->um_last_fullmsg,
411 &ump->um_secs_fullmsg, 1)) {
413 ffs_fserr(fs, ip->i_number, "filesystem full");
414 uprintf("\n%s: write failed, filesystem "
415 "is full\n", fs->fs_fsmnt);
422 MPASS(allocblk < allociblk + nitems(allociblk));
423 MPASS(lbns_remfree < lbns + nitems(lbns));
425 *lbns_remfree++ = lbn;
426 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
427 nbp->b_blkno = fsbtodb(fs, nb);
428 if (flags & BA_CLRBUF)
430 if (DOINGSOFTDEP(vp))
431 softdep_setup_allocindir_page(ip, lbn, bp,
432 indirs[i].in_off, nb, 0, nbp);
433 bap[indirs[i].in_off] = nb;
435 * If required, write synchronously, otherwise use
438 if (flags & IO_SYNC) {
441 if (bp->b_bufsize == fs->fs_bsize)
442 bp->b_flags |= B_CLUSTEROK;
445 curthread_pflags_restore(saved_inbdflush);
450 if (flags & BA_CLRBUF) {
451 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
453 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
454 !(vm_page_count_severe() || buf_dirty_count_severe())) {
455 error = cluster_read(vp, ip->i_size, lbn,
456 (int)fs->fs_bsize, NOCRED,
457 MAXBSIZE, seqcount, gbflags, &nbp);
459 error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
467 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
468 nbp->b_blkno = fsbtodb(fs, nb);
470 curthread_pflags_restore(saved_inbdflush);
474 curthread_pflags_restore(saved_inbdflush);
476 * If we have failed to allocate any blocks, simply return the error.
477 * This is the usual case and avoids the need to fsync the file.
479 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
482 * If we have failed part way through block allocation, we
483 * have to deallocate any indirect blocks that we have allocated.
484 * We have to fsync the file before we start to get rid of all
485 * of its dependencies so that we do not leave them dangling.
486 * We have to sync it at the end so that the soft updates code
487 * does not find any untracked changes. Although this is really
488 * slow, running out of disk space is not expected to be a common
489 * occurrence. The error return from fsync is ignored as we already
490 * have an error to return to the user.
492 * XXX Still have to journal the free below
494 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
495 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
496 blkp < allocblk; blkp++, lbns_remfree++) {
498 * We shall not leave the freed blocks on the vnode
499 * buffer object lists.
501 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
502 GB_NOCREAT | GB_UNMAPPED);
504 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
505 ("mismatch1 l %jd %jd b %ju %ju",
506 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
507 (uintmax_t)bp->b_blkno,
508 (uintmax_t)fsbtodb(fs, *blkp)));
509 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
510 bp->b_flags &= ~(B_ASYNC | B_CACHE);
513 deallocated += fs->fs_bsize;
515 if (allocib != NULL) {
517 } else if (unwindidx >= 0) {
520 r = bread(vp, indirs[unwindidx].in_lbn,
521 (int)fs->fs_bsize, NOCRED, &bp);
523 panic("Could not unwind indirect block, error %d", r);
526 bap = (ufs1_daddr_t *)bp->b_data;
527 bap[indirs[unwindidx].in_off] = 0;
528 if (flags & IO_SYNC) {
531 if (bp->b_bufsize == fs->fs_bsize)
532 bp->b_flags |= B_CLUSTEROK;
540 * Restore user's disk quota because allocation failed.
542 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
544 dp->di_blocks -= btodb(deallocated);
545 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
547 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
549 * After the buffers are invalidated and on-disk pointers are
550 * cleared, free the blocks.
552 for (blkp = allociblk; blkp < allocblk; blkp++) {
554 if (blkp == allociblk)
556 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
557 GB_NOCREAT | GB_UNMAPPED);
559 panic("zombie1 %jd %ju %ju",
560 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
561 (uintmax_t)fsbtodb(fs, *blkp));
565 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
566 ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
572 * Balloc defines the structure of file system storage
573 * by allocating the physical blocks on a device given
574 * the inode and the logical block number in a file.
575 * This is the allocation strategy for UFS2. Above is
576 * the allocation strategy for UFS1.
579 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
580 struct ucred *cred, int flags, struct buf **bpp)
583 struct ufs2_dinode *dp;
584 ufs_lbn_t lbn, lastlbn;
586 struct buf *bp, *nbp;
588 struct ufsmount *ump;
589 struct indir indirs[UFS_NIADDR + 2];
590 ufs2_daddr_t nb, newb, *bap, pref;
591 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
592 ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
593 int deallocated, osize, nsize, num, i, error;
596 int gbflags, reclaimed;
603 lbn = lblkno(fs, startoffset);
604 size = blkoff(fs, startoffset) + size;
606 if (size > fs->fs_bsize)
607 panic("ffs_balloc_ufs2: blk too big");
611 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
613 if (DOINGSOFTDEP(vp))
614 softdep_prealloc(vp, MNT_WAIT);
617 * Check for allocating external data.
619 if (flags & IO_EXT) {
620 if (lbn >= UFS_NXADDR)
623 * If the next write will extend the data into a new block,
624 * and the data is currently composed of a fragment
625 * this fragment has to be extended to be a full block.
627 lastlbn = lblkno(fs, dp->di_extsize);
630 osize = sblksize(fs, dp->di_extsize, nb);
631 if (osize < fs->fs_bsize && osize > 0) {
633 error = ffs_realloccg(ip, -1 - nb,
635 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
636 &dp->di_extb[0]), osize,
637 (int)fs->fs_bsize, flags, cred, &bp);
640 if (DOINGSOFTDEP(vp))
641 softdep_setup_allocext(ip, nb,
642 dbtofsb(fs, bp->b_blkno),
644 fs->fs_bsize, osize, bp);
645 dp->di_extsize = smalllblktosize(fs, nb + 1);
646 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
647 bp->b_xflags |= BX_ALTDATA;
648 UFS_INODE_SET_FLAG(ip, IN_CHANGE);
656 * All blocks are direct blocks
658 if (flags & BA_METAONLY)
659 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
660 nb = dp->di_extb[lbn];
661 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
662 error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
667 bp->b_blkno = fsbtodb(fs, nb);
668 bp->b_xflags |= BX_ALTDATA;
674 * Consider need to reallocate a fragment.
676 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
677 nsize = fragroundup(fs, size);
678 if (nsize <= osize) {
679 error = bread_gb(vp, -1 - lbn, osize, NOCRED,
684 bp->b_blkno = fsbtodb(fs, nb);
685 bp->b_xflags |= BX_ALTDATA;
688 error = ffs_realloccg(ip, -1 - lbn,
690 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
691 &dp->di_extb[0]), osize, nsize, flags,
695 bp->b_xflags |= BX_ALTDATA;
696 if (DOINGSOFTDEP(vp))
697 softdep_setup_allocext(ip, lbn,
698 dbtofsb(fs, bp->b_blkno), nb,
702 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
703 nsize = fragroundup(fs, size);
705 nsize = fs->fs_bsize;
707 error = ffs_alloc(ip, lbn,
708 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
709 nsize, flags, cred, &newb);
712 bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
713 bp->b_blkno = fsbtodb(fs, newb);
714 bp->b_xflags |= BX_ALTDATA;
715 if (flags & BA_CLRBUF)
717 if (DOINGSOFTDEP(vp))
718 softdep_setup_allocext(ip, lbn, newb, 0,
721 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
722 UFS_INODE_SET_FLAG(ip, IN_CHANGE);
727 * If the next write will extend the file into a new block,
728 * and the file is currently composed of a fragment
729 * this fragment has to be extended to be a full block.
731 lastlbn = lblkno(fs, ip->i_size);
732 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
734 osize = blksize(fs, ip, nb);
735 if (osize < fs->fs_bsize && osize > 0) {
737 error = ffs_realloccg(ip, nb, dp->di_db[nb],
738 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
739 &dp->di_db[0]), osize, (int)fs->fs_bsize,
743 if (DOINGSOFTDEP(vp))
744 softdep_setup_allocdirect(ip, nb,
745 dbtofsb(fs, bp->b_blkno),
747 fs->fs_bsize, osize, bp);
748 ip->i_size = smalllblktosize(fs, nb + 1);
749 dp->di_size = ip->i_size;
750 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
751 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
759 * The first UFS_NDADDR blocks are direct blocks
761 if (lbn < UFS_NDADDR) {
762 if (flags & BA_METAONLY)
763 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
765 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
766 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
771 bp->b_blkno = fsbtodb(fs, nb);
777 * Consider need to reallocate a fragment.
779 osize = fragroundup(fs, blkoff(fs, ip->i_size));
780 nsize = fragroundup(fs, size);
781 if (nsize <= osize) {
782 error = bread_gb(vp, lbn, osize, NOCRED,
787 bp->b_blkno = fsbtodb(fs, nb);
790 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
791 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
792 &dp->di_db[0]), osize, nsize, flags,
796 if (DOINGSOFTDEP(vp))
797 softdep_setup_allocdirect(ip, lbn,
798 dbtofsb(fs, bp->b_blkno), nb,
802 if (ip->i_size < smalllblktosize(fs, lbn + 1))
803 nsize = fragroundup(fs, size);
805 nsize = fs->fs_bsize;
807 error = ffs_alloc(ip, lbn,
808 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
809 &dp->di_db[0]), nsize, flags, cred, &newb);
812 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
813 bp->b_blkno = fsbtodb(fs, newb);
814 if (flags & BA_CLRBUF)
816 if (DOINGSOFTDEP(vp))
817 softdep_setup_allocdirect(ip, lbn, newb, 0,
820 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
821 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
826 * Determine the number of levels of indirection.
829 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
833 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
835 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
837 * Fetch the first indirect block allocating if necessary.
840 nb = dp->di_ib[indirs[0].in_off];
842 allocblk = allociblk;
846 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
848 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
849 flags, cred, &newb)) != 0) {
850 curthread_pflags_restore(saved_inbdflush);
853 pref = newb + fs->fs_frag;
855 MPASS(allocblk < allociblk + nitems(allociblk));
856 MPASS(lbns_remfree < lbns + nitems(lbns));
858 *lbns_remfree++ = indirs[1].in_lbn;
859 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
861 bp->b_blkno = fsbtodb(fs, nb);
863 if (DOINGSOFTDEP(vp)) {
864 softdep_setup_allocdirect(ip,
865 UFS_NDADDR + indirs[0].in_off, newb, 0,
866 fs->fs_bsize, 0, bp);
868 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
869 if (bp->b_bufsize == fs->fs_bsize)
870 bp->b_flags |= B_CLUSTEROK;
873 if ((error = bwrite(bp)) != 0)
876 allocib = &dp->di_ib[indirs[0].in_off];
878 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
881 * Fetch through the indirect blocks, allocating as necessary.
886 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
890 bap = (ufs2_daddr_t *)bp->b_data;
891 nb = bap[indirs[i].in_off];
892 if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
893 fs->fs_bsize)) != 0) {
906 * If parent indirect has just been allocated, try to cluster
907 * immediately following it.
910 pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
912 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
913 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
916 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
917 softdep_request_cleanup(fs, vp, cred,
922 if (ppsratecheck(&ump->um_last_fullmsg,
923 &ump->um_secs_fullmsg, 1)) {
925 ffs_fserr(fs, ip->i_number, "filesystem full");
926 uprintf("\n%s: write failed, filesystem "
927 "is full\n", fs->fs_fsmnt);
933 pref = newb + fs->fs_frag;
935 MPASS(allocblk < allociblk + nitems(allociblk));
936 MPASS(lbns_remfree < lbns + nitems(lbns));
938 *lbns_remfree++ = indirs[i].in_lbn;
939 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
941 nbp->b_blkno = fsbtodb(fs, nb);
943 if (DOINGSOFTDEP(vp)) {
944 softdep_setup_allocindir_meta(nbp, ip, bp,
945 indirs[i - 1].in_off, nb);
947 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
948 if (nbp->b_bufsize == fs->fs_bsize)
949 nbp->b_flags |= B_CLUSTEROK;
952 if ((error = bwrite(nbp)) != 0) {
957 bap[indirs[i - 1].in_off] = nb;
958 if (allocib == NULL && unwindidx < 0)
961 * If required, write synchronously, otherwise use
964 if (flags & IO_SYNC) {
967 if (bp->b_bufsize == fs->fs_bsize)
968 bp->b_flags |= B_CLUSTEROK;
973 * If asked only for the indirect block, then return it.
975 if (flags & BA_METAONLY) {
976 curthread_pflags_restore(saved_inbdflush);
981 * Get the data block, allocating if necessary.
986 * If allocating metadata at the front of the cylinder
987 * group and parent indirect block has just been allocated,
988 * then cluster next to it if it is the first indirect in
989 * the file. Otherwise it has been allocated in the metadata
990 * area, so we want to find our own place out in the data area.
992 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
993 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
995 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
996 flags | IO_BUFLOCKED, cred, &newb);
1000 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
1001 softdep_request_cleanup(fs, vp, cred,
1006 if (ppsratecheck(&ump->um_last_fullmsg,
1007 &ump->um_secs_fullmsg, 1)) {
1009 ffs_fserr(fs, ip->i_number, "filesystem full");
1010 uprintf("\n%s: write failed, filesystem "
1011 "is full\n", fs->fs_fsmnt);
1018 MPASS(allocblk < allociblk + nitems(allociblk));
1019 MPASS(lbns_remfree < lbns + nitems(lbns));
1021 *lbns_remfree++ = lbn;
1022 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1023 nbp->b_blkno = fsbtodb(fs, nb);
1024 if (flags & BA_CLRBUF)
1025 vfs_bio_clrbuf(nbp);
1026 if (DOINGSOFTDEP(vp))
1027 softdep_setup_allocindir_page(ip, lbn, bp,
1028 indirs[i].in_off, nb, 0, nbp);
1029 bap[indirs[i].in_off] = nb;
1031 * If required, write synchronously, otherwise use
1034 if (flags & IO_SYNC) {
1037 if (bp->b_bufsize == fs->fs_bsize)
1038 bp->b_flags |= B_CLUSTEROK;
1041 curthread_pflags_restore(saved_inbdflush);
1047 * If requested clear invalid portions of the buffer. If we
1048 * have to do a read-before-write (typical if BA_CLRBUF is set),
1049 * try to do some read-ahead in the sequential case to reduce
1050 * the number of I/O transactions.
1052 if (flags & BA_CLRBUF) {
1053 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1054 if (seqcount != 0 &&
1055 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1056 !(vm_page_count_severe() || buf_dirty_count_severe())) {
1057 error = cluster_read(vp, ip->i_size, lbn,
1058 (int)fs->fs_bsize, NOCRED,
1059 MAXBSIZE, seqcount, gbflags, &nbp);
1061 error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1062 NOCRED, gbflags, &nbp);
1069 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1070 nbp->b_blkno = fsbtodb(fs, nb);
1072 curthread_pflags_restore(saved_inbdflush);
1076 curthread_pflags_restore(saved_inbdflush);
1078 * If we have failed to allocate any blocks, simply return the error.
1079 * This is the usual case and avoids the need to fsync the file.
1081 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1084 * If we have failed part way through block allocation, we
1085 * have to deallocate any indirect blocks that we have allocated.
1086 * We have to fsync the file before we start to get rid of all
1087 * of its dependencies so that we do not leave them dangling.
1088 * We have to sync it at the end so that the soft updates code
1089 * does not find any untracked changes. Although this is really
1090 * slow, running out of disk space is not expected to be a common
1091 * occurrence. The error return from fsync is ignored as we already
1092 * have an error to return to the user.
1094 * XXX Still have to journal the free below
1096 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1097 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1098 blkp < allocblk; blkp++, lbns_remfree++) {
1100 * We shall not leave the freed blocks on the vnode
1101 * buffer object lists.
1103 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1104 GB_NOCREAT | GB_UNMAPPED);
1106 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1107 ("mismatch2 l %jd %jd b %ju %ju",
1108 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1109 (uintmax_t)bp->b_blkno,
1110 (uintmax_t)fsbtodb(fs, *blkp)));
1111 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1112 bp->b_flags &= ~(B_ASYNC | B_CACHE);
1115 deallocated += fs->fs_bsize;
1117 if (allocib != NULL) {
1119 } else if (unwindidx >= 0) {
1122 r = bread(vp, indirs[unwindidx].in_lbn,
1123 (int)fs->fs_bsize, NOCRED, &bp);
1125 panic("Could not unwind indirect block, error %d", r);
1128 bap = (ufs2_daddr_t *)bp->b_data;
1129 bap[indirs[unwindidx].in_off] = 0;
1130 if (flags & IO_SYNC) {
1133 if (bp->b_bufsize == fs->fs_bsize)
1134 bp->b_flags |= B_CLUSTEROK;
1142 * Restore user's disk quota because allocation failed.
1144 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1146 dp->di_blocks -= btodb(deallocated);
1147 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
1149 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1151 * After the buffers are invalidated and on-disk pointers are
1152 * cleared, free the blocks.
1154 for (blkp = allociblk; blkp < allocblk; blkp++) {
1156 if (blkp == allociblk)
1157 lbns_remfree = lbns;
1158 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1159 GB_NOCREAT | GB_UNMAPPED);
1161 panic("zombie2 %jd %ju %ju",
1162 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1163 (uintmax_t)fsbtodb(fs, *blkp));
1167 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1168 ip->i_number, vp->v_type, NULL, SINGLETON_KEY);