2 * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * Copyright (c) 1982, 1986, 1989, 1993
35 * The Regents of the University of California. All rights reserved.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
67 #include <sys/param.h>
68 #include <sys/systm.h>
72 #include <sys/mount.h>
73 #include <sys/vnode.h>
74 #include <sys/vmmeter.h>
76 #include <ufs/ufs/quota.h>
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/extattr.h>
80 #include <ufs/ufs/ufsmount.h>
82 #include <ufs/ffs/fs.h>
83 #include <ufs/ffs/ffs_extern.h>
86 * Balloc defines the structure of filesystem storage
87 * by allocating the physical blocks on a device given
88 * the inode and the logical block number in a file.
89 * This is the allocation strategy for UFS1. Below is
90 * the allocation strategy for UFS2.
93 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
94 struct ucred *cred, int flags, struct buf **bpp)
97 struct ufs1_dinode *dp;
98 ufs_lbn_t lbn, lastlbn;
101 struct buf *bp, *nbp;
102 struct ufsmount *ump;
103 struct indir indirs[UFS_NIADDR + 2];
104 int deallocated, osize, nsize, num, i, error;
106 ufs1_daddr_t *bap, pref;
107 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
108 ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
111 static struct timeval lastfail;
113 int gbflags, reclaimed;
119 lbn = lblkno(fs, startoffset);
120 size = blkoff(fs, startoffset) + size;
122 if (size > fs->fs_bsize)
123 panic("ffs_balloc_ufs1: blk too big");
129 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
131 if (DOINGSOFTDEP(vp))
132 softdep_prealloc(vp, MNT_WAIT);
134 * If the next write will extend the file into a new block,
135 * and the file is currently composed of a fragment
136 * this fragment has to be extended to be a full block.
138 lastlbn = lblkno(fs, ip->i_size);
139 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
141 osize = blksize(fs, ip, nb);
142 if (osize < fs->fs_bsize && osize > 0) {
144 error = ffs_realloccg(ip, nb, dp->di_db[nb],
145 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
146 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
150 if (DOINGSOFTDEP(vp))
151 softdep_setup_allocdirect(ip, nb,
152 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
153 fs->fs_bsize, osize, bp);
154 ip->i_size = smalllblktosize(fs, nb + 1);
155 dp->di_size = ip->i_size;
156 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
157 ip->i_flag |= IN_CHANGE | IN_UPDATE;
160 else if (DOINGASYNC(vp))
167 * The first UFS_NDADDR blocks are direct blocks
169 if (lbn < UFS_NDADDR) {
170 if (flags & BA_METAONLY)
171 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
173 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
174 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
179 bp->b_blkno = fsbtodb(fs, nb);
185 * Consider need to reallocate a fragment.
187 osize = fragroundup(fs, blkoff(fs, ip->i_size));
188 nsize = fragroundup(fs, size);
189 if (nsize <= osize) {
190 error = bread(vp, lbn, osize, NOCRED, &bp);
195 bp->b_blkno = fsbtodb(fs, nb);
198 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
199 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
200 &dp->di_db[0]), osize, nsize, flags,
204 if (DOINGSOFTDEP(vp))
205 softdep_setup_allocdirect(ip, lbn,
206 dbtofsb(fs, bp->b_blkno), nb,
210 if (ip->i_size < smalllblktosize(fs, lbn + 1))
211 nsize = fragroundup(fs, size);
213 nsize = fs->fs_bsize;
215 error = ffs_alloc(ip, lbn,
216 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
217 nsize, flags, cred, &newb);
220 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
221 bp->b_blkno = fsbtodb(fs, newb);
222 if (flags & BA_CLRBUF)
224 if (DOINGSOFTDEP(vp))
225 softdep_setup_allocdirect(ip, lbn, newb, 0,
228 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
229 ip->i_flag |= IN_CHANGE | IN_UPDATE;
234 * Determine the number of levels of indirection.
237 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
241 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
243 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
245 * Fetch the first indirect block allocating if necessary.
248 nb = dp->di_ib[indirs[0].in_off];
250 allocblk = allociblk;
254 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
256 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
257 flags, cred, &newb)) != 0) {
258 curthread_pflags_restore(saved_inbdflush);
261 pref = newb + fs->fs_frag;
263 MPASS(allocblk < allociblk + nitems(allociblk));
264 MPASS(lbns_remfree < lbns + nitems(lbns));
266 *lbns_remfree++ = indirs[1].in_lbn;
267 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
268 bp->b_blkno = fsbtodb(fs, nb);
270 if (DOINGSOFTDEP(vp)) {
271 softdep_setup_allocdirect(ip,
272 UFS_NDADDR + indirs[0].in_off, newb, 0,
273 fs->fs_bsize, 0, bp);
275 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
276 if (bp->b_bufsize == fs->fs_bsize)
277 bp->b_flags |= B_CLUSTEROK;
280 if ((error = bwrite(bp)) != 0)
283 allocib = &dp->di_ib[indirs[0].in_off];
285 ip->i_flag |= IN_CHANGE | IN_UPDATE;
288 * Fetch through the indirect blocks, allocating as necessary.
293 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
298 bap = (ufs1_daddr_t *)bp->b_data;
299 nb = bap[indirs[i].in_off];
309 * If parent indirect has just been allocated, try to cluster
310 * immediately following it.
313 pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
315 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
316 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
318 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
320 softdep_request_cleanup(fs, vp, cred,
325 if (ppsratecheck(&lastfail, &curfail, 1)) {
326 ffs_fserr(fs, ip->i_number, "filesystem full");
327 uprintf("\n%s: write failed, filesystem "
328 "is full\n", fs->fs_fsmnt);
332 pref = newb + fs->fs_frag;
334 MPASS(allocblk < allociblk + nitems(allociblk));
335 MPASS(lbns_remfree < lbns + nitems(lbns));
337 *lbns_remfree++ = indirs[i].in_lbn;
338 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
339 nbp->b_blkno = fsbtodb(fs, nb);
341 if (DOINGSOFTDEP(vp)) {
342 softdep_setup_allocindir_meta(nbp, ip, bp,
343 indirs[i - 1].in_off, nb);
345 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
346 if (nbp->b_bufsize == fs->fs_bsize)
347 nbp->b_flags |= B_CLUSTEROK;
350 if ((error = bwrite(nbp)) != 0) {
355 bap[indirs[i - 1].in_off] = nb;
356 if (allocib == NULL && unwindidx < 0)
359 * If required, write synchronously, otherwise use
362 if (flags & IO_SYNC) {
365 if (bp->b_bufsize == fs->fs_bsize)
366 bp->b_flags |= B_CLUSTEROK;
371 * If asked only for the indirect block, then return it.
373 if (flags & BA_METAONLY) {
374 curthread_pflags_restore(saved_inbdflush);
379 * Get the data block, allocating if necessary.
384 * If allocating metadata at the front of the cylinder
385 * group and parent indirect block has just been allocated,
386 * then cluster next to it if it is the first indirect in
387 * the file. Otherwise it has been allocated in the metadata
388 * area, so we want to find our own place out in the data area.
390 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
391 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
393 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
394 flags | IO_BUFLOCKED, cred, &newb);
397 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
399 softdep_request_cleanup(fs, vp, cred,
404 if (ppsratecheck(&lastfail, &curfail, 1)) {
405 ffs_fserr(fs, ip->i_number, "filesystem full");
406 uprintf("\n%s: write failed, filesystem "
407 "is full\n", fs->fs_fsmnt);
412 MPASS(allocblk < allociblk + nitems(allociblk));
413 MPASS(lbns_remfree < lbns + nitems(lbns));
415 *lbns_remfree++ = lbn;
416 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
417 nbp->b_blkno = fsbtodb(fs, nb);
418 if (flags & BA_CLRBUF)
420 if (DOINGSOFTDEP(vp))
421 softdep_setup_allocindir_page(ip, lbn, bp,
422 indirs[i].in_off, nb, 0, nbp);
423 bap[indirs[i].in_off] = nb;
425 * If required, write synchronously, otherwise use
428 if (flags & IO_SYNC) {
431 if (bp->b_bufsize == fs->fs_bsize)
432 bp->b_flags |= B_CLUSTEROK;
435 curthread_pflags_restore(saved_inbdflush);
440 if (flags & BA_CLRBUF) {
441 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
443 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
444 !(vm_page_count_severe() || buf_dirty_count_severe())) {
445 error = cluster_read(vp, ip->i_size, lbn,
446 (int)fs->fs_bsize, NOCRED,
447 MAXBSIZE, seqcount, gbflags, &nbp);
449 error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
457 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
458 nbp->b_blkno = fsbtodb(fs, nb);
460 curthread_pflags_restore(saved_inbdflush);
464 curthread_pflags_restore(saved_inbdflush);
466 * If we have failed to allocate any blocks, simply return the error.
467 * This is the usual case and avoids the need to fsync the file.
469 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
472 * If we have failed part way through block allocation, we
473 * have to deallocate any indirect blocks that we have allocated.
474 * We have to fsync the file before we start to get rid of all
475 * of its dependencies so that we do not leave them dangling.
476 * We have to sync it at the end so that the soft updates code
477 * does not find any untracked changes. Although this is really
478 * slow, running out of disk space is not expected to be a common
479 * occurrence. The error return from fsync is ignored as we already
480 * have an error to return to the user.
482 * XXX Still have to journal the free below
484 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
485 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
486 blkp < allocblk; blkp++, lbns_remfree++) {
488 * We shall not leave the freed blocks on the vnode
489 * buffer object lists.
491 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
492 GB_NOCREAT | GB_UNMAPPED);
494 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
495 ("mismatch1 l %jd %jd b %ju %ju",
496 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
497 (uintmax_t)bp->b_blkno,
498 (uintmax_t)fsbtodb(fs, *blkp)));
499 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
500 bp->b_flags &= ~(B_ASYNC | B_CACHE);
503 deallocated += fs->fs_bsize;
505 if (allocib != NULL) {
507 } else if (unwindidx >= 0) {
510 r = bread(vp, indirs[unwindidx].in_lbn,
511 (int)fs->fs_bsize, NOCRED, &bp);
513 panic("Could not unwind indirect block, error %d", r);
516 bap = (ufs1_daddr_t *)bp->b_data;
517 bap[indirs[unwindidx].in_off] = 0;
518 if (flags & IO_SYNC) {
521 if (bp->b_bufsize == fs->fs_bsize)
522 bp->b_flags |= B_CLUSTEROK;
530 * Restore user's disk quota because allocation failed.
532 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
534 dp->di_blocks -= btodb(deallocated);
535 ip->i_flag |= IN_CHANGE | IN_UPDATE;
537 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
539 * After the buffers are invalidated and on-disk pointers are
540 * cleared, free the blocks.
542 for (blkp = allociblk; blkp < allocblk; blkp++) {
544 if (blkp == allociblk)
546 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
547 GB_NOCREAT | GB_UNMAPPED);
549 panic("zombie1 %jd %ju %ju",
550 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
551 (uintmax_t)fsbtodb(fs, *blkp));
555 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
556 ip->i_number, vp->v_type, NULL);
562 * Balloc defines the structure of file system storage
563 * by allocating the physical blocks on a device given
564 * the inode and the logical block number in a file.
565 * This is the allocation strategy for UFS2. Above is
566 * the allocation strategy for UFS1.
569 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
570 struct ucred *cred, int flags, struct buf **bpp)
573 struct ufs2_dinode *dp;
574 ufs_lbn_t lbn, lastlbn;
576 struct buf *bp, *nbp;
577 struct ufsmount *ump;
578 struct indir indirs[UFS_NIADDR + 2];
579 ufs2_daddr_t nb, newb, *bap, pref;
580 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
581 ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
582 int deallocated, osize, nsize, num, i, error;
585 static struct timeval lastfail;
587 int gbflags, reclaimed;
593 lbn = lblkno(fs, startoffset);
594 size = blkoff(fs, startoffset) + size;
596 if (size > fs->fs_bsize)
597 panic("ffs_balloc_ufs2: blk too big");
601 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
603 if (DOINGSOFTDEP(vp))
604 softdep_prealloc(vp, MNT_WAIT);
607 * Check for allocating external data.
609 if (flags & IO_EXT) {
610 if (lbn >= UFS_NXADDR)
613 * If the next write will extend the data into a new block,
614 * and the data is currently composed of a fragment
615 * this fragment has to be extended to be a full block.
617 lastlbn = lblkno(fs, dp->di_extsize);
620 osize = sblksize(fs, dp->di_extsize, nb);
621 if (osize < fs->fs_bsize && osize > 0) {
623 error = ffs_realloccg(ip, -1 - nb,
625 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
626 &dp->di_extb[0]), osize,
627 (int)fs->fs_bsize, flags, cred, &bp);
630 if (DOINGSOFTDEP(vp))
631 softdep_setup_allocext(ip, nb,
632 dbtofsb(fs, bp->b_blkno),
634 fs->fs_bsize, osize, bp);
635 dp->di_extsize = smalllblktosize(fs, nb + 1);
636 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
637 bp->b_xflags |= BX_ALTDATA;
638 ip->i_flag |= IN_CHANGE;
646 * All blocks are direct blocks
648 if (flags & BA_METAONLY)
649 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
650 nb = dp->di_extb[lbn];
651 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
652 error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
658 bp->b_blkno = fsbtodb(fs, nb);
659 bp->b_xflags |= BX_ALTDATA;
665 * Consider need to reallocate a fragment.
667 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
668 nsize = fragroundup(fs, size);
669 if (nsize <= osize) {
670 error = bread_gb(vp, -1 - lbn, osize, NOCRED,
676 bp->b_blkno = fsbtodb(fs, nb);
677 bp->b_xflags |= BX_ALTDATA;
680 error = ffs_realloccg(ip, -1 - lbn,
682 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
683 &dp->di_extb[0]), osize, nsize, flags,
687 bp->b_xflags |= BX_ALTDATA;
688 if (DOINGSOFTDEP(vp))
689 softdep_setup_allocext(ip, lbn,
690 dbtofsb(fs, bp->b_blkno), nb,
694 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
695 nsize = fragroundup(fs, size);
697 nsize = fs->fs_bsize;
699 error = ffs_alloc(ip, lbn,
700 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
701 nsize, flags, cred, &newb);
704 bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
705 bp->b_blkno = fsbtodb(fs, newb);
706 bp->b_xflags |= BX_ALTDATA;
707 if (flags & BA_CLRBUF)
709 if (DOINGSOFTDEP(vp))
710 softdep_setup_allocext(ip, lbn, newb, 0,
713 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
714 ip->i_flag |= IN_CHANGE;
719 * If the next write will extend the file into a new block,
720 * and the file is currently composed of a fragment
721 * this fragment has to be extended to be a full block.
723 lastlbn = lblkno(fs, ip->i_size);
724 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
726 osize = blksize(fs, ip, nb);
727 if (osize < fs->fs_bsize && osize > 0) {
729 error = ffs_realloccg(ip, nb, dp->di_db[nb],
730 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
731 &dp->di_db[0]), osize, (int)fs->fs_bsize,
735 if (DOINGSOFTDEP(vp))
736 softdep_setup_allocdirect(ip, nb,
737 dbtofsb(fs, bp->b_blkno),
739 fs->fs_bsize, osize, bp);
740 ip->i_size = smalllblktosize(fs, nb + 1);
741 dp->di_size = ip->i_size;
742 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
743 ip->i_flag |= IN_CHANGE | IN_UPDATE;
751 * The first UFS_NDADDR blocks are direct blocks
753 if (lbn < UFS_NDADDR) {
754 if (flags & BA_METAONLY)
755 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
757 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
758 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
764 bp->b_blkno = fsbtodb(fs, nb);
770 * Consider need to reallocate a fragment.
772 osize = fragroundup(fs, blkoff(fs, ip->i_size));
773 nsize = fragroundup(fs, size);
774 if (nsize <= osize) {
775 error = bread_gb(vp, lbn, osize, NOCRED,
781 bp->b_blkno = fsbtodb(fs, nb);
784 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
785 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
786 &dp->di_db[0]), osize, nsize, flags,
790 if (DOINGSOFTDEP(vp))
791 softdep_setup_allocdirect(ip, lbn,
792 dbtofsb(fs, bp->b_blkno), nb,
796 if (ip->i_size < smalllblktosize(fs, lbn + 1))
797 nsize = fragroundup(fs, size);
799 nsize = fs->fs_bsize;
801 error = ffs_alloc(ip, lbn,
802 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
803 &dp->di_db[0]), nsize, flags, cred, &newb);
806 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
807 bp->b_blkno = fsbtodb(fs, newb);
808 if (flags & BA_CLRBUF)
810 if (DOINGSOFTDEP(vp))
811 softdep_setup_allocdirect(ip, lbn, newb, 0,
814 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
815 ip->i_flag |= IN_CHANGE | IN_UPDATE;
820 * Determine the number of levels of indirection.
823 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
827 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
829 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
831 * Fetch the first indirect block allocating if necessary.
834 nb = dp->di_ib[indirs[0].in_off];
836 allocblk = allociblk;
840 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
842 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
843 flags, cred, &newb)) != 0) {
844 curthread_pflags_restore(saved_inbdflush);
847 pref = newb + fs->fs_frag;
849 MPASS(allocblk < allociblk + nitems(allociblk));
850 MPASS(lbns_remfree < lbns + nitems(lbns));
852 *lbns_remfree++ = indirs[1].in_lbn;
853 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
855 bp->b_blkno = fsbtodb(fs, nb);
857 if (DOINGSOFTDEP(vp)) {
858 softdep_setup_allocdirect(ip,
859 UFS_NDADDR + indirs[0].in_off, newb, 0,
860 fs->fs_bsize, 0, bp);
862 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
863 if (bp->b_bufsize == fs->fs_bsize)
864 bp->b_flags |= B_CLUSTEROK;
867 if ((error = bwrite(bp)) != 0)
870 allocib = &dp->di_ib[indirs[0].in_off];
872 ip->i_flag |= IN_CHANGE | IN_UPDATE;
875 * Fetch through the indirect blocks, allocating as necessary.
880 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
885 bap = (ufs2_daddr_t *)bp->b_data;
886 nb = bap[indirs[i].in_off];
896 * If parent indirect has just been allocated, try to cluster
897 * immediately following it.
900 pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
902 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
903 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
905 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
907 softdep_request_cleanup(fs, vp, cred,
912 if (ppsratecheck(&lastfail, &curfail, 1)) {
913 ffs_fserr(fs, ip->i_number, "filesystem full");
914 uprintf("\n%s: write failed, filesystem "
915 "is full\n", fs->fs_fsmnt);
919 pref = newb + fs->fs_frag;
921 MPASS(allocblk < allociblk + nitems(allociblk));
922 MPASS(lbns_remfree < lbns + nitems(lbns));
924 *lbns_remfree++ = indirs[i].in_lbn;
925 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
927 nbp->b_blkno = fsbtodb(fs, nb);
929 if (DOINGSOFTDEP(vp)) {
930 softdep_setup_allocindir_meta(nbp, ip, bp,
931 indirs[i - 1].in_off, nb);
933 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
934 if (nbp->b_bufsize == fs->fs_bsize)
935 nbp->b_flags |= B_CLUSTEROK;
938 if ((error = bwrite(nbp)) != 0) {
943 bap[indirs[i - 1].in_off] = nb;
944 if (allocib == NULL && unwindidx < 0)
947 * If required, write synchronously, otherwise use
950 if (flags & IO_SYNC) {
953 if (bp->b_bufsize == fs->fs_bsize)
954 bp->b_flags |= B_CLUSTEROK;
959 * If asked only for the indirect block, then return it.
961 if (flags & BA_METAONLY) {
962 curthread_pflags_restore(saved_inbdflush);
967 * Get the data block, allocating if necessary.
972 * If allocating metadata at the front of the cylinder
973 * group and parent indirect block has just been allocated,
974 * then cluster next to it if it is the first indirect in
975 * the file. Otherwise it has been allocated in the metadata
976 * area, so we want to find our own place out in the data area.
978 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
979 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
981 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
982 flags | IO_BUFLOCKED, cred, &newb);
985 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
987 softdep_request_cleanup(fs, vp, cred,
992 if (ppsratecheck(&lastfail, &curfail, 1)) {
993 ffs_fserr(fs, ip->i_number, "filesystem full");
994 uprintf("\n%s: write failed, filesystem "
995 "is full\n", fs->fs_fsmnt);
1000 MPASS(allocblk < allociblk + nitems(allociblk));
1001 MPASS(lbns_remfree < lbns + nitems(lbns));
1003 *lbns_remfree++ = lbn;
1004 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1005 nbp->b_blkno = fsbtodb(fs, nb);
1006 if (flags & BA_CLRBUF)
1007 vfs_bio_clrbuf(nbp);
1008 if (DOINGSOFTDEP(vp))
1009 softdep_setup_allocindir_page(ip, lbn, bp,
1010 indirs[i].in_off, nb, 0, nbp);
1011 bap[indirs[i].in_off] = nb;
1013 * If required, write synchronously, otherwise use
1016 if (flags & IO_SYNC) {
1019 if (bp->b_bufsize == fs->fs_bsize)
1020 bp->b_flags |= B_CLUSTEROK;
1023 curthread_pflags_restore(saved_inbdflush);
1029 * If requested clear invalid portions of the buffer. If we
1030 * have to do a read-before-write (typical if BA_CLRBUF is set),
1031 * try to do some read-ahead in the sequential case to reduce
1032 * the number of I/O transactions.
1034 if (flags & BA_CLRBUF) {
1035 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1036 if (seqcount != 0 &&
1037 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1038 !(vm_page_count_severe() || buf_dirty_count_severe())) {
1039 error = cluster_read(vp, ip->i_size, lbn,
1040 (int)fs->fs_bsize, NOCRED,
1041 MAXBSIZE, seqcount, gbflags, &nbp);
1043 error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1044 NOCRED, gbflags, &nbp);
1051 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1052 nbp->b_blkno = fsbtodb(fs, nb);
1054 curthread_pflags_restore(saved_inbdflush);
1058 curthread_pflags_restore(saved_inbdflush);
1060 * If we have failed to allocate any blocks, simply return the error.
1061 * This is the usual case and avoids the need to fsync the file.
1063 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1066 * If we have failed part way through block allocation, we
1067 * have to deallocate any indirect blocks that we have allocated.
1068 * We have to fsync the file before we start to get rid of all
1069 * of its dependencies so that we do not leave them dangling.
1070 * We have to sync it at the end so that the soft updates code
1071 * does not find any untracked changes. Although this is really
1072 * slow, running out of disk space is not expected to be a common
1073 * occurrence. The error return from fsync is ignored as we already
1074 * have an error to return to the user.
1076 * XXX Still have to journal the free below
1078 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1079 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1080 blkp < allocblk; blkp++, lbns_remfree++) {
1082 * We shall not leave the freed blocks on the vnode
1083 * buffer object lists.
1085 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1086 GB_NOCREAT | GB_UNMAPPED);
1088 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1089 ("mismatch2 l %jd %jd b %ju %ju",
1090 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1091 (uintmax_t)bp->b_blkno,
1092 (uintmax_t)fsbtodb(fs, *blkp)));
1093 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1094 bp->b_flags &= ~(B_ASYNC | B_CACHE);
1097 deallocated += fs->fs_bsize;
1099 if (allocib != NULL) {
1101 } else if (unwindidx >= 0) {
1104 r = bread(vp, indirs[unwindidx].in_lbn,
1105 (int)fs->fs_bsize, NOCRED, &bp);
1107 panic("Could not unwind indirect block, error %d", r);
1110 bap = (ufs2_daddr_t *)bp->b_data;
1111 bap[indirs[unwindidx].in_off] = 0;
1112 if (flags & IO_SYNC) {
1115 if (bp->b_bufsize == fs->fs_bsize)
1116 bp->b_flags |= B_CLUSTEROK;
1124 * Restore user's disk quota because allocation failed.
1126 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1128 dp->di_blocks -= btodb(deallocated);
1129 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1131 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1133 * After the buffers are invalidated and on-disk pointers are
1134 * cleared, free the blocks.
1136 for (blkp = allociblk; blkp < allocblk; blkp++) {
1138 if (blkp == allociblk)
1139 lbns_remfree = lbns;
1140 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1141 GB_NOCREAT | GB_UNMAPPED);
1143 panic("zombie2 %jd %ju %ju",
1144 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1145 (uintmax_t)fsbtodb(fs, *blkp));
1149 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1150 ip->i_number, vp->v_type, NULL);