2 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * This software was developed for the FreeBSD Project by Marshall
6 * Kirk McKusick and Network Associates Laboratories, the Security
7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * Copyright (c) 1982, 1986, 1989, 1993
33 * The Regents of the University of California. All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD$");
65 #include <sys/param.h>
66 #include <sys/systm.h>
70 #include <sys/mount.h>
71 #include <sys/vnode.h>
73 #include <ufs/ufs/quota.h>
74 #include <ufs/ufs/inode.h>
75 #include <ufs/ufs/ufs_extern.h>
76 #include <ufs/ufs/extattr.h>
77 #include <ufs/ufs/ufsmount.h>
79 #include <ufs/ffs/fs.h>
80 #include <ufs/ffs/ffs_extern.h>
83 * Balloc defines the structure of filesystem storage
84 * by allocating the physical blocks on a device given
85 * the inode and the logical block number in a file.
86 * This is the allocation strategy for UFS1. Below is
87 * the allocation strategy for UFS2.
90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
91 struct ucred *cred, int flags, struct buf **bpp)
94 struct ufs1_dinode *dp;
95 ufs_lbn_t lbn, lastlbn;
100 struct indir indirs[UFS_NIADDR + 2];
101 int deallocated, osize, nsize, num, i, error;
103 ufs1_daddr_t *bap, pref;
104 ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
105 ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
108 static struct timeval lastfail;
110 int gbflags, reclaimed;
116 lbn = lblkno(fs, startoffset);
117 size = blkoff(fs, startoffset) + size;
119 if (size > fs->fs_bsize)
120 panic("ffs_balloc_ufs1: blk too big");
126 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
128 if (DOINGSOFTDEP(vp))
129 softdep_prealloc(vp, MNT_WAIT);
131 * If the next write will extend the file into a new block,
132 * and the file is currently composed of a fragment
133 * this fragment has to be extended to be a full block.
135 lastlbn = lblkno(fs, ip->i_size);
136 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
138 osize = blksize(fs, ip, nb);
139 if (osize < fs->fs_bsize && osize > 0) {
141 error = ffs_realloccg(ip, nb, dp->di_db[nb],
142 ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
143 &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
147 if (DOINGSOFTDEP(vp))
148 softdep_setup_allocdirect(ip, nb,
149 dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
150 fs->fs_bsize, osize, bp);
151 ip->i_size = smalllblktosize(fs, nb + 1);
152 dp->di_size = ip->i_size;
153 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
154 ip->i_flag |= IN_CHANGE | IN_UPDATE;
157 else if (DOINGASYNC(vp))
164 * The first UFS_NDADDR blocks are direct blocks
166 if (lbn < UFS_NDADDR) {
167 if (flags & BA_METAONLY)
168 panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
170 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
171 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
176 bp->b_blkno = fsbtodb(fs, nb);
182 * Consider need to reallocate a fragment.
184 osize = fragroundup(fs, blkoff(fs, ip->i_size));
185 nsize = fragroundup(fs, size);
186 if (nsize <= osize) {
187 error = bread(vp, lbn, osize, NOCRED, &bp);
192 bp->b_blkno = fsbtodb(fs, nb);
195 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
196 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
197 &dp->di_db[0]), osize, nsize, flags,
201 if (DOINGSOFTDEP(vp))
202 softdep_setup_allocdirect(ip, lbn,
203 dbtofsb(fs, bp->b_blkno), nb,
207 if (ip->i_size < smalllblktosize(fs, lbn + 1))
208 nsize = fragroundup(fs, size);
210 nsize = fs->fs_bsize;
212 error = ffs_alloc(ip, lbn,
213 ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
214 nsize, flags, cred, &newb);
217 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
218 bp->b_blkno = fsbtodb(fs, newb);
219 if (flags & BA_CLRBUF)
221 if (DOINGSOFTDEP(vp))
222 softdep_setup_allocdirect(ip, lbn, newb, 0,
225 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
226 ip->i_flag |= IN_CHANGE | IN_UPDATE;
231 * Determine the number of levels of indirection.
234 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
238 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
240 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
242 * Fetch the first indirect block allocating if necessary.
245 nb = dp->di_ib[indirs[0].in_off];
247 allocblk = allociblk;
251 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
253 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
254 flags, cred, &newb)) != 0) {
255 curthread_pflags_restore(saved_inbdflush);
258 pref = newb + fs->fs_frag;
260 MPASS(allocblk < allociblk + nitems(allociblk));
261 MPASS(lbns_remfree < lbns + nitems(lbns));
263 *lbns_remfree++ = indirs[1].in_lbn;
264 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
265 bp->b_blkno = fsbtodb(fs, nb);
267 if (DOINGSOFTDEP(vp)) {
268 softdep_setup_allocdirect(ip,
269 UFS_NDADDR + indirs[0].in_off, newb, 0,
270 fs->fs_bsize, 0, bp);
272 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
273 if (bp->b_bufsize == fs->fs_bsize)
274 bp->b_flags |= B_CLUSTEROK;
277 if ((error = bwrite(bp)) != 0)
280 allocib = &dp->di_ib[indirs[0].in_off];
282 ip->i_flag |= IN_CHANGE | IN_UPDATE;
285 * Fetch through the indirect blocks, allocating as necessary.
290 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
295 bap = (ufs1_daddr_t *)bp->b_data;
296 nb = bap[indirs[i].in_off];
306 * If parent indirect has just been allocated, try to cluster
307 * immediately following it.
310 pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
312 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
313 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
315 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
317 softdep_request_cleanup(fs, vp, cred,
322 if (ppsratecheck(&lastfail, &curfail, 1)) {
323 ffs_fserr(fs, ip->i_number, "filesystem full");
324 uprintf("\n%s: write failed, filesystem "
325 "is full\n", fs->fs_fsmnt);
329 pref = newb + fs->fs_frag;
331 MPASS(allocblk < allociblk + nitems(allociblk));
332 MPASS(lbns_remfree < lbns + nitems(lbns));
334 *lbns_remfree++ = indirs[i].in_lbn;
335 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
336 nbp->b_blkno = fsbtodb(fs, nb);
338 if (DOINGSOFTDEP(vp)) {
339 softdep_setup_allocindir_meta(nbp, ip, bp,
340 indirs[i - 1].in_off, nb);
342 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
343 if (nbp->b_bufsize == fs->fs_bsize)
344 nbp->b_flags |= B_CLUSTEROK;
347 if ((error = bwrite(nbp)) != 0) {
352 bap[indirs[i - 1].in_off] = nb;
353 if (allocib == NULL && unwindidx < 0)
356 * If required, write synchronously, otherwise use
359 if (flags & IO_SYNC) {
362 if (bp->b_bufsize == fs->fs_bsize)
363 bp->b_flags |= B_CLUSTEROK;
368 * If asked only for the indirect block, then return it.
370 if (flags & BA_METAONLY) {
371 curthread_pflags_restore(saved_inbdflush);
376 * Get the data block, allocating if necessary.
381 * If allocating metadata at the front of the cylinder
382 * group and parent indirect block has just been allocated,
383 * then cluster next to it if it is the first indirect in
384 * the file. Otherwise it has been allocated in the metadata
385 * area, so we want to find our own place out in the data area.
387 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
388 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
390 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
391 flags | IO_BUFLOCKED, cred, &newb);
394 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
396 softdep_request_cleanup(fs, vp, cred,
401 if (ppsratecheck(&lastfail, &curfail, 1)) {
402 ffs_fserr(fs, ip->i_number, "filesystem full");
403 uprintf("\n%s: write failed, filesystem "
404 "is full\n", fs->fs_fsmnt);
409 MPASS(allocblk < allociblk + nitems(allociblk));
410 MPASS(lbns_remfree < lbns + nitems(lbns));
412 *lbns_remfree++ = lbn;
413 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
414 nbp->b_blkno = fsbtodb(fs, nb);
415 if (flags & BA_CLRBUF)
417 if (DOINGSOFTDEP(vp))
418 softdep_setup_allocindir_page(ip, lbn, bp,
419 indirs[i].in_off, nb, 0, nbp);
420 bap[indirs[i].in_off] = nb;
422 * If required, write synchronously, otherwise use
425 if (flags & IO_SYNC) {
428 if (bp->b_bufsize == fs->fs_bsize)
429 bp->b_flags |= B_CLUSTEROK;
432 curthread_pflags_restore(saved_inbdflush);
437 if (flags & BA_CLRBUF) {
438 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
440 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
441 !(vm_page_count_severe() || buf_dirty_count_severe())) {
442 error = cluster_read(vp, ip->i_size, lbn,
443 (int)fs->fs_bsize, NOCRED,
444 MAXBSIZE, seqcount, gbflags, &nbp);
446 error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
454 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
455 nbp->b_blkno = fsbtodb(fs, nb);
457 curthread_pflags_restore(saved_inbdflush);
461 curthread_pflags_restore(saved_inbdflush);
463 * If we have failed to allocate any blocks, simply return the error.
464 * This is the usual case and avoids the need to fsync the file.
466 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
469 * If we have failed part way through block allocation, we
470 * have to deallocate any indirect blocks that we have allocated.
471 * We have to fsync the file before we start to get rid of all
472 * of its dependencies so that we do not leave them dangling.
473 * We have to sync it at the end so that the soft updates code
474 * does not find any untracked changes. Although this is really
475 * slow, running out of disk space is not expected to be a common
476 * occurrence. The error return from fsync is ignored as we already
477 * have an error to return to the user.
479 * XXX Still have to journal the free below
481 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
482 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
483 blkp < allocblk; blkp++, lbns_remfree++) {
485 * We shall not leave the freed blocks on the vnode
486 * buffer object lists.
488 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
489 GB_NOCREAT | GB_UNMAPPED);
491 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
492 ("mismatch1 l %jd %jd b %ju %ju",
493 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
494 (uintmax_t)bp->b_blkno,
495 (uintmax_t)fsbtodb(fs, *blkp)));
496 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
497 bp->b_flags &= ~(B_ASYNC | B_CACHE);
500 deallocated += fs->fs_bsize;
502 if (allocib != NULL) {
504 } else if (unwindidx >= 0) {
507 r = bread(vp, indirs[unwindidx].in_lbn,
508 (int)fs->fs_bsize, NOCRED, &bp);
510 panic("Could not unwind indirect block, error %d", r);
513 bap = (ufs1_daddr_t *)bp->b_data;
514 bap[indirs[unwindidx].in_off] = 0;
515 if (flags & IO_SYNC) {
518 if (bp->b_bufsize == fs->fs_bsize)
519 bp->b_flags |= B_CLUSTEROK;
527 * Restore user's disk quota because allocation failed.
529 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
531 dp->di_blocks -= btodb(deallocated);
532 ip->i_flag |= IN_CHANGE | IN_UPDATE;
534 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
536 * After the buffers are invalidated and on-disk pointers are
537 * cleared, free the blocks.
539 for (blkp = allociblk; blkp < allocblk; blkp++) {
541 if (blkp == allociblk)
543 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
544 GB_NOCREAT | GB_UNMAPPED);
546 panic("zombie1 %jd %ju %ju",
547 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
548 (uintmax_t)fsbtodb(fs, *blkp));
552 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
553 ip->i_number, vp->v_type, NULL);
559 * Balloc defines the structure of file system storage
560 * by allocating the physical blocks on a device given
561 * the inode and the logical block number in a file.
562 * This is the allocation strategy for UFS2. Above is
563 * the allocation strategy for UFS1.
566 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
567 struct ucred *cred, int flags, struct buf **bpp)
570 struct ufs2_dinode *dp;
571 ufs_lbn_t lbn, lastlbn;
573 struct buf *bp, *nbp;
574 struct ufsmount *ump;
575 struct indir indirs[UFS_NIADDR + 2];
576 ufs2_daddr_t nb, newb, *bap, pref;
577 ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
578 ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
579 int deallocated, osize, nsize, num, i, error;
582 static struct timeval lastfail;
584 int gbflags, reclaimed;
590 lbn = lblkno(fs, startoffset);
591 size = blkoff(fs, startoffset) + size;
593 if (size > fs->fs_bsize)
594 panic("ffs_balloc_ufs2: blk too big");
598 gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
600 if (DOINGSOFTDEP(vp))
601 softdep_prealloc(vp, MNT_WAIT);
604 * Check for allocating external data.
606 if (flags & IO_EXT) {
607 if (lbn >= UFS_NXADDR)
610 * If the next write will extend the data into a new block,
611 * and the data is currently composed of a fragment
612 * this fragment has to be extended to be a full block.
614 lastlbn = lblkno(fs, dp->di_extsize);
617 osize = sblksize(fs, dp->di_extsize, nb);
618 if (osize < fs->fs_bsize && osize > 0) {
620 error = ffs_realloccg(ip, -1 - nb,
622 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
623 &dp->di_extb[0]), osize,
624 (int)fs->fs_bsize, flags, cred, &bp);
627 if (DOINGSOFTDEP(vp))
628 softdep_setup_allocext(ip, nb,
629 dbtofsb(fs, bp->b_blkno),
631 fs->fs_bsize, osize, bp);
632 dp->di_extsize = smalllblktosize(fs, nb + 1);
633 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
634 bp->b_xflags |= BX_ALTDATA;
635 ip->i_flag |= IN_CHANGE;
643 * All blocks are direct blocks
645 if (flags & BA_METAONLY)
646 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
647 nb = dp->di_extb[lbn];
648 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
649 error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
655 bp->b_blkno = fsbtodb(fs, nb);
656 bp->b_xflags |= BX_ALTDATA;
662 * Consider need to reallocate a fragment.
664 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
665 nsize = fragroundup(fs, size);
666 if (nsize <= osize) {
667 error = bread_gb(vp, -1 - lbn, osize, NOCRED,
673 bp->b_blkno = fsbtodb(fs, nb);
674 bp->b_xflags |= BX_ALTDATA;
677 error = ffs_realloccg(ip, -1 - lbn,
679 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
680 &dp->di_extb[0]), osize, nsize, flags,
684 bp->b_xflags |= BX_ALTDATA;
685 if (DOINGSOFTDEP(vp))
686 softdep_setup_allocext(ip, lbn,
687 dbtofsb(fs, bp->b_blkno), nb,
691 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
692 nsize = fragroundup(fs, size);
694 nsize = fs->fs_bsize;
696 error = ffs_alloc(ip, lbn,
697 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
698 nsize, flags, cred, &newb);
701 bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
702 bp->b_blkno = fsbtodb(fs, newb);
703 bp->b_xflags |= BX_ALTDATA;
704 if (flags & BA_CLRBUF)
706 if (DOINGSOFTDEP(vp))
707 softdep_setup_allocext(ip, lbn, newb, 0,
710 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
711 ip->i_flag |= IN_CHANGE;
716 * If the next write will extend the file into a new block,
717 * and the file is currently composed of a fragment
718 * this fragment has to be extended to be a full block.
720 lastlbn = lblkno(fs, ip->i_size);
721 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
723 osize = blksize(fs, ip, nb);
724 if (osize < fs->fs_bsize && osize > 0) {
726 error = ffs_realloccg(ip, nb, dp->di_db[nb],
727 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
728 &dp->di_db[0]), osize, (int)fs->fs_bsize,
732 if (DOINGSOFTDEP(vp))
733 softdep_setup_allocdirect(ip, nb,
734 dbtofsb(fs, bp->b_blkno),
736 fs->fs_bsize, osize, bp);
737 ip->i_size = smalllblktosize(fs, nb + 1);
738 dp->di_size = ip->i_size;
739 dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
740 ip->i_flag |= IN_CHANGE | IN_UPDATE;
748 * The first UFS_NDADDR blocks are direct blocks
750 if (lbn < UFS_NDADDR) {
751 if (flags & BA_METAONLY)
752 panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
754 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
755 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
761 bp->b_blkno = fsbtodb(fs, nb);
767 * Consider need to reallocate a fragment.
769 osize = fragroundup(fs, blkoff(fs, ip->i_size));
770 nsize = fragroundup(fs, size);
771 if (nsize <= osize) {
772 error = bread_gb(vp, lbn, osize, NOCRED,
778 bp->b_blkno = fsbtodb(fs, nb);
781 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
782 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
783 &dp->di_db[0]), osize, nsize, flags,
787 if (DOINGSOFTDEP(vp))
788 softdep_setup_allocdirect(ip, lbn,
789 dbtofsb(fs, bp->b_blkno), nb,
793 if (ip->i_size < smalllblktosize(fs, lbn + 1))
794 nsize = fragroundup(fs, size);
796 nsize = fs->fs_bsize;
798 error = ffs_alloc(ip, lbn,
799 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
800 &dp->di_db[0]), nsize, flags, cred, &newb);
803 bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
804 bp->b_blkno = fsbtodb(fs, newb);
805 if (flags & BA_CLRBUF)
807 if (DOINGSOFTDEP(vp))
808 softdep_setup_allocdirect(ip, lbn, newb, 0,
811 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
812 ip->i_flag |= IN_CHANGE | IN_UPDATE;
817 * Determine the number of levels of indirection.
820 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
824 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
826 saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
828 * Fetch the first indirect block allocating if necessary.
831 nb = dp->di_ib[indirs[0].in_off];
833 allocblk = allociblk;
837 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
839 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
840 flags, cred, &newb)) != 0) {
841 curthread_pflags_restore(saved_inbdflush);
844 pref = newb + fs->fs_frag;
846 MPASS(allocblk < allociblk + nitems(allociblk));
847 MPASS(lbns_remfree < lbns + nitems(lbns));
849 *lbns_remfree++ = indirs[1].in_lbn;
850 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
852 bp->b_blkno = fsbtodb(fs, nb);
854 if (DOINGSOFTDEP(vp)) {
855 softdep_setup_allocdirect(ip,
856 UFS_NDADDR + indirs[0].in_off, newb, 0,
857 fs->fs_bsize, 0, bp);
859 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
860 if (bp->b_bufsize == fs->fs_bsize)
861 bp->b_flags |= B_CLUSTEROK;
864 if ((error = bwrite(bp)) != 0)
867 allocib = &dp->di_ib[indirs[0].in_off];
869 ip->i_flag |= IN_CHANGE | IN_UPDATE;
872 * Fetch through the indirect blocks, allocating as necessary.
877 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
882 bap = (ufs2_daddr_t *)bp->b_data;
883 nb = bap[indirs[i].in_off];
893 * If parent indirect has just been allocated, try to cluster
894 * immediately following it.
897 pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
899 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
900 flags | IO_BUFLOCKED, cred, &newb)) != 0) {
902 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
904 softdep_request_cleanup(fs, vp, cred,
909 if (ppsratecheck(&lastfail, &curfail, 1)) {
910 ffs_fserr(fs, ip->i_number, "filesystem full");
911 uprintf("\n%s: write failed, filesystem "
912 "is full\n", fs->fs_fsmnt);
916 pref = newb + fs->fs_frag;
918 MPASS(allocblk < allociblk + nitems(allociblk));
919 MPASS(lbns_remfree < lbns + nitems(lbns));
921 *lbns_remfree++ = indirs[i].in_lbn;
922 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
924 nbp->b_blkno = fsbtodb(fs, nb);
926 if (DOINGSOFTDEP(vp)) {
927 softdep_setup_allocindir_meta(nbp, ip, bp,
928 indirs[i - 1].in_off, nb);
930 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
931 if (nbp->b_bufsize == fs->fs_bsize)
932 nbp->b_flags |= B_CLUSTEROK;
935 if ((error = bwrite(nbp)) != 0) {
940 bap[indirs[i - 1].in_off] = nb;
941 if (allocib == NULL && unwindidx < 0)
944 * If required, write synchronously, otherwise use
947 if (flags & IO_SYNC) {
950 if (bp->b_bufsize == fs->fs_bsize)
951 bp->b_flags |= B_CLUSTEROK;
956 * If asked only for the indirect block, then return it.
958 if (flags & BA_METAONLY) {
959 curthread_pflags_restore(saved_inbdflush);
964 * Get the data block, allocating if necessary.
969 * If allocating metadata at the front of the cylinder
970 * group and parent indirect block has just been allocated,
971 * then cluster next to it if it is the first indirect in
972 * the file. Otherwise it has been allocated in the metadata
973 * area, so we want to find our own place out in the data area.
975 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
976 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
978 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
979 flags | IO_BUFLOCKED, cred, &newb);
982 if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
984 softdep_request_cleanup(fs, vp, cred,
989 if (ppsratecheck(&lastfail, &curfail, 1)) {
990 ffs_fserr(fs, ip->i_number, "filesystem full");
991 uprintf("\n%s: write failed, filesystem "
992 "is full\n", fs->fs_fsmnt);
997 MPASS(allocblk < allociblk + nitems(allociblk));
998 MPASS(lbns_remfree < lbns + nitems(lbns));
1000 *lbns_remfree++ = lbn;
1001 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1002 nbp->b_blkno = fsbtodb(fs, nb);
1003 if (flags & BA_CLRBUF)
1004 vfs_bio_clrbuf(nbp);
1005 if (DOINGSOFTDEP(vp))
1006 softdep_setup_allocindir_page(ip, lbn, bp,
1007 indirs[i].in_off, nb, 0, nbp);
1008 bap[indirs[i].in_off] = nb;
1010 * If required, write synchronously, otherwise use
1013 if (flags & IO_SYNC) {
1016 if (bp->b_bufsize == fs->fs_bsize)
1017 bp->b_flags |= B_CLUSTEROK;
1020 curthread_pflags_restore(saved_inbdflush);
1026 * If requested clear invalid portions of the buffer. If we
1027 * have to do a read-before-write (typical if BA_CLRBUF is set),
1028 * try to do some read-ahead in the sequential case to reduce
1029 * the number of I/O transactions.
1031 if (flags & BA_CLRBUF) {
1032 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1033 if (seqcount != 0 &&
1034 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1035 !(vm_page_count_severe() || buf_dirty_count_severe())) {
1036 error = cluster_read(vp, ip->i_size, lbn,
1037 (int)fs->fs_bsize, NOCRED,
1038 MAXBSIZE, seqcount, gbflags, &nbp);
1040 error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1041 NOCRED, gbflags, &nbp);
1048 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1049 nbp->b_blkno = fsbtodb(fs, nb);
1051 curthread_pflags_restore(saved_inbdflush);
1055 curthread_pflags_restore(saved_inbdflush);
1057 * If we have failed to allocate any blocks, simply return the error.
1058 * This is the usual case and avoids the need to fsync the file.
1060 if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1063 * If we have failed part way through block allocation, we
1064 * have to deallocate any indirect blocks that we have allocated.
1065 * We have to fsync the file before we start to get rid of all
1066 * of its dependencies so that we do not leave them dangling.
1067 * We have to sync it at the end so that the soft updates code
1068 * does not find any untracked changes. Although this is really
1069 * slow, running out of disk space is not expected to be a common
1070 * occurrence. The error return from fsync is ignored as we already
1071 * have an error to return to the user.
1073 * XXX Still have to journal the free below
1075 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1076 for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1077 blkp < allocblk; blkp++, lbns_remfree++) {
1079 * We shall not leave the freed blocks on the vnode
1080 * buffer object lists.
1082 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1083 GB_NOCREAT | GB_UNMAPPED);
1085 KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1086 ("mismatch2 l %jd %jd b %ju %ju",
1087 (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1088 (uintmax_t)bp->b_blkno,
1089 (uintmax_t)fsbtodb(fs, *blkp)));
1090 bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1091 bp->b_flags &= ~(B_ASYNC | B_CACHE);
1094 deallocated += fs->fs_bsize;
1096 if (allocib != NULL) {
1098 } else if (unwindidx >= 0) {
1101 r = bread(vp, indirs[unwindidx].in_lbn,
1102 (int)fs->fs_bsize, NOCRED, &bp);
1104 panic("Could not unwind indirect block, error %d", r);
1107 bap = (ufs2_daddr_t *)bp->b_data;
1108 bap[indirs[unwindidx].in_off] = 0;
1109 if (flags & IO_SYNC) {
1112 if (bp->b_bufsize == fs->fs_bsize)
1113 bp->b_flags |= B_CLUSTEROK;
1121 * Restore user's disk quota because allocation failed.
1123 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1125 dp->di_blocks -= btodb(deallocated);
1126 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1128 (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1130 * After the buffers are invalidated and on-disk pointers are
1131 * cleared, free the blocks.
1133 for (blkp = allociblk; blkp < allocblk; blkp++) {
1135 if (blkp == allociblk)
1136 lbns_remfree = lbns;
1137 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1138 GB_NOCREAT | GB_UNMAPPED);
1140 panic("zombie2 %jd %ju %ju",
1141 (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1142 (uintmax_t)fsbtodb(fs, *blkp));
1146 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1147 ip->i_number, vp->v_type, NULL);