]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/ufs/ffs/ffs_balloc.c
Merge llvm-project release/13.x llvmorg-13.0.0-rc2-43-gf56129fe78d5
[FreeBSD/FreeBSD.git] / sys / ufs / ffs / ffs_balloc.c
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
3  *
4  * Copyright (c) 2002 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *      The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *      @(#)ffs_balloc.c        8.8 (Berkeley) 6/16/95
62  */
63
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
66
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/bio.h>
70 #include <sys/buf.h>
71 #include <sys/lock.h>
72 #include <sys/mount.h>
73 #include <sys/vnode.h>
74 #include <sys/vmmeter.h>
75
76 #include <ufs/ufs/quota.h>
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/extattr.h>
80 #include <ufs/ufs/ufsmount.h>
81
82 #include <ufs/ffs/fs.h>
83 #include <ufs/ffs/ffs_extern.h>
84
85 /*
86  * Balloc defines the structure of filesystem storage
87  * by allocating the physical blocks on a device given
88  * the inode and the logical block number in a file.
89  * This is the allocation strategy for UFS1. Below is
90  * the allocation strategy for UFS2.
91  */
92 int
93 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
94     struct ucred *cred, int flags, struct buf **bpp)
95 {
96         struct inode *ip;
97         struct ufs1_dinode *dp;
98         ufs_lbn_t lbn, lastlbn;
99         struct fs *fs;
100         ufs1_daddr_t nb;
101         struct buf *bp, *nbp;
102         struct mount *mp;
103         struct ufsmount *ump;
104         struct indir indirs[UFS_NIADDR + 2];
105         int deallocated, osize, nsize, num, i, error;
106         ufs2_daddr_t newb;
107         ufs1_daddr_t *bap, pref;
108         ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
109         ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
110         int unwindidx = -1;
111         int saved_inbdflush;
112         int gbflags, reclaimed;
113
114         ip = VTOI(vp);
115         dp = ip->i_din1;
116         fs = ITOFS(ip);
117         mp = ITOVFS(ip);
118         ump = ITOUMP(ip);
119         lbn = lblkno(fs, startoffset);
120         size = blkoff(fs, startoffset) + size;
121         reclaimed = 0;
122         if (size > fs->fs_bsize)
123                 panic("ffs_balloc_ufs1: blk too big");
124         *bpp = NULL;
125         if (flags & IO_EXT)
126                 return (EOPNOTSUPP);
127         if (lbn < 0)
128                 return (EFBIG);
129         gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
130
131         vn_seqc_write_begin(vp);
132
133         /*
134          * If the next write will extend the file into a new block,
135          * and the file is currently composed of a fragment
136          * this fragment has to be extended to be a full block.
137          */
138         lastlbn = lblkno(fs, ip->i_size);
139         if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
140                 nb = lastlbn;
141                 osize = blksize(fs, ip, nb);
142                 if (osize < fs->fs_bsize && osize > 0) {
143                         UFS_LOCK(ump);
144                         error = ffs_realloccg(ip, nb, dp->di_db[nb],
145                            ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
146                            &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
147                            cred, &bp);
148                         if (error)
149                                 goto done;
150                         if (DOINGSOFTDEP(vp))
151                                 softdep_setup_allocdirect(ip, nb,
152                                     dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
153                                     fs->fs_bsize, osize, bp);
154                         ip->i_size = smalllblktosize(fs, nb + 1);
155                         dp->di_size = ip->i_size;
156                         dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
157                         UFS_INODE_SET_FLAG(ip,
158                             IN_SIZEMOD | IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
159                         if (flags & IO_SYNC)
160                                 bwrite(bp);
161                         else if (DOINGASYNC(vp))
162                                 bdwrite(bp);
163                         else
164                                 bawrite(bp);
165                 }
166         }
167         /*
168          * The first UFS_NDADDR blocks are direct blocks
169          */
170         if (lbn < UFS_NDADDR) {
171                 if (flags & BA_METAONLY)
172                         panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
173                 nb = dp->di_db[lbn];
174                 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
175                         if ((flags & BA_CLRBUF) != 0) {
176                                 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
177                                     &bp);
178                                 if (error != 0)
179                                         goto done;
180                         } else {
181                                 bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
182                                     gbflags);
183                                 if (bp == NULL) {
184                                         error = EIO;
185                                         goto done;
186                                 }
187                                 vfs_bio_clrbuf(bp);
188                         }
189                         bp->b_blkno = fsbtodb(fs, nb);
190                         *bpp = bp;
191                         error = 0;
192                         goto done;
193                 }
194                 if (nb != 0) {
195                         /*
196                          * Consider need to reallocate a fragment.
197                          */
198                         osize = fragroundup(fs, blkoff(fs, ip->i_size));
199                         nsize = fragroundup(fs, size);
200                         if (nsize <= osize) {
201                                 error = bread(vp, lbn, osize, NOCRED, &bp);
202                                 if (error)
203                                         goto done;
204                                 bp->b_blkno = fsbtodb(fs, nb);
205                         } else {
206                                 UFS_LOCK(ump);
207                                 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
208                                     ffs_blkpref_ufs1(ip, lbn, (int)lbn,
209                                     &dp->di_db[0]), osize, nsize, flags,
210                                     cred, &bp);
211                                 if (error)
212                                         goto done;
213                                 if (DOINGSOFTDEP(vp))
214                                         softdep_setup_allocdirect(ip, lbn,
215                                             dbtofsb(fs, bp->b_blkno), nb,
216                                             nsize, osize, bp);
217                         }
218                 } else {
219                         if (ip->i_size < smalllblktosize(fs, lbn + 1))
220                                 nsize = fragroundup(fs, size);
221                         else
222                                 nsize = fs->fs_bsize;
223                         UFS_LOCK(ump);
224                         error = ffs_alloc(ip, lbn,
225                             ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
226                             nsize, flags, cred, &newb);
227                         if (error)
228                                 goto done;
229                         bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
230                         bp->b_blkno = fsbtodb(fs, newb);
231                         if (flags & BA_CLRBUF)
232                                 vfs_bio_clrbuf(bp);
233                         if (DOINGSOFTDEP(vp))
234                                 softdep_setup_allocdirect(ip, lbn, newb, 0,
235                                     nsize, 0, bp);
236                 }
237                 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
238                 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
239                 *bpp = bp;
240                 error = 0;
241                 goto done;
242         }
243         /*
244          * Determine the number of levels of indirection.
245          */
246         pref = 0;
247         if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
248                 goto done;
249 #ifdef INVARIANTS
250         if (num < 1)
251                 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
252 #endif
253         saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
254         /*
255          * Fetch the first indirect block allocating if necessary.
256          */
257         --num;
258         nb = dp->di_ib[indirs[0].in_off];
259         allocib = NULL;
260         allocblk = allociblk;
261         lbns_remfree = lbns;
262         if (nb == 0) {
263                 UFS_LOCK(ump);
264                 pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
265                     (ufs1_daddr_t *)0);
266                 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
267                     flags, cred, &newb)) != 0) {
268                         curthread_pflags_restore(saved_inbdflush);
269                         goto done;
270                 }
271                 pref = newb + fs->fs_frag;
272                 nb = newb;
273                 MPASS(allocblk < allociblk + nitems(allociblk));
274                 MPASS(lbns_remfree < lbns + nitems(lbns));
275                 *allocblk++ = nb;
276                 *lbns_remfree++ = indirs[1].in_lbn;
277                 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
278                 bp->b_blkno = fsbtodb(fs, nb);
279                 vfs_bio_clrbuf(bp);
280                 if (DOINGSOFTDEP(vp)) {
281                         softdep_setup_allocdirect(ip,
282                             UFS_NDADDR + indirs[0].in_off, newb, 0,
283                             fs->fs_bsize, 0, bp);
284                         bdwrite(bp);
285                 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
286                         if (bp->b_bufsize == fs->fs_bsize)
287                                 bp->b_flags |= B_CLUSTEROK;
288                         bdwrite(bp);
289                 } else {
290                         if ((error = bwrite(bp)) != 0)
291                                 goto fail;
292                 }
293                 allocib = &dp->di_ib[indirs[0].in_off];
294                 *allocib = nb;
295                 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
296         }
297         /*
298          * Fetch through the indirect blocks, allocating as necessary.
299          */
300 retry:
301         for (i = 1;;) {
302                 error = bread(vp,
303                     indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
304                 if (error) {
305                         goto fail;
306                 }
307                 bap = (ufs1_daddr_t *)bp->b_data;
308                 nb = bap[indirs[i].in_off];
309                 if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
310                     fs->fs_bsize)) != 0) {
311                         brelse(bp);
312                         goto fail;
313                 }
314                 if (i == num)
315                         break;
316                 i += 1;
317                 if (nb != 0) {
318                         bqrelse(bp);
319                         continue;
320                 }
321                 UFS_LOCK(ump);
322                 /*
323                  * If parent indirect has just been allocated, try to cluster
324                  * immediately following it.
325                  */
326                 if (pref == 0)
327                         pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
328                             (ufs1_daddr_t *)0);
329                 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
330                     flags | IO_BUFLOCKED, cred, &newb)) != 0) {
331                         brelse(bp);
332                         UFS_LOCK(ump);
333                         if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
334                                 softdep_request_cleanup(fs, vp, cred,
335                                     FLUSH_BLOCKS_WAIT);
336                                 UFS_UNLOCK(ump);
337                                 goto retry;
338                         }
339                         if (!ffs_fsfail_cleanup_locked(ump, error) &&
340                             ppsratecheck(&ump->um_last_fullmsg,
341                             &ump->um_secs_fullmsg, 1)) {
342                                 UFS_UNLOCK(ump);
343                                 ffs_fserr(fs, ip->i_number, "filesystem full");
344                                 uprintf("\n%s: write failed, filesystem "
345                                     "is full\n", fs->fs_fsmnt);
346                         } else {
347                                 UFS_UNLOCK(ump);
348                         }
349                         goto fail;
350                 }
351                 pref = newb + fs->fs_frag;
352                 nb = newb;
353                 MPASS(allocblk < allociblk + nitems(allociblk));
354                 MPASS(lbns_remfree < lbns + nitems(lbns));
355                 *allocblk++ = nb;
356                 *lbns_remfree++ = indirs[i].in_lbn;
357                 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
358                 nbp->b_blkno = fsbtodb(fs, nb);
359                 vfs_bio_clrbuf(nbp);
360                 if (DOINGSOFTDEP(vp)) {
361                         softdep_setup_allocindir_meta(nbp, ip, bp,
362                             indirs[i - 1].in_off, nb);
363                         bdwrite(nbp);
364                 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
365                         if (nbp->b_bufsize == fs->fs_bsize)
366                                 nbp->b_flags |= B_CLUSTEROK;
367                         bdwrite(nbp);
368                 } else {
369                         if ((error = bwrite(nbp)) != 0) {
370                                 brelse(bp);
371                                 goto fail;
372                         }
373                 }
374                 bap[indirs[i - 1].in_off] = nb;
375                 if (allocib == NULL && unwindidx < 0)
376                         unwindidx = i - 1;
377                 /*
378                  * If required, write synchronously, otherwise use
379                  * delayed write.
380                  */
381                 if (flags & IO_SYNC) {
382                         bwrite(bp);
383                 } else {
384                         if (bp->b_bufsize == fs->fs_bsize)
385                                 bp->b_flags |= B_CLUSTEROK;
386                         bdwrite(bp);
387                 }
388         }
389         /*
390          * If asked only for the indirect block, then return it.
391          */
392         if (flags & BA_METAONLY) {
393                 curthread_pflags_restore(saved_inbdflush);
394                 *bpp = bp;
395                 error = 0;
396                 goto done;
397         }
398         /*
399          * Get the data block, allocating if necessary.
400          */
401         if (nb == 0) {
402                 UFS_LOCK(ump);
403                 /*
404                  * If allocating metadata at the front of the cylinder
405                  * group and parent indirect block has just been allocated,
406                  * then cluster next to it if it is the first indirect in
407                  * the file. Otherwise it has been allocated in the metadata
408                  * area, so we want to find our own place out in the data area.
409                  */
410                 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
411                         pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
412                             &bap[0]);
413                 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
414                     flags | IO_BUFLOCKED, cred, &newb);
415                 if (error) {
416                         brelse(bp);
417                         UFS_LOCK(ump);
418                         if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
419                                 softdep_request_cleanup(fs, vp, cred,
420                                     FLUSH_BLOCKS_WAIT);
421                                 UFS_UNLOCK(ump);
422                                 goto retry;
423                         }
424                         if (!ffs_fsfail_cleanup_locked(ump, error) &&
425                             ppsratecheck(&ump->um_last_fullmsg,
426                             &ump->um_secs_fullmsg, 1)) {
427                                 UFS_UNLOCK(ump);
428                                 ffs_fserr(fs, ip->i_number, "filesystem full");
429                                 uprintf("\n%s: write failed, filesystem "
430                                     "is full\n", fs->fs_fsmnt);
431                         } else {
432                                 UFS_UNLOCK(ump);
433                         }
434                         goto fail;
435                 }
436                 nb = newb;
437                 MPASS(allocblk < allociblk + nitems(allociblk));
438                 MPASS(lbns_remfree < lbns + nitems(lbns));
439                 *allocblk++ = nb;
440                 *lbns_remfree++ = lbn;
441                 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
442                 nbp->b_blkno = fsbtodb(fs, nb);
443                 if (flags & BA_CLRBUF)
444                         vfs_bio_clrbuf(nbp);
445                 if (DOINGSOFTDEP(vp))
446                         softdep_setup_allocindir_page(ip, lbn, bp,
447                             indirs[i].in_off, nb, 0, nbp);
448                 bap[indirs[i].in_off] = nb;
449                 /*
450                  * If required, write synchronously, otherwise use
451                  * delayed write.
452                  */
453                 if (flags & IO_SYNC) {
454                         bwrite(bp);
455                 } else {
456                         if (bp->b_bufsize == fs->fs_bsize)
457                                 bp->b_flags |= B_CLUSTEROK;
458                         bdwrite(bp);
459                 }
460                 curthread_pflags_restore(saved_inbdflush);
461                 *bpp = nbp;
462                 error = 0;
463                 goto done;
464         }
465         brelse(bp);
466         if (flags & BA_CLRBUF) {
467                 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
468                 if (seqcount != 0 &&
469                     (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
470                     !(vm_page_count_severe() || buf_dirty_count_severe())) {
471                         error = cluster_read(vp, ip->i_size, lbn,
472                             (int)fs->fs_bsize, NOCRED,
473                             MAXBSIZE, seqcount, gbflags, &nbp);
474                 } else {
475                         error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
476                             gbflags, &nbp);
477                 }
478                 if (error) {
479                         brelse(nbp);
480                         goto fail;
481                 }
482         } else {
483                 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
484                 nbp->b_blkno = fsbtodb(fs, nb);
485         }
486         curthread_pflags_restore(saved_inbdflush);
487         *bpp = nbp;
488         error = 0;
489         goto done;
490 fail:
491         curthread_pflags_restore(saved_inbdflush);
492         /*
493          * If we have failed to allocate any blocks, simply return the error.
494          * This is the usual case and avoids the need to fsync the file.
495          */
496         if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
497                 goto done;
498         /*
499          * If we have failed part way through block allocation, we
500          * have to deallocate any indirect blocks that we have allocated.
501          * We have to fsync the file before we start to get rid of all
502          * of its dependencies so that we do not leave them dangling.
503          * We have to sync it at the end so that the soft updates code
504          * does not find any untracked changes. Although this is really
505          * slow, running out of disk space is not expected to be a common
506          * occurrence. The error return from fsync is ignored as we already
507          * have an error to return to the user.
508          *
509          * XXX Still have to journal the free below
510          */
511         (void) ffs_syncvnode(vp, MNT_WAIT, 0);
512         for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
513              blkp < allocblk; blkp++, lbns_remfree++) {
514                 /*
515                  * We shall not leave the freed blocks on the vnode
516                  * buffer object lists.
517                  */
518                 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
519                     GB_NOCREAT | GB_UNMAPPED);
520                 if (bp != NULL) {
521                         KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
522                             ("mismatch1 l %jd %jd b %ju %ju",
523                             (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
524                             (uintmax_t)bp->b_blkno,
525                             (uintmax_t)fsbtodb(fs, *blkp)));
526                         bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
527                         bp->b_flags &= ~(B_ASYNC | B_CACHE);
528                         brelse(bp);
529                 }
530                 deallocated += fs->fs_bsize;
531         }
532         if (allocib != NULL) {
533                 *allocib = 0;
534         } else if (unwindidx >= 0) {
535                 int r;
536
537                 r = bread(vp, indirs[unwindidx].in_lbn, 
538                     (int)fs->fs_bsize, NOCRED, &bp);
539                 if (r) {
540                         panic("Could not unwind indirect block, error %d", r);
541                         brelse(bp);
542                 } else {
543                         bap = (ufs1_daddr_t *)bp->b_data;
544                         bap[indirs[unwindidx].in_off] = 0;
545                         if (flags & IO_SYNC) {
546                                 bwrite(bp);
547                         } else {
548                                 if (bp->b_bufsize == fs->fs_bsize)
549                                         bp->b_flags |= B_CLUSTEROK;
550                                 bdwrite(bp);
551                         }
552                 }
553         }
554         if (deallocated) {
555 #ifdef QUOTA
556                 /*
557                  * Restore user's disk quota because allocation failed.
558                  */
559                 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
560 #endif
561                 dp->di_blocks -= btodb(deallocated);
562                 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
563         }
564         (void) ffs_syncvnode(vp, MNT_WAIT, 0);
565         /*
566          * After the buffers are invalidated and on-disk pointers are
567          * cleared, free the blocks.
568          */
569         for (blkp = allociblk; blkp < allocblk; blkp++) {
570 #ifdef INVARIANTS
571                 if (blkp == allociblk)
572                         lbns_remfree = lbns;
573                 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
574                     GB_NOCREAT | GB_UNMAPPED);
575                 if (bp != NULL) {
576                         panic("zombie1 %jd %ju %ju",
577                             (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
578                             (uintmax_t)fsbtodb(fs, *blkp));
579                 }
580                 lbns_remfree++;
581 #endif
582                 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
583                     ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
584         }
585 done:
586         vn_seqc_write_end(vp);
587         return (error);
588 }
589
590 /*
591  * Balloc defines the structure of file system storage
592  * by allocating the physical blocks on a device given
593  * the inode and the logical block number in a file.
594  * This is the allocation strategy for UFS2. Above is
595  * the allocation strategy for UFS1.
596  */
597 int
598 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
599     struct ucred *cred, int flags, struct buf **bpp)
600 {
601         struct inode *ip;
602         struct ufs2_dinode *dp;
603         ufs_lbn_t lbn, lastlbn;
604         struct fs *fs;
605         struct buf *bp, *nbp;
606         struct mount *mp;
607         struct ufsmount *ump;
608         struct indir indirs[UFS_NIADDR + 2];
609         ufs2_daddr_t nb, newb, *bap, pref;
610         ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
611         ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
612         int deallocated, osize, nsize, num, i, error;
613         int unwindidx = -1;
614         int saved_inbdflush;
615         int gbflags, reclaimed;
616
617         ip = VTOI(vp);
618         dp = ip->i_din2;
619         fs = ITOFS(ip);
620         mp = ITOVFS(ip);
621         ump = ITOUMP(ip);
622         lbn = lblkno(fs, startoffset);
623         size = blkoff(fs, startoffset) + size;
624         reclaimed = 0;
625         if (size > fs->fs_bsize)
626                 panic("ffs_balloc_ufs2: blk too big");
627         *bpp = NULL;
628         if (lbn < 0)
629                 return (EFBIG);
630         gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
631
632         vn_seqc_write_begin(vp);
633
634         /*
635          * Check for allocating external data.
636          */
637         if (flags & IO_EXT) {
638                 if (lbn >= UFS_NXADDR) {
639                         error = EFBIG;
640                         goto done;
641                 }
642
643                 /*
644                  * If the next write will extend the data into a new block,
645                  * and the data is currently composed of a fragment
646                  * this fragment has to be extended to be a full block.
647                  */
648                 lastlbn = lblkno(fs, dp->di_extsize);
649                 if (lastlbn < lbn) {
650                         nb = lastlbn;
651                         osize = sblksize(fs, dp->di_extsize, nb);
652                         if (osize < fs->fs_bsize && osize > 0) {
653                                 UFS_LOCK(ump);
654                                 error = ffs_realloccg(ip, -1 - nb,
655                                     dp->di_extb[nb],
656                                     ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
657                                     &dp->di_extb[0]), osize,
658                                     (int)fs->fs_bsize, flags, cred, &bp);
659                                 if (error)
660                                         goto done;
661                                 if (DOINGSOFTDEP(vp))
662                                         softdep_setup_allocext(ip, nb,
663                                             dbtofsb(fs, bp->b_blkno),
664                                             dp->di_extb[nb],
665                                             fs->fs_bsize, osize, bp);
666                                 dp->di_extsize = smalllblktosize(fs, nb + 1);
667                                 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
668                                 bp->b_xflags |= BX_ALTDATA;
669                                 UFS_INODE_SET_FLAG(ip,
670                                     IN_SIZEMOD | IN_CHANGE | IN_IBLKDATA);
671                                 if (flags & IO_SYNC)
672                                         bwrite(bp);
673                                 else
674                                         bawrite(bp);
675                         }
676                 }
677                 /*
678                  * All blocks are direct blocks
679                  */
680                 if (flags & BA_METAONLY)
681                         panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
682                 nb = dp->di_extb[lbn];
683                 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
684                         error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
685                             gbflags, &bp);
686                         if (error)
687                                 goto done;
688                         bp->b_blkno = fsbtodb(fs, nb);
689                         bp->b_xflags |= BX_ALTDATA;
690                         *bpp = bp;
691                         goto done;
692                 }
693                 if (nb != 0) {
694                         /*
695                          * Consider need to reallocate a fragment.
696                          */
697                         osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
698                         nsize = fragroundup(fs, size);
699                         if (nsize <= osize) {
700                                 error = bread_gb(vp, -1 - lbn, osize, NOCRED,
701                                     gbflags, &bp);
702                                 if (error)
703                                         goto done;
704                                 bp->b_blkno = fsbtodb(fs, nb);
705                                 bp->b_xflags |= BX_ALTDATA;
706                         } else {
707                                 UFS_LOCK(ump);
708                                 error = ffs_realloccg(ip, -1 - lbn,
709                                     dp->di_extb[lbn],
710                                     ffs_blkpref_ufs2(ip, lbn, (int)lbn,
711                                     &dp->di_extb[0]), osize, nsize, flags,
712                                     cred, &bp);
713                                 if (error)
714                                         goto done;
715                                 bp->b_xflags |= BX_ALTDATA;
716                                 if (DOINGSOFTDEP(vp))
717                                         softdep_setup_allocext(ip, lbn,
718                                             dbtofsb(fs, bp->b_blkno), nb,
719                                             nsize, osize, bp);
720                         }
721                 } else {
722                         if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
723                                 nsize = fragroundup(fs, size);
724                         else
725                                 nsize = fs->fs_bsize;
726                         UFS_LOCK(ump);
727                         error = ffs_alloc(ip, lbn,
728                            ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
729                            nsize, flags, cred, &newb);
730                         if (error)
731                                 goto done;
732                         bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
733                         bp->b_blkno = fsbtodb(fs, newb);
734                         bp->b_xflags |= BX_ALTDATA;
735                         if (flags & BA_CLRBUF)
736                                 vfs_bio_clrbuf(bp);
737                         if (DOINGSOFTDEP(vp))
738                                 softdep_setup_allocext(ip, lbn, newb, 0,
739                                     nsize, 0, bp);
740                 }
741                 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
742                 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_IBLKDATA);
743                 *bpp = bp;
744                 error = 0;
745                 goto done;
746         }
747         /*
748          * If the next write will extend the file into a new block,
749          * and the file is currently composed of a fragment
750          * this fragment has to be extended to be a full block.
751          */
752         lastlbn = lblkno(fs, ip->i_size);
753         if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
754                 nb = lastlbn;
755                 osize = blksize(fs, ip, nb);
756                 if (osize < fs->fs_bsize && osize > 0) {
757                         UFS_LOCK(ump);
758                         error = ffs_realloccg(ip, nb, dp->di_db[nb],
759                             ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
760                             &dp->di_db[0]), osize, (int)fs->fs_bsize,
761                             flags, cred, &bp);
762                         if (error)
763                                 goto done;
764                         if (DOINGSOFTDEP(vp))
765                                 softdep_setup_allocdirect(ip, nb,
766                                     dbtofsb(fs, bp->b_blkno),
767                                     dp->di_db[nb],
768                                     fs->fs_bsize, osize, bp);
769                         ip->i_size = smalllblktosize(fs, nb + 1);
770                         dp->di_size = ip->i_size;
771                         dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
772                         UFS_INODE_SET_FLAG(ip,
773                             IN_SIZEMOD |IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
774                         if (flags & IO_SYNC)
775                                 bwrite(bp);
776                         else
777                                 bawrite(bp);
778                 }
779         }
780         /*
781          * The first UFS_NDADDR blocks are direct blocks
782          */
783         if (lbn < UFS_NDADDR) {
784                 if (flags & BA_METAONLY)
785                         panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
786                 nb = dp->di_db[lbn];
787                 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
788                         if ((flags & BA_CLRBUF) != 0) {
789                                 error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
790                                     gbflags, &bp);
791                                 if (error != 0)
792                                         goto done;
793                         } else {
794                                 bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
795                                     gbflags);
796                                 if (bp == NULL) {
797                                         error = EIO;
798                                         goto done;
799                                 }
800                                 vfs_bio_clrbuf(bp);
801                         }
802                         bp->b_blkno = fsbtodb(fs, nb);
803                         *bpp = bp;
804                         error = 0;
805                         goto done;
806                 }
807                 if (nb != 0) {
808                         /*
809                          * Consider need to reallocate a fragment.
810                          */
811                         osize = fragroundup(fs, blkoff(fs, ip->i_size));
812                         nsize = fragroundup(fs, size);
813                         if (nsize <= osize) {
814                                 error = bread_gb(vp, lbn, osize, NOCRED,
815                                     gbflags, &bp);
816                                 if (error)
817                                         goto done;
818                                 bp->b_blkno = fsbtodb(fs, nb);
819                         } else {
820                                 UFS_LOCK(ump);
821                                 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
822                                     ffs_blkpref_ufs2(ip, lbn, (int)lbn,
823                                     &dp->di_db[0]), osize, nsize, flags,
824                                     cred, &bp);
825                                 if (error)
826                                         goto done;
827                                 if (DOINGSOFTDEP(vp))
828                                         softdep_setup_allocdirect(ip, lbn,
829                                             dbtofsb(fs, bp->b_blkno), nb,
830                                             nsize, osize, bp);
831                         }
832                 } else {
833                         if (ip->i_size < smalllblktosize(fs, lbn + 1))
834                                 nsize = fragroundup(fs, size);
835                         else
836                                 nsize = fs->fs_bsize;
837                         UFS_LOCK(ump);
838                         error = ffs_alloc(ip, lbn,
839                             ffs_blkpref_ufs2(ip, lbn, (int)lbn,
840                                 &dp->di_db[0]), nsize, flags, cred, &newb);
841                         if (error)
842                                 goto done;
843                         bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
844                         bp->b_blkno = fsbtodb(fs, newb);
845                         if (flags & BA_CLRBUF)
846                                 vfs_bio_clrbuf(bp);
847                         if (DOINGSOFTDEP(vp))
848                                 softdep_setup_allocdirect(ip, lbn, newb, 0,
849                                     nsize, 0, bp);
850                 }
851                 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
852                 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
853                 *bpp = bp;
854                 error = 0;
855                 goto done;
856         }
857         /*
858          * Determine the number of levels of indirection.
859          */
860         pref = 0;
861         if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
862                 goto done;
863 #ifdef INVARIANTS
864         if (num < 1)
865                 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
866 #endif
867         saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
868         /*
869          * Fetch the first indirect block allocating if necessary.
870          */
871         --num;
872         nb = dp->di_ib[indirs[0].in_off];
873         allocib = NULL;
874         allocblk = allociblk;
875         lbns_remfree = lbns;
876         if (nb == 0) {
877                 UFS_LOCK(ump);
878                 pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
879                     (ufs2_daddr_t *)0);
880                 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
881                     flags, cred, &newb)) != 0) {
882                         curthread_pflags_restore(saved_inbdflush);
883                         goto done;
884                 }
885                 pref = newb + fs->fs_frag;
886                 nb = newb;
887                 MPASS(allocblk < allociblk + nitems(allociblk));
888                 MPASS(lbns_remfree < lbns + nitems(lbns));
889                 *allocblk++ = nb;
890                 *lbns_remfree++ = indirs[1].in_lbn;
891                 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
892                     GB_UNMAPPED);
893                 bp->b_blkno = fsbtodb(fs, nb);
894                 vfs_bio_clrbuf(bp);
895                 if (DOINGSOFTDEP(vp)) {
896                         softdep_setup_allocdirect(ip,
897                             UFS_NDADDR + indirs[0].in_off, newb, 0,
898                             fs->fs_bsize, 0, bp);
899                         bdwrite(bp);
900                 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
901                         if (bp->b_bufsize == fs->fs_bsize)
902                                 bp->b_flags |= B_CLUSTEROK;
903                         bdwrite(bp);
904                 } else {
905                         if ((error = bwrite(bp)) != 0)
906                                 goto fail;
907                 }
908                 allocib = &dp->di_ib[indirs[0].in_off];
909                 *allocib = nb;
910                 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
911         }
912         /*
913          * Fetch through the indirect blocks, allocating as necessary.
914          */
915 retry:
916         for (i = 1;;) {
917                 error = bread(vp,
918                     indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
919                 if (error) {
920                         goto fail;
921                 }
922                 bap = (ufs2_daddr_t *)bp->b_data;
923                 nb = bap[indirs[i].in_off];
924                 if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
925                     fs->fs_bsize)) != 0) {
926                         brelse(bp);
927                         goto fail;
928                 }
929                 if (i == num)
930                         break;
931                 i += 1;
932                 if (nb != 0) {
933                         bqrelse(bp);
934                         continue;
935                 }
936                 UFS_LOCK(ump);
937                 /*
938                  * If parent indirect has just been allocated, try to cluster
939                  * immediately following it.
940                  */
941                 if (pref == 0)
942                         pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
943                             (ufs2_daddr_t *)0);
944                 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
945                     flags | IO_BUFLOCKED, cred, &newb)) != 0) {
946                         brelse(bp);
947                         UFS_LOCK(ump);
948                         if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
949                                 softdep_request_cleanup(fs, vp, cred,
950                                     FLUSH_BLOCKS_WAIT);
951                                 UFS_UNLOCK(ump);
952                                 goto retry;
953                         }
954                         if (!ffs_fsfail_cleanup_locked(ump, error) &&
955                             ppsratecheck(&ump->um_last_fullmsg,
956                             &ump->um_secs_fullmsg, 1)) {
957                                 UFS_UNLOCK(ump);
958                                 ffs_fserr(fs, ip->i_number, "filesystem full");
959                                 uprintf("\n%s: write failed, filesystem "
960                                     "is full\n", fs->fs_fsmnt);
961                         } else {
962                                 UFS_UNLOCK(ump);
963                         }
964                         goto fail;
965                 }
966                 pref = newb + fs->fs_frag;
967                 nb = newb;
968                 MPASS(allocblk < allociblk + nitems(allociblk));
969                 MPASS(lbns_remfree < lbns + nitems(lbns));
970                 *allocblk++ = nb;
971                 *lbns_remfree++ = indirs[i].in_lbn;
972                 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
973                     GB_UNMAPPED);
974                 nbp->b_blkno = fsbtodb(fs, nb);
975                 vfs_bio_clrbuf(nbp);
976                 if (DOINGSOFTDEP(vp)) {
977                         softdep_setup_allocindir_meta(nbp, ip, bp,
978                             indirs[i - 1].in_off, nb);
979                         bdwrite(nbp);
980                 } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
981                         if (nbp->b_bufsize == fs->fs_bsize)
982                                 nbp->b_flags |= B_CLUSTEROK;
983                         bdwrite(nbp);
984                 } else {
985                         if ((error = bwrite(nbp)) != 0) {
986                                 brelse(bp);
987                                 goto fail;
988                         }
989                 }
990                 bap[indirs[i - 1].in_off] = nb;
991                 if (allocib == NULL && unwindidx < 0)
992                         unwindidx = i - 1;
993                 /*
994                  * If required, write synchronously, otherwise use
995                  * delayed write.
996                  */
997                 if (flags & IO_SYNC) {
998                         bwrite(bp);
999                 } else {
1000                         if (bp->b_bufsize == fs->fs_bsize)
1001                                 bp->b_flags |= B_CLUSTEROK;
1002                         bdwrite(bp);
1003                 }
1004         }
1005         /*
1006          * If asked only for the indirect block, then return it.
1007          */
1008         if (flags & BA_METAONLY) {
1009                 curthread_pflags_restore(saved_inbdflush);
1010                 *bpp = bp;
1011                 error = 0;
1012                 goto done;
1013         }
1014         /*
1015          * Get the data block, allocating if necessary.
1016          */
1017         if (nb == 0) {
1018                 UFS_LOCK(ump);
1019                 /*
1020                  * If allocating metadata at the front of the cylinder
1021                  * group and parent indirect block has just been allocated,
1022                  * then cluster next to it if it is the first indirect in
1023                  * the file. Otherwise it has been allocated in the metadata
1024                  * area, so we want to find our own place out in the data area.
1025                  */
1026                 if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
1027                         pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
1028                             &bap[0]);
1029                 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
1030                     flags | IO_BUFLOCKED, cred, &newb);
1031                 if (error) {
1032                         brelse(bp);
1033                         UFS_LOCK(ump);
1034                         if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
1035                                 softdep_request_cleanup(fs, vp, cred,
1036                                     FLUSH_BLOCKS_WAIT);
1037                                 UFS_UNLOCK(ump);
1038                                 goto retry;
1039                         }
1040                         if (!ffs_fsfail_cleanup_locked(ump, error) &&
1041                             ppsratecheck(&ump->um_last_fullmsg,
1042                             &ump->um_secs_fullmsg, 1)) {
1043                                 UFS_UNLOCK(ump);
1044                                 ffs_fserr(fs, ip->i_number, "filesystem full");
1045                                 uprintf("\n%s: write failed, filesystem "
1046                                     "is full\n", fs->fs_fsmnt);
1047                         } else {
1048                                 UFS_UNLOCK(ump);
1049                         }
1050                         goto fail;
1051                 }
1052                 nb = newb;
1053                 MPASS(allocblk < allociblk + nitems(allociblk));
1054                 MPASS(lbns_remfree < lbns + nitems(lbns));
1055                 *allocblk++ = nb;
1056                 *lbns_remfree++ = lbn;
1057                 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1058                 nbp->b_blkno = fsbtodb(fs, nb);
1059                 if (flags & BA_CLRBUF)
1060                         vfs_bio_clrbuf(nbp);
1061                 if (DOINGSOFTDEP(vp))
1062                         softdep_setup_allocindir_page(ip, lbn, bp,
1063                             indirs[i].in_off, nb, 0, nbp);
1064                 bap[indirs[i].in_off] = nb;
1065                 /*
1066                  * If required, write synchronously, otherwise use
1067                  * delayed write.
1068                  */
1069                 if (flags & IO_SYNC) {
1070                         bwrite(bp);
1071                 } else {
1072                         if (bp->b_bufsize == fs->fs_bsize)
1073                                 bp->b_flags |= B_CLUSTEROK;
1074                         bdwrite(bp);
1075                 }
1076                 curthread_pflags_restore(saved_inbdflush);
1077                 *bpp = nbp;
1078                 error = 0;
1079                 goto done;
1080         }
1081         brelse(bp);
1082         /*
1083          * If requested clear invalid portions of the buffer.  If we
1084          * have to do a read-before-write (typical if BA_CLRBUF is set),
1085          * try to do some read-ahead in the sequential case to reduce
1086          * the number of I/O transactions.
1087          */
1088         if (flags & BA_CLRBUF) {
1089                 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1090                 if (seqcount != 0 &&
1091                     (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1092                     !(vm_page_count_severe() || buf_dirty_count_severe())) {
1093                         error = cluster_read(vp, ip->i_size, lbn,
1094                             (int)fs->fs_bsize, NOCRED,
1095                             MAXBSIZE, seqcount, gbflags, &nbp);
1096                 } else {
1097                         error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1098                             NOCRED, gbflags, &nbp);
1099                 }
1100                 if (error) {
1101                         brelse(nbp);
1102                         goto fail;
1103                 }
1104         } else {
1105                 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1106                 nbp->b_blkno = fsbtodb(fs, nb);
1107         }
1108         curthread_pflags_restore(saved_inbdflush);
1109         *bpp = nbp;
1110         error = 0;
1111         goto done;
1112 fail:
1113         curthread_pflags_restore(saved_inbdflush);
1114         /*
1115          * If we have failed to allocate any blocks, simply return the error.
1116          * This is the usual case and avoids the need to fsync the file.
1117          */
1118         if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1119                 goto done;
1120         /*
1121          * If we have failed part way through block allocation, we
1122          * have to deallocate any indirect blocks that we have allocated.
1123          * We have to fsync the file before we start to get rid of all
1124          * of its dependencies so that we do not leave them dangling.
1125          * We have to sync it at the end so that the soft updates code
1126          * does not find any untracked changes. Although this is really
1127          * slow, running out of disk space is not expected to be a common
1128          * occurrence. The error return from fsync is ignored as we already
1129          * have an error to return to the user.
1130          *
1131          * XXX Still have to journal the free below
1132          */
1133         (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1134         for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1135              blkp < allocblk; blkp++, lbns_remfree++) {
1136                 /*
1137                  * We shall not leave the freed blocks on the vnode
1138                  * buffer object lists.
1139                  */
1140                 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1141                     GB_NOCREAT | GB_UNMAPPED);
1142                 if (bp != NULL) {
1143                         KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1144                             ("mismatch2 l %jd %jd b %ju %ju",
1145                             (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1146                             (uintmax_t)bp->b_blkno,
1147                             (uintmax_t)fsbtodb(fs, *blkp)));
1148                         bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1149                         bp->b_flags &= ~(B_ASYNC | B_CACHE);
1150                         brelse(bp);
1151                 }
1152                 deallocated += fs->fs_bsize;
1153         }
1154         if (allocib != NULL) {
1155                 *allocib = 0;
1156         } else if (unwindidx >= 0) {
1157                 int r;
1158
1159                 r = bread(vp, indirs[unwindidx].in_lbn, 
1160                     (int)fs->fs_bsize, NOCRED, &bp);
1161                 if (r) {
1162                         panic("Could not unwind indirect block, error %d", r);
1163                         brelse(bp);
1164                 } else {
1165                         bap = (ufs2_daddr_t *)bp->b_data;
1166                         bap[indirs[unwindidx].in_off] = 0;
1167                         if (flags & IO_SYNC) {
1168                                 bwrite(bp);
1169                         } else {
1170                                 if (bp->b_bufsize == fs->fs_bsize)
1171                                         bp->b_flags |= B_CLUSTEROK;
1172                                 bdwrite(bp);
1173                         }
1174                 }
1175         }
1176         if (deallocated) {
1177 #ifdef QUOTA
1178                 /*
1179                  * Restore user's disk quota because allocation failed.
1180                  */
1181                 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1182 #endif
1183                 dp->di_blocks -= btodb(deallocated);
1184                 UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
1185         }
1186         (void) ffs_syncvnode(vp, MNT_WAIT, 0);
1187         /*
1188          * After the buffers are invalidated and on-disk pointers are
1189          * cleared, free the blocks.
1190          */
1191         for (blkp = allociblk; blkp < allocblk; blkp++) {
1192 #ifdef INVARIANTS
1193                 if (blkp == allociblk)
1194                         lbns_remfree = lbns;
1195                 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1196                     GB_NOCREAT | GB_UNMAPPED);
1197                 if (bp != NULL) {
1198                         panic("zombie2 %jd %ju %ju",
1199                             (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1200                             (uintmax_t)fsbtodb(fs, *blkp));
1201                 }
1202                 lbns_remfree++;
1203 #endif
1204                 ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1205                     ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
1206         }
1207 done:
1208         vn_seqc_write_end(vp);
1209         return (error);
1210 }