]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/ufs/ffs/ffs_balloc.c
MFC r362623:
[FreeBSD/stable/8.git] / sys / ufs / ffs / ffs_balloc.c
1 /*-
2  * Copyright (c) 2002 Networks Associates Technology, Inc.
3  * All rights reserved.
4  *
5  * This software was developed for the FreeBSD Project by Marshall
6  * Kirk McKusick and Network Associates Laboratories, the Security
7  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9  * research program
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * Copyright (c) 1982, 1986, 1989, 1993
33  *      The Regents of the University of California.  All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 4. Neither the name of the University nor the names of its contributors
44  *    may be used to endorse or promote products derived from this software
45  *    without specific prior written permission.
46  *
47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57  * SUCH DAMAGE.
58  *
59  *      @(#)ffs_balloc.c        8.8 (Berkeley) 6/16/95
60  */
61
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD$");
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/bio.h>
68 #include <sys/buf.h>
69 #include <sys/lock.h>
70 #include <sys/mount.h>
71 #include <sys/vnode.h>
72
73 #include <ufs/ufs/quota.h>
74 #include <ufs/ufs/inode.h>
75 #include <ufs/ufs/ufs_extern.h>
76 #include <ufs/ufs/extattr.h>
77 #include <ufs/ufs/ufsmount.h>
78
79 #include <ufs/ffs/fs.h>
80 #include <ufs/ffs/ffs_extern.h>
81
82 /*
83  * Balloc defines the structure of filesystem storage
84  * by allocating the physical blocks on a device given
85  * the inode and the logical block number in a file.
86  * This is the allocation strategy for UFS1. Below is
87  * the allocation strategy for UFS2.
88  */
89 int
90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
91     struct ucred *cred, int flags, struct buf **bpp)
92 {
93         struct inode *ip;
94         struct ufs1_dinode *dp;
95         ufs_lbn_t lbn, lastlbn;
96         struct fs *fs;
97         ufs1_daddr_t nb;
98         struct buf *bp, *nbp;
99         struct ufsmount *ump;
100         struct indir indirs[NIADDR + 2];
101         int deallocated, osize, nsize, num, i, error;
102         ufs2_daddr_t newb;
103         ufs1_daddr_t *bap, pref;
104         ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
105         ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
106         int unwindidx = -1;
107         int saved_inbdflush;
108
109         ip = VTOI(vp);
110         dp = ip->i_din1;
111         fs = ip->i_fs;
112         ump = ip->i_ump;
113         lbn = lblkno(fs, startoffset);
114         size = blkoff(fs, startoffset) + size;
115         if (size > fs->fs_bsize)
116                 panic("ffs_balloc_ufs1: blk too big");
117         *bpp = NULL;
118         if (flags & IO_EXT)
119                 return (EOPNOTSUPP);
120         if (lbn < 0)
121                 return (EFBIG);
122
123         /*
124          * If the next write will extend the file into a new block,
125          * and the file is currently composed of a fragment
126          * this fragment has to be extended to be a full block.
127          */
128         lastlbn = lblkno(fs, ip->i_size);
129         if (lastlbn < NDADDR && lastlbn < lbn) {
130                 nb = lastlbn;
131                 osize = blksize(fs, ip, nb);
132                 if (osize < fs->fs_bsize && osize > 0) {
133                         UFS_LOCK(ump);
134                         error = ffs_realloccg(ip, nb, dp->di_db[nb],
135                            ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
136                            &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
137                            cred, &bp);
138                         if (error)
139                                 return (error);
140                         if (DOINGSOFTDEP(vp))
141                                 softdep_setup_allocdirect(ip, nb,
142                                     dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
143                                     fs->fs_bsize, osize, bp);
144                         ip->i_size = smalllblktosize(fs, nb + 1);
145                         dp->di_size = ip->i_size;
146                         dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
147                         ip->i_flag |= IN_CHANGE | IN_UPDATE;
148                         if (flags & IO_SYNC)
149                                 bwrite(bp);
150                         else
151                                 bawrite(bp);
152                 }
153         }
154         /*
155          * The first NDADDR blocks are direct blocks
156          */
157         if (lbn < NDADDR) {
158                 if (flags & BA_METAONLY)
159                         panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
160                 nb = dp->di_db[lbn];
161                 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
162                         error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
163                         if (error) {
164                                 brelse(bp);
165                                 return (error);
166                         }
167                         bp->b_blkno = fsbtodb(fs, nb);
168                         *bpp = bp;
169                         return (0);
170                 }
171                 if (nb != 0) {
172                         /*
173                          * Consider need to reallocate a fragment.
174                          */
175                         osize = fragroundup(fs, blkoff(fs, ip->i_size));
176                         nsize = fragroundup(fs, size);
177                         if (nsize <= osize) {
178                                 error = bread(vp, lbn, osize, NOCRED, &bp);
179                                 if (error) {
180                                         brelse(bp);
181                                         return (error);
182                                 }
183                                 bp->b_blkno = fsbtodb(fs, nb);
184                         } else {
185                                 UFS_LOCK(ump);
186                                 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
187                                     ffs_blkpref_ufs1(ip, lbn, (int)lbn,
188                                     &dp->di_db[0]), osize, nsize, flags,
189                                     cred, &bp);
190                                 if (error)
191                                         return (error);
192                                 if (DOINGSOFTDEP(vp))
193                                         softdep_setup_allocdirect(ip, lbn,
194                                             dbtofsb(fs, bp->b_blkno), nb,
195                                             nsize, osize, bp);
196                         }
197                 } else {
198                         if (ip->i_size < smalllblktosize(fs, lbn + 1))
199                                 nsize = fragroundup(fs, size);
200                         else
201                                 nsize = fs->fs_bsize;
202                         UFS_LOCK(ump);
203                         error = ffs_alloc(ip, lbn,
204                             ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
205                             nsize, flags, cred, &newb);
206                         if (error)
207                                 return (error);
208                         bp = getblk(vp, lbn, nsize, 0, 0, 0);
209                         bp->b_blkno = fsbtodb(fs, newb);
210                         if (flags & BA_CLRBUF)
211                                 vfs_bio_clrbuf(bp);
212                         if (DOINGSOFTDEP(vp))
213                                 softdep_setup_allocdirect(ip, lbn, newb, 0,
214                                     nsize, 0, bp);
215                 }
216                 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
217                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
218                 *bpp = bp;
219                 return (0);
220         }
221         /*
222          * Determine the number of levels of indirection.
223          */
224         pref = 0;
225         if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
226                 return(error);
227 #ifdef INVARIANTS
228         if (num < 1)
229                 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
230 #endif
231         saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
232         /*
233          * Fetch the first indirect block allocating if necessary.
234          */
235         --num;
236         nb = dp->di_ib[indirs[0].in_off];
237         allocib = NULL;
238         allocblk = allociblk;
239         lbns_remfree = lbns;
240         if (nb == 0) {
241                 UFS_LOCK(ump);
242                 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
243                 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
244                     flags, cred, &newb)) != 0) {
245                         curthread_pflags_restore(saved_inbdflush);
246                         return (error);
247                 }
248                 nb = newb;
249                 *allocblk++ = nb;
250                 *lbns_remfree++ = indirs[1].in_lbn;
251                 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
252                 bp->b_blkno = fsbtodb(fs, nb);
253                 vfs_bio_clrbuf(bp);
254                 if (DOINGSOFTDEP(vp)) {
255                         softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
256                             newb, 0, fs->fs_bsize, 0, bp);
257                         bdwrite(bp);
258                 } else {
259                         /*
260                          * Write synchronously so that indirect blocks
261                          * never point at garbage.
262                          */
263                         if (DOINGASYNC(vp))
264                                 bdwrite(bp);
265                         else if ((error = bwrite(bp)) != 0)
266                                 goto fail;
267                 }
268                 allocib = &dp->di_ib[indirs[0].in_off];
269                 *allocib = nb;
270                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
271         }
272         /*
273          * Fetch through the indirect blocks, allocating as necessary.
274          */
275         for (i = 1;;) {
276                 error = bread(vp,
277                     indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
278                 if (error) {
279                         brelse(bp);
280                         goto fail;
281                 }
282                 bap = (ufs1_daddr_t *)bp->b_data;
283                 nb = bap[indirs[i].in_off];
284                 if (i == num)
285                         break;
286                 i += 1;
287                 if (nb != 0) {
288                         bqrelse(bp);
289                         continue;
290                 }
291                 UFS_LOCK(ump);
292                 if (pref == 0)
293                         pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
294                 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
295                     flags, cred, &newb)) != 0) {
296                         brelse(bp);
297                         goto fail;
298                 }
299                 nb = newb;
300                 *allocblk++ = nb;
301                 *lbns_remfree++ = indirs[i].in_lbn;
302                 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
303                 nbp->b_blkno = fsbtodb(fs, nb);
304                 vfs_bio_clrbuf(nbp);
305                 if (DOINGSOFTDEP(vp)) {
306                         softdep_setup_allocindir_meta(nbp, ip, bp,
307                             indirs[i - 1].in_off, nb);
308                         bdwrite(nbp);
309                 } else {
310                         /*
311                          * Write synchronously so that indirect blocks
312                          * never point at garbage.
313                          */
314                         if ((error = bwrite(nbp)) != 0) {
315                                 brelse(bp);
316                                 goto fail;
317                         }
318                 }
319                 bap[indirs[i - 1].in_off] = nb;
320                 if (allocib == NULL && unwindidx < 0)
321                         unwindidx = i - 1;
322                 /*
323                  * If required, write synchronously, otherwise use
324                  * delayed write.
325                  */
326                 if (flags & IO_SYNC) {
327                         bwrite(bp);
328                 } else {
329                         if (bp->b_bufsize == fs->fs_bsize)
330                                 bp->b_flags |= B_CLUSTEROK;
331                         bdwrite(bp);
332                 }
333         }
334         /*
335          * If asked only for the indirect block, then return it.
336          */
337         if (flags & BA_METAONLY) {
338                 curthread_pflags_restore(saved_inbdflush);
339                 *bpp = bp;
340                 return (0);
341         }
342         /*
343          * Get the data block, allocating if necessary.
344          */
345         if (nb == 0) {
346                 UFS_LOCK(ump);
347                 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
348                 error = ffs_alloc(ip,
349                     lbn, pref, (int)fs->fs_bsize, flags, cred, &newb);
350                 if (error) {
351                         brelse(bp);
352                         goto fail;
353                 }
354                 nb = newb;
355                 *allocblk++ = nb;
356                 *lbns_remfree++ = lbn;
357                 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
358                 nbp->b_blkno = fsbtodb(fs, nb);
359                 if (flags & BA_CLRBUF)
360                         vfs_bio_clrbuf(nbp);
361                 if (DOINGSOFTDEP(vp))
362                         softdep_setup_allocindir_page(ip, lbn, bp,
363                             indirs[i].in_off, nb, 0, nbp);
364                 bap[indirs[i].in_off] = nb;
365                 /*
366                  * If required, write synchronously, otherwise use
367                  * delayed write.
368                  */
369                 if (flags & IO_SYNC) {
370                         bwrite(bp);
371                 } else {
372                         if (bp->b_bufsize == fs->fs_bsize)
373                                 bp->b_flags |= B_CLUSTEROK;
374                         bdwrite(bp);
375                 }
376                 curthread_pflags_restore(saved_inbdflush);
377                 *bpp = nbp;
378                 return (0);
379         }
380         brelse(bp);
381         if (flags & BA_CLRBUF) {
382                 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
383                 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
384                         error = cluster_read(vp, ip->i_size, lbn,
385                             (int)fs->fs_bsize, NOCRED,
386                             MAXBSIZE, seqcount, &nbp);
387                 } else {
388                         error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
389                 }
390                 if (error) {
391                         brelse(nbp);
392                         goto fail;
393                 }
394         } else {
395                 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
396                 nbp->b_blkno = fsbtodb(fs, nb);
397         }
398         curthread_pflags_restore(saved_inbdflush);
399         *bpp = nbp;
400         return (0);
401 fail:
402         curthread_pflags_restore(saved_inbdflush);
403         /*
404          * If we have failed to allocate any blocks, simply return the error.
405          * This is the usual case and avoids the need to fsync the file.
406          */
407         if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
408                 return (error);
409         /*
410          * If we have failed part way through block allocation, we
411          * have to deallocate any indirect blocks that we have allocated.
412          * We have to fsync the file before we start to get rid of all
413          * of its dependencies so that we do not leave them dangling.
414          * We have to sync it at the end so that the soft updates code
415          * does not find any untracked changes. Although this is really
416          * slow, running out of disk space is not expected to be a common
417          * occurence. The error return from fsync is ignored as we already
418          * have an error to return to the user.
419          */
420         (void) ffs_syncvnode(vp, MNT_WAIT);
421         for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
422              blkp < allocblk; blkp++, lbns_remfree++) {
423                 /*
424                  * We shall not leave the freed blocks on the vnode
425                  * buffer object lists.
426                  */
427                 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
428                 if (bp != NULL) {
429                         bp->b_flags |= (B_INVAL | B_RELBUF);
430                         bp->b_flags &= ~B_ASYNC;
431                         brelse(bp);
432                 }
433                 deallocated += fs->fs_bsize;
434         }
435         if (allocib != NULL) {
436                 *allocib = 0;
437         } else if (unwindidx >= 0) {
438                 int r;
439
440                 r = bread(vp, indirs[unwindidx].in_lbn, 
441                     (int)fs->fs_bsize, NOCRED, &bp);
442                 if (r) {
443                         panic("Could not unwind indirect block, error %d", r);
444                         brelse(bp);
445                 } else {
446                         bap = (ufs1_daddr_t *)bp->b_data;
447                         bap[indirs[unwindidx].in_off] = 0;
448                         if (flags & IO_SYNC) {
449                                 bwrite(bp);
450                         } else {
451                                 if (bp->b_bufsize == fs->fs_bsize)
452                                         bp->b_flags |= B_CLUSTEROK;
453                                 bdwrite(bp);
454                         }
455                 }
456         }
457         if (deallocated) {
458 #ifdef QUOTA
459                 /*
460                  * Restore user's disk quota because allocation failed.
461                  */
462                 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
463 #endif
464                 dp->di_blocks -= btodb(deallocated);
465                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
466         }
467         (void) ffs_syncvnode(vp, MNT_WAIT);
468         /*
469          * After the buffers are invalidated and on-disk pointers are
470          * cleared, free the blocks.
471          */
472         for (blkp = allociblk; blkp < allocblk; blkp++) {
473                 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
474                     ip->i_number);
475         }
476         return (error);
477 }
478
479 /*
480  * Balloc defines the structure of file system storage
481  * by allocating the physical blocks on a device given
482  * the inode and the logical block number in a file.
483  * This is the allocation strategy for UFS2. Above is
484  * the allocation strategy for UFS1.
485  */
486 int
487 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
488     struct ucred *cred, int flags, struct buf **bpp)
489 {
490         struct inode *ip;
491         struct ufs2_dinode *dp;
492         ufs_lbn_t lbn, lastlbn;
493         struct fs *fs;
494         struct buf *bp, *nbp;
495         struct ufsmount *ump;
496         struct indir indirs[NIADDR + 2];
497         ufs2_daddr_t nb, newb, *bap, pref;
498         ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
499         ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
500         int deallocated, osize, nsize, num, i, error;
501         int unwindidx = -1;
502         int saved_inbdflush;
503
504         ip = VTOI(vp);
505         dp = ip->i_din2;
506         fs = ip->i_fs;
507         ump = ip->i_ump;
508         lbn = lblkno(fs, startoffset);
509         size = blkoff(fs, startoffset) + size;
510         if (size > fs->fs_bsize)
511                 panic("ffs_balloc_ufs2: blk too big");
512         *bpp = NULL;
513         if (lbn < 0)
514                 return (EFBIG);
515
516         /*
517          * Check for allocating external data.
518          */
519         if (flags & IO_EXT) {
520                 if (lbn >= NXADDR)
521                         return (EFBIG);
522                 /*
523                  * If the next write will extend the data into a new block,
524                  * and the data is currently composed of a fragment
525                  * this fragment has to be extended to be a full block.
526                  */
527                 lastlbn = lblkno(fs, dp->di_extsize);
528                 if (lastlbn < lbn) {
529                         nb = lastlbn;
530                         osize = sblksize(fs, dp->di_extsize, nb);
531                         if (osize < fs->fs_bsize && osize > 0) {
532                                 UFS_LOCK(ump);
533                                 error = ffs_realloccg(ip, -1 - nb,
534                                     dp->di_extb[nb],
535                                     ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
536                                     &dp->di_extb[0]), osize,
537                                     (int)fs->fs_bsize, flags, cred, &bp);
538                                 if (error)
539                                         return (error);
540                                 if (DOINGSOFTDEP(vp))
541                                         softdep_setup_allocext(ip, nb,
542                                             dbtofsb(fs, bp->b_blkno),
543                                             dp->di_extb[nb],
544                                             fs->fs_bsize, osize, bp);
545                                 dp->di_extsize = smalllblktosize(fs, nb + 1);
546                                 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
547                                 bp->b_xflags |= BX_ALTDATA;
548                                 ip->i_flag |= IN_CHANGE;
549                                 if (flags & IO_SYNC)
550                                         bwrite(bp);
551                                 else
552                                         bawrite(bp);
553                         }
554                 }
555                 /*
556                  * All blocks are direct blocks
557                  */
558                 if (flags & BA_METAONLY)
559                         panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
560                 nb = dp->di_extb[lbn];
561                 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
562                         error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
563                         if (error) {
564                                 brelse(bp);
565                                 return (error);
566                         }
567                         bp->b_blkno = fsbtodb(fs, nb);
568                         bp->b_xflags |= BX_ALTDATA;
569                         *bpp = bp;
570                         return (0);
571                 }
572                 if (nb != 0) {
573                         /*
574                          * Consider need to reallocate a fragment.
575                          */
576                         osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
577                         nsize = fragroundup(fs, size);
578                         if (nsize <= osize) {
579                                 error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
580                                 if (error) {
581                                         brelse(bp);
582                                         return (error);
583                                 }
584                                 bp->b_blkno = fsbtodb(fs, nb);
585                                 bp->b_xflags |= BX_ALTDATA;
586                         } else {
587                                 UFS_LOCK(ump);
588                                 error = ffs_realloccg(ip, -1 - lbn,
589                                     dp->di_extb[lbn],
590                                     ffs_blkpref_ufs2(ip, lbn, (int)lbn,
591                                     &dp->di_extb[0]), osize, nsize, flags,
592                                     cred, &bp);
593                                 if (error)
594                                         return (error);
595                                 bp->b_xflags |= BX_ALTDATA;
596                                 if (DOINGSOFTDEP(vp))
597                                         softdep_setup_allocext(ip, lbn,
598                                             dbtofsb(fs, bp->b_blkno), nb,
599                                             nsize, osize, bp);
600                         }
601                 } else {
602                         if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
603                                 nsize = fragroundup(fs, size);
604                         else
605                                 nsize = fs->fs_bsize;
606                         UFS_LOCK(ump);
607                         error = ffs_alloc(ip, lbn,
608                            ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
609                            nsize, flags, cred, &newb);
610                         if (error)
611                                 return (error);
612                         bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0);
613                         bp->b_blkno = fsbtodb(fs, newb);
614                         bp->b_xflags |= BX_ALTDATA;
615                         if (flags & BA_CLRBUF)
616                                 vfs_bio_clrbuf(bp);
617                         if (DOINGSOFTDEP(vp))
618                                 softdep_setup_allocext(ip, lbn, newb, 0,
619                                     nsize, 0, bp);
620                 }
621                 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
622                 ip->i_flag |= IN_CHANGE;
623                 *bpp = bp;
624                 return (0);
625         }
626         /*
627          * If the next write will extend the file into a new block,
628          * and the file is currently composed of a fragment
629          * this fragment has to be extended to be a full block.
630          */
631         lastlbn = lblkno(fs, ip->i_size);
632         if (lastlbn < NDADDR && lastlbn < lbn) {
633                 nb = lastlbn;
634                 osize = blksize(fs, ip, nb);
635                 if (osize < fs->fs_bsize && osize > 0) {
636                         UFS_LOCK(ump);
637                         error = ffs_realloccg(ip, nb, dp->di_db[nb],
638                                 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
639                                     &dp->di_db[0]), osize, (int)fs->fs_bsize,
640                                     flags, cred, &bp);
641                         if (error)
642                                 return (error);
643                         if (DOINGSOFTDEP(vp))
644                                 softdep_setup_allocdirect(ip, nb,
645                                     dbtofsb(fs, bp->b_blkno),
646                                     dp->di_db[nb],
647                                     fs->fs_bsize, osize, bp);
648                         ip->i_size = smalllblktosize(fs, nb + 1);
649                         dp->di_size = ip->i_size;
650                         dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
651                         ip->i_flag |= IN_CHANGE | IN_UPDATE;
652                         if (flags & IO_SYNC)
653                                 bwrite(bp);
654                         else
655                                 bawrite(bp);
656                 }
657         }
658         /*
659          * The first NDADDR blocks are direct blocks
660          */
661         if (lbn < NDADDR) {
662                 if (flags & BA_METAONLY)
663                         panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
664                 nb = dp->di_db[lbn];
665                 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
666                         error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
667                         if (error) {
668                                 brelse(bp);
669                                 return (error);
670                         }
671                         bp->b_blkno = fsbtodb(fs, nb);
672                         *bpp = bp;
673                         return (0);
674                 }
675                 if (nb != 0) {
676                         /*
677                          * Consider need to reallocate a fragment.
678                          */
679                         osize = fragroundup(fs, blkoff(fs, ip->i_size));
680                         nsize = fragroundup(fs, size);
681                         if (nsize <= osize) {
682                                 error = bread(vp, lbn, osize, NOCRED, &bp);
683                                 if (error) {
684                                         brelse(bp);
685                                         return (error);
686                                 }
687                                 bp->b_blkno = fsbtodb(fs, nb);
688                         } else {
689                                 UFS_LOCK(ump);
690                                 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
691                                     ffs_blkpref_ufs2(ip, lbn, (int)lbn,
692                                        &dp->di_db[0]), osize, nsize, flags,
693                                     cred, &bp);
694                                 if (error)
695                                         return (error);
696                                 if (DOINGSOFTDEP(vp))
697                                         softdep_setup_allocdirect(ip, lbn,
698                                             dbtofsb(fs, bp->b_blkno), nb,
699                                             nsize, osize, bp);
700                         }
701                 } else {
702                         if (ip->i_size < smalllblktosize(fs, lbn + 1))
703                                 nsize = fragroundup(fs, size);
704                         else
705                                 nsize = fs->fs_bsize;
706                         UFS_LOCK(ump);
707                         error = ffs_alloc(ip, lbn,
708                             ffs_blkpref_ufs2(ip, lbn, (int)lbn,
709                                 &dp->di_db[0]), nsize, flags, cred, &newb);
710                         if (error)
711                                 return (error);
712                         bp = getblk(vp, lbn, nsize, 0, 0, 0);
713                         bp->b_blkno = fsbtodb(fs, newb);
714                         if (flags & BA_CLRBUF)
715                                 vfs_bio_clrbuf(bp);
716                         if (DOINGSOFTDEP(vp))
717                                 softdep_setup_allocdirect(ip, lbn, newb, 0,
718                                     nsize, 0, bp);
719                 }
720                 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
721                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
722                 *bpp = bp;
723                 return (0);
724         }
725         /*
726          * Determine the number of levels of indirection.
727          */
728         pref = 0;
729         if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
730                 return(error);
731 #ifdef INVARIANTS
732         if (num < 1)
733                 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
734 #endif
735         saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
736         /*
737          * Fetch the first indirect block allocating if necessary.
738          */
739         --num;
740         nb = dp->di_ib[indirs[0].in_off];
741         allocib = NULL;
742         allocblk = allociblk;
743         lbns_remfree = lbns;
744         if (nb == 0) {
745                 UFS_LOCK(ump);
746                 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
747                 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
748                     flags, cred, &newb)) != 0) {
749                         curthread_pflags_restore(saved_inbdflush);
750                         return (error);
751                 }
752                 nb = newb;
753                 *allocblk++ = nb;
754                 *lbns_remfree++ = indirs[1].in_lbn;
755                 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
756                 bp->b_blkno = fsbtodb(fs, nb);
757                 vfs_bio_clrbuf(bp);
758                 if (DOINGSOFTDEP(vp)) {
759                         softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
760                             newb, 0, fs->fs_bsize, 0, bp);
761                         bdwrite(bp);
762                 } else {
763                         /*
764                          * Write synchronously so that indirect blocks
765                          * never point at garbage.
766                          */
767                         if (DOINGASYNC(vp))
768                                 bdwrite(bp);
769                         else if ((error = bwrite(bp)) != 0)
770                                 goto fail;
771                 }
772                 allocib = &dp->di_ib[indirs[0].in_off];
773                 *allocib = nb;
774                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
775         }
776         /*
777          * Fetch through the indirect blocks, allocating as necessary.
778          */
779         for (i = 1;;) {
780                 error = bread(vp,
781                     indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
782                 if (error) {
783                         brelse(bp);
784                         goto fail;
785                 }
786                 bap = (ufs2_daddr_t *)bp->b_data;
787                 nb = bap[indirs[i].in_off];
788                 if (i == num)
789                         break;
790                 i += 1;
791                 if (nb != 0) {
792                         bqrelse(bp);
793                         continue;
794                 }
795                 UFS_LOCK(ump);
796                 if (pref == 0)
797                         pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
798                 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
799                     flags, cred, &newb)) != 0) {
800                         brelse(bp);
801                         goto fail;
802                 }
803                 nb = newb;
804                 *allocblk++ = nb;
805                 *lbns_remfree++ = indirs[i].in_lbn;
806                 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
807                 nbp->b_blkno = fsbtodb(fs, nb);
808                 vfs_bio_clrbuf(nbp);
809                 if (DOINGSOFTDEP(vp)) {
810                         softdep_setup_allocindir_meta(nbp, ip, bp,
811                             indirs[i - 1].in_off, nb);
812                         bdwrite(nbp);
813                 } else {
814                         /*
815                          * Write synchronously so that indirect blocks
816                          * never point at garbage.
817                          */
818                         if ((error = bwrite(nbp)) != 0) {
819                                 brelse(bp);
820                                 goto fail;
821                         }
822                 }
823                 bap[indirs[i - 1].in_off] = nb;
824                 if (allocib == NULL && unwindidx < 0)
825                         unwindidx = i - 1;
826                 /*
827                  * If required, write synchronously, otherwise use
828                  * delayed write.
829                  */
830                 if (flags & IO_SYNC) {
831                         bwrite(bp);
832                 } else {
833                         if (bp->b_bufsize == fs->fs_bsize)
834                                 bp->b_flags |= B_CLUSTEROK;
835                         bdwrite(bp);
836                 }
837         }
838         /*
839          * If asked only for the indirect block, then return it.
840          */
841         if (flags & BA_METAONLY) {
842                 curthread_pflags_restore(saved_inbdflush);
843                 *bpp = bp;
844                 return (0);
845         }
846         /*
847          * Get the data block, allocating if necessary.
848          */
849         if (nb == 0) {
850                 UFS_LOCK(ump);
851                 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]);
852                 error = ffs_alloc(ip,
853                     lbn, pref, (int)fs->fs_bsize, flags, cred, &newb);
854                 if (error) {
855                         brelse(bp);
856                         goto fail;
857                 }
858                 nb = newb;
859                 *allocblk++ = nb;
860                 *lbns_remfree++ = lbn;
861                 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
862                 nbp->b_blkno = fsbtodb(fs, nb);
863                 if (flags & BA_CLRBUF)
864                         vfs_bio_clrbuf(nbp);
865                 if (DOINGSOFTDEP(vp))
866                         softdep_setup_allocindir_page(ip, lbn, bp,
867                             indirs[i].in_off, nb, 0, nbp);
868                 bap[indirs[i].in_off] = nb;
869                 /*
870                  * If required, write synchronously, otherwise use
871                  * delayed write.
872                  */
873                 if (flags & IO_SYNC) {
874                         bwrite(bp);
875                 } else {
876                         if (bp->b_bufsize == fs->fs_bsize)
877                                 bp->b_flags |= B_CLUSTEROK;
878                         bdwrite(bp);
879                 }
880                 curthread_pflags_restore(saved_inbdflush);
881                 *bpp = nbp;
882                 return (0);
883         }
884         brelse(bp);
885         /*
886          * If requested clear invalid portions of the buffer.  If we
887          * have to do a read-before-write (typical if BA_CLRBUF is set),
888          * try to do some read-ahead in the sequential case to reduce
889          * the number of I/O transactions.
890          */
891         if (flags & BA_CLRBUF) {
892                 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
893                 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
894                         error = cluster_read(vp, ip->i_size, lbn,
895                             (int)fs->fs_bsize, NOCRED,
896                             MAXBSIZE, seqcount, &nbp);
897                 } else {
898                         error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
899                 }
900                 if (error) {
901                         brelse(nbp);
902                         goto fail;
903                 }
904         } else {
905                 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
906                 nbp->b_blkno = fsbtodb(fs, nb);
907         }
908         curthread_pflags_restore(saved_inbdflush);
909         *bpp = nbp;
910         return (0);
911 fail:
912         curthread_pflags_restore(saved_inbdflush);
913         /*
914          * If we have failed to allocate any blocks, simply return the error.
915          * This is the usual case and avoids the need to fsync the file.
916          */
917         if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
918                 return (error);
919         /*
920          * If we have failed part way through block allocation, we
921          * have to deallocate any indirect blocks that we have allocated.
922          * We have to fsync the file before we start to get rid of all
923          * of its dependencies so that we do not leave them dangling.
924          * We have to sync it at the end so that the soft updates code
925          * does not find any untracked changes. Although this is really
926          * slow, running out of disk space is not expected to be a common
927          * occurence. The error return from fsync is ignored as we already
928          * have an error to return to the user.
929          */
930         (void) ffs_syncvnode(vp, MNT_WAIT);
931         for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
932              blkp < allocblk; blkp++, lbns_remfree++) {
933                 /*
934                  * We shall not leave the freed blocks on the vnode
935                  * buffer object lists.
936                  */
937                 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
938                 if (bp != NULL) {
939                         bp->b_flags |= (B_INVAL | B_RELBUF);
940                         bp->b_flags &= ~B_ASYNC;
941                         brelse(bp);
942                 }
943                 deallocated += fs->fs_bsize;
944         }
945         if (allocib != NULL) {
946                 *allocib = 0;
947         } else if (unwindidx >= 0) {
948                 int r;
949
950                 r = bread(vp, indirs[unwindidx].in_lbn, 
951                     (int)fs->fs_bsize, NOCRED, &bp);
952                 if (r) {
953                         panic("Could not unwind indirect block, error %d", r);
954                         brelse(bp);
955                 } else {
956                         bap = (ufs2_daddr_t *)bp->b_data;
957                         bap[indirs[unwindidx].in_off] = 0;
958                         if (flags & IO_SYNC) {
959                                 bwrite(bp);
960                         } else {
961                                 if (bp->b_bufsize == fs->fs_bsize)
962                                         bp->b_flags |= B_CLUSTEROK;
963                                 bdwrite(bp);
964                         }
965                 }
966         }
967         if (deallocated) {
968 #ifdef QUOTA
969                 /*
970                  * Restore user's disk quota because allocation failed.
971                  */
972                 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
973 #endif
974                 dp->di_blocks -= btodb(deallocated);
975                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
976         }
977         (void) ffs_syncvnode(vp, MNT_WAIT);
978         /*
979          * After the buffers are invalidated and on-disk pointers are
980          * cleared, free the blocks.
981          */
982         for (blkp = allociblk; blkp < allocblk; blkp++) {
983                 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
984                     ip->i_number);
985         }
986         return (error);
987 }