]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - sys/ufs/ffs/ffs_balloc.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / sys / ufs / ffs / ffs_balloc.c
1 /*-
2  * Copyright (c) 2002 Networks Associates Technology, Inc.
3  * All rights reserved.
4  *
5  * This software was developed for the FreeBSD Project by Marshall
6  * Kirk McKusick and Network Associates Laboratories, the Security
7  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
8  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
9  * research program
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * Copyright (c) 1982, 1986, 1989, 1993
33  *      The Regents of the University of California.  All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 4. Neither the name of the University nor the names of its contributors
44  *    may be used to endorse or promote products derived from this software
45  *    without specific prior written permission.
46  *
47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57  * SUCH DAMAGE.
58  *
59  *      @(#)ffs_balloc.c        8.8 (Berkeley) 6/16/95
60  */
61
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD$");
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/bio.h>
68 #include <sys/buf.h>
69 #include <sys/lock.h>
70 #include <sys/mount.h>
71 #include <sys/vnode.h>
72
73 #include <ufs/ufs/quota.h>
74 #include <ufs/ufs/inode.h>
75 #include <ufs/ufs/ufs_extern.h>
76 #include <ufs/ufs/extattr.h>
77 #include <ufs/ufs/ufsmount.h>
78
79 #include <ufs/ffs/fs.h>
80 #include <ufs/ffs/ffs_extern.h>
81
82 /*
83  * Balloc defines the structure of filesystem storage
84  * by allocating the physical blocks on a device given
85  * the inode and the logical block number in a file.
86  * This is the allocation strategy for UFS1. Below is
87  * the allocation strategy for UFS2.
88  */
89 int
90 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
91     struct ucred *cred, int flags, struct buf **bpp)
92 {
93         struct inode *ip;
94         struct ufs1_dinode *dp;
95         ufs_lbn_t lbn, lastlbn;
96         struct fs *fs;
97         ufs1_daddr_t nb;
98         struct buf *bp, *nbp;
99         struct ufsmount *ump;
100         struct indir indirs[NIADDR + 2];
101         int deallocated, osize, nsize, num, i, error;
102         ufs2_daddr_t newb;
103         ufs1_daddr_t *bap, pref;
104         ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
105         ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
106         int unwindidx = -1;
107         int saved_inbdflush;
108
109         ip = VTOI(vp);
110         dp = ip->i_din1;
111         fs = ip->i_fs;
112         ump = ip->i_ump;
113         lbn = lblkno(fs, startoffset);
114         size = blkoff(fs, startoffset) + size;
115         if (size > fs->fs_bsize)
116                 panic("ffs_balloc_ufs1: blk too big");
117         *bpp = NULL;
118         if (flags & IO_EXT)
119                 return (EOPNOTSUPP);
120         if (lbn < 0)
121                 return (EFBIG);
122
123         /*
124          * If the next write will extend the file into a new block,
125          * and the file is currently composed of a fragment
126          * this fragment has to be extended to be a full block.
127          */
128         lastlbn = lblkno(fs, ip->i_size);
129         if (lastlbn < NDADDR && lastlbn < lbn) {
130                 nb = lastlbn;
131                 osize = blksize(fs, ip, nb);
132                 if (osize < fs->fs_bsize && osize > 0) {
133                         UFS_LOCK(ump);
134                         error = ffs_realloccg(ip, nb, dp->di_db[nb],
135                            ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
136                            &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
137                            cred, &bp);
138                         if (error)
139                                 return (error);
140                         if (DOINGSOFTDEP(vp))
141                                 softdep_setup_allocdirect(ip, nb,
142                                     dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
143                                     fs->fs_bsize, osize, bp);
144                         ip->i_size = smalllblktosize(fs, nb + 1);
145                         dp->di_size = ip->i_size;
146                         dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
147                         ip->i_flag |= IN_CHANGE | IN_UPDATE;
148                         if (flags & IO_SYNC)
149                                 bwrite(bp);
150                         else
151                                 bawrite(bp);
152                 }
153         }
154         /*
155          * The first NDADDR blocks are direct blocks
156          */
157         if (lbn < NDADDR) {
158                 if (flags & BA_METAONLY)
159                         panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
160                 nb = dp->di_db[lbn];
161                 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
162                         error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
163                         if (error) {
164                                 brelse(bp);
165                                 return (error);
166                         }
167                         bp->b_blkno = fsbtodb(fs, nb);
168                         *bpp = bp;
169                         return (0);
170                 }
171                 if (nb != 0) {
172                         /*
173                          * Consider need to reallocate a fragment.
174                          */
175                         osize = fragroundup(fs, blkoff(fs, ip->i_size));
176                         nsize = fragroundup(fs, size);
177                         if (nsize <= osize) {
178                                 error = bread(vp, lbn, osize, NOCRED, &bp);
179                                 if (error) {
180                                         brelse(bp);
181                                         return (error);
182                                 }
183                                 bp->b_blkno = fsbtodb(fs, nb);
184                         } else {
185                                 UFS_LOCK(ump);
186                                 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
187                                     ffs_blkpref_ufs1(ip, lbn, (int)lbn,
188                                     &dp->di_db[0]), osize, nsize, flags,
189                                     cred, &bp);
190                                 if (error)
191                                         return (error);
192                                 if (DOINGSOFTDEP(vp))
193                                         softdep_setup_allocdirect(ip, lbn,
194                                             dbtofsb(fs, bp->b_blkno), nb,
195                                             nsize, osize, bp);
196                         }
197                 } else {
198                         if (ip->i_size < smalllblktosize(fs, lbn + 1))
199                                 nsize = fragroundup(fs, size);
200                         else
201                                 nsize = fs->fs_bsize;
202                         UFS_LOCK(ump);
203                         error = ffs_alloc(ip, lbn,
204                             ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
205                             nsize, flags, cred, &newb);
206                         if (error)
207                                 return (error);
208                         bp = getblk(vp, lbn, nsize, 0, 0, 0);
209                         bp->b_blkno = fsbtodb(fs, newb);
210                         if (flags & BA_CLRBUF)
211                                 vfs_bio_clrbuf(bp);
212                         if (DOINGSOFTDEP(vp))
213                                 softdep_setup_allocdirect(ip, lbn, newb, 0,
214                                     nsize, 0, bp);
215                 }
216                 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
217                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
218                 *bpp = bp;
219                 return (0);
220         }
221         /*
222          * Determine the number of levels of indirection.
223          */
224         pref = 0;
225         if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
226                 return(error);
227 #ifdef INVARIANTS
228         if (num < 1)
229                 panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
230 #endif
231         saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags &
232             TDP_INBDFLUSH);
233         curthread->td_pflags |= TDP_INBDFLUSH;
234         /*
235          * Fetch the first indirect block allocating if necessary.
236          */
237         --num;
238         nb = dp->di_ib[indirs[0].in_off];
239         allocib = NULL;
240         allocblk = allociblk;
241         lbns_remfree = lbns;
242         if (nb == 0) {
243                 UFS_LOCK(ump);
244                 pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
245                 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
246                     flags, cred, &newb)) != 0) {
247                         curthread->td_pflags &= saved_inbdflush;
248                         return (error);
249                 }
250                 nb = newb;
251                 *allocblk++ = nb;
252                 *lbns_remfree++ = indirs[1].in_lbn;
253                 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
254                 bp->b_blkno = fsbtodb(fs, nb);
255                 vfs_bio_clrbuf(bp);
256                 if (DOINGSOFTDEP(vp)) {
257                         softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
258                             newb, 0, fs->fs_bsize, 0, bp);
259                         bdwrite(bp);
260                 } else {
261                         /*
262                          * Write synchronously so that indirect blocks
263                          * never point at garbage.
264                          */
265                         if (DOINGASYNC(vp))
266                                 bdwrite(bp);
267                         else if ((error = bwrite(bp)) != 0)
268                                 goto fail;
269                 }
270                 allocib = &dp->di_ib[indirs[0].in_off];
271                 *allocib = nb;
272                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
273         }
274         /*
275          * Fetch through the indirect blocks, allocating as necessary.
276          */
277         for (i = 1;;) {
278                 error = bread(vp,
279                     indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
280                 if (error) {
281                         brelse(bp);
282                         goto fail;
283                 }
284                 bap = (ufs1_daddr_t *)bp->b_data;
285                 nb = bap[indirs[i].in_off];
286                 if (i == num)
287                         break;
288                 i += 1;
289                 if (nb != 0) {
290                         bqrelse(bp);
291                         continue;
292                 }
293                 UFS_LOCK(ump);
294                 if (pref == 0)
295                         pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
296                 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
297                     flags, cred, &newb)) != 0) {
298                         brelse(bp);
299                         goto fail;
300                 }
301                 nb = newb;
302                 *allocblk++ = nb;
303                 *lbns_remfree++ = indirs[i].in_lbn;
304                 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
305                 nbp->b_blkno = fsbtodb(fs, nb);
306                 vfs_bio_clrbuf(nbp);
307                 if (DOINGSOFTDEP(vp)) {
308                         softdep_setup_allocindir_meta(nbp, ip, bp,
309                             indirs[i - 1].in_off, nb);
310                         bdwrite(nbp);
311                 } else {
312                         /*
313                          * Write synchronously so that indirect blocks
314                          * never point at garbage.
315                          */
316                         if ((error = bwrite(nbp)) != 0) {
317                                 brelse(bp);
318                                 goto fail;
319                         }
320                 }
321                 bap[indirs[i - 1].in_off] = nb;
322                 if (allocib == NULL && unwindidx < 0)
323                         unwindidx = i - 1;
324                 /*
325                  * If required, write synchronously, otherwise use
326                  * delayed write.
327                  */
328                 if (flags & IO_SYNC) {
329                         bwrite(bp);
330                 } else {
331                         if (bp->b_bufsize == fs->fs_bsize)
332                                 bp->b_flags |= B_CLUSTEROK;
333                         bdwrite(bp);
334                 }
335         }
336         /*
337          * If asked only for the indirect block, then return it.
338          */
339         if (flags & BA_METAONLY) {
340                 curthread->td_pflags &= saved_inbdflush;
341                 *bpp = bp;
342                 return (0);
343         }
344         /*
345          * Get the data block, allocating if necessary.
346          */
347         if (nb == 0) {
348                 UFS_LOCK(ump);
349                 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
350                 error = ffs_alloc(ip,
351                     lbn, pref, (int)fs->fs_bsize, flags, cred, &newb);
352                 if (error) {
353                         brelse(bp);
354                         goto fail;
355                 }
356                 nb = newb;
357                 *allocblk++ = nb;
358                 *lbns_remfree++ = lbn;
359                 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
360                 nbp->b_blkno = fsbtodb(fs, nb);
361                 if (flags & BA_CLRBUF)
362                         vfs_bio_clrbuf(nbp);
363                 if (DOINGSOFTDEP(vp))
364                         softdep_setup_allocindir_page(ip, lbn, bp,
365                             indirs[i].in_off, nb, 0, nbp);
366                 bap[indirs[i].in_off] = nb;
367                 /*
368                  * If required, write synchronously, otherwise use
369                  * delayed write.
370                  */
371                 if (flags & IO_SYNC) {
372                         bwrite(bp);
373                 } else {
374                         if (bp->b_bufsize == fs->fs_bsize)
375                                 bp->b_flags |= B_CLUSTEROK;
376                         bdwrite(bp);
377                 }
378                 curthread->td_pflags &= saved_inbdflush;
379                 *bpp = nbp;
380                 return (0);
381         }
382         brelse(bp);
383         if (flags & BA_CLRBUF) {
384                 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
385                 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
386                         error = cluster_read(vp, ip->i_size, lbn,
387                             (int)fs->fs_bsize, NOCRED,
388                             MAXBSIZE, seqcount, &nbp);
389                 } else {
390                         error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
391                 }
392                 if (error) {
393                         brelse(nbp);
394                         goto fail;
395                 }
396         } else {
397                 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
398                 nbp->b_blkno = fsbtodb(fs, nb);
399         }
400         curthread->td_pflags &= saved_inbdflush;
401         *bpp = nbp;
402         return (0);
403 fail:
404         curthread->td_pflags &= saved_inbdflush;
405         /*
406          * If we have failed to allocate any blocks, simply return the error.
407          * This is the usual case and avoids the need to fsync the file.
408          */
409         if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
410                 return (error);
411         /*
412          * If we have failed part way through block allocation, we
413          * have to deallocate any indirect blocks that we have allocated.
414          * We have to fsync the file before we start to get rid of all
415          * of its dependencies so that we do not leave them dangling.
416          * We have to sync it at the end so that the soft updates code
417          * does not find any untracked changes. Although this is really
418          * slow, running out of disk space is not expected to be a common
419          * occurence. The error return from fsync is ignored as we already
420          * have an error to return to the user.
421          */
422         (void) ffs_syncvnode(vp, MNT_WAIT);
423         for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
424              blkp < allocblk; blkp++, lbns_remfree++) {
425                 /*
426                  * We shall not leave the freed blocks on the vnode
427                  * buffer object lists.
428                  */
429                 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
430                 if (bp != NULL) {
431                         bp->b_flags |= (B_INVAL | B_RELBUF);
432                         bp->b_flags &= ~B_ASYNC;
433                         brelse(bp);
434                 }
435                 deallocated += fs->fs_bsize;
436         }
437         if (allocib != NULL) {
438                 *allocib = 0;
439         } else if (unwindidx >= 0) {
440                 int r;
441
442                 r = bread(vp, indirs[unwindidx].in_lbn, 
443                     (int)fs->fs_bsize, NOCRED, &bp);
444                 if (r) {
445                         panic("Could not unwind indirect block, error %d", r);
446                         brelse(bp);
447                 } else {
448                         bap = (ufs1_daddr_t *)bp->b_data;
449                         bap[indirs[unwindidx].in_off] = 0;
450                         if (flags & IO_SYNC) {
451                                 bwrite(bp);
452                         } else {
453                                 if (bp->b_bufsize == fs->fs_bsize)
454                                         bp->b_flags |= B_CLUSTEROK;
455                                 bdwrite(bp);
456                         }
457                 }
458         }
459         if (deallocated) {
460 #ifdef QUOTA
461                 /*
462                  * Restore user's disk quota because allocation failed.
463                  */
464                 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
465 #endif
466                 dp->di_blocks -= btodb(deallocated);
467                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
468         }
469         (void) ffs_syncvnode(vp, MNT_WAIT);
470         /*
471          * After the buffers are invalidated and on-disk pointers are
472          * cleared, free the blocks.
473          */
474         for (blkp = allociblk; blkp < allocblk; blkp++) {
475                 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
476                     ip->i_number);
477         }
478         return (error);
479 }
480
481 /*
482  * Balloc defines the structure of file system storage
483  * by allocating the physical blocks on a device given
484  * the inode and the logical block number in a file.
485  * This is the allocation strategy for UFS2. Above is
486  * the allocation strategy for UFS1.
487  */
488 int
489 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
490     struct ucred *cred, int flags, struct buf **bpp)
491 {
492         struct inode *ip;
493         struct ufs2_dinode *dp;
494         ufs_lbn_t lbn, lastlbn;
495         struct fs *fs;
496         struct buf *bp, *nbp;
497         struct ufsmount *ump;
498         struct indir indirs[NIADDR + 2];
499         ufs2_daddr_t nb, newb, *bap, pref;
500         ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
501         ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
502         int deallocated, osize, nsize, num, i, error;
503         int unwindidx = -1;
504         int saved_inbdflush;
505
506         ip = VTOI(vp);
507         dp = ip->i_din2;
508         fs = ip->i_fs;
509         ump = ip->i_ump;
510         lbn = lblkno(fs, startoffset);
511         size = blkoff(fs, startoffset) + size;
512         if (size > fs->fs_bsize)
513                 panic("ffs_balloc_ufs2: blk too big");
514         *bpp = NULL;
515         if (lbn < 0)
516                 return (EFBIG);
517
518         /*
519          * Check for allocating external data.
520          */
521         if (flags & IO_EXT) {
522                 if (lbn >= NXADDR)
523                         return (EFBIG);
524                 /*
525                  * If the next write will extend the data into a new block,
526                  * and the data is currently composed of a fragment
527                  * this fragment has to be extended to be a full block.
528                  */
529                 lastlbn = lblkno(fs, dp->di_extsize);
530                 if (lastlbn < lbn) {
531                         nb = lastlbn;
532                         osize = sblksize(fs, dp->di_extsize, nb);
533                         if (osize < fs->fs_bsize && osize > 0) {
534                                 UFS_LOCK(ump);
535                                 error = ffs_realloccg(ip, -1 - nb,
536                                     dp->di_extb[nb],
537                                     ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
538                                     &dp->di_extb[0]), osize,
539                                     (int)fs->fs_bsize, flags, cred, &bp);
540                                 if (error)
541                                         return (error);
542                                 if (DOINGSOFTDEP(vp))
543                                         softdep_setup_allocext(ip, nb,
544                                             dbtofsb(fs, bp->b_blkno),
545                                             dp->di_extb[nb],
546                                             fs->fs_bsize, osize, bp);
547                                 dp->di_extsize = smalllblktosize(fs, nb + 1);
548                                 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
549                                 bp->b_xflags |= BX_ALTDATA;
550                                 ip->i_flag |= IN_CHANGE;
551                                 if (flags & IO_SYNC)
552                                         bwrite(bp);
553                                 else
554                                         bawrite(bp);
555                         }
556                 }
557                 /*
558                  * All blocks are direct blocks
559                  */
560                 if (flags & BA_METAONLY)
561                         panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
562                 nb = dp->di_extb[lbn];
563                 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
564                         error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
565                         if (error) {
566                                 brelse(bp);
567                                 return (error);
568                         }
569                         bp->b_blkno = fsbtodb(fs, nb);
570                         bp->b_xflags |= BX_ALTDATA;
571                         *bpp = bp;
572                         return (0);
573                 }
574                 if (nb != 0) {
575                         /*
576                          * Consider need to reallocate a fragment.
577                          */
578                         osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
579                         nsize = fragroundup(fs, size);
580                         if (nsize <= osize) {
581                                 error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
582                                 if (error) {
583                                         brelse(bp);
584                                         return (error);
585                                 }
586                                 bp->b_blkno = fsbtodb(fs, nb);
587                                 bp->b_xflags |= BX_ALTDATA;
588                         } else {
589                                 UFS_LOCK(ump);
590                                 error = ffs_realloccg(ip, -1 - lbn,
591                                     dp->di_extb[lbn],
592                                     ffs_blkpref_ufs2(ip, lbn, (int)lbn,
593                                     &dp->di_extb[0]), osize, nsize, flags,
594                                     cred, &bp);
595                                 if (error)
596                                         return (error);
597                                 bp->b_xflags |= BX_ALTDATA;
598                                 if (DOINGSOFTDEP(vp))
599                                         softdep_setup_allocext(ip, lbn,
600                                             dbtofsb(fs, bp->b_blkno), nb,
601                                             nsize, osize, bp);
602                         }
603                 } else {
604                         if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
605                                 nsize = fragroundup(fs, size);
606                         else
607                                 nsize = fs->fs_bsize;
608                         UFS_LOCK(ump);
609                         error = ffs_alloc(ip, lbn,
610                            ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
611                            nsize, flags, cred, &newb);
612                         if (error)
613                                 return (error);
614                         bp = getblk(vp, -1 - lbn, nsize, 0, 0, 0);
615                         bp->b_blkno = fsbtodb(fs, newb);
616                         bp->b_xflags |= BX_ALTDATA;
617                         if (flags & BA_CLRBUF)
618                                 vfs_bio_clrbuf(bp);
619                         if (DOINGSOFTDEP(vp))
620                                 softdep_setup_allocext(ip, lbn, newb, 0,
621                                     nsize, 0, bp);
622                 }
623                 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
624                 ip->i_flag |= IN_CHANGE;
625                 *bpp = bp;
626                 return (0);
627         }
628         /*
629          * If the next write will extend the file into a new block,
630          * and the file is currently composed of a fragment
631          * this fragment has to be extended to be a full block.
632          */
633         lastlbn = lblkno(fs, ip->i_size);
634         if (lastlbn < NDADDR && lastlbn < lbn) {
635                 nb = lastlbn;
636                 osize = blksize(fs, ip, nb);
637                 if (osize < fs->fs_bsize && osize > 0) {
638                         UFS_LOCK(ump);
639                         error = ffs_realloccg(ip, nb, dp->di_db[nb],
640                                 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
641                                     &dp->di_db[0]), osize, (int)fs->fs_bsize,
642                                     flags, cred, &bp);
643                         if (error)
644                                 return (error);
645                         if (DOINGSOFTDEP(vp))
646                                 softdep_setup_allocdirect(ip, nb,
647                                     dbtofsb(fs, bp->b_blkno),
648                                     dp->di_db[nb],
649                                     fs->fs_bsize, osize, bp);
650                         ip->i_size = smalllblktosize(fs, nb + 1);
651                         dp->di_size = ip->i_size;
652                         dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
653                         ip->i_flag |= IN_CHANGE | IN_UPDATE;
654                         if (flags & IO_SYNC)
655                                 bwrite(bp);
656                         else
657                                 bawrite(bp);
658                 }
659         }
660         /*
661          * The first NDADDR blocks are direct blocks
662          */
663         if (lbn < NDADDR) {
664                 if (flags & BA_METAONLY)
665                         panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
666                 nb = dp->di_db[lbn];
667                 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
668                         error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
669                         if (error) {
670                                 brelse(bp);
671                                 return (error);
672                         }
673                         bp->b_blkno = fsbtodb(fs, nb);
674                         *bpp = bp;
675                         return (0);
676                 }
677                 if (nb != 0) {
678                         /*
679                          * Consider need to reallocate a fragment.
680                          */
681                         osize = fragroundup(fs, blkoff(fs, ip->i_size));
682                         nsize = fragroundup(fs, size);
683                         if (nsize <= osize) {
684                                 error = bread(vp, lbn, osize, NOCRED, &bp);
685                                 if (error) {
686                                         brelse(bp);
687                                         return (error);
688                                 }
689                                 bp->b_blkno = fsbtodb(fs, nb);
690                         } else {
691                                 UFS_LOCK(ump);
692                                 error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
693                                     ffs_blkpref_ufs2(ip, lbn, (int)lbn,
694                                        &dp->di_db[0]), osize, nsize, flags,
695                                     cred, &bp);
696                                 if (error)
697                                         return (error);
698                                 if (DOINGSOFTDEP(vp))
699                                         softdep_setup_allocdirect(ip, lbn,
700                                             dbtofsb(fs, bp->b_blkno), nb,
701                                             nsize, osize, bp);
702                         }
703                 } else {
704                         if (ip->i_size < smalllblktosize(fs, lbn + 1))
705                                 nsize = fragroundup(fs, size);
706                         else
707                                 nsize = fs->fs_bsize;
708                         UFS_LOCK(ump);
709                         error = ffs_alloc(ip, lbn,
710                             ffs_blkpref_ufs2(ip, lbn, (int)lbn,
711                                 &dp->di_db[0]), nsize, flags, cred, &newb);
712                         if (error)
713                                 return (error);
714                         bp = getblk(vp, lbn, nsize, 0, 0, 0);
715                         bp->b_blkno = fsbtodb(fs, newb);
716                         if (flags & BA_CLRBUF)
717                                 vfs_bio_clrbuf(bp);
718                         if (DOINGSOFTDEP(vp))
719                                 softdep_setup_allocdirect(ip, lbn, newb, 0,
720                                     nsize, 0, bp);
721                 }
722                 dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
723                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
724                 *bpp = bp;
725                 return (0);
726         }
727         /*
728          * Determine the number of levels of indirection.
729          */
730         pref = 0;
731         if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
732                 return(error);
733 #ifdef INVARIANTS
734         if (num < 1)
735                 panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
736 #endif
737         saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags &
738             TDP_INBDFLUSH);
739         curthread->td_pflags |= TDP_INBDFLUSH;
740         /*
741          * Fetch the first indirect block allocating if necessary.
742          */
743         --num;
744         nb = dp->di_ib[indirs[0].in_off];
745         allocib = NULL;
746         allocblk = allociblk;
747         lbns_remfree = lbns;
748         if (nb == 0) {
749                 UFS_LOCK(ump);
750                 pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
751                 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
752                     flags, cred, &newb)) != 0) {
753                         curthread->td_pflags &= saved_inbdflush;
754                         return (error);
755                 }
756                 nb = newb;
757                 *allocblk++ = nb;
758                 *lbns_remfree++ = indirs[1].in_lbn;
759                 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
760                 bp->b_blkno = fsbtodb(fs, nb);
761                 vfs_bio_clrbuf(bp);
762                 if (DOINGSOFTDEP(vp)) {
763                         softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
764                             newb, 0, fs->fs_bsize, 0, bp);
765                         bdwrite(bp);
766                 } else {
767                         /*
768                          * Write synchronously so that indirect blocks
769                          * never point at garbage.
770                          */
771                         if (DOINGASYNC(vp))
772                                 bdwrite(bp);
773                         else if ((error = bwrite(bp)) != 0)
774                                 goto fail;
775                 }
776                 allocib = &dp->di_ib[indirs[0].in_off];
777                 *allocib = nb;
778                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
779         }
780         /*
781          * Fetch through the indirect blocks, allocating as necessary.
782          */
783         for (i = 1;;) {
784                 error = bread(vp,
785                     indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
786                 if (error) {
787                         brelse(bp);
788                         goto fail;
789                 }
790                 bap = (ufs2_daddr_t *)bp->b_data;
791                 nb = bap[indirs[i].in_off];
792                 if (i == num)
793                         break;
794                 i += 1;
795                 if (nb != 0) {
796                         bqrelse(bp);
797                         continue;
798                 }
799                 UFS_LOCK(ump);
800                 if (pref == 0)
801                         pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
802                 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
803                     flags, cred, &newb)) != 0) {
804                         brelse(bp);
805                         goto fail;
806                 }
807                 nb = newb;
808                 *allocblk++ = nb;
809                 *lbns_remfree++ = indirs[i].in_lbn;
810                 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
811                 nbp->b_blkno = fsbtodb(fs, nb);
812                 vfs_bio_clrbuf(nbp);
813                 if (DOINGSOFTDEP(vp)) {
814                         softdep_setup_allocindir_meta(nbp, ip, bp,
815                             indirs[i - 1].in_off, nb);
816                         bdwrite(nbp);
817                 } else {
818                         /*
819                          * Write synchronously so that indirect blocks
820                          * never point at garbage.
821                          */
822                         if ((error = bwrite(nbp)) != 0) {
823                                 brelse(bp);
824                                 goto fail;
825                         }
826                 }
827                 bap[indirs[i - 1].in_off] = nb;
828                 if (allocib == NULL && unwindidx < 0)
829                         unwindidx = i - 1;
830                 /*
831                  * If required, write synchronously, otherwise use
832                  * delayed write.
833                  */
834                 if (flags & IO_SYNC) {
835                         bwrite(bp);
836                 } else {
837                         if (bp->b_bufsize == fs->fs_bsize)
838                                 bp->b_flags |= B_CLUSTEROK;
839                         bdwrite(bp);
840                 }
841         }
842         /*
843          * If asked only for the indirect block, then return it.
844          */
845         if (flags & BA_METAONLY) {
846                 curthread->td_pflags &= saved_inbdflush;
847                 *bpp = bp;
848                 return (0);
849         }
850         /*
851          * Get the data block, allocating if necessary.
852          */
853         if (nb == 0) {
854                 UFS_LOCK(ump);
855                 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]);
856                 error = ffs_alloc(ip,
857                     lbn, pref, (int)fs->fs_bsize, flags, cred, &newb);
858                 if (error) {
859                         brelse(bp);
860                         goto fail;
861                 }
862                 nb = newb;
863                 *allocblk++ = nb;
864                 *lbns_remfree++ = lbn;
865                 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
866                 nbp->b_blkno = fsbtodb(fs, nb);
867                 if (flags & BA_CLRBUF)
868                         vfs_bio_clrbuf(nbp);
869                 if (DOINGSOFTDEP(vp))
870                         softdep_setup_allocindir_page(ip, lbn, bp,
871                             indirs[i].in_off, nb, 0, nbp);
872                 bap[indirs[i].in_off] = nb;
873                 /*
874                  * If required, write synchronously, otherwise use
875                  * delayed write.
876                  */
877                 if (flags & IO_SYNC) {
878                         bwrite(bp);
879                 } else {
880                         if (bp->b_bufsize == fs->fs_bsize)
881                                 bp->b_flags |= B_CLUSTEROK;
882                         bdwrite(bp);
883                 }
884                 curthread->td_pflags &= saved_inbdflush;
885                 *bpp = nbp;
886                 return (0);
887         }
888         brelse(bp);
889         /*
890          * If requested clear invalid portions of the buffer.  If we
891          * have to do a read-before-write (typical if BA_CLRBUF is set),
892          * try to do some read-ahead in the sequential case to reduce
893          * the number of I/O transactions.
894          */
895         if (flags & BA_CLRBUF) {
896                 int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
897                 if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
898                         error = cluster_read(vp, ip->i_size, lbn,
899                             (int)fs->fs_bsize, NOCRED,
900                             MAXBSIZE, seqcount, &nbp);
901                 } else {
902                         error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
903                 }
904                 if (error) {
905                         brelse(nbp);
906                         goto fail;
907                 }
908         } else {
909                 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
910                 nbp->b_blkno = fsbtodb(fs, nb);
911         }
912         curthread->td_pflags &= saved_inbdflush;
913         *bpp = nbp;
914         return (0);
915 fail:
916         curthread->td_pflags &= saved_inbdflush;
917         /*
918          * If we have failed to allocate any blocks, simply return the error.
919          * This is the usual case and avoids the need to fsync the file.
920          */
921         if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
922                 return (error);
923         /*
924          * If we have failed part way through block allocation, we
925          * have to deallocate any indirect blocks that we have allocated.
926          * We have to fsync the file before we start to get rid of all
927          * of its dependencies so that we do not leave them dangling.
928          * We have to sync it at the end so that the soft updates code
929          * does not find any untracked changes. Although this is really
930          * slow, running out of disk space is not expected to be a common
931          * occurence. The error return from fsync is ignored as we already
932          * have an error to return to the user.
933          */
934         (void) ffs_syncvnode(vp, MNT_WAIT);
935         for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
936              blkp < allocblk; blkp++, lbns_remfree++) {
937                 /*
938                  * We shall not leave the freed blocks on the vnode
939                  * buffer object lists.
940                  */
941                 bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0, GB_NOCREAT);
942                 if (bp != NULL) {
943                         bp->b_flags |= (B_INVAL | B_RELBUF);
944                         bp->b_flags &= ~B_ASYNC;
945                         brelse(bp);
946                 }
947                 deallocated += fs->fs_bsize;
948         }
949         if (allocib != NULL) {
950                 *allocib = 0;
951         } else if (unwindidx >= 0) {
952                 int r;
953
954                 r = bread(vp, indirs[unwindidx].in_lbn, 
955                     (int)fs->fs_bsize, NOCRED, &bp);
956                 if (r) {
957                         panic("Could not unwind indirect block, error %d", r);
958                         brelse(bp);
959                 } else {
960                         bap = (ufs2_daddr_t *)bp->b_data;
961                         bap[indirs[unwindidx].in_off] = 0;
962                         if (flags & IO_SYNC) {
963                                 bwrite(bp);
964                         } else {
965                                 if (bp->b_bufsize == fs->fs_bsize)
966                                         bp->b_flags |= B_CLUSTEROK;
967                                 bdwrite(bp);
968                         }
969                 }
970         }
971         if (deallocated) {
972 #ifdef QUOTA
973                 /*
974                  * Restore user's disk quota because allocation failed.
975                  */
976                 (void) chkdq(ip, -btodb(deallocated), cred, FORCE);
977 #endif
978                 dp->di_blocks -= btodb(deallocated);
979                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
980         }
981         (void) ffs_syncvnode(vp, MNT_WAIT);
982         /*
983          * After the buffers are invalidated and on-disk pointers are
984          * cleared, free the blocks.
985          */
986         for (blkp = allociblk; blkp < allocblk; blkp++) {
987                 ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize,
988                     ip->i_number);
989         }
990         return (error);
991 }