]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/ext2fs/ext2_extents.c
MFV r325605: 8713 Buffer overflow in dsl_dataset_name()
[FreeBSD/FreeBSD.git] / sys / fs / ext2fs / ext2_extents.c
1 /*-
2  * Copyright (c) 2010 Zheng Liu <lz@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/types.h>
32 #include <sys/kernel.h>
33 #include <sys/malloc.h>
34 #include <sys/vnode.h>
35 #include <sys/bio.h>
36 #include <sys/buf.h>
37 #include <sys/conf.h>
38 #include <sys/stat.h>
39
40 #include <fs/ext2fs/ext2_mount.h>
41 #include <fs/ext2fs/fs.h>
42 #include <fs/ext2fs/inode.h>
43 #include <fs/ext2fs/ext2fs.h>
44 #include <fs/ext2fs/ext2_extents.h>
45 #include <fs/ext2fs/ext2_extern.h>
46
47 static MALLOC_DEFINE(M_EXT2EXTENTS, "ext2_extents", "EXT2 extents");
48
49 #ifdef EXT2FS_DEBUG
50 static void
51 ext4_ext_print_extent(struct ext4_extent *ep)
52 {
53
54         printf("    ext %p => (blk %u len %u start %lu)\n",
55             ep, ep->e_blk, ep->e_len,
56             (uint64_t)ep->e_start_hi << 32 | ep->e_start_lo);
57 }
58
59 static void ext4_ext_print_header(struct inode *ip, struct ext4_extent_header *ehp);
60
61 static void
62 ext4_ext_print_index(struct inode *ip, struct ext4_extent_index *ex, int do_walk)
63 {
64         struct m_ext2fs *fs;
65         struct buf *bp;
66         int error;
67
68         fs = ip->i_e2fs;
69
70         printf("    index %p => (blk %u pblk %lu)\n",
71             ex, ex->ei_blk, (uint64_t)ex->ei_leaf_hi << 32 | ex->ei_leaf_lo);
72
73         if(!do_walk)
74                 return;
75
76         if ((error = bread(ip->i_devvp,
77             fsbtodb(fs, ((uint64_t)ex->ei_leaf_hi << 32 | ex->ei_leaf_lo)),
78             (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) {
79                 brelse(bp);
80                 return;
81         }
82
83         ext4_ext_print_header(ip, (struct ext4_extent_header *)bp->b_data);
84
85         brelse(bp);
86
87 }
88
89 static void
90 ext4_ext_print_header(struct inode *ip, struct ext4_extent_header *ehp)
91 {
92         int i;
93
94         printf("header %p => (magic 0x%x entries %d max %d depth %d gen %d)\n",
95             ehp, ehp->eh_magic, ehp->eh_ecount, ehp->eh_max, ehp->eh_depth,
96             ehp->eh_gen);
97
98         for (i = 0; i < ehp->eh_ecount; i++)
99                 if (ehp->eh_depth != 0)
100                         ext4_ext_print_index(ip,
101                             (struct ext4_extent_index *)(ehp + 1 + i), 1);
102                 else
103                         ext4_ext_print_extent((struct ext4_extent *)(ehp + 1 + i));
104 }
105
106 static void
107 ext4_ext_print_path(struct inode *ip, struct ext4_extent_path *path)
108 {
109         int k, l;
110
111         l = path->ep_depth
112
113         printf("ip=%d, Path:\n", ip->i_number);
114         for (k = 0; k <= l; k++, path++) {
115                 if (path->ep_index) {
116                         ext4_ext_print_index(ip, path->ep_index, 0);
117                 } else if (path->ep_ext) {
118                         ext4_ext_print_extent(path->ep_ext);
119                 }
120         }
121 }
122
123 void
124 ext4_ext_print_extent_tree_status(struct inode * ip)
125 {
126         struct m_ext2fs *fs;
127         struct ext4_extent_header *ehp;
128
129         fs = ip->i_e2fs;
130         ehp = (struct ext4_extent_header *)(char *)ip->i_db;
131
132         printf("Extent status:ip=%d\n", ip->i_number);
133         if (!(ip->i_flag & IN_E4EXTENTS))
134                 return;
135
136         ext4_ext_print_header(ip, ehp);
137
138         return;
139 }
140 #endif
141
142 static inline struct ext4_extent_header *
143 ext4_ext_inode_header(struct inode *ip)
144 {
145
146         return ((struct ext4_extent_header *)ip->i_db);
147 }
148
149 static inline struct ext4_extent_header *
150 ext4_ext_block_header(char *bdata)
151 {
152
153         return ((struct ext4_extent_header *)bdata);
154 }
155
156 static inline unsigned short
157 ext4_ext_inode_depth(struct inode *ip)
158 {
159         struct ext4_extent_header *ehp;
160
161         ehp = (struct ext4_extent_header *)ip->i_data;
162         return (ehp->eh_depth);
163 }
164
165 static inline e4fs_daddr_t
166 ext4_ext_index_pblock(struct ext4_extent_index *index)
167 {
168         e4fs_daddr_t blk;
169
170         blk = index->ei_leaf_lo;
171         blk |= (e4fs_daddr_t)index->ei_leaf_hi << 32;
172
173         return (blk);
174 }
175
176 static inline void
177 ext4_index_store_pblock(struct ext4_extent_index *index, e4fs_daddr_t pb)
178 {
179
180         index->ei_leaf_lo = pb & 0xffffffff;
181         index->ei_leaf_hi = (pb >> 32) & 0xffff;
182 }
183
184
185 static inline e4fs_daddr_t
186 ext4_ext_extent_pblock(struct ext4_extent *extent)
187 {
188         e4fs_daddr_t blk;
189
190         blk = extent->e_start_lo;
191         blk |= (e4fs_daddr_t)extent->e_start_hi << 32;
192
193         return (blk);
194 }
195
196 static inline void
197 ext4_ext_store_pblock(struct ext4_extent *ex, e4fs_daddr_t pb)
198 {
199
200         ex->e_start_lo = pb & 0xffffffff;
201         ex->e_start_hi = (pb >> 32) & 0xffff;
202 }
203
204 int
205 ext4_ext_in_cache(struct inode *ip, daddr_t lbn, struct ext4_extent *ep)
206 {
207         struct ext4_extent_cache *ecp;
208         int ret = EXT4_EXT_CACHE_NO;
209
210         ecp = &ip->i_ext_cache;
211         if (ecp->ec_type == EXT4_EXT_CACHE_NO)
212                 return (ret);
213
214         if (lbn >= ecp->ec_blk && lbn < ecp->ec_blk + ecp->ec_len) {
215                 ep->e_blk = ecp->ec_blk;
216                 ep->e_start_lo = ecp->ec_start & 0xffffffff;
217                 ep->e_start_hi = ecp->ec_start >> 32 & 0xffff;
218                 ep->e_len = ecp->ec_len;
219                 ret = ecp->ec_type;
220         }
221         return (ret);
222 }
223
224 static int
225 ext4_ext_check_header(struct inode *ip, struct ext4_extent_header *eh)
226 {
227         struct m_ext2fs *fs;
228         char *error_msg;
229
230         fs = ip->i_e2fs;
231
232         if (eh->eh_magic != EXT4_EXT_MAGIC) {
233                 error_msg = "invalid magic";
234                 goto corrupted;
235         }
236         if (eh->eh_max == 0) {
237                 error_msg = "invalid eh_max";
238                 goto corrupted;
239         }
240         if (eh->eh_ecount > eh->eh_max) {
241                 error_msg = "invalid eh_entries";
242                 goto corrupted;
243         }
244
245         return (0);
246
247 corrupted:
248         ext2_fserr(fs, ip->i_uid, error_msg);
249         return (EIO);
250 }
251
252 static void
253 ext4_ext_binsearch_index(struct ext4_extent_path *path, int blk)
254 {
255         struct ext4_extent_header *eh;
256         struct ext4_extent_index *r, *l, *m;
257
258         eh = path->ep_header;
259
260         KASSERT(eh->eh_ecount <= eh->eh_max && eh->eh_ecount > 0,
261             ("ext4_ext_binsearch_index: bad args"));
262
263         l = EXT_FIRST_INDEX(eh) + 1;
264         r = EXT_FIRST_INDEX(eh) + eh->eh_ecount - 1;
265         while (l <= r) {
266                 m = l + (r - l) / 2;
267                 if (blk < m->ei_blk)
268                         r = m - 1;
269                 else
270                         l = m + 1;
271         }
272
273         path->ep_index = l - 1;
274 }
275
276 static void
277 ext4_ext_binsearch_ext(struct ext4_extent_path *path, int blk)
278 {
279         struct ext4_extent_header *eh;
280         struct ext4_extent *r, *l, *m;
281
282         eh = path->ep_header;
283
284         KASSERT(eh->eh_ecount <= eh->eh_max,
285             ("ext4_ext_binsearch_ext: bad args"));
286
287         if (eh->eh_ecount == 0)
288                 return;
289
290         l = EXT_FIRST_EXTENT(eh) + 1;
291         r = EXT_FIRST_EXTENT(eh) + eh->eh_ecount - 1;
292
293         while (l <= r) {
294                 m = l + (r - l) / 2;
295                 if (blk < m->e_blk)
296                         r = m - 1;
297                 else
298                         l = m + 1;
299         }
300
301         path->ep_ext = l - 1;
302 }
303
304 static int
305 ext4_ext_fill_path_bdata(struct ext4_extent_path *path,
306     struct buf *bp, uint64_t blk)
307 {
308
309         KASSERT(path->ep_data == NULL,
310             ("ext4_ext_fill_path_bdata: bad ep_data"));
311
312         path->ep_data = malloc(bp->b_bufsize, M_EXT2EXTENTS, M_WAITOK);
313         if (!path->ep_data)
314                 return (ENOMEM);
315
316         memcpy(path->ep_data, bp->b_data, bp->b_bufsize);
317         path->ep_blk = blk;
318
319         return (0);
320 }
321
322 static void
323 ext4_ext_fill_path_buf(struct ext4_extent_path *path, struct buf *bp)
324 {
325
326         KASSERT(path->ep_data != NULL,
327             ("ext4_ext_fill_path_buf: bad ep_data"));
328
329         memcpy(bp->b_data, path->ep_data, bp->b_bufsize);
330 }
331
332 static void
333 ext4_ext_drop_refs(struct ext4_extent_path *path)
334 {
335         int depth, i;
336
337         if (!path)
338                 return;
339
340         depth = path->ep_depth;
341         for (i = 0; i <= depth; i++, path++)
342                 if (path->ep_data) {
343                         free(path->ep_data, M_EXT2EXTENTS);
344                         path->ep_data = NULL;
345                 }
346 }
347
348 void
349 ext4_ext_path_free(struct ext4_extent_path *path)
350 {
351
352         if (!path)
353                 return;
354
355         ext4_ext_drop_refs(path);
356         free(path, M_EXT2EXTENTS);
357 }
358
359 int
360 ext4_ext_find_extent(struct inode *ip, daddr_t block,
361     struct ext4_extent_path **ppath)
362 {
363         struct m_ext2fs *fs;
364         struct ext4_extent_header *eh;
365         struct ext4_extent_path *path;
366         struct buf *bp;
367         uint64_t blk;
368         int error, depth, i, ppos, alloc;
369
370         fs = ip->i_e2fs;
371         eh = ext4_ext_inode_header(ip);
372         depth = ext4_ext_inode_depth(ip);
373         ppos = 0;
374         alloc = 0;
375
376         error = ext4_ext_check_header(ip, eh);
377         if (error)
378                 return (error);
379
380         if (ppath == NULL)
381                 return (EINVAL);
382
383         path = *ppath;
384         if (path == NULL) {
385                 path = malloc(EXT4_EXT_DEPTH_MAX *
386                     sizeof(struct ext4_extent_path),
387                     M_EXT2EXTENTS, M_WAITOK | M_ZERO);
388                 if (!path)
389                         return (ENOMEM);
390
391                 *ppath = path;
392                 alloc = 1;
393         }
394
395         path[0].ep_header = eh;
396         path[0].ep_data = NULL;
397
398         /* Walk through the tree. */
399         i = depth;
400         while (i) {
401                 ext4_ext_binsearch_index(&path[ppos], block);
402                 blk = ext4_ext_index_pblock(path[ppos].ep_index);
403                 path[ppos].ep_depth = i;
404                 path[ppos].ep_ext = NULL;
405
406                 error = bread(ip->i_devvp, fsbtodb(ip->i_e2fs, blk),
407                     ip->i_e2fs->e2fs_bsize, NOCRED, &bp);
408                 if (error) {
409                         brelse(bp);
410                         goto error;
411                 }
412
413                 ppos++;
414                 if (ppos > depth) {
415                         ext2_fserr(fs, ip->i_uid,
416                             "ppos > depth => extent corrupted");
417                         error = EIO;
418                         brelse(bp);
419                         goto error;
420                 }
421
422                 ext4_ext_fill_path_bdata(&path[ppos], bp, blk);
423                 bqrelse(bp);
424
425                 eh = ext4_ext_block_header(path[ppos].ep_data);
426                 error = ext4_ext_check_header(ip, eh);
427                 if (error)
428                         goto error;
429
430                 path[ppos].ep_header = eh;
431
432                 i--;
433         }
434
435         error = ext4_ext_check_header(ip, eh);
436         if (error)
437                 goto error;
438
439         /* Find extent. */
440         path[ppos].ep_depth = i;
441         path[ppos].ep_header = eh;
442         path[ppos].ep_ext = NULL;
443         path[ppos].ep_index = NULL;
444         ext4_ext_binsearch_ext(&path[ppos], block);
445         return (0);
446
447 error:
448         ext4_ext_drop_refs(path);
449         if (alloc)
450                 free(path, M_EXT2EXTENTS);
451
452         *ppath = NULL;
453
454         return (error);
455 }
456
457 static inline int
458 ext4_ext_space_root(struct inode *ip)
459 {
460         int size;
461
462         size = sizeof(ip->i_data);
463         size -= sizeof(struct ext4_extent_header);
464         size /= sizeof(struct ext4_extent);
465
466         return (size);
467 }
468
469 static inline int
470 ext4_ext_space_block(struct inode *ip)
471 {
472         struct m_ext2fs *fs;
473         int size;
474
475         fs = ip->i_e2fs;
476
477         size = (fs->e2fs_bsize - sizeof(struct ext4_extent_header)) /
478             sizeof(struct ext4_extent);
479
480         return (size);
481 }
482
483 static inline int
484 ext4_ext_space_block_index(struct inode *ip)
485 {
486         struct m_ext2fs *fs;
487         int size;
488
489         fs = ip->i_e2fs;
490
491         size = (fs->e2fs_bsize - sizeof(struct ext4_extent_header)) /
492             sizeof(struct ext4_extent_index);
493
494         return (size);
495 }
496
497 void
498 ext4_ext_tree_init(struct inode *ip)
499 {
500         struct ext4_extent_header *ehp;
501
502         ip->i_flag |= IN_E4EXTENTS;
503
504         memset(ip->i_data, 0, EXT2_NDADDR + EXT2_NIADDR);
505         ehp = (struct ext4_extent_header *)ip->i_data;
506         ehp->eh_magic = EXT4_EXT_MAGIC;
507         ehp->eh_max = ext4_ext_space_root(ip);
508         ip->i_ext_cache.ec_type = EXT4_EXT_CACHE_NO;
509         ip->i_flag |= IN_CHANGE | IN_UPDATE;
510         ext2_update(ip->i_vnode, 1);
511 }
512
513 static inline void
514 ext4_ext_put_in_cache(struct inode *ip, uint32_t blk,
515                         uint32_t len, uint32_t start, int type)
516 {
517
518         KASSERT(len != 0, ("ext4_ext_put_in_cache: bad input"));
519
520         ip->i_ext_cache.ec_type = type;
521         ip->i_ext_cache.ec_blk = blk;
522         ip->i_ext_cache.ec_len = len;
523         ip->i_ext_cache.ec_start = start;
524 }
525
526 static e4fs_daddr_t
527 ext4_ext_blkpref(struct inode *ip, struct ext4_extent_path *path,
528     e4fs_daddr_t block)
529 {
530         struct m_ext2fs *fs;
531         struct ext4_extent *ex;
532         e4fs_daddr_t bg_start;
533         int depth;
534
535         fs = ip->i_e2fs;
536
537         if (path) {
538                 depth = path->ep_depth;
539                 ex = path[depth].ep_ext;
540                 if (ex) {
541                         e4fs_daddr_t pblk = ext4_ext_extent_pblock(ex);
542                         e2fs_daddr_t blk = ex->e_blk;
543
544                         if (block > blk)
545                                 return (pblk + (block - blk));
546                         else
547                                 return (pblk - (blk - block));
548                 }
549
550                 /* Try to get block from index itself. */
551                 if (path[depth].ep_data)
552                         return (path[depth].ep_blk);
553         }
554
555         /* Use inode's group. */
556         bg_start = (ip->i_block_group * EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) +
557             fs->e2fs->e2fs_first_dblock;
558
559         return (bg_start + block);
560 }
561
562 static int inline
563 ext4_can_extents_be_merged(struct ext4_extent *ex1,
564     struct ext4_extent *ex2)
565 {
566
567         if (ex1->e_blk + ex1->e_len != ex2->e_blk)
568                 return (0);
569
570         if (ex1->e_len + ex2->e_len > EXT4_MAX_LEN)
571                 return (0);
572
573         if (ext4_ext_extent_pblock(ex1) + ex1->e_len ==
574             ext4_ext_extent_pblock(ex2))
575                 return (1);
576
577         return (0);
578 }
579
580 static unsigned
581 ext4_ext_next_leaf_block(struct inode *ip, struct ext4_extent_path *path)
582 {
583         int depth = path->ep_depth;
584
585         /* Empty tree */
586         if (depth == 0)
587                 return (EXT4_MAX_BLOCKS);
588
589         /* Go to indexes. */
590         depth--;
591
592         while (depth >= 0) {
593                 if (path[depth].ep_index !=
594                     EXT_LAST_INDEX(path[depth].ep_header))
595                         return (path[depth].ep_index[1].ei_blk);
596
597                 depth--;
598         }
599
600         return (EXT4_MAX_BLOCKS);
601 }
602
603 static int
604 ext4_ext_dirty(struct inode *ip, struct ext4_extent_path *path)
605 {
606         struct m_ext2fs *fs;
607         struct buf *bp;
608         uint64_t blk;
609         int error;
610
611         fs = ip->i_e2fs;
612
613         if (!path)
614                 return (EINVAL);
615
616         if (path->ep_data) {
617                 blk = path->ep_blk;
618                 bp = getblk(ip->i_devvp, fsbtodb(fs, blk),
619                     fs->e2fs_bsize, 0, 0, 0);
620                 if (!bp)
621                         return (EIO);
622                 ext4_ext_fill_path_buf(path, bp);
623                 error = bwrite(bp);
624         } else {
625                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
626                 error = ext2_update(ip->i_vnode, 1);
627         }
628
629         return (error);
630 }
631
632 static int
633 ext4_ext_insert_index(struct inode *ip, struct ext4_extent_path *path,
634     uint32_t lblk, e4fs_daddr_t blk)
635 {
636         struct m_ext2fs *fs;
637         struct ext4_extent_index *idx;
638         int len;
639
640         fs = ip->i_e2fs;
641
642         if (lblk == path->ep_index->ei_blk) {
643                 ext2_fserr(fs, ip->i_uid,
644                     "lblk == index blk => extent corrupted");
645                 return (EIO);
646         }
647
648         if (path->ep_header->eh_ecount >= path->ep_header->eh_max) {
649                 ext2_fserr(fs, ip->i_uid,
650                     "ecout > maxcount => extent corrupted");
651                 return (EIO);
652         }
653
654         if (lblk > path->ep_index->ei_blk) {
655                 /* Insert after. */
656                 idx = path->ep_index + 1;
657         } else {
658                 /* Insert before. */
659                 idx = path->ep_index;
660         }
661
662         len = EXT_LAST_INDEX(path->ep_header) - idx + 1;
663         if (len > 0)
664                 memmove(idx + 1, idx, len * sizeof(struct ext4_extent_index));
665
666         if (idx > EXT_MAX_INDEX(path->ep_header)) {
667                 ext2_fserr(fs, ip->i_uid,
668                     "index is out of range => extent corrupted");
669                 return (EIO);
670         }
671
672         idx->ei_blk = lblk;
673         ext4_index_store_pblock(idx, blk);
674         path->ep_header->eh_ecount++;
675
676         return (ext4_ext_dirty(ip, path));
677 }
678
679 static e4fs_daddr_t
680 ext4_ext_alloc_meta(struct inode *ip)
681 {
682         e4fs_daddr_t blk = ext2_alloc_meta(ip);
683         if (blk) {
684                 ip->i_blocks += btodb(ip->i_e2fs->e2fs_bsize);
685                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
686                 ext2_update(ip->i_vnode, 1);
687         }
688
689         return (blk);
690 }
691
692 static void
693 ext4_ext_blkfree(struct inode *ip, uint64_t blk, int count, int flags)
694 {
695         struct m_ext2fs *fs;
696         int i, blocksreleased;
697
698         fs = ip->i_e2fs;
699         blocksreleased = count;
700
701         for(i = 0; i < count; i++)
702                 ext2_blkfree(ip, blk + i, fs->e2fs_bsize);
703
704         if (ip->i_blocks >= blocksreleased)
705                 ip->i_blocks -= (btodb(fs->e2fs_bsize)*blocksreleased);
706         else
707                 ip->i_blocks = 0;
708
709         ip->i_flag |= IN_CHANGE | IN_UPDATE;
710         ext2_update(ip->i_vnode, 1);
711 }
712
713 static int
714 ext4_ext_split(struct inode *ip, struct ext4_extent_path *path,
715     struct ext4_extent *newext, int at)
716 {
717         struct m_ext2fs *fs;
718         struct  buf *bp;
719         int depth = ext4_ext_inode_depth(ip);
720         struct ext4_extent_header *neh;
721         struct ext4_extent_index *fidx;
722         struct ext4_extent *ex;
723         int i = at, k, m, a;
724         e4fs_daddr_t newblk, oldblk;
725         uint32_t border;
726         e4fs_daddr_t *ablks = NULL;
727         int error = 0;
728
729         fs = ip->i_e2fs;
730         bp = NULL;
731
732         /*
733          * We will split at current extent for now.
734          */
735         if (path[depth].ep_ext > EXT_MAX_EXTENT(path[depth].ep_header)) {
736                 ext2_fserr(fs, ip->i_uid,
737                     "extent is out of range => extent corrupted");
738                 return (EIO);
739         }
740
741         if (path[depth].ep_ext != EXT_MAX_EXTENT(path[depth].ep_header))
742                 border = path[depth].ep_ext[1].e_blk;
743         else
744                 border = newext->e_blk;
745
746         /* Allocate new blocks. */
747         ablks = malloc(sizeof(e4fs_daddr_t) * depth,
748             M_EXT2EXTENTS, M_WAITOK | M_ZERO);
749         if (!ablks)
750                 return (ENOMEM);
751         for (a = 0; a < depth - at; a++) {
752                 newblk = ext4_ext_alloc_meta(ip);
753                 if (newblk == 0)
754                         goto cleanup;
755                 ablks[a] = newblk;
756         }
757
758         newblk = ablks[--a];
759         bp = getblk(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, 0, 0, 0);
760         if (!bp) {
761                 error = EIO;
762                 goto cleanup;
763         }
764
765         neh = ext4_ext_block_header(bp->b_data);
766         neh->eh_ecount = 0;
767         neh->eh_max = ext4_ext_space_block(ip);
768         neh->eh_magic = EXT4_EXT_MAGIC;
769         neh->eh_depth = 0;
770         ex = EXT_FIRST_EXTENT(neh);
771
772         if (path[depth].ep_header->eh_ecount != path[depth].ep_header->eh_max) {
773                 ext2_fserr(fs, ip->i_uid,
774                     "extents count out of range => extent corrupted");
775                 error = EIO;
776                 goto cleanup;
777         }
778
779         /* Start copy from next extent. */
780         m = 0;
781         path[depth].ep_ext++;
782         while (path[depth].ep_ext <= EXT_MAX_EXTENT(path[depth].ep_header)) {
783                 path[depth].ep_ext++;
784                 m++;
785         }
786         if (m) {
787                 memmove(ex, path[depth].ep_ext - m,
788                     sizeof(struct ext4_extent) * m);
789                 neh->eh_ecount = neh->eh_ecount + m;
790         }
791
792         bwrite(bp);
793         bp = NULL;
794
795         /* Fix old leaf. */
796         if (m) {
797                 path[depth].ep_header->eh_ecount =
798                     path[depth].ep_header->eh_ecount - m;
799                 ext4_ext_dirty(ip, path + depth);
800         }
801
802         /* Create intermediate indexes. */
803         k = depth - at - 1;
804         KASSERT(k >= 0, ("ext4_ext_split: negative k"));
805
806         /* Insert new index into current index block. */
807         i = depth - 1;
808         while (k--) {
809                 oldblk = newblk;
810                 newblk = ablks[--a];
811                 error = bread(ip->i_devvp, fsbtodb(fs, newblk),
812                     (int)fs->e2fs_bsize, NOCRED, &bp);
813                 if (error) {
814                         brelse(bp);
815                         goto cleanup;
816                 }
817
818                 neh = (struct ext4_extent_header *)bp->b_data;
819                 neh->eh_ecount = 1;
820                 neh->eh_magic = EXT4_EXT_MAGIC;
821                 neh->eh_max = ext4_ext_space_block_index(ip);
822                 neh->eh_depth = depth - i;
823                 fidx = EXT_FIRST_INDEX(neh);
824                 fidx->ei_blk = border;
825                 ext4_index_store_pblock(fidx, oldblk);
826
827                 m = 0;
828                 path[i].ep_index++;
829                 while (path[i].ep_index <= EXT_MAX_INDEX(path[i].ep_header)) {
830                         path[i].ep_index++;
831                         m++;
832                 }
833                 if (m) {
834                         memmove(++fidx, path[i].ep_index - m,
835                             sizeof(struct ext4_extent_index) * m);
836                         neh->eh_ecount = neh->eh_ecount + m;
837                 }
838
839                 bwrite(bp);
840                 bp = NULL;
841
842                 /* Fix old index. */
843                 if (m) {
844                         path[i].ep_header->eh_ecount =
845                             path[i].ep_header->eh_ecount - m;
846                         ext4_ext_dirty(ip, path + i);
847                 }
848
849                 i--;
850         }
851
852         error = ext4_ext_insert_index(ip, path + at, border, newblk);
853
854 cleanup:
855         if (bp)
856                 brelse(bp);
857
858         if (error) {
859                 for (i = 0; i < depth; i++) {
860                         if (!ablks[i])
861                                 continue;
862                         ext4_ext_blkfree(ip, ablks[i], 1, 0);
863                 }
864         }
865
866         free(ablks, M_EXT2EXTENTS);
867
868         return (error);
869 }
870
871 static int
872 ext4_ext_grow_indepth(struct inode *ip, struct ext4_extent_path *path,
873     struct ext4_extent *newext)
874 {
875         struct m_ext2fs *fs;
876         struct ext4_extent_path *curpath;
877         struct ext4_extent_header *neh;
878         struct ext4_extent_index *fidx;
879         struct buf *bp;
880         e4fs_daddr_t newblk;
881         int error = 0;
882
883         fs = ip->i_e2fs;
884         curpath = path;
885
886         newblk = ext4_ext_alloc_meta(ip);
887         if (newblk == 0)
888                 return (error);
889
890         bp = getblk(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, 0, 0, 0);
891         if (!bp)
892                 return (EIO);
893
894         /* Move top-level index/leaf into new block. */
895         memmove(bp->b_data, curpath->ep_header, sizeof(ip->i_data));
896
897         /* Set size of new block */
898         neh = ext4_ext_block_header(bp->b_data);
899         neh->eh_magic = EXT4_EXT_MAGIC;
900
901         if (ext4_ext_inode_depth(ip))
902                 neh->eh_max = ext4_ext_space_block_index(ip);
903         else
904                 neh->eh_max = ext4_ext_space_block(ip);
905
906         error = bwrite(bp);
907         if (error)
908                 goto out;
909
910         bp = NULL;
911
912         curpath->ep_header->eh_magic = EXT4_EXT_MAGIC;
913         curpath->ep_header->eh_max = ext4_ext_space_root(ip);
914         curpath->ep_header->eh_ecount = 1;
915         curpath->ep_index = EXT_FIRST_INDEX(curpath->ep_header);
916         curpath->ep_index->ei_blk = EXT_FIRST_EXTENT(path[0].ep_header)->e_blk;
917         ext4_index_store_pblock(curpath->ep_index, newblk);
918
919         neh = ext4_ext_inode_header(ip);
920         fidx = EXT_FIRST_INDEX(neh);
921         neh->eh_depth = path->ep_depth + 1;
922         ext4_ext_dirty(ip, curpath);
923 out:
924         brelse(bp);
925
926         return (error);
927 }
928
929 static int
930 ext4_ext_create_new_leaf(struct inode *ip, struct ext4_extent_path *path,
931     struct ext4_extent *newext)
932 {
933         struct m_ext2fs *fs;
934         struct ext4_extent_path *curpath;
935         int depth, i, error;
936
937         fs = ip->i_e2fs;
938
939 repeat:
940         i = depth = ext4_ext_inode_depth(ip);
941
942         /* Look for free index entry int the tree */
943         curpath = path + depth;
944         while (i > 0 && !EXT_HAS_FREE_INDEX(curpath)) {
945                 i--;
946                 curpath--;
947         }
948
949         /*
950          * We use already allocated block for index block,
951          * so subsequent data blocks should be contiguous.
952          */
953         if (EXT_HAS_FREE_INDEX(curpath)) {
954                 error = ext4_ext_split(ip, path, newext, i);
955                 if (error)
956                         goto out;
957
958                 /* Refill path. */
959                 ext4_ext_drop_refs(path);
960                 error = ext4_ext_find_extent(ip, newext->e_blk, &path);
961                 if (error)
962                         goto out;
963         } else {
964                 /* Tree is full, do grow in depth. */
965                 error = ext4_ext_grow_indepth(ip, path, newext);
966                 if (error)
967                         goto out;
968
969                 /* Refill path. */
970                 ext4_ext_drop_refs(path);
971                 error = ext4_ext_find_extent(ip, newext->e_blk, &path);
972                 if (error)
973                         goto out;
974
975                 /* Check and split tree if required. */
976                 depth = ext4_ext_inode_depth(ip);
977                 if (path[depth].ep_header->eh_ecount ==
978                     path[depth].ep_header->eh_max)
979                         goto repeat;
980         }
981
982 out:
983         return (error);
984 }
985
986 static int
987 ext4_ext_correct_indexes(struct inode *ip, struct ext4_extent_path *path)
988 {
989         struct ext4_extent_header *eh;
990         struct ext4_extent *ex;
991         int32_t border;
992         int depth, k;
993
994         depth = ext4_ext_inode_depth(ip);
995         eh = path[depth].ep_header;
996         ex = path[depth].ep_ext;
997
998         if (ex == NULL || eh == NULL)
999                 return (EIO);
1000
1001         if (!depth)
1002                 return (0);
1003
1004         /* We will correct tree if first leaf got modified only. */
1005         if (ex != EXT_FIRST_EXTENT(eh))
1006                 return (0);
1007
1008         k = depth - 1;
1009         border = path[depth].ep_ext->e_blk;
1010         path[k].ep_index->ei_blk = border;
1011         ext4_ext_dirty(ip, path + k);
1012         while (k--) {
1013                 /* Change all left-side indexes. */
1014                 if (path[k+1].ep_index != EXT_FIRST_INDEX(path[k+1].ep_header))
1015                         break;
1016
1017                 path[k].ep_index->ei_blk = border;
1018                 ext4_ext_dirty(ip, path + k);
1019         }
1020
1021         return (0);
1022 }
1023
1024 static int
1025 ext4_ext_insert_extent(struct inode *ip, struct ext4_extent_path *path,
1026     struct ext4_extent *newext)
1027 {
1028         struct m_ext2fs *fs;
1029         struct ext4_extent_header * eh;
1030         struct ext4_extent *ex, *nex, *nearex;
1031         struct ext4_extent_path *npath;
1032         int depth, len, error, next;
1033
1034         fs = ip->i_e2fs;
1035         depth = ext4_ext_inode_depth(ip);
1036         ex = path[depth].ep_ext;
1037         npath = NULL;
1038
1039         if (newext->e_len == 0 || path[depth].ep_header == NULL)
1040                 return (EINVAL);
1041
1042         /* Insert block into found extent. */
1043         if (ex && ext4_can_extents_be_merged(ex, newext)) {
1044                 ex->e_len = ex->e_len + newext->e_len;
1045                 eh = path[depth].ep_header;
1046                 nearex = ex;
1047                 goto merge;
1048         }
1049
1050 repeat:
1051         depth = ext4_ext_inode_depth(ip);
1052         eh = path[depth].ep_header;
1053         if (eh->eh_ecount < eh->eh_max)
1054                 goto has_space;
1055
1056         /* Try next leaf */
1057         nex = EXT_LAST_EXTENT(eh);
1058         next = ext4_ext_next_leaf_block(ip, path);
1059         if (newext->e_blk > nex->e_blk && next != EXT4_MAX_BLOCKS) {
1060                 KASSERT(npath == NULL,
1061                     ("ext4_ext_insert_extent: bad path"));
1062
1063                 error = ext4_ext_find_extent(ip, next, &npath);
1064                 if (error)
1065                         goto cleanup;
1066
1067                 if (npath->ep_depth != path->ep_depth) {
1068                         error = EIO;
1069                         goto cleanup;
1070                 }
1071
1072                 eh = npath[depth].ep_header;
1073                 if (eh->eh_ecount < eh->eh_max) {
1074                         path = npath;
1075                         goto repeat;
1076                 }
1077         }
1078
1079         /*
1080          * There is no free space in the found leaf,
1081          * try to add a new leaf to the tree.
1082          */
1083         error = ext4_ext_create_new_leaf(ip, path, newext);
1084         if (error)
1085                 goto cleanup;
1086
1087         depth = ext4_ext_inode_depth(ip);
1088         eh = path[depth].ep_header;
1089
1090 has_space:
1091         nearex = path[depth].ep_ext;
1092         if (!nearex) {
1093                 /* Create new extent in the leaf. */
1094                 path[depth].ep_ext = EXT_FIRST_EXTENT(eh);
1095         } else if (newext->e_blk > nearex->e_blk) {
1096                 if (nearex != EXT_LAST_EXTENT(eh)) {
1097                         len = EXT_MAX_EXTENT(eh) - nearex;
1098                         len = (len - 1) * sizeof(struct ext4_extent);
1099                         len = len < 0 ? 0 : len;
1100                         memmove(nearex + 2, nearex + 1, len);
1101                 }
1102                 path[depth].ep_ext = nearex + 1;
1103         } else {
1104                 len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
1105                 len = len < 0 ? 0 : len;
1106                 memmove(nearex + 1, nearex, len);
1107                 path[depth].ep_ext = nearex;
1108         }
1109
1110         eh->eh_ecount = eh->eh_ecount + 1;
1111         nearex = path[depth].ep_ext;
1112         nearex->e_blk = newext->e_blk;
1113         nearex->e_start_lo = newext->e_start_lo;
1114         nearex->e_start_hi = newext->e_start_hi;
1115         nearex->e_len = newext->e_len;
1116
1117 merge:
1118         /* Try to merge extents to the right. */
1119         while (nearex < EXT_LAST_EXTENT(eh)) {
1120                 if (!ext4_can_extents_be_merged(nearex, nearex + 1))
1121                         break;
1122
1123                 /* Merge with next extent. */
1124                 nearex->e_len = nearex->e_len + nearex[1].e_len;
1125                 if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
1126                         len = (EXT_LAST_EXTENT(eh) - nearex - 1) *
1127                             sizeof(struct ext4_extent);
1128                         memmove(nearex + 1, nearex + 2, len);
1129                 }
1130
1131                 eh->eh_ecount = eh->eh_ecount - 1;
1132                 KASSERT(eh->eh_ecount != 0,
1133                     ("ext4_ext_insert_extent: bad ecount"));
1134         }
1135
1136         /*
1137          * Try to merge extents to the left,
1138          * start from inexes correction.
1139          */
1140         error = ext4_ext_correct_indexes(ip, path);
1141         if (error)
1142                 goto cleanup;
1143
1144         ext4_ext_dirty(ip, path + depth);
1145
1146 cleanup:
1147         if (npath) {
1148                 ext4_ext_drop_refs(npath);
1149                 free(npath, M_EXT2EXTENTS);
1150         }
1151
1152         ip->i_ext_cache.ec_type = EXT4_EXT_CACHE_NO;
1153         return (error);
1154 }
1155
1156 static e4fs_daddr_t
1157 ext4_new_blocks(struct inode *ip, daddr_t lbn, e4fs_daddr_t pref,
1158     struct ucred *cred, unsigned long *count, int *perror)
1159 {
1160         struct m_ext2fs *fs;
1161         struct ext2mount *ump;
1162         e4fs_daddr_t newblk;
1163
1164         fs = ip->i_e2fs;
1165         ump = ip->i_ump;
1166
1167         /*
1168          * We will allocate only single block for now.
1169          */
1170         if (*count > 1)
1171                 return (0);
1172
1173         EXT2_LOCK(ip->i_ump);
1174         *perror = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newblk);
1175         if (*perror)
1176                 return (0);
1177
1178         if (newblk) {
1179                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1180                 ext2_update(ip->i_vnode, 1);
1181         }
1182
1183         return (newblk);
1184 }
1185
1186 int
1187 ext4_ext_get_blocks(struct inode *ip, e4fs_daddr_t iblk,
1188     unsigned long max_blocks, struct ucred *cred, struct buf **bpp,
1189     int *pallocated, uint32_t *nb)
1190 {
1191         struct m_ext2fs *fs;
1192         struct buf *bp = NULL;
1193         struct ext4_extent_path *path;
1194         struct ext4_extent newex, *ex;
1195         e4fs_daddr_t bpref, newblk = 0;
1196         unsigned long allocated = 0;
1197         int error = 0, depth;
1198
1199         fs = ip->i_e2fs;
1200         *pallocated = 0;
1201         path = NULL;
1202         if(bpp)
1203                 *bpp = NULL;
1204
1205         /* Check cache. */
1206         if ((bpref = ext4_ext_in_cache(ip, iblk, &newex))) {
1207                 if (bpref == EXT4_EXT_CACHE_IN) {
1208                         /* Block is already allocated. */
1209                         newblk = iblk - newex.e_blk +
1210                             ext4_ext_extent_pblock(&newex);
1211                         allocated = newex.e_len - (iblk - newex.e_blk);
1212                         goto out;
1213                 } else {
1214                         error = EIO;
1215                         goto out2;
1216                 }
1217         }
1218
1219         error = ext4_ext_find_extent(ip, iblk, &path);
1220         if (error) {
1221                 goto out2;
1222         }
1223
1224         depth = ext4_ext_inode_depth(ip);
1225         if (path[depth].ep_ext == NULL && depth != 0) {
1226                 error = EIO;
1227                 goto out2;
1228         }
1229
1230         if ((ex = path[depth].ep_ext)) {
1231                 uint64_t lblk = ex->e_blk;
1232                 uint16_t e_len  = ex->e_len;
1233                 e4fs_daddr_t e_start = ext4_ext_extent_pblock(ex);
1234
1235                 if (e_len > EXT4_MAX_LEN)
1236                         goto out2;
1237
1238                 /* If we found extent covers block, simply return it. */
1239                 if (iblk >= lblk && iblk < lblk + e_len) {
1240                         newblk = iblk - lblk + e_start;
1241                         allocated = e_len - (iblk - lblk);
1242                         ext4_ext_put_in_cache(ip, lblk, e_len,
1243                             e_start, EXT4_EXT_CACHE_IN);
1244                         goto out;
1245                 }
1246         }
1247
1248         /* Allocate the new block. */
1249         if (S_ISREG(ip->i_mode) && (!ip->i_next_alloc_block)) {
1250                 ip->i_next_alloc_goal = 0;
1251         }
1252
1253         bpref = ext4_ext_blkpref(ip, path, iblk);
1254         allocated = max_blocks;
1255         newblk = ext4_new_blocks(ip, iblk, bpref, cred, &allocated, &error);
1256         if (!newblk)
1257                 goto out2;
1258
1259         /* Try to insert new extent into found leaf and return. */
1260         newex.e_blk = iblk;
1261         ext4_ext_store_pblock(&newex, newblk);
1262         newex.e_len = allocated;
1263         error = ext4_ext_insert_extent(ip, path, &newex);
1264         if (error)
1265                 goto out2;
1266
1267         newblk = ext4_ext_extent_pblock(&newex);
1268         ext4_ext_put_in_cache(ip, iblk, allocated, newblk, EXT4_EXT_CACHE_IN);
1269         *pallocated = 1;
1270
1271 out:
1272         if (allocated > max_blocks)
1273                 allocated = max_blocks;
1274
1275         if (bpp)
1276         {
1277                 error = bread(ip->i_devvp, fsbtodb(fs, newblk),
1278                     fs->e2fs_bsize, cred, &bp);
1279                 if (error) {
1280                         brelse(bp);
1281                 } else {
1282                         *bpp = bp;
1283                 }
1284         }
1285
1286 out2:
1287         if (path) {
1288                 ext4_ext_drop_refs(path);
1289                 free(path, M_EXT2EXTENTS);
1290         }
1291
1292         if (nb)
1293                 *nb = newblk;
1294
1295         return (error);
1296 }
1297
1298 static inline uint16_t
1299 ext4_ext_get_actual_len(struct ext4_extent *ext)
1300 {
1301
1302         return (ext->e_len <= EXT_INIT_MAX_LEN ?
1303             ext->e_len : (ext->e_len - EXT_INIT_MAX_LEN));
1304 }
1305
1306 static inline struct ext4_extent_header *
1307 ext4_ext_header(struct inode *ip)
1308 {
1309
1310         return (struct ext4_extent_header *)ip->i_db;
1311 }
1312
1313 static int
1314 ext4_remove_blocks(struct inode *ip, struct ext4_extent *ex,
1315     unsigned long from, unsigned long to)
1316 {
1317         unsigned long num, start;
1318
1319         if (from >= ex->e_blk &&
1320             to == ex->e_blk + ext4_ext_get_actual_len(ex) - 1) {
1321                 /* Tail cleanup. */
1322                 num = ex->e_blk + ext4_ext_get_actual_len(ex) - from;
1323                 start = ext4_ext_extent_pblock(ex) +
1324                     ext4_ext_get_actual_len(ex) - num;
1325                 ext4_ext_blkfree(ip, start, num, 0);
1326         }
1327
1328         return (0);
1329 }
1330
1331 static int
1332 ext4_ext_rm_index(struct inode *ip, struct ext4_extent_path *path)
1333 {
1334         e4fs_daddr_t leaf;
1335
1336         /* Free index block. */
1337         path--;
1338         leaf = ext4_ext_index_pblock(path->ep_index);
1339         KASSERT(path->ep_header->eh_ecount != 0,
1340             ("ext4_ext_rm_index: bad ecount"));
1341         path->ep_header->eh_ecount--;
1342         ext4_ext_dirty(ip, path);
1343         ext4_ext_blkfree(ip, leaf, 1, 0);
1344         return (0);
1345 }
1346
1347 static int
1348 ext4_ext_rm_leaf(struct inode *ip, struct ext4_extent_path *path,
1349     uint64_t start)
1350 {
1351         struct m_ext2fs *fs;
1352         int depth;
1353         struct ext4_extent_header *eh;
1354         unsigned int a, b, block, num;
1355         unsigned long ex_blk;
1356         unsigned short ex_len;
1357         struct ext4_extent *ex;
1358         int error, correct_index;
1359
1360         fs = ip->i_e2fs;
1361         depth = ext4_ext_inode_depth(ip);
1362         correct_index = 0;
1363
1364         if (!path[depth].ep_header) {
1365                 if (path[depth].ep_data == NULL)
1366                         return (EINVAL);
1367                 path[depth].ep_header =
1368                     (struct ext4_extent_header* )path[depth].ep_data;
1369         }
1370
1371         eh = path[depth].ep_header;
1372         if (!eh) {
1373                 ext2_fserr(fs, ip->i_uid, "bad header => extent corrupted");
1374                 return (EIO);
1375         }
1376
1377         ex = EXT_LAST_EXTENT(eh);
1378         ex_blk = ex->e_blk;
1379         ex_len = ext4_ext_get_actual_len(ex);
1380
1381         while (ex >= EXT_FIRST_EXTENT(eh) && ex_blk + ex_len > start) {
1382                 path[depth].ep_ext = ex;
1383                 a = ex_blk > start ? ex_blk : start;
1384                 b = (uint64_t)ex_blk + ex_len - 1 <
1385                     EXT4_MAX_BLOCKS ? ex_blk + ex_len - 1 : EXT4_MAX_BLOCKS;
1386
1387                 if (a != ex_blk && b != ex_blk + ex_len - 1)
1388                         return (EINVAL);
1389                 else if (a != ex_blk) {
1390                         /* Remove tail of the extent. */
1391                         block = ex_blk;
1392                         num = a - block;
1393                 } else if (b != ex_blk + ex_len - 1) {
1394                         /* Remove head of the extent, not implemented. */
1395                         return (EINVAL);
1396                 } else {
1397                         /* Remove whole extent. */
1398                         block = ex_blk;
1399                         num = 0;
1400                 }
1401
1402                 if (ex == EXT_FIRST_EXTENT(eh))
1403                         correct_index = 1;
1404
1405                 error = ext4_remove_blocks(ip, ex, a, b);
1406                 if (error)
1407                         goto out;
1408
1409                 if (num == 0) {
1410                         ext4_ext_store_pblock(ex, 0);
1411                         eh->eh_ecount--;
1412                 }
1413
1414                 ex->e_blk = block;
1415                 ex->e_len = num;
1416
1417                 ext4_ext_dirty(ip, path + depth);
1418
1419                 ex--;
1420                 ex_blk = ex->e_blk;
1421                 ex_len = ext4_ext_get_actual_len(ex);
1422         };
1423
1424         if (correct_index && eh->eh_ecount)
1425                 error = ext4_ext_correct_indexes(ip, path);
1426
1427         /*
1428          * If this leaf is free, we should
1429          * remove it from index block above.
1430          */
1431         if (error == 0 && eh->eh_ecount == 0 && path[depth].ep_data != NULL)
1432                 error = ext4_ext_rm_index(ip, path + depth);
1433
1434 out:
1435         return (error);
1436 }
1437
1438 static struct buf *
1439 ext4_read_extent_tree_block(struct inode *ip, e4fs_daddr_t pblk,
1440     int depth, int flags)
1441 {
1442         struct m_ext2fs *fs;
1443         struct ext4_extent_header *eh;
1444         struct buf *bp;
1445         int error;
1446
1447         fs = ip->i_e2fs;
1448
1449         error = bread(ip->i_devvp, fsbtodb(fs, pblk),
1450             fs->e2fs_bsize, NOCRED, &bp);
1451         if (error) {
1452                 brelse(bp);
1453                 return (NULL);
1454         }
1455
1456         eh = ext4_ext_block_header(bp->b_data);
1457         if (eh->eh_depth != depth) {
1458                 ext2_fserr(fs, ip->i_uid, "unexpected eh_depth");
1459                 goto err;
1460         }
1461
1462         error = ext4_ext_check_header(ip, eh);
1463         if (error)
1464                 goto err;
1465
1466         return (bp);
1467
1468 err:
1469         brelse(bp);
1470         return (NULL);
1471
1472 }
1473
1474 static int inline
1475 ext4_ext_more_to_rm(struct ext4_extent_path *path)
1476 {
1477
1478         KASSERT(path->ep_index != NULL,
1479             ("ext4_ext_more_to_rm: bad index from path"));
1480
1481         if (path->ep_index < EXT_FIRST_INDEX(path->ep_header))
1482                 return (0);
1483
1484         if (path->ep_header->eh_ecount == path->index_count)
1485                 return (0);
1486
1487         return (1);
1488 }
1489
1490 int
1491 ext4_ext_remove_space(struct inode *ip, off_t length, int flags,
1492     struct ucred *cred, struct thread *td)
1493 {
1494         struct buf *bp;
1495         struct ext4_extent_header *ehp;
1496         struct ext4_extent_path *path;
1497         int depth;
1498         int i, error;
1499
1500         ehp = (struct ext4_extent_header *)ip->i_db;
1501         depth = ext4_ext_inode_depth(ip);
1502
1503         error = ext4_ext_check_header(ip, ehp);
1504         if(error)
1505                 return (error);
1506
1507         path = malloc(sizeof(struct ext4_extent_path) * (depth + 1),
1508             M_EXT2EXTENTS, M_WAITOK | M_ZERO);
1509         if (!path)
1510                 return (ENOMEM);
1511
1512         i = 0;
1513         path[0].ep_header = ehp;
1514         path[0].ep_depth = depth;
1515         while (i >= 0 && error == 0) {
1516                 if (i == depth) {
1517                         /* This is leaf. */
1518                         error = ext4_ext_rm_leaf(ip, path, length);
1519                         if (error)
1520                                 break;
1521                         free(path[i].ep_data, M_EXT2EXTENTS);
1522                         path[i].ep_data = NULL;
1523                         i--;
1524                         continue;
1525                 }
1526
1527                 /* This is index. */
1528                 if (!path[i].ep_header)
1529                         path[i].ep_header =
1530                             (struct ext4_extent_header *)path[i].ep_data;
1531
1532                 if (!path[i].ep_index) {
1533                         /* This level hasn't touched yet. */
1534                         path[i].ep_index = EXT_LAST_INDEX(path[i].ep_header);
1535                         path[i].index_count = path[i].ep_header->eh_ecount + 1;
1536                 } else {
1537                         /* We've already was here, see at next index. */
1538                         path[i].ep_index--;
1539                 }
1540
1541                 if (ext4_ext_more_to_rm(path + i)) {
1542                         memset(path + i + 1, 0, sizeof(*path));
1543                         bp = ext4_read_extent_tree_block(ip,
1544                             ext4_ext_index_pblock(path[i].ep_index),
1545                             path[0].ep_depth - (i + 1), 0);
1546                         if (!bp) {
1547                                 error = EIO;
1548                                 break;
1549                         }
1550
1551                         ext4_ext_fill_path_bdata(&path[i+1], bp,
1552                             ext4_ext_index_pblock(path[i].ep_index));
1553                         brelse(bp);
1554                         path[i].index_count = path[i].ep_header->eh_ecount;
1555                         i++;
1556                 } else {
1557                         if (path[i].ep_header->eh_ecount == 0 && i > 0) {
1558                                 /* Index is empty, remove it. */
1559                                 error = ext4_ext_rm_index(ip, path + i);
1560                         }
1561                         free(path[i].ep_data, M_EXT2EXTENTS);
1562                         path[i].ep_data = NULL;
1563                         i--;
1564                 }
1565         }
1566
1567         if (path->ep_header->eh_ecount == 0) {
1568                 /*
1569                  * Truncate the tree to zero.
1570                  */
1571                  ext4_ext_header(ip)->eh_depth = 0;
1572                  ext4_ext_header(ip)->eh_max = ext4_ext_space_root(ip);
1573                  ext4_ext_dirty(ip, path);
1574
1575         }
1576
1577         ext4_ext_drop_refs(path);
1578         free(path, M_EXT2EXTENTS);
1579
1580         return (error);
1581 }