]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/ext2fs/ext2_extents.c
MFV r337029:
[FreeBSD/FreeBSD.git] / sys / fs / ext2fs / ext2_extents.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2010 Zheng Liu <lz@freebsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/types.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/vnode.h>
37 #include <sys/bio.h>
38 #include <sys/buf.h>
39 #include <sys/conf.h>
40 #include <sys/stat.h>
41
42 #include <fs/ext2fs/ext2_mount.h>
43 #include <fs/ext2fs/fs.h>
44 #include <fs/ext2fs/inode.h>
45 #include <fs/ext2fs/ext2fs.h>
46 #include <fs/ext2fs/ext2_extents.h>
47 #include <fs/ext2fs/ext2_extern.h>
48
49 static MALLOC_DEFINE(M_EXT2EXTENTS, "ext2_extents", "EXT2 extents");
50
51 #ifdef EXT2FS_DEBUG
52 static void
53 ext4_ext_print_extent(struct ext4_extent *ep)
54 {
55
56         printf("    ext %p => (blk %u len %u start %ju)\n",
57             ep, ep->e_blk, ep->e_len,
58             (uint64_t)ep->e_start_hi << 32 | ep->e_start_lo);
59 }
60
61 static void ext4_ext_print_header(struct inode *ip, struct ext4_extent_header *ehp);
62
63 static void
64 ext4_ext_print_index(struct inode *ip, struct ext4_extent_index *ex, int do_walk)
65 {
66         struct m_ext2fs *fs;
67         struct buf *bp;
68         int error;
69
70         fs = ip->i_e2fs;
71
72         printf("    index %p => (blk %u pblk %ju)\n",
73             ex, ex->ei_blk, (uint64_t)ex->ei_leaf_hi << 32 | ex->ei_leaf_lo);
74
75         if(!do_walk)
76                 return;
77
78         if ((error = bread(ip->i_devvp,
79             fsbtodb(fs, ((uint64_t)ex->ei_leaf_hi << 32 | ex->ei_leaf_lo)),
80             (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) {
81                 brelse(bp);
82                 return;
83         }
84
85         ext4_ext_print_header(ip, (struct ext4_extent_header *)bp->b_data);
86
87         brelse(bp);
88
89 }
90
91 static void
92 ext4_ext_print_header(struct inode *ip, struct ext4_extent_header *ehp)
93 {
94         int i;
95
96         printf("header %p => (magic 0x%x entries %d max %d depth %d gen %d)\n",
97             ehp, ehp->eh_magic, ehp->eh_ecount, ehp->eh_max, ehp->eh_depth,
98             ehp->eh_gen);
99
100         for (i = 0; i < ehp->eh_ecount; i++)
101                 if (ehp->eh_depth != 0)
102                         ext4_ext_print_index(ip,
103                             (struct ext4_extent_index *)(ehp + 1 + i), 1);
104                 else
105                         ext4_ext_print_extent((struct ext4_extent *)(ehp + 1 + i));
106 }
107
108 static void
109 ext4_ext_print_path(struct inode *ip, struct ext4_extent_path *path)
110 {
111         int k, l;
112
113         l = path->ep_depth;
114
115         printf("ip=%ju, Path:\n", ip->i_number);
116         for (k = 0; k <= l; k++, path++) {
117                 if (path->ep_index) {
118                         ext4_ext_print_index(ip, path->ep_index, 0);
119                 } else if (path->ep_ext) {
120                         ext4_ext_print_extent(path->ep_ext);
121                 }
122         }
123 }
124
125 void
126 ext4_ext_print_extent_tree_status(struct inode *ip)
127 {
128         struct ext4_extent_header *ehp;
129
130         ehp = (struct ext4_extent_header *)(char *)ip->i_db;
131
132         printf("Extent status:ip=%ju\n", ip->i_number);
133         if (!(ip->i_flag & IN_E4EXTENTS))
134                 return;
135
136         ext4_ext_print_header(ip, ehp);
137
138         return;
139 }
140 #endif
141
142 static inline struct ext4_extent_header *
143 ext4_ext_inode_header(struct inode *ip)
144 {
145
146         return ((struct ext4_extent_header *)ip->i_db);
147 }
148
149 static inline struct ext4_extent_header *
150 ext4_ext_block_header(char *bdata)
151 {
152
153         return ((struct ext4_extent_header *)bdata);
154 }
155
156 static inline unsigned short
157 ext4_ext_inode_depth(struct inode *ip)
158 {
159         struct ext4_extent_header *ehp;
160
161         ehp = (struct ext4_extent_header *)ip->i_data;
162         return (ehp->eh_depth);
163 }
164
165 static inline e4fs_daddr_t
166 ext4_ext_index_pblock(struct ext4_extent_index *index)
167 {
168         e4fs_daddr_t blk;
169
170         blk = index->ei_leaf_lo;
171         blk |= (e4fs_daddr_t)index->ei_leaf_hi << 32;
172
173         return (blk);
174 }
175
176 static inline void
177 ext4_index_store_pblock(struct ext4_extent_index *index, e4fs_daddr_t pb)
178 {
179
180         index->ei_leaf_lo = pb & 0xffffffff;
181         index->ei_leaf_hi = (pb >> 32) & 0xffff;
182 }
183
184
185 static inline e4fs_daddr_t
186 ext4_ext_extent_pblock(struct ext4_extent *extent)
187 {
188         e4fs_daddr_t blk;
189
190         blk = extent->e_start_lo;
191         blk |= (e4fs_daddr_t)extent->e_start_hi << 32;
192
193         return (blk);
194 }
195
196 static inline void
197 ext4_ext_store_pblock(struct ext4_extent *ex, e4fs_daddr_t pb)
198 {
199
200         ex->e_start_lo = pb & 0xffffffff;
201         ex->e_start_hi = (pb >> 32) & 0xffff;
202 }
203
204 int
205 ext4_ext_in_cache(struct inode *ip, daddr_t lbn, struct ext4_extent *ep)
206 {
207         struct ext4_extent_cache *ecp;
208         int ret = EXT4_EXT_CACHE_NO;
209
210         ecp = &ip->i_ext_cache;
211         if (ecp->ec_type == EXT4_EXT_CACHE_NO)
212                 return (ret);
213
214         if (lbn >= ecp->ec_blk && lbn < ecp->ec_blk + ecp->ec_len) {
215                 ep->e_blk = ecp->ec_blk;
216                 ep->e_start_lo = ecp->ec_start & 0xffffffff;
217                 ep->e_start_hi = ecp->ec_start >> 32 & 0xffff;
218                 ep->e_len = ecp->ec_len;
219                 ret = ecp->ec_type;
220         }
221         return (ret);
222 }
223
224 static int
225 ext4_ext_check_header(struct inode *ip, struct ext4_extent_header *eh)
226 {
227         struct m_ext2fs *fs;
228         char *error_msg;
229
230         fs = ip->i_e2fs;
231
232         if (eh->eh_magic != EXT4_EXT_MAGIC) {
233                 error_msg = "invalid magic";
234                 goto corrupted;
235         }
236         if (eh->eh_max == 0) {
237                 error_msg = "invalid eh_max";
238                 goto corrupted;
239         }
240         if (eh->eh_ecount > eh->eh_max) {
241                 error_msg = "invalid eh_entries";
242                 goto corrupted;
243         }
244
245         return (0);
246
247 corrupted:
248         ext2_fserr(fs, ip->i_uid, error_msg);
249         return (EIO);
250 }
251
252 static void
253 ext4_ext_binsearch_index(struct ext4_extent_path *path, int blk)
254 {
255         struct ext4_extent_header *eh;
256         struct ext4_extent_index *r, *l, *m;
257
258         eh = path->ep_header;
259
260         KASSERT(eh->eh_ecount <= eh->eh_max && eh->eh_ecount > 0,
261             ("ext4_ext_binsearch_index: bad args"));
262
263         l = EXT_FIRST_INDEX(eh) + 1;
264         r = EXT_FIRST_INDEX(eh) + eh->eh_ecount - 1;
265         while (l <= r) {
266                 m = l + (r - l) / 2;
267                 if (blk < m->ei_blk)
268                         r = m - 1;
269                 else
270                         l = m + 1;
271         }
272
273         path->ep_index = l - 1;
274 }
275
276 static void
277 ext4_ext_binsearch_ext(struct ext4_extent_path *path, int blk)
278 {
279         struct ext4_extent_header *eh;
280         struct ext4_extent *r, *l, *m;
281
282         eh = path->ep_header;
283
284         KASSERT(eh->eh_ecount <= eh->eh_max,
285             ("ext4_ext_binsearch_ext: bad args"));
286
287         if (eh->eh_ecount == 0)
288                 return;
289
290         l = EXT_FIRST_EXTENT(eh) + 1;
291         r = EXT_FIRST_EXTENT(eh) + eh->eh_ecount - 1;
292
293         while (l <= r) {
294                 m = l + (r - l) / 2;
295                 if (blk < m->e_blk)
296                         r = m - 1;
297                 else
298                         l = m + 1;
299         }
300
301         path->ep_ext = l - 1;
302 }
303
304 static int
305 ext4_ext_fill_path_bdata(struct ext4_extent_path *path,
306     struct buf *bp, uint64_t blk)
307 {
308
309         KASSERT(path->ep_data == NULL,
310             ("ext4_ext_fill_path_bdata: bad ep_data"));
311
312         path->ep_data = malloc(bp->b_bufsize, M_EXT2EXTENTS, M_WAITOK);
313         if (!path->ep_data)
314                 return (ENOMEM);
315
316         memcpy(path->ep_data, bp->b_data, bp->b_bufsize);
317         path->ep_blk = blk;
318
319         return (0);
320 }
321
322 static void
323 ext4_ext_fill_path_buf(struct ext4_extent_path *path, struct buf *bp)
324 {
325
326         KASSERT(path->ep_data != NULL,
327             ("ext4_ext_fill_path_buf: bad ep_data"));
328
329         memcpy(bp->b_data, path->ep_data, bp->b_bufsize);
330 }
331
332 static void
333 ext4_ext_drop_refs(struct ext4_extent_path *path)
334 {
335         int depth, i;
336
337         if (!path)
338                 return;
339
340         depth = path->ep_depth;
341         for (i = 0; i <= depth; i++, path++)
342                 if (path->ep_data) {
343                         free(path->ep_data, M_EXT2EXTENTS);
344                         path->ep_data = NULL;
345                 }
346 }
347
348 void
349 ext4_ext_path_free(struct ext4_extent_path *path)
350 {
351
352         if (!path)
353                 return;
354
355         ext4_ext_drop_refs(path);
356         free(path, M_EXT2EXTENTS);
357 }
358
359 int
360 ext4_ext_find_extent(struct inode *ip, daddr_t block,
361     struct ext4_extent_path **ppath)
362 {
363         struct m_ext2fs *fs;
364         struct ext4_extent_header *eh;
365         struct ext4_extent_path *path;
366         struct buf *bp;
367         uint64_t blk;
368         int error, depth, i, ppos, alloc;
369
370         fs = ip->i_e2fs;
371         eh = ext4_ext_inode_header(ip);
372         depth = ext4_ext_inode_depth(ip);
373         ppos = 0;
374         alloc = 0;
375
376         error = ext4_ext_check_header(ip, eh);
377         if (error)
378                 return (error);
379
380         if (ppath == NULL)
381                 return (EINVAL);
382
383         path = *ppath;
384         if (path == NULL) {
385                 path = malloc(EXT4_EXT_DEPTH_MAX *
386                     sizeof(struct ext4_extent_path),
387                     M_EXT2EXTENTS, M_WAITOK | M_ZERO);
388                 if (!path)
389                         return (ENOMEM);
390
391                 *ppath = path;
392                 alloc = 1;
393         }
394
395         path[0].ep_header = eh;
396         path[0].ep_data = NULL;
397
398         /* Walk through the tree. */
399         i = depth;
400         while (i) {
401                 ext4_ext_binsearch_index(&path[ppos], block);
402                 blk = ext4_ext_index_pblock(path[ppos].ep_index);
403                 path[ppos].ep_depth = i;
404                 path[ppos].ep_ext = NULL;
405
406                 error = bread(ip->i_devvp, fsbtodb(ip->i_e2fs, blk),
407                     ip->i_e2fs->e2fs_bsize, NOCRED, &bp);
408                 if (error) {
409                         brelse(bp);
410                         goto error;
411                 }
412
413                 ppos++;
414                 if (ppos > depth) {
415                         ext2_fserr(fs, ip->i_uid,
416                             "ppos > depth => extent corrupted");
417                         error = EIO;
418                         brelse(bp);
419                         goto error;
420                 }
421
422                 ext4_ext_fill_path_bdata(&path[ppos], bp, blk);
423                 bqrelse(bp);
424
425                 eh = ext4_ext_block_header(path[ppos].ep_data);
426                 if (ext4_ext_check_header(ip, eh) ||
427                     ext2_extent_blk_csum_verify(ip, path[ppos].ep_data)) {
428                         error = EIO;
429                         goto error;
430                 }
431
432                 path[ppos].ep_header = eh;
433
434                 i--;
435         }
436
437         error = ext4_ext_check_header(ip, eh);
438         if (error)
439                 goto error;
440
441         /* Find extent. */
442         path[ppos].ep_depth = i;
443         path[ppos].ep_header = eh;
444         path[ppos].ep_ext = NULL;
445         path[ppos].ep_index = NULL;
446         ext4_ext_binsearch_ext(&path[ppos], block);
447         return (0);
448
449 error:
450         ext4_ext_drop_refs(path);
451         if (alloc)
452                 free(path, M_EXT2EXTENTS);
453
454         *ppath = NULL;
455
456         return (error);
457 }
458
459 static inline int
460 ext4_ext_space_root(struct inode *ip)
461 {
462         int size;
463
464         size = sizeof(ip->i_data);
465         size -= sizeof(struct ext4_extent_header);
466         size /= sizeof(struct ext4_extent);
467
468         return (size);
469 }
470
471 static inline int
472 ext4_ext_space_block(struct inode *ip)
473 {
474         struct m_ext2fs *fs;
475         int size;
476
477         fs = ip->i_e2fs;
478
479         size = (fs->e2fs_bsize - sizeof(struct ext4_extent_header)) /
480             sizeof(struct ext4_extent);
481
482         return (size);
483 }
484
485 static inline int
486 ext4_ext_space_block_index(struct inode *ip)
487 {
488         struct m_ext2fs *fs;
489         int size;
490
491         fs = ip->i_e2fs;
492
493         size = (fs->e2fs_bsize - sizeof(struct ext4_extent_header)) /
494             sizeof(struct ext4_extent_index);
495
496         return (size);
497 }
498
499 void
500 ext4_ext_tree_init(struct inode *ip)
501 {
502         struct ext4_extent_header *ehp;
503
504         ip->i_flag |= IN_E4EXTENTS;
505
506         memset(ip->i_data, 0, EXT2_NDADDR + EXT2_NIADDR);
507         ehp = (struct ext4_extent_header *)ip->i_data;
508         ehp->eh_magic = EXT4_EXT_MAGIC;
509         ehp->eh_max = ext4_ext_space_root(ip);
510         ip->i_ext_cache.ec_type = EXT4_EXT_CACHE_NO;
511         ip->i_flag |= IN_CHANGE | IN_UPDATE;
512         ext2_update(ip->i_vnode, 1);
513 }
514
515 static inline void
516 ext4_ext_put_in_cache(struct inode *ip, uint32_t blk,
517                         uint32_t len, uint32_t start, int type)
518 {
519
520         KASSERT(len != 0, ("ext4_ext_put_in_cache: bad input"));
521
522         ip->i_ext_cache.ec_type = type;
523         ip->i_ext_cache.ec_blk = blk;
524         ip->i_ext_cache.ec_len = len;
525         ip->i_ext_cache.ec_start = start;
526 }
527
528 static e4fs_daddr_t
529 ext4_ext_blkpref(struct inode *ip, struct ext4_extent_path *path,
530     e4fs_daddr_t block)
531 {
532         struct m_ext2fs *fs;
533         struct ext4_extent *ex;
534         e4fs_daddr_t bg_start;
535         int depth;
536
537         fs = ip->i_e2fs;
538
539         if (path) {
540                 depth = path->ep_depth;
541                 ex = path[depth].ep_ext;
542                 if (ex) {
543                         e4fs_daddr_t pblk = ext4_ext_extent_pblock(ex);
544                         e2fs_daddr_t blk = ex->e_blk;
545
546                         if (block > blk)
547                                 return (pblk + (block - blk));
548                         else
549                                 return (pblk - (blk - block));
550                 }
551
552                 /* Try to get block from index itself. */
553                 if (path[depth].ep_data)
554                         return (path[depth].ep_blk);
555         }
556
557         /* Use inode's group. */
558         bg_start = (ip->i_block_group * EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) +
559             fs->e2fs->e2fs_first_dblock;
560
561         return (bg_start + block);
562 }
563
564 static int inline
565 ext4_can_extents_be_merged(struct ext4_extent *ex1,
566     struct ext4_extent *ex2)
567 {
568
569         if (ex1->e_blk + ex1->e_len != ex2->e_blk)
570                 return (0);
571
572         if (ex1->e_len + ex2->e_len > EXT4_MAX_LEN)
573                 return (0);
574
575         if (ext4_ext_extent_pblock(ex1) + ex1->e_len ==
576             ext4_ext_extent_pblock(ex2))
577                 return (1);
578
579         return (0);
580 }
581
582 static unsigned
583 ext4_ext_next_leaf_block(struct inode *ip, struct ext4_extent_path *path)
584 {
585         int depth = path->ep_depth;
586
587         /* Empty tree */
588         if (depth == 0)
589                 return (EXT4_MAX_BLOCKS);
590
591         /* Go to indexes. */
592         depth--;
593
594         while (depth >= 0) {
595                 if (path[depth].ep_index !=
596                     EXT_LAST_INDEX(path[depth].ep_header))
597                         return (path[depth].ep_index[1].ei_blk);
598
599                 depth--;
600         }
601
602         return (EXT4_MAX_BLOCKS);
603 }
604
605 static int
606 ext4_ext_dirty(struct inode *ip, struct ext4_extent_path *path)
607 {
608         struct m_ext2fs *fs;
609         struct buf *bp;
610         uint64_t blk;
611         int error;
612
613         fs = ip->i_e2fs;
614
615         if (!path)
616                 return (EINVAL);
617
618         if (path->ep_data) {
619                 blk = path->ep_blk;
620                 bp = getblk(ip->i_devvp, fsbtodb(fs, blk),
621                     fs->e2fs_bsize, 0, 0, 0);
622                 if (!bp)
623                         return (EIO);
624                 ext4_ext_fill_path_buf(path, bp);
625                 ext2_extent_blk_csum_set(ip, bp->b_data);
626                 error = bwrite(bp);
627         } else {
628                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
629                 error = ext2_update(ip->i_vnode, 1);
630         }
631
632         return (error);
633 }
634
635 static int
636 ext4_ext_insert_index(struct inode *ip, struct ext4_extent_path *path,
637     uint32_t lblk, e4fs_daddr_t blk)
638 {
639         struct m_ext2fs *fs;
640         struct ext4_extent_index *idx;
641         int len;
642
643         fs = ip->i_e2fs;
644
645         if (lblk == path->ep_index->ei_blk) {
646                 ext2_fserr(fs, ip->i_uid,
647                     "lblk == index blk => extent corrupted");
648                 return (EIO);
649         }
650
651         if (path->ep_header->eh_ecount >= path->ep_header->eh_max) {
652                 ext2_fserr(fs, ip->i_uid,
653                     "ecout > maxcount => extent corrupted");
654                 return (EIO);
655         }
656
657         if (lblk > path->ep_index->ei_blk) {
658                 /* Insert after. */
659                 idx = path->ep_index + 1;
660         } else {
661                 /* Insert before. */
662                 idx = path->ep_index;
663         }
664
665         len = EXT_LAST_INDEX(path->ep_header) - idx + 1;
666         if (len > 0)
667                 memmove(idx + 1, idx, len * sizeof(struct ext4_extent_index));
668
669         if (idx > EXT_MAX_INDEX(path->ep_header)) {
670                 ext2_fserr(fs, ip->i_uid,
671                     "index is out of range => extent corrupted");
672                 return (EIO);
673         }
674
675         idx->ei_blk = lblk;
676         ext4_index_store_pblock(idx, blk);
677         path->ep_header->eh_ecount++;
678
679         return (ext4_ext_dirty(ip, path));
680 }
681
682 static e4fs_daddr_t
683 ext4_ext_alloc_meta(struct inode *ip)
684 {
685         e4fs_daddr_t blk = ext2_alloc_meta(ip);
686         if (blk) {
687                 ip->i_blocks += btodb(ip->i_e2fs->e2fs_bsize);
688                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
689                 ext2_update(ip->i_vnode, 1);
690         }
691
692         return (blk);
693 }
694
695 static void
696 ext4_ext_blkfree(struct inode *ip, uint64_t blk, int count, int flags)
697 {
698         struct m_ext2fs *fs;
699         int i, blocksreleased;
700
701         fs = ip->i_e2fs;
702         blocksreleased = count;
703
704         for(i = 0; i < count; i++)
705                 ext2_blkfree(ip, blk + i, fs->e2fs_bsize);
706
707         if (ip->i_blocks >= blocksreleased)
708                 ip->i_blocks -= (btodb(fs->e2fs_bsize)*blocksreleased);
709         else
710                 ip->i_blocks = 0;
711
712         ip->i_flag |= IN_CHANGE | IN_UPDATE;
713         ext2_update(ip->i_vnode, 1);
714 }
715
716 static int
717 ext4_ext_split(struct inode *ip, struct ext4_extent_path *path,
718     struct ext4_extent *newext, int at)
719 {
720         struct m_ext2fs *fs;
721         struct  buf *bp;
722         int depth = ext4_ext_inode_depth(ip);
723         struct ext4_extent_header *neh;
724         struct ext4_extent_index *fidx;
725         struct ext4_extent *ex;
726         int i = at, k, m, a;
727         e4fs_daddr_t newblk, oldblk;
728         uint32_t border;
729         e4fs_daddr_t *ablks = NULL;
730         int error = 0;
731
732         fs = ip->i_e2fs;
733         bp = NULL;
734
735         /*
736          * We will split at current extent for now.
737          */
738         if (path[depth].ep_ext > EXT_MAX_EXTENT(path[depth].ep_header)) {
739                 ext2_fserr(fs, ip->i_uid,
740                     "extent is out of range => extent corrupted");
741                 return (EIO);
742         }
743
744         if (path[depth].ep_ext != EXT_MAX_EXTENT(path[depth].ep_header))
745                 border = path[depth].ep_ext[1].e_blk;
746         else
747                 border = newext->e_blk;
748
749         /* Allocate new blocks. */
750         ablks = malloc(sizeof(e4fs_daddr_t) * depth,
751             M_EXT2EXTENTS, M_WAITOK | M_ZERO);
752         if (!ablks)
753                 return (ENOMEM);
754         for (a = 0; a < depth - at; a++) {
755                 newblk = ext4_ext_alloc_meta(ip);
756                 if (newblk == 0)
757                         goto cleanup;
758                 ablks[a] = newblk;
759         }
760
761         newblk = ablks[--a];
762         bp = getblk(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, 0, 0, 0);
763         if (!bp) {
764                 error = EIO;
765                 goto cleanup;
766         }
767
768         neh = ext4_ext_block_header(bp->b_data);
769         neh->eh_ecount = 0;
770         neh->eh_max = ext4_ext_space_block(ip);
771         neh->eh_magic = EXT4_EXT_MAGIC;
772         neh->eh_depth = 0;
773         ex = EXT_FIRST_EXTENT(neh);
774
775         if (path[depth].ep_header->eh_ecount != path[depth].ep_header->eh_max) {
776                 ext2_fserr(fs, ip->i_uid,
777                     "extents count out of range => extent corrupted");
778                 error = EIO;
779                 goto cleanup;
780         }
781
782         /* Start copy from next extent. */
783         m = 0;
784         path[depth].ep_ext++;
785         while (path[depth].ep_ext <= EXT_MAX_EXTENT(path[depth].ep_header)) {
786                 path[depth].ep_ext++;
787                 m++;
788         }
789         if (m) {
790                 memmove(ex, path[depth].ep_ext - m,
791                     sizeof(struct ext4_extent) * m);
792                 neh->eh_ecount = neh->eh_ecount + m;
793         }
794
795         ext2_extent_blk_csum_set(ip, bp->b_data);
796         bwrite(bp);
797         bp = NULL;
798
799         /* Fix old leaf. */
800         if (m) {
801                 path[depth].ep_header->eh_ecount =
802                     path[depth].ep_header->eh_ecount - m;
803                 ext4_ext_dirty(ip, path + depth);
804         }
805
806         /* Create intermediate indexes. */
807         k = depth - at - 1;
808         KASSERT(k >= 0, ("ext4_ext_split: negative k"));
809
810         /* Insert new index into current index block. */
811         i = depth - 1;
812         while (k--) {
813                 oldblk = newblk;
814                 newblk = ablks[--a];
815                 error = bread(ip->i_devvp, fsbtodb(fs, newblk),
816                     (int)fs->e2fs_bsize, NOCRED, &bp);
817                 if (error) {
818                         brelse(bp);
819                         goto cleanup;
820                 }
821
822                 neh = (struct ext4_extent_header *)bp->b_data;
823                 neh->eh_ecount = 1;
824                 neh->eh_magic = EXT4_EXT_MAGIC;
825                 neh->eh_max = ext4_ext_space_block_index(ip);
826                 neh->eh_depth = depth - i;
827                 fidx = EXT_FIRST_INDEX(neh);
828                 fidx->ei_blk = border;
829                 ext4_index_store_pblock(fidx, oldblk);
830
831                 m = 0;
832                 path[i].ep_index++;
833                 while (path[i].ep_index <= EXT_MAX_INDEX(path[i].ep_header)) {
834                         path[i].ep_index++;
835                         m++;
836                 }
837                 if (m) {
838                         memmove(++fidx, path[i].ep_index - m,
839                             sizeof(struct ext4_extent_index) * m);
840                         neh->eh_ecount = neh->eh_ecount + m;
841                 }
842
843                 ext2_extent_blk_csum_set(ip, bp->b_data);
844                 bwrite(bp);
845                 bp = NULL;
846
847                 /* Fix old index. */
848                 if (m) {
849                         path[i].ep_header->eh_ecount =
850                             path[i].ep_header->eh_ecount - m;
851                         ext4_ext_dirty(ip, path + i);
852                 }
853
854                 i--;
855         }
856
857         error = ext4_ext_insert_index(ip, path + at, border, newblk);
858
859 cleanup:
860         if (bp)
861                 brelse(bp);
862
863         if (error) {
864                 for (i = 0; i < depth; i++) {
865                         if (!ablks[i])
866                                 continue;
867                         ext4_ext_blkfree(ip, ablks[i], 1, 0);
868                 }
869         }
870
871         free(ablks, M_EXT2EXTENTS);
872
873         return (error);
874 }
875
876 static int
877 ext4_ext_grow_indepth(struct inode *ip, struct ext4_extent_path *path,
878     struct ext4_extent *newext)
879 {
880         struct m_ext2fs *fs;
881         struct ext4_extent_path *curpath;
882         struct ext4_extent_header *neh;
883         struct buf *bp;
884         e4fs_daddr_t newblk;
885         int error = 0;
886
887         fs = ip->i_e2fs;
888         curpath = path;
889
890         newblk = ext4_ext_alloc_meta(ip);
891         if (newblk == 0)
892                 return (error);
893
894         bp = getblk(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, 0, 0, 0);
895         if (!bp)
896                 return (EIO);
897
898         /* Move top-level index/leaf into new block. */
899         memmove(bp->b_data, curpath->ep_header, sizeof(ip->i_data));
900
901         /* Set size of new block */
902         neh = ext4_ext_block_header(bp->b_data);
903         neh->eh_magic = EXT4_EXT_MAGIC;
904
905         if (ext4_ext_inode_depth(ip))
906                 neh->eh_max = ext4_ext_space_block_index(ip);
907         else
908                 neh->eh_max = ext4_ext_space_block(ip);
909
910         ext2_extent_blk_csum_set(ip, bp->b_data);
911         error = bwrite(bp);
912         if (error)
913                 goto out;
914
915         bp = NULL;
916
917         curpath->ep_header->eh_magic = EXT4_EXT_MAGIC;
918         curpath->ep_header->eh_max = ext4_ext_space_root(ip);
919         curpath->ep_header->eh_ecount = 1;
920         curpath->ep_index = EXT_FIRST_INDEX(curpath->ep_header);
921         curpath->ep_index->ei_blk = EXT_FIRST_EXTENT(path[0].ep_header)->e_blk;
922         ext4_index_store_pblock(curpath->ep_index, newblk);
923
924         neh = ext4_ext_inode_header(ip);
925         neh->eh_depth = path->ep_depth + 1;
926         ext4_ext_dirty(ip, curpath);
927 out:
928         brelse(bp);
929
930         return (error);
931 }
932
933 static int
934 ext4_ext_create_new_leaf(struct inode *ip, struct ext4_extent_path *path,
935     struct ext4_extent *newext)
936 {
937         struct ext4_extent_path *curpath;
938         int depth, i, error;
939
940 repeat:
941         i = depth = ext4_ext_inode_depth(ip);
942
943         /* Look for free index entry int the tree */
944         curpath = path + depth;
945         while (i > 0 && !EXT_HAS_FREE_INDEX(curpath)) {
946                 i--;
947                 curpath--;
948         }
949
950         /*
951          * We use already allocated block for index block,
952          * so subsequent data blocks should be contiguous.
953          */
954         if (EXT_HAS_FREE_INDEX(curpath)) {
955                 error = ext4_ext_split(ip, path, newext, i);
956                 if (error)
957                         goto out;
958
959                 /* Refill path. */
960                 ext4_ext_drop_refs(path);
961                 error = ext4_ext_find_extent(ip, newext->e_blk, &path);
962                 if (error)
963                         goto out;
964         } else {
965                 /* Tree is full, do grow in depth. */
966                 error = ext4_ext_grow_indepth(ip, path, newext);
967                 if (error)
968                         goto out;
969
970                 /* Refill path. */
971                 ext4_ext_drop_refs(path);
972                 error = ext4_ext_find_extent(ip, newext->e_blk, &path);
973                 if (error)
974                         goto out;
975
976                 /* Check and split tree if required. */
977                 depth = ext4_ext_inode_depth(ip);
978                 if (path[depth].ep_header->eh_ecount ==
979                     path[depth].ep_header->eh_max)
980                         goto repeat;
981         }
982
983 out:
984         return (error);
985 }
986
987 static int
988 ext4_ext_correct_indexes(struct inode *ip, struct ext4_extent_path *path)
989 {
990         struct ext4_extent_header *eh;
991         struct ext4_extent *ex;
992         int32_t border;
993         int depth, k;
994
995         depth = ext4_ext_inode_depth(ip);
996         eh = path[depth].ep_header;
997         ex = path[depth].ep_ext;
998
999         if (ex == NULL || eh == NULL)
1000                 return (EIO);
1001
1002         if (!depth)
1003                 return (0);
1004
1005         /* We will correct tree if first leaf got modified only. */
1006         if (ex != EXT_FIRST_EXTENT(eh))
1007                 return (0);
1008
1009         k = depth - 1;
1010         border = path[depth].ep_ext->e_blk;
1011         path[k].ep_index->ei_blk = border;
1012         ext4_ext_dirty(ip, path + k);
1013         while (k--) {
1014                 /* Change all left-side indexes. */
1015                 if (path[k+1].ep_index != EXT_FIRST_INDEX(path[k+1].ep_header))
1016                         break;
1017
1018                 path[k].ep_index->ei_blk = border;
1019                 ext4_ext_dirty(ip, path + k);
1020         }
1021
1022         return (0);
1023 }
1024
1025 static int
1026 ext4_ext_insert_extent(struct inode *ip, struct ext4_extent_path *path,
1027     struct ext4_extent *newext)
1028 {
1029         struct ext4_extent_header * eh;
1030         struct ext4_extent *ex, *nex, *nearex;
1031         struct ext4_extent_path *npath;
1032         int depth, len, error, next;
1033
1034         depth = ext4_ext_inode_depth(ip);
1035         ex = path[depth].ep_ext;
1036         npath = NULL;
1037
1038         if (newext->e_len == 0 || path[depth].ep_header == NULL)
1039                 return (EINVAL);
1040
1041         /* Insert block into found extent. */
1042         if (ex && ext4_can_extents_be_merged(ex, newext)) {
1043                 ex->e_len = ex->e_len + newext->e_len;
1044                 eh = path[depth].ep_header;
1045                 nearex = ex;
1046                 goto merge;
1047         }
1048
1049 repeat:
1050         depth = ext4_ext_inode_depth(ip);
1051         eh = path[depth].ep_header;
1052         if (eh->eh_ecount < eh->eh_max)
1053                 goto has_space;
1054
1055         /* Try next leaf */
1056         nex = EXT_LAST_EXTENT(eh);
1057         next = ext4_ext_next_leaf_block(ip, path);
1058         if (newext->e_blk > nex->e_blk && next != EXT4_MAX_BLOCKS) {
1059                 KASSERT(npath == NULL,
1060                     ("ext4_ext_insert_extent: bad path"));
1061
1062                 error = ext4_ext_find_extent(ip, next, &npath);
1063                 if (error)
1064                         goto cleanup;
1065
1066                 if (npath->ep_depth != path->ep_depth) {
1067                         error = EIO;
1068                         goto cleanup;
1069                 }
1070
1071                 eh = npath[depth].ep_header;
1072                 if (eh->eh_ecount < eh->eh_max) {
1073                         path = npath;
1074                         goto repeat;
1075                 }
1076         }
1077
1078         /*
1079          * There is no free space in the found leaf,
1080          * try to add a new leaf to the tree.
1081          */
1082         error = ext4_ext_create_new_leaf(ip, path, newext);
1083         if (error)
1084                 goto cleanup;
1085
1086         depth = ext4_ext_inode_depth(ip);
1087         eh = path[depth].ep_header;
1088
1089 has_space:
1090         nearex = path[depth].ep_ext;
1091         if (!nearex) {
1092                 /* Create new extent in the leaf. */
1093                 path[depth].ep_ext = EXT_FIRST_EXTENT(eh);
1094         } else if (newext->e_blk > nearex->e_blk) {
1095                 if (nearex != EXT_LAST_EXTENT(eh)) {
1096                         len = EXT_MAX_EXTENT(eh) - nearex;
1097                         len = (len - 1) * sizeof(struct ext4_extent);
1098                         len = len < 0 ? 0 : len;
1099                         memmove(nearex + 2, nearex + 1, len);
1100                 }
1101                 path[depth].ep_ext = nearex + 1;
1102         } else {
1103                 len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
1104                 len = len < 0 ? 0 : len;
1105                 memmove(nearex + 1, nearex, len);
1106                 path[depth].ep_ext = nearex;
1107         }
1108
1109         eh->eh_ecount = eh->eh_ecount + 1;
1110         nearex = path[depth].ep_ext;
1111         nearex->e_blk = newext->e_blk;
1112         nearex->e_start_lo = newext->e_start_lo;
1113         nearex->e_start_hi = newext->e_start_hi;
1114         nearex->e_len = newext->e_len;
1115
1116 merge:
1117         /* Try to merge extents to the right. */
1118         while (nearex < EXT_LAST_EXTENT(eh)) {
1119                 if (!ext4_can_extents_be_merged(nearex, nearex + 1))
1120                         break;
1121
1122                 /* Merge with next extent. */
1123                 nearex->e_len = nearex->e_len + nearex[1].e_len;
1124                 if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
1125                         len = (EXT_LAST_EXTENT(eh) - nearex - 1) *
1126                             sizeof(struct ext4_extent);
1127                         memmove(nearex + 1, nearex + 2, len);
1128                 }
1129
1130                 eh->eh_ecount = eh->eh_ecount - 1;
1131                 KASSERT(eh->eh_ecount != 0,
1132                     ("ext4_ext_insert_extent: bad ecount"));
1133         }
1134
1135         /*
1136          * Try to merge extents to the left,
1137          * start from inexes correction.
1138          */
1139         error = ext4_ext_correct_indexes(ip, path);
1140         if (error)
1141                 goto cleanup;
1142
1143         ext4_ext_dirty(ip, path + depth);
1144
1145 cleanup:
1146         if (npath) {
1147                 ext4_ext_drop_refs(npath);
1148                 free(npath, M_EXT2EXTENTS);
1149         }
1150
1151         ip->i_ext_cache.ec_type = EXT4_EXT_CACHE_NO;
1152         return (error);
1153 }
1154
1155 static e4fs_daddr_t
1156 ext4_new_blocks(struct inode *ip, daddr_t lbn, e4fs_daddr_t pref,
1157     struct ucred *cred, unsigned long *count, int *perror)
1158 {
1159         struct m_ext2fs *fs;
1160         e4fs_daddr_t newblk;
1161
1162         /*
1163          * We will allocate only single block for now.
1164          */
1165         if (*count > 1)
1166                 return (0);
1167
1168         fs = ip->i_e2fs;
1169         EXT2_LOCK(ip->i_ump);
1170         *perror = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newblk);
1171         if (*perror)
1172                 return (0);
1173
1174         if (newblk) {
1175                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1176                 ext2_update(ip->i_vnode, 1);
1177         }
1178
1179         return (newblk);
1180 }
1181
1182 int
1183 ext4_ext_get_blocks(struct inode *ip, e4fs_daddr_t iblk,
1184     unsigned long max_blocks, struct ucred *cred, struct buf **bpp,
1185     int *pallocated, daddr_t *nb)
1186 {
1187         struct m_ext2fs *fs;
1188         struct buf *bp = NULL;
1189         struct ext4_extent_path *path;
1190         struct ext4_extent newex, *ex;
1191         e4fs_daddr_t bpref, newblk = 0;
1192         unsigned long allocated = 0;
1193         int error = 0, depth;
1194
1195         if(bpp)
1196                 *bpp = NULL;
1197         *pallocated = 0;
1198
1199         /* Check cache. */
1200         path = NULL;
1201         if ((bpref = ext4_ext_in_cache(ip, iblk, &newex))) {
1202                 if (bpref == EXT4_EXT_CACHE_IN) {
1203                         /* Block is already allocated. */
1204                         newblk = iblk - newex.e_blk +
1205                             ext4_ext_extent_pblock(&newex);
1206                         allocated = newex.e_len - (iblk - newex.e_blk);
1207                         goto out;
1208                 } else {
1209                         error = EIO;
1210                         goto out2;
1211                 }
1212         }
1213
1214         error = ext4_ext_find_extent(ip, iblk, &path);
1215         if (error) {
1216                 goto out2;
1217         }
1218
1219         depth = ext4_ext_inode_depth(ip);
1220         if (path[depth].ep_ext == NULL && depth != 0) {
1221                 error = EIO;
1222                 goto out2;
1223         }
1224
1225         if ((ex = path[depth].ep_ext)) {
1226                 uint64_t lblk = ex->e_blk;
1227                 uint16_t e_len  = ex->e_len;
1228                 e4fs_daddr_t e_start = ext4_ext_extent_pblock(ex);
1229
1230                 if (e_len > EXT4_MAX_LEN)
1231                         goto out2;
1232
1233                 /* If we found extent covers block, simply return it. */
1234                 if (iblk >= lblk && iblk < lblk + e_len) {
1235                         newblk = iblk - lblk + e_start;
1236                         allocated = e_len - (iblk - lblk);
1237                         ext4_ext_put_in_cache(ip, lblk, e_len,
1238                             e_start, EXT4_EXT_CACHE_IN);
1239                         goto out;
1240                 }
1241         }
1242
1243         /* Allocate the new block. */
1244         if (S_ISREG(ip->i_mode) && (!ip->i_next_alloc_block)) {
1245                 ip->i_next_alloc_goal = 0;
1246         }
1247
1248         bpref = ext4_ext_blkpref(ip, path, iblk);
1249         allocated = max_blocks;
1250         newblk = ext4_new_blocks(ip, iblk, bpref, cred, &allocated, &error);
1251         if (!newblk)
1252                 goto out2;
1253
1254         /* Try to insert new extent into found leaf and return. */
1255         newex.e_blk = iblk;
1256         ext4_ext_store_pblock(&newex, newblk);
1257         newex.e_len = allocated;
1258         error = ext4_ext_insert_extent(ip, path, &newex);
1259         if (error)
1260                 goto out2;
1261
1262         newblk = ext4_ext_extent_pblock(&newex);
1263         ext4_ext_put_in_cache(ip, iblk, allocated, newblk, EXT4_EXT_CACHE_IN);
1264         *pallocated = 1;
1265
1266 out:
1267         if (allocated > max_blocks)
1268                 allocated = max_blocks;
1269
1270         if (bpp)
1271         {
1272                 fs = ip->i_e2fs;
1273                 error = bread(ip->i_devvp, fsbtodb(fs, newblk),
1274                     fs->e2fs_bsize, cred, &bp);
1275                 if (error) {
1276                         brelse(bp);
1277                 } else {
1278                         *bpp = bp;
1279                 }
1280         }
1281
1282 out2:
1283         if (path) {
1284                 ext4_ext_drop_refs(path);
1285                 free(path, M_EXT2EXTENTS);
1286         }
1287
1288         if (nb)
1289                 *nb = newblk;
1290
1291         return (error);
1292 }
1293
1294 static inline uint16_t
1295 ext4_ext_get_actual_len(struct ext4_extent *ext)
1296 {
1297
1298         return (ext->e_len <= EXT_INIT_MAX_LEN ?
1299             ext->e_len : (ext->e_len - EXT_INIT_MAX_LEN));
1300 }
1301
1302 static inline struct ext4_extent_header *
1303 ext4_ext_header(struct inode *ip)
1304 {
1305
1306         return ((struct ext4_extent_header *)ip->i_db);
1307 }
1308
1309 static int
1310 ext4_remove_blocks(struct inode *ip, struct ext4_extent *ex,
1311     unsigned long from, unsigned long to)
1312 {
1313         unsigned long num, start;
1314
1315         if (from >= ex->e_blk &&
1316             to == ex->e_blk + ext4_ext_get_actual_len(ex) - 1) {
1317                 /* Tail cleanup. */
1318                 num = ex->e_blk + ext4_ext_get_actual_len(ex) - from;
1319                 start = ext4_ext_extent_pblock(ex) +
1320                     ext4_ext_get_actual_len(ex) - num;
1321                 ext4_ext_blkfree(ip, start, num, 0);
1322         }
1323
1324         return (0);
1325 }
1326
1327 static int
1328 ext4_ext_rm_index(struct inode *ip, struct ext4_extent_path *path)
1329 {
1330         e4fs_daddr_t leaf;
1331
1332         /* Free index block. */
1333         path--;
1334         leaf = ext4_ext_index_pblock(path->ep_index);
1335         KASSERT(path->ep_header->eh_ecount != 0,
1336             ("ext4_ext_rm_index: bad ecount"));
1337         path->ep_header->eh_ecount--;
1338         ext4_ext_dirty(ip, path);
1339         ext4_ext_blkfree(ip, leaf, 1, 0);
1340         return (0);
1341 }
1342
1343 static int
1344 ext4_ext_rm_leaf(struct inode *ip, struct ext4_extent_path *path,
1345     uint64_t start)
1346 {
1347         struct ext4_extent_header *eh;
1348         struct ext4_extent *ex;
1349         unsigned int a, b, block, num;
1350         unsigned long ex_blk;
1351         unsigned short ex_len;
1352         int depth;
1353         int error, correct_index;
1354
1355         depth = ext4_ext_inode_depth(ip);
1356         if (!path[depth].ep_header) {
1357                 if (path[depth].ep_data == NULL)
1358                         return (EINVAL);
1359                 path[depth].ep_header =
1360                     (struct ext4_extent_header* )path[depth].ep_data;
1361         }
1362
1363         eh = path[depth].ep_header;
1364         if (!eh) {
1365                 ext2_fserr(ip->i_e2fs, ip->i_uid,
1366                     "bad header => extent corrupted");
1367                 return (EIO);
1368         }
1369
1370         ex = EXT_LAST_EXTENT(eh);
1371         ex_blk = ex->e_blk;
1372         ex_len = ext4_ext_get_actual_len(ex);
1373
1374         error = 0;
1375         correct_index = 0;
1376         while (ex >= EXT_FIRST_EXTENT(eh) && ex_blk + ex_len > start) {
1377                 path[depth].ep_ext = ex;
1378                 a = ex_blk > start ? ex_blk : start;
1379                 b = (uint64_t)ex_blk + ex_len - 1 <
1380                     EXT4_MAX_BLOCKS ? ex_blk + ex_len - 1 : EXT4_MAX_BLOCKS;
1381
1382                 if (a != ex_blk && b != ex_blk + ex_len - 1)
1383                         return (EINVAL);
1384                 else if (a != ex_blk) {
1385                         /* Remove tail of the extent. */
1386                         block = ex_blk;
1387                         num = a - block;
1388                 } else if (b != ex_blk + ex_len - 1) {
1389                         /* Remove head of the extent, not implemented. */
1390                         return (EINVAL);
1391                 } else {
1392                         /* Remove whole extent. */
1393                         block = ex_blk;
1394                         num = 0;
1395                 }
1396
1397                 if (ex == EXT_FIRST_EXTENT(eh))
1398                         correct_index = 1;
1399
1400                 error = ext4_remove_blocks(ip, ex, a, b);
1401                 if (error)
1402                         goto out;
1403
1404                 if (num == 0) {
1405                         ext4_ext_store_pblock(ex, 0);
1406                         eh->eh_ecount--;
1407                 }
1408
1409                 ex->e_blk = block;
1410                 ex->e_len = num;
1411
1412                 ext4_ext_dirty(ip, path + depth);
1413
1414                 ex--;
1415                 ex_blk = ex->e_blk;
1416                 ex_len = ext4_ext_get_actual_len(ex);
1417         };
1418
1419         if (correct_index && eh->eh_ecount)
1420                 error = ext4_ext_correct_indexes(ip, path);
1421
1422         /*
1423          * If this leaf is free, we should
1424          * remove it from index block above.
1425          */
1426         if (error == 0 && eh->eh_ecount == 0 && path[depth].ep_data != NULL)
1427                 error = ext4_ext_rm_index(ip, path + depth);
1428
1429 out:
1430         return (error);
1431 }
1432
1433 static struct buf *
1434 ext4_read_extent_tree_block(struct inode *ip, e4fs_daddr_t pblk,
1435     int depth, int flags)
1436 {
1437         struct m_ext2fs *fs;
1438         struct ext4_extent_header *eh;
1439         struct buf *bp;
1440         int error;
1441
1442         fs = ip->i_e2fs;
1443         error = bread(ip->i_devvp, fsbtodb(fs, pblk),
1444             fs->e2fs_bsize, NOCRED, &bp);
1445         if (error) {
1446                 brelse(bp);
1447                 return (NULL);
1448         }
1449
1450         eh = ext4_ext_block_header(bp->b_data);
1451         if (eh->eh_depth != depth) {
1452                 ext2_fserr(fs, ip->i_uid, "unexpected eh_depth");
1453                 goto err;
1454         }
1455
1456         error = ext4_ext_check_header(ip, eh);
1457         if (error)
1458                 goto err;
1459
1460         return (bp);
1461
1462 err:
1463         brelse(bp);
1464         return (NULL);
1465
1466 }
1467
1468 static int inline
1469 ext4_ext_more_to_rm(struct ext4_extent_path *path)
1470 {
1471
1472         KASSERT(path->ep_index != NULL,
1473             ("ext4_ext_more_to_rm: bad index from path"));
1474
1475         if (path->ep_index < EXT_FIRST_INDEX(path->ep_header))
1476                 return (0);
1477
1478         if (path->ep_header->eh_ecount == path->index_count)
1479                 return (0);
1480
1481         return (1);
1482 }
1483
1484 int
1485 ext4_ext_remove_space(struct inode *ip, off_t length, int flags,
1486     struct ucred *cred, struct thread *td)
1487 {
1488         struct buf *bp;
1489         struct ext4_extent_header *ehp;
1490         struct ext4_extent_path *path;
1491         int depth;
1492         int i, error;
1493
1494         ehp = (struct ext4_extent_header *)ip->i_db;
1495         depth = ext4_ext_inode_depth(ip);
1496
1497         error = ext4_ext_check_header(ip, ehp);
1498         if(error)
1499                 return (error);
1500
1501         path = malloc(sizeof(struct ext4_extent_path) * (depth + 1),
1502             M_EXT2EXTENTS, M_WAITOK | M_ZERO);
1503         if (!path)
1504                 return (ENOMEM);
1505
1506         path[0].ep_header = ehp;
1507         path[0].ep_depth = depth;
1508         i = 0;
1509         while (error == 0 && i >= 0) {
1510                 if (i == depth) {
1511                         /* This is leaf. */
1512                         error = ext4_ext_rm_leaf(ip, path, length);
1513                         if (error)
1514                                 break;
1515                         free(path[i].ep_data, M_EXT2EXTENTS);
1516                         path[i].ep_data = NULL;
1517                         i--;
1518                         continue;
1519                 }
1520
1521                 /* This is index. */
1522                 if (!path[i].ep_header)
1523                         path[i].ep_header =
1524                             (struct ext4_extent_header *)path[i].ep_data;
1525
1526                 if (!path[i].ep_index) {
1527                         /* This level hasn't touched yet. */
1528                         path[i].ep_index = EXT_LAST_INDEX(path[i].ep_header);
1529                         path[i].index_count = path[i].ep_header->eh_ecount + 1;
1530                 } else {
1531                         /* We've already was here, see at next index. */
1532                         path[i].ep_index--;
1533                 }
1534
1535                 if (ext4_ext_more_to_rm(path + i)) {
1536                         memset(path + i + 1, 0, sizeof(*path));
1537                         bp = ext4_read_extent_tree_block(ip,
1538                             ext4_ext_index_pblock(path[i].ep_index),
1539                             path[0].ep_depth - (i + 1), 0);
1540                         if (!bp) {
1541                                 error = EIO;
1542                                 break;
1543                         }
1544
1545                         ext4_ext_fill_path_bdata(&path[i+1], bp,
1546                             ext4_ext_index_pblock(path[i].ep_index));
1547                         brelse(bp);
1548                         path[i].index_count = path[i].ep_header->eh_ecount;
1549                         i++;
1550                 } else {
1551                         if (path[i].ep_header->eh_ecount == 0 && i > 0) {
1552                                 /* Index is empty, remove it. */
1553                                 error = ext4_ext_rm_index(ip, path + i);
1554                         }
1555                         free(path[i].ep_data, M_EXT2EXTENTS);
1556                         path[i].ep_data = NULL;
1557                         i--;
1558                 }
1559         }
1560
1561         if (path->ep_header->eh_ecount == 0) {
1562                 /*
1563                  * Truncate the tree to zero.
1564                  */
1565                  ext4_ext_header(ip)->eh_depth = 0;
1566                  ext4_ext_header(ip)->eh_max = ext4_ext_space_root(ip);
1567                  ext4_ext_dirty(ip, path);
1568         }
1569
1570         ext4_ext_drop_refs(path);
1571         free(path, M_EXT2EXTENTS);
1572
1573         return (error);
1574 }