]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/ext2fs/ext2_extents.c
MFV: r362513
[FreeBSD/FreeBSD.git] / sys / fs / ext2fs / ext2_extents.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2010 Zheng Liu <lz@freebsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/types.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/vnode.h>
37 #include <sys/bio.h>
38 #include <sys/buf.h>
39 #include <sys/endian.h>
40 #include <sys/conf.h>
41 #include <sys/sdt.h>
42 #include <sys/stat.h>
43
44 #include <fs/ext2fs/ext2_mount.h>
45 #include <fs/ext2fs/fs.h>
46 #include <fs/ext2fs/inode.h>
47 #include <fs/ext2fs/ext2fs.h>
48 #include <fs/ext2fs/ext2_extents.h>
49 #include <fs/ext2fs/ext2_extern.h>
50
51 SDT_PROVIDER_DECLARE(ext2fs);
52 /*
53  * ext2fs trace probe:
54  * arg0: verbosity. Higher numbers give more verbose messages
55  * arg1: Textual message
56  */
57 SDT_PROBE_DEFINE2(ext2fs, , trace, extents, "int", "char*");
58
59 static MALLOC_DEFINE(M_EXT2EXTENTS, "ext2_extents", "EXT2 extents");
60
61 #ifdef EXT2FS_PRINT_EXTENTS
62 static void
63 ext4_ext_print_extent(struct ext4_extent *ep)
64 {
65
66         printf("    ext %p => (blk %u len %u start %ju)\n",
67             ep, le32toh(ep->e_blk), le16toh(ep->e_len),
68             (uint64_t)le16toh(ep->e_start_hi) << 32 | le32toh(ep->e_start_lo));
69 }
70
71 static void ext4_ext_print_header(struct inode *ip, struct ext4_extent_header *ehp);
72
73 static void
74 ext4_ext_print_index(struct inode *ip, struct ext4_extent_index *ex, int do_walk)
75 {
76         struct m_ext2fs *fs;
77         struct buf *bp;
78         int error;
79
80         fs = ip->i_e2fs;
81
82         printf("    index %p => (blk %u pblk %ju)\n",
83             ex, le32toh(ex->ei_blk), (uint64_t)le16toh(ex->ei_leaf_hi) << 32 |
84             le32toh(ex->ei_leaf_lo));
85
86         if(!do_walk)
87                 return;
88
89         if ((error = bread(ip->i_devvp,
90             fsbtodb(fs, ((uint64_t)le16toh(ex->ei_leaf_hi) << 32 |
91             le32toh(ex->ei_leaf_lo))), (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) {
92                 brelse(bp);
93                 return;
94         }
95
96         ext4_ext_print_header(ip, (struct ext4_extent_header *)bp->b_data);
97
98         brelse(bp);
99
100 }
101
102 static void
103 ext4_ext_print_header(struct inode *ip, struct ext4_extent_header *ehp)
104 {
105         int i;
106
107         printf("header %p => (magic 0x%x entries %d max %d depth %d gen %d)\n",
108             ehp, le16toh(ehp->eh_magic), le16toh(ehp->eh_ecount),
109             le16toh(ehp->eh_max), le16toh(ehp->eh_depth), le32toh(ehp->eh_gen));
110
111         for (i = 0; i < le16toh(ehp->eh_ecount); i++)
112                 if (ehp->eh_depth != 0)
113                         ext4_ext_print_index(ip,
114                             (struct ext4_extent_index *)(ehp + 1 + i), 1);
115                 else
116                         ext4_ext_print_extent((struct ext4_extent *)(ehp + 1 + i));
117 }
118
119 static void
120 ext4_ext_print_path(struct inode *ip, struct ext4_extent_path *path)
121 {
122         int k, l;
123
124         l = path->ep_depth;
125
126         printf("ip=%ju, Path:\n", ip->i_number);
127         for (k = 0; k <= l; k++, path++) {
128                 if (path->ep_index) {
129                         ext4_ext_print_index(ip, path->ep_index, 0);
130                 } else if (path->ep_ext) {
131                         ext4_ext_print_extent(path->ep_ext);
132                 }
133         }
134 }
135
136 void
137 ext4_ext_print_extent_tree_status(struct inode *ip)
138 {
139         struct ext4_extent_header *ehp;
140
141         ehp = (struct ext4_extent_header *)(char *)ip->i_db;
142
143         printf("Extent status:ip=%ju\n", ip->i_number);
144         if (!(ip->i_flag & IN_E4EXTENTS))
145                 return;
146
147         ext4_ext_print_header(ip, ehp);
148
149         return;
150 }
151 #endif
152
153 static inline struct ext4_extent_header *
154 ext4_ext_inode_header(struct inode *ip)
155 {
156
157         return ((struct ext4_extent_header *)ip->i_db);
158 }
159
160 static inline struct ext4_extent_header *
161 ext4_ext_block_header(char *bdata)
162 {
163
164         return ((struct ext4_extent_header *)bdata);
165 }
166
167 static inline unsigned short
168 ext4_ext_inode_depth(struct inode *ip)
169 {
170         struct ext4_extent_header *ehp;
171
172         ehp = (struct ext4_extent_header *)ip->i_data;
173         return (le16toh(ehp->eh_depth));
174 }
175
176 static inline e4fs_daddr_t
177 ext4_ext_index_pblock(struct ext4_extent_index *index)
178 {
179         e4fs_daddr_t blk;
180
181         blk = le32toh(index->ei_leaf_lo);
182         blk |= (e4fs_daddr_t)le16toh(index->ei_leaf_hi) << 32;
183
184         return (blk);
185 }
186
187 static inline void
188 ext4_index_store_pblock(struct ext4_extent_index *index, e4fs_daddr_t pb)
189 {
190
191         index->ei_leaf_lo = htole32(pb & 0xffffffff);
192         index->ei_leaf_hi = htole16((pb >> 32) & 0xffff);
193 }
194
195
196 static inline e4fs_daddr_t
197 ext4_ext_extent_pblock(struct ext4_extent *extent)
198 {
199         e4fs_daddr_t blk;
200
201         blk = le32toh(extent->e_start_lo);
202         blk |= (e4fs_daddr_t)le16toh(extent->e_start_hi) << 32;
203
204         return (blk);
205 }
206
207 static inline void
208 ext4_ext_store_pblock(struct ext4_extent *ex, e4fs_daddr_t pb)
209 {
210
211         ex->e_start_lo = htole32(pb & 0xffffffff);
212         ex->e_start_hi = htole16((pb >> 32) & 0xffff);
213 }
214
215 int
216 ext4_ext_in_cache(struct inode *ip, daddr_t lbn, struct ext4_extent *ep)
217 {
218         struct ext4_extent_cache *ecp;
219         int ret = EXT4_EXT_CACHE_NO;
220
221         ecp = &ip->i_ext_cache;
222         if (ecp->ec_type == EXT4_EXT_CACHE_NO)
223                 return (ret);
224
225         if (lbn >= ecp->ec_blk && lbn < ecp->ec_blk + ecp->ec_len) {
226                 ep->e_blk = htole32(ecp->ec_blk);
227                 ep->e_start_lo = htole32(ecp->ec_start & 0xffffffff);
228                 ep->e_start_hi = htole16(ecp->ec_start >> 32 & 0xffff);
229                 ep->e_len = htole16(ecp->ec_len);
230                 ret = ecp->ec_type;
231         }
232         return (ret);
233 }
234
235 static int
236 ext4_ext_check_header(struct inode *ip, struct ext4_extent_header *eh)
237 {
238         struct m_ext2fs *fs;
239         char *error_msg;
240
241         fs = ip->i_e2fs;
242
243         if (le16toh(eh->eh_magic) != EXT4_EXT_MAGIC) {
244                 error_msg = "header: invalid magic";
245                 goto corrupted;
246         }
247         if (eh->eh_max == 0) {
248                 error_msg = "header: invalid eh_max";
249                 goto corrupted;
250         }
251         if (le16toh(eh->eh_ecount) > le16toh(eh->eh_max)) {
252                 error_msg = "header: invalid eh_entries";
253                 goto corrupted;
254         }
255
256         return (0);
257
258 corrupted:
259         SDT_PROBE2(ext2fs, , trace, extents, 1, error_msg);
260         return (EIO);
261 }
262
263 static void
264 ext4_ext_binsearch_index(struct ext4_extent_path *path, int blk)
265 {
266         struct ext4_extent_header *eh;
267         struct ext4_extent_index *r, *l, *m;
268
269         eh = path->ep_header;
270
271         KASSERT(le16toh(eh->eh_ecount) <= le16toh(eh->eh_max) &&
272             le16toh(eh->eh_ecount) > 0,
273             ("ext4_ext_binsearch_index: bad args"));
274
275         l = EXT_FIRST_INDEX(eh) + 1;
276         r = EXT_FIRST_INDEX(eh) + le16toh(eh->eh_ecount) - 1;
277         while (l <= r) {
278                 m = l + (r - l) / 2;
279                 if (blk < le32toh(m->ei_blk))
280                         r = m - 1;
281                 else
282                         l = m + 1;
283         }
284
285         path->ep_index = l - 1;
286 }
287
288 static void
289 ext4_ext_binsearch_ext(struct ext4_extent_path *path, int blk)
290 {
291         struct ext4_extent_header *eh;
292         struct ext4_extent *r, *l, *m;
293
294         eh = path->ep_header;
295
296         KASSERT(le16toh(eh->eh_ecount) <= le16toh(eh->eh_max),
297             ("ext4_ext_binsearch_ext: bad args"));
298
299         if (eh->eh_ecount == 0)
300                 return;
301
302         l = EXT_FIRST_EXTENT(eh) + 1;
303         r = EXT_FIRST_EXTENT(eh) + le16toh(eh->eh_ecount) - 1;
304
305         while (l <= r) {
306                 m = l + (r - l) / 2;
307                 if (blk < le32toh(m->e_blk))
308                         r = m - 1;
309                 else
310                         l = m + 1;
311         }
312
313         path->ep_ext = l - 1;
314 }
315
316 static int
317 ext4_ext_fill_path_bdata(struct ext4_extent_path *path,
318     struct buf *bp, uint64_t blk)
319 {
320
321         KASSERT(path->ep_data == NULL,
322             ("ext4_ext_fill_path_bdata: bad ep_data"));
323
324         path->ep_data = malloc(bp->b_bufsize, M_EXT2EXTENTS, M_WAITOK);
325         if (!path->ep_data)
326                 return (ENOMEM);
327
328         memcpy(path->ep_data, bp->b_data, bp->b_bufsize);
329         path->ep_blk = blk;
330
331         return (0);
332 }
333
334 static void
335 ext4_ext_fill_path_buf(struct ext4_extent_path *path, struct buf *bp)
336 {
337
338         KASSERT(path->ep_data != NULL,
339             ("ext4_ext_fill_path_buf: bad ep_data"));
340
341         memcpy(bp->b_data, path->ep_data, bp->b_bufsize);
342 }
343
344 static void
345 ext4_ext_drop_refs(struct ext4_extent_path *path)
346 {
347         int depth, i;
348
349         if (!path)
350                 return;
351
352         depth = path->ep_depth;
353         for (i = 0; i <= depth; i++, path++)
354                 if (path->ep_data) {
355                         free(path->ep_data, M_EXT2EXTENTS);
356                         path->ep_data = NULL;
357                 }
358 }
359
360 void
361 ext4_ext_path_free(struct ext4_extent_path *path)
362 {
363
364         if (!path)
365                 return;
366
367         ext4_ext_drop_refs(path);
368         free(path, M_EXT2EXTENTS);
369 }
370
371 int
372 ext4_ext_find_extent(struct inode *ip, daddr_t block,
373     struct ext4_extent_path **ppath)
374 {
375         struct m_ext2fs *fs;
376         struct ext4_extent_header *eh;
377         struct ext4_extent_path *path;
378         struct buf *bp;
379         uint64_t blk;
380         int error, depth, i, ppos, alloc;
381
382         fs = ip->i_e2fs;
383         eh = ext4_ext_inode_header(ip);
384         depth = ext4_ext_inode_depth(ip);
385         ppos = 0;
386         alloc = 0;
387
388         error = ext4_ext_check_header(ip, eh);
389         if (error)
390                 return (error);
391
392         if (ppath == NULL)
393                 return (EINVAL);
394
395         path = *ppath;
396         if (path == NULL) {
397                 path = malloc(EXT4_EXT_DEPTH_MAX *
398                     sizeof(struct ext4_extent_path),
399                     M_EXT2EXTENTS, M_WAITOK | M_ZERO);
400                 if (!path)
401                         return (ENOMEM);
402
403                 *ppath = path;
404                 alloc = 1;
405         }
406
407         path[0].ep_header = eh;
408         path[0].ep_data = NULL;
409
410         /* Walk through the tree. */
411         i = depth;
412         while (i) {
413                 ext4_ext_binsearch_index(&path[ppos], block);
414                 blk = ext4_ext_index_pblock(path[ppos].ep_index);
415                 path[ppos].ep_depth = i;
416                 path[ppos].ep_ext = NULL;
417
418                 error = bread(ip->i_devvp, fsbtodb(ip->i_e2fs, blk),
419                     ip->i_e2fs->e2fs_bsize, NOCRED, &bp);
420                 if (error) {
421                         goto error;
422                 }
423
424                 ppos++;
425                 if (ppos > depth) {
426                         SDT_PROBE2(ext2fs, , trace, extents, 1,
427                             "ppos > depth => extent corrupted");
428                         error = EIO;
429                         brelse(bp);
430                         goto error;
431                 }
432
433                 ext4_ext_fill_path_bdata(&path[ppos], bp, blk);
434                 bqrelse(bp);
435
436                 eh = ext4_ext_block_header(path[ppos].ep_data);
437                 if (ext4_ext_check_header(ip, eh) ||
438                     ext2_extent_blk_csum_verify(ip, path[ppos].ep_data)) {
439                         error = EIO;
440                         goto error;
441                 }
442
443                 path[ppos].ep_header = eh;
444
445                 i--;
446         }
447
448         error = ext4_ext_check_header(ip, eh);
449         if (error)
450                 goto error;
451
452         /* Find extent. */
453         path[ppos].ep_depth = i;
454         path[ppos].ep_header = eh;
455         path[ppos].ep_ext = NULL;
456         path[ppos].ep_index = NULL;
457         ext4_ext_binsearch_ext(&path[ppos], block);
458         return (0);
459
460 error:
461         ext4_ext_drop_refs(path);
462         if (alloc)
463                 free(path, M_EXT2EXTENTS);
464
465         *ppath = NULL;
466
467         return (error);
468 }
469
470 static inline int
471 ext4_ext_space_root(struct inode *ip)
472 {
473         int size;
474
475         size = sizeof(ip->i_data);
476         size -= sizeof(struct ext4_extent_header);
477         size /= sizeof(struct ext4_extent);
478
479         return (size);
480 }
481
482 static inline int
483 ext4_ext_space_block(struct inode *ip)
484 {
485         struct m_ext2fs *fs;
486         int size;
487
488         fs = ip->i_e2fs;
489
490         size = (fs->e2fs_bsize - sizeof(struct ext4_extent_header)) /
491             sizeof(struct ext4_extent);
492
493         return (size);
494 }
495
496 static inline int
497 ext4_ext_space_block_index(struct inode *ip)
498 {
499         struct m_ext2fs *fs;
500         int size;
501
502         fs = ip->i_e2fs;
503
504         size = (fs->e2fs_bsize - sizeof(struct ext4_extent_header)) /
505             sizeof(struct ext4_extent_index);
506
507         return (size);
508 }
509
510 void
511 ext4_ext_tree_init(struct inode *ip)
512 {
513         struct ext4_extent_header *ehp;
514
515         ip->i_flag |= IN_E4EXTENTS;
516
517         memset(ip->i_data, 0, EXT2_NDADDR + EXT2_NIADDR);
518         ehp = (struct ext4_extent_header *)ip->i_data;
519         ehp->eh_magic = htole16(EXT4_EXT_MAGIC);
520         ehp->eh_max = htole16(ext4_ext_space_root(ip));
521         ip->i_ext_cache.ec_type = EXT4_EXT_CACHE_NO;
522         ip->i_flag |= IN_CHANGE | IN_UPDATE;
523         ext2_update(ip->i_vnode, 1);
524 }
525
526 static inline void
527 ext4_ext_put_in_cache(struct inode *ip, uint32_t blk,
528                         uint32_t len, uint32_t start, int type)
529 {
530
531         KASSERT(len != 0, ("ext4_ext_put_in_cache: bad input"));
532
533         ip->i_ext_cache.ec_type = type;
534         ip->i_ext_cache.ec_blk = blk;
535         ip->i_ext_cache.ec_len = len;
536         ip->i_ext_cache.ec_start = start;
537 }
538
539 static e4fs_daddr_t
540 ext4_ext_blkpref(struct inode *ip, struct ext4_extent_path *path,
541     e4fs_daddr_t block)
542 {
543         struct m_ext2fs *fs;
544         struct ext4_extent *ex;
545         e4fs_daddr_t bg_start;
546         int depth;
547
548         fs = ip->i_e2fs;
549
550         if (path) {
551                 depth = path->ep_depth;
552                 ex = path[depth].ep_ext;
553                 if (ex) {
554                         e4fs_daddr_t pblk = ext4_ext_extent_pblock(ex);
555                         e2fs_daddr_t blk = le32toh(ex->e_blk);
556
557                         if (block > blk)
558                                 return (pblk + (block - blk));
559                         else
560                                 return (pblk - (blk - block));
561                 }
562
563                 /* Try to get block from index itself. */
564                 if (path[depth].ep_data)
565                         return (path[depth].ep_blk);
566         }
567
568         /* Use inode's group. */
569         bg_start = (ip->i_block_group * EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) +
570             le32toh(fs->e2fs->e2fs_first_dblock);
571
572         return (bg_start + block);
573 }
574
575 static int inline
576 ext4_can_extents_be_merged(struct ext4_extent *ex1,
577     struct ext4_extent *ex2)
578 {
579
580         if (le32toh(ex1->e_blk) + le16toh(ex1->e_len) != le32toh(ex2->e_blk))
581                 return (0);
582
583         if (le16toh(ex1->e_len) + le16toh(ex2->e_len) > EXT4_MAX_LEN)
584                 return (0);
585
586         if (ext4_ext_extent_pblock(ex1) + le16toh(ex1->e_len) ==
587             ext4_ext_extent_pblock(ex2))
588                 return (1);
589
590         return (0);
591 }
592
593 static unsigned
594 ext4_ext_next_leaf_block(struct inode *ip, struct ext4_extent_path *path)
595 {
596         int depth = path->ep_depth;
597
598         /* Empty tree */
599         if (depth == 0)
600                 return (EXT4_MAX_BLOCKS);
601
602         /* Go to indexes. */
603         depth--;
604
605         while (depth >= 0) {
606                 if (path[depth].ep_index !=
607                     EXT_LAST_INDEX(path[depth].ep_header))
608                         return (le32toh(path[depth].ep_index[1].ei_blk));
609
610                 depth--;
611         }
612
613         return (EXT4_MAX_BLOCKS);
614 }
615
616 static int
617 ext4_ext_dirty(struct inode *ip, struct ext4_extent_path *path)
618 {
619         struct m_ext2fs *fs;
620         struct buf *bp;
621         uint64_t blk;
622         int error;
623
624         fs = ip->i_e2fs;
625
626         if (!path)
627                 return (EINVAL);
628
629         if (path->ep_data) {
630                 blk = path->ep_blk;
631                 bp = getblk(ip->i_devvp, fsbtodb(fs, blk),
632                     fs->e2fs_bsize, 0, 0, 0);
633                 if (!bp)
634                         return (EIO);
635                 ext4_ext_fill_path_buf(path, bp);
636                 ext2_extent_blk_csum_set(ip, bp->b_data);
637                 error = bwrite(bp);
638         } else {
639                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
640                 error = ext2_update(ip->i_vnode, 1);
641         }
642
643         return (error);
644 }
645
646 static int
647 ext4_ext_insert_index(struct inode *ip, struct ext4_extent_path *path,
648     uint32_t lblk, e4fs_daddr_t blk)
649 {
650         struct m_ext2fs *fs;
651         struct ext4_extent_index *idx;
652         int len;
653
654         fs = ip->i_e2fs;
655
656         if (lblk == le32toh(path->ep_index->ei_blk)) {
657                 SDT_PROBE2(ext2fs, , trace, extents, 1,
658                     "lblk == index blk => extent corrupted");
659                 return (EIO);
660         }
661
662         if (le16toh(path->ep_header->eh_ecount) >=
663             le16toh(path->ep_header->eh_max)) {
664                 SDT_PROBE2(ext2fs, , trace, extents, 1,
665                     "ecout > maxcount => extent corrupted");
666                 return (EIO);
667         }
668
669         if (lblk > le32toh(path->ep_index->ei_blk)) {
670                 /* Insert after. */
671                 idx = path->ep_index + 1;
672         } else {
673                 /* Insert before. */
674                 idx = path->ep_index;
675         }
676
677         len = EXT_LAST_INDEX(path->ep_header) - idx + 1;
678         if (len > 0)
679                 memmove(idx + 1, idx, len * sizeof(struct ext4_extent_index));
680
681         if (idx > EXT_MAX_INDEX(path->ep_header)) {
682                 SDT_PROBE2(ext2fs, , trace, extents, 1,
683                     "index is out of range => extent corrupted");
684                 return (EIO);
685         }
686
687         idx->ei_blk = htole32(lblk);
688         ext4_index_store_pblock(idx, blk);
689         path->ep_header->eh_ecount =
690             htole16(le16toh(path->ep_header->eh_ecount) + 1);
691
692         return (ext4_ext_dirty(ip, path));
693 }
694
695 static e4fs_daddr_t
696 ext4_ext_alloc_meta(struct inode *ip)
697 {
698         e4fs_daddr_t blk = ext2_alloc_meta(ip);
699         if (blk) {
700                 ip->i_blocks += btodb(ip->i_e2fs->e2fs_bsize);
701                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
702                 ext2_update(ip->i_vnode, 1);
703         }
704
705         return (blk);
706 }
707
708 static void
709 ext4_ext_blkfree(struct inode *ip, uint64_t blk, int count, int flags)
710 {
711         struct m_ext2fs *fs;
712         int i, blocksreleased;
713
714         fs = ip->i_e2fs;
715         blocksreleased = count;
716
717         for(i = 0; i < count; i++)
718                 ext2_blkfree(ip, blk + i, fs->e2fs_bsize);
719
720         if (ip->i_blocks >= blocksreleased)
721                 ip->i_blocks -= (btodb(fs->e2fs_bsize)*blocksreleased);
722         else
723                 ip->i_blocks = 0;
724
725         ip->i_flag |= IN_CHANGE | IN_UPDATE;
726         ext2_update(ip->i_vnode, 1);
727 }
728
729 static int
730 ext4_ext_split(struct inode *ip, struct ext4_extent_path *path,
731     struct ext4_extent *newext, int at)
732 {
733         struct m_ext2fs *fs;
734         struct  buf *bp;
735         int depth = ext4_ext_inode_depth(ip);
736         struct ext4_extent_header *neh;
737         struct ext4_extent_index *fidx;
738         struct ext4_extent *ex;
739         int i = at, k, m, a;
740         e4fs_daddr_t newblk, oldblk;
741         uint32_t border;
742         e4fs_daddr_t *ablks = NULL;
743         int error = 0;
744
745         fs = ip->i_e2fs;
746         bp = NULL;
747
748         /*
749          * We will split at current extent for now.
750          */
751         if (path[depth].ep_ext > EXT_MAX_EXTENT(path[depth].ep_header)) {
752                 SDT_PROBE2(ext2fs, , trace, extents, 1,
753                     "extent is out of range => extent corrupted");
754                 return (EIO);
755         }
756
757         if (path[depth].ep_ext != EXT_MAX_EXTENT(path[depth].ep_header))
758                 border = le32toh(path[depth].ep_ext[1].e_blk);
759         else
760                 border = le32toh(newext->e_blk);
761
762         /* Allocate new blocks. */
763         ablks = malloc(sizeof(e4fs_daddr_t) * depth,
764             M_EXT2EXTENTS, M_WAITOK | M_ZERO);
765         if (!ablks)
766                 return (ENOMEM);
767         for (a = 0; a < depth - at; a++) {
768                 newblk = ext4_ext_alloc_meta(ip);
769                 if (newblk == 0)
770                         goto cleanup;
771                 ablks[a] = newblk;
772         }
773
774         newblk = ablks[--a];
775         bp = getblk(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, 0, 0, 0);
776         if (!bp) {
777                 error = EIO;
778                 goto cleanup;
779         }
780
781         neh = ext4_ext_block_header(bp->b_data);
782         neh->eh_ecount = 0;
783         neh->eh_max = le16toh(ext4_ext_space_block(ip));
784         neh->eh_magic = le16toh(EXT4_EXT_MAGIC);
785         neh->eh_depth = 0;
786         ex = EXT_FIRST_EXTENT(neh);
787
788         if (le16toh(path[depth].ep_header->eh_ecount) !=
789             le16toh(path[depth].ep_header->eh_max)) {
790                 SDT_PROBE2(ext2fs, , trace, extents, 1,
791                     "extents count out of range => extent corrupted");
792                 error = EIO;
793                 goto cleanup;
794         }
795
796         /* Start copy from next extent. */
797         m = 0;
798         path[depth].ep_ext++;
799         while (path[depth].ep_ext <= EXT_MAX_EXTENT(path[depth].ep_header)) {
800                 path[depth].ep_ext++;
801                 m++;
802         }
803         if (m) {
804                 memmove(ex, path[depth].ep_ext - m,
805                     sizeof(struct ext4_extent) * m);
806                 neh->eh_ecount = htole16(le16toh(neh->eh_ecount) + m);
807         }
808
809         ext2_extent_blk_csum_set(ip, bp->b_data);
810         bwrite(bp);
811         bp = NULL;
812
813         /* Fix old leaf. */
814         if (m) {
815                 path[depth].ep_header->eh_ecount =
816                     htole16(le16toh(path[depth].ep_header->eh_ecount) - m);
817                 ext4_ext_dirty(ip, path + depth);
818         }
819
820         /* Create intermediate indexes. */
821         k = depth - at - 1;
822         KASSERT(k >= 0, ("ext4_ext_split: negative k"));
823
824         /* Insert new index into current index block. */
825         i = depth - 1;
826         while (k--) {
827                 oldblk = newblk;
828                 newblk = ablks[--a];
829                 error = bread(ip->i_devvp, fsbtodb(fs, newblk),
830                     (int)fs->e2fs_bsize, NOCRED, &bp);
831                 if (error) {
832                         goto cleanup;
833                 }
834
835                 neh = (struct ext4_extent_header *)bp->b_data;
836                 neh->eh_ecount = htole16(1);
837                 neh->eh_magic = htole16(EXT4_EXT_MAGIC);
838                 neh->eh_max = htole16(ext4_ext_space_block_index(ip));
839                 neh->eh_depth = htole16(depth - i);
840                 fidx = EXT_FIRST_INDEX(neh);
841                 fidx->ei_blk = htole32(border);
842                 ext4_index_store_pblock(fidx, oldblk);
843
844                 m = 0;
845                 path[i].ep_index++;
846                 while (path[i].ep_index <= EXT_MAX_INDEX(path[i].ep_header)) {
847                         path[i].ep_index++;
848                         m++;
849                 }
850                 if (m) {
851                         memmove(++fidx, path[i].ep_index - m,
852                             sizeof(struct ext4_extent_index) * m);
853                         neh->eh_ecount = htole16(le16toh(neh->eh_ecount) + m);
854                 }
855
856                 ext2_extent_blk_csum_set(ip, bp->b_data);
857                 bwrite(bp);
858                 bp = NULL;
859
860                 /* Fix old index. */
861                 if (m) {
862                         path[i].ep_header->eh_ecount =
863                             htole16(le16toh(path[i].ep_header->eh_ecount) - m);
864                         ext4_ext_dirty(ip, path + i);
865                 }
866
867                 i--;
868         }
869
870         error = ext4_ext_insert_index(ip, path + at, border, newblk);
871
872 cleanup:
873         if (bp)
874                 brelse(bp);
875
876         if (error) {
877                 for (i = 0; i < depth; i++) {
878                         if (!ablks[i])
879                                 continue;
880                         ext4_ext_blkfree(ip, ablks[i], 1, 0);
881                 }
882         }
883
884         free(ablks, M_EXT2EXTENTS);
885
886         return (error);
887 }
888
889 static int
890 ext4_ext_grow_indepth(struct inode *ip, struct ext4_extent_path *path,
891     struct ext4_extent *newext)
892 {
893         struct m_ext2fs *fs;
894         struct ext4_extent_path *curpath;
895         struct ext4_extent_header *neh;
896         struct buf *bp;
897         e4fs_daddr_t newblk;
898         int error = 0;
899
900         fs = ip->i_e2fs;
901         curpath = path;
902
903         newblk = ext4_ext_alloc_meta(ip);
904         if (newblk == 0)
905                 return (error);
906
907         bp = getblk(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, 0, 0, 0);
908         if (!bp)
909                 return (EIO);
910
911         /* Move top-level index/leaf into new block. */
912         memmove(bp->b_data, curpath->ep_header, sizeof(ip->i_data));
913
914         /* Set size of new block */
915         neh = ext4_ext_block_header(bp->b_data);
916         neh->eh_magic = htole16(EXT4_EXT_MAGIC);
917
918         if (ext4_ext_inode_depth(ip))
919                 neh->eh_max = htole16(ext4_ext_space_block_index(ip));
920         else
921                 neh->eh_max = htole16(ext4_ext_space_block(ip));
922
923         ext2_extent_blk_csum_set(ip, bp->b_data);
924         error = bwrite(bp);
925         if (error)
926                 goto out;
927
928         bp = NULL;
929
930         curpath->ep_header->eh_magic = htole16(EXT4_EXT_MAGIC);
931         curpath->ep_header->eh_max = htole16(ext4_ext_space_root(ip));
932         curpath->ep_header->eh_ecount = htole16(1);
933         curpath->ep_index = EXT_FIRST_INDEX(curpath->ep_header);
934         curpath->ep_index->ei_blk = EXT_FIRST_EXTENT(path[0].ep_header)->e_blk;
935         ext4_index_store_pblock(curpath->ep_index, newblk);
936
937         neh = ext4_ext_inode_header(ip);
938         neh->eh_depth = htole16(path->ep_depth + 1);
939         ext4_ext_dirty(ip, curpath);
940 out:
941         brelse(bp);
942
943         return (error);
944 }
945
946 static int
947 ext4_ext_create_new_leaf(struct inode *ip, struct ext4_extent_path *path,
948     struct ext4_extent *newext)
949 {
950         struct ext4_extent_path *curpath;
951         int depth, i, error;
952
953 repeat:
954         i = depth = ext4_ext_inode_depth(ip);
955
956         /* Look for free index entry int the tree */
957         curpath = path + depth;
958         while (i > 0 && !EXT_HAS_FREE_INDEX(curpath)) {
959                 i--;
960                 curpath--;
961         }
962
963         /*
964          * We use already allocated block for index block,
965          * so subsequent data blocks should be contiguous.
966          */
967         if (EXT_HAS_FREE_INDEX(curpath)) {
968                 error = ext4_ext_split(ip, path, newext, i);
969                 if (error)
970                         goto out;
971
972                 /* Refill path. */
973                 ext4_ext_drop_refs(path);
974                 error = ext4_ext_find_extent(ip, le32toh(newext->e_blk), &path);
975                 if (error)
976                         goto out;
977         } else {
978                 /* Tree is full, do grow in depth. */
979                 error = ext4_ext_grow_indepth(ip, path, newext);
980                 if (error)
981                         goto out;
982
983                 /* Refill path. */
984                 ext4_ext_drop_refs(path);
985                 error = ext4_ext_find_extent(ip, le32toh(newext->e_blk), &path);
986                 if (error)
987                         goto out;
988
989                 /* Check and split tree if required. */
990                 depth = ext4_ext_inode_depth(ip);
991                 if (le16toh(path[depth].ep_header->eh_ecount) ==
992                     le16toh(path[depth].ep_header->eh_max))
993                         goto repeat;
994         }
995
996 out:
997         return (error);
998 }
999
1000 static int
1001 ext4_ext_correct_indexes(struct inode *ip, struct ext4_extent_path *path)
1002 {
1003         struct ext4_extent_header *eh;
1004         struct ext4_extent *ex;
1005         int32_t border;
1006         int depth, k;
1007
1008         depth = ext4_ext_inode_depth(ip);
1009         eh = path[depth].ep_header;
1010         ex = path[depth].ep_ext;
1011
1012         if (ex == NULL || eh == NULL)
1013                 return (EIO);
1014
1015         if (!depth)
1016                 return (0);
1017
1018         /* We will correct tree if first leaf got modified only. */
1019         if (ex != EXT_FIRST_EXTENT(eh))
1020                 return (0);
1021
1022         k = depth - 1;
1023         border = le32toh(path[depth].ep_ext->e_blk);
1024         path[k].ep_index->ei_blk = htole32(border);
1025         ext4_ext_dirty(ip, path + k);
1026         while (k--) {
1027                 /* Change all left-side indexes. */
1028                 if (path[k+1].ep_index != EXT_FIRST_INDEX(path[k+1].ep_header))
1029                         break;
1030
1031                 path[k].ep_index->ei_blk = htole32(border);
1032                 ext4_ext_dirty(ip, path + k);
1033         }
1034
1035         return (0);
1036 }
1037
1038 static int
1039 ext4_ext_insert_extent(struct inode *ip, struct ext4_extent_path *path,
1040     struct ext4_extent *newext)
1041 {
1042         struct ext4_extent_header * eh;
1043         struct ext4_extent *ex, *nex, *nearex;
1044         struct ext4_extent_path *npath;
1045         int depth, len, error, next;
1046
1047         depth = ext4_ext_inode_depth(ip);
1048         ex = path[depth].ep_ext;
1049         npath = NULL;
1050
1051         if (htole16(newext->e_len) == 0 || path[depth].ep_header == NULL)
1052                 return (EINVAL);
1053
1054         /* Insert block into found extent. */
1055         if (ex && ext4_can_extents_be_merged(ex, newext)) {
1056                 ex->e_len = htole16(le16toh(ex->e_len) + le16toh(newext->e_len));
1057                 eh = path[depth].ep_header;
1058                 nearex = ex;
1059                 goto merge;
1060         }
1061
1062 repeat:
1063         depth = ext4_ext_inode_depth(ip);
1064         eh = path[depth].ep_header;
1065         if (le16toh(eh->eh_ecount) < le16toh(eh->eh_max))
1066                 goto has_space;
1067
1068         /* Try next leaf */
1069         nex = EXT_LAST_EXTENT(eh);
1070         next = ext4_ext_next_leaf_block(ip, path);
1071         if (le32toh(newext->e_blk) > le32toh(nex->e_blk) && next !=
1072             EXT4_MAX_BLOCKS) {
1073                 KASSERT(npath == NULL,
1074                     ("ext4_ext_insert_extent: bad path"));
1075
1076                 error = ext4_ext_find_extent(ip, next, &npath);
1077                 if (error)
1078                         goto cleanup;
1079
1080                 if (npath->ep_depth != path->ep_depth) {
1081                         error = EIO;
1082                         goto cleanup;
1083                 }
1084
1085                 eh = npath[depth].ep_header;
1086                 if (le16toh(eh->eh_ecount) < le16toh(eh->eh_max)) {
1087                         path = npath;
1088                         goto repeat;
1089                 }
1090         }
1091
1092         /*
1093          * There is no free space in the found leaf,
1094          * try to add a new leaf to the tree.
1095          */
1096         error = ext4_ext_create_new_leaf(ip, path, newext);
1097         if (error)
1098                 goto cleanup;
1099
1100         depth = ext4_ext_inode_depth(ip);
1101         eh = path[depth].ep_header;
1102
1103 has_space:
1104         nearex = path[depth].ep_ext;
1105         if (!nearex) {
1106                 /* Create new extent in the leaf. */
1107                 path[depth].ep_ext = EXT_FIRST_EXTENT(eh);
1108         } else if (le32toh(newext->e_blk) > le32toh(nearex->e_blk)) {
1109                 if (nearex != EXT_LAST_EXTENT(eh)) {
1110                         len = EXT_MAX_EXTENT(eh) - nearex;
1111                         len = (len - 1) * sizeof(struct ext4_extent);
1112                         len = len < 0 ? 0 : len;
1113                         memmove(nearex + 2, nearex + 1, len);
1114                 }
1115                 path[depth].ep_ext = nearex + 1;
1116         } else {
1117                 len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
1118                 len = len < 0 ? 0 : len;
1119                 memmove(nearex + 1, nearex, len);
1120                 path[depth].ep_ext = nearex;
1121         }
1122
1123         eh->eh_ecount = htole16(le16toh(eh->eh_ecount) + 1);
1124         nearex = path[depth].ep_ext;
1125         nearex->e_blk = newext->e_blk;
1126         nearex->e_start_lo = newext->e_start_lo;
1127         nearex->e_start_hi = newext->e_start_hi;
1128         nearex->e_len = newext->e_len;
1129
1130 merge:
1131         /* Try to merge extents to the right. */
1132         while (nearex < EXT_LAST_EXTENT(eh)) {
1133                 if (!ext4_can_extents_be_merged(nearex, nearex + 1))
1134                         break;
1135
1136                 /* Merge with next extent. */
1137                 nearex->e_len = htole16(le16toh(nearex->e_len) +
1138                     le16toh(nearex[1].e_len));
1139                 if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
1140                         len = (EXT_LAST_EXTENT(eh) - nearex - 1) *
1141                             sizeof(struct ext4_extent);
1142                         memmove(nearex + 1, nearex + 2, len);
1143                 }
1144
1145                 eh->eh_ecount = htole16(le16toh(eh->eh_ecount) - 1);
1146                 KASSERT(le16toh(eh->eh_ecount) != 0,
1147                     ("ext4_ext_insert_extent: bad ecount"));
1148         }
1149
1150         /*
1151          * Try to merge extents to the left,
1152          * start from inexes correction.
1153          */
1154         error = ext4_ext_correct_indexes(ip, path);
1155         if (error)
1156                 goto cleanup;
1157
1158         ext4_ext_dirty(ip, path + depth);
1159
1160 cleanup:
1161         if (npath) {
1162                 ext4_ext_drop_refs(npath);
1163                 free(npath, M_EXT2EXTENTS);
1164         }
1165
1166         ip->i_ext_cache.ec_type = EXT4_EXT_CACHE_NO;
1167         return (error);
1168 }
1169
1170 static e4fs_daddr_t
1171 ext4_new_blocks(struct inode *ip, daddr_t lbn, e4fs_daddr_t pref,
1172     struct ucred *cred, unsigned long *count, int *perror)
1173 {
1174         struct m_ext2fs *fs;
1175         e4fs_daddr_t newblk;
1176
1177         /*
1178          * We will allocate only single block for now.
1179          */
1180         if (*count > 1)
1181                 return (0);
1182
1183         fs = ip->i_e2fs;
1184         EXT2_LOCK(ip->i_ump);
1185         *perror = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newblk);
1186         if (*perror)
1187                 return (0);
1188
1189         if (newblk) {
1190                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1191                 ext2_update(ip->i_vnode, 1);
1192         }
1193
1194         return (newblk);
1195 }
1196
1197 int
1198 ext4_ext_get_blocks(struct inode *ip, e4fs_daddr_t iblk,
1199     unsigned long max_blocks, struct ucred *cred, struct buf **bpp,
1200     int *pallocated, daddr_t *nb)
1201 {
1202         struct m_ext2fs *fs;
1203         struct buf *bp = NULL;
1204         struct ext4_extent_path *path;
1205         struct ext4_extent newex, *ex;
1206         e4fs_daddr_t bpref, newblk = 0;
1207         unsigned long allocated = 0;
1208         int error = 0, depth;
1209
1210         if(bpp)
1211                 *bpp = NULL;
1212         *pallocated = 0;
1213
1214         /* Check cache. */
1215         path = NULL;
1216         if ((bpref = ext4_ext_in_cache(ip, iblk, &newex))) {
1217                 if (bpref == EXT4_EXT_CACHE_IN) {
1218                         /* Block is already allocated. */
1219                         newblk = iblk - le32toh(newex.e_blk) +
1220                             ext4_ext_extent_pblock(&newex);
1221                         allocated = le16toh(newex.e_len) - (iblk - le32toh(newex.e_blk));
1222                         goto out;
1223                 } else {
1224                         error = EIO;
1225                         goto out2;
1226                 }
1227         }
1228
1229         error = ext4_ext_find_extent(ip, iblk, &path);
1230         if (error) {
1231                 goto out2;
1232         }
1233
1234         depth = ext4_ext_inode_depth(ip);
1235         if (path[depth].ep_ext == NULL && depth != 0) {
1236                 error = EIO;
1237                 goto out2;
1238         }
1239
1240         if ((ex = path[depth].ep_ext)) {
1241                 uint64_t lblk = le32toh(ex->e_blk);
1242                 uint16_t e_len  = le16toh(ex->e_len);
1243                 e4fs_daddr_t e_start = ext4_ext_extent_pblock(ex);
1244
1245                 if (e_len > EXT4_MAX_LEN)
1246                         goto out2;
1247
1248                 /* If we found extent covers block, simply return it. */
1249                 if (iblk >= lblk && iblk < lblk + e_len) {
1250                         newblk = iblk - lblk + e_start;
1251                         allocated = e_len - (iblk - lblk);
1252                         ext4_ext_put_in_cache(ip, lblk, e_len,
1253                             e_start, EXT4_EXT_CACHE_IN);
1254                         goto out;
1255                 }
1256         }
1257
1258         /* Allocate the new block. */
1259         if (S_ISREG(ip->i_mode) && (!ip->i_next_alloc_block)) {
1260                 ip->i_next_alloc_goal = 0;
1261         }
1262
1263         bpref = ext4_ext_blkpref(ip, path, iblk);
1264         allocated = max_blocks;
1265         newblk = ext4_new_blocks(ip, iblk, bpref, cred, &allocated, &error);
1266         if (!newblk)
1267                 goto out2;
1268
1269         /* Try to insert new extent into found leaf and return. */
1270         newex.e_blk = htole32(iblk);
1271         ext4_ext_store_pblock(&newex, newblk);
1272         newex.e_len = htole16(allocated);
1273         error = ext4_ext_insert_extent(ip, path, &newex);
1274         if (error)
1275                 goto out2;
1276
1277         newblk = ext4_ext_extent_pblock(&newex);
1278         ext4_ext_put_in_cache(ip, iblk, allocated, newblk, EXT4_EXT_CACHE_IN);
1279         *pallocated = 1;
1280
1281 out:
1282         if (allocated > max_blocks)
1283                 allocated = max_blocks;
1284
1285         if (bpp)
1286         {
1287                 fs = ip->i_e2fs;
1288                 error = bread(ip->i_devvp, fsbtodb(fs, newblk),
1289                     fs->e2fs_bsize, cred, &bp);
1290                 if (error) {
1291                         brelse(bp);
1292                 } else {
1293                         *bpp = bp;
1294                 }
1295         }
1296
1297 out2:
1298         if (path) {
1299                 ext4_ext_drop_refs(path);
1300                 free(path, M_EXT2EXTENTS);
1301         }
1302
1303         if (nb)
1304                 *nb = newblk;
1305
1306         return (error);
1307 }
1308
1309 static inline uint16_t
1310 ext4_ext_get_actual_len(struct ext4_extent *ext)
1311 {
1312
1313         return (le16toh(ext->e_len) <= EXT_INIT_MAX_LEN ?
1314             le16toh(ext->e_len) : (le16toh(ext->e_len) - EXT_INIT_MAX_LEN));
1315 }
1316
1317 static inline struct ext4_extent_header *
1318 ext4_ext_header(struct inode *ip)
1319 {
1320
1321         return ((struct ext4_extent_header *)ip->i_db);
1322 }
1323
1324 static int
1325 ext4_remove_blocks(struct inode *ip, struct ext4_extent *ex,
1326     unsigned long from, unsigned long to)
1327 {
1328         unsigned long num, start;
1329
1330         if (from >= le32toh(ex->e_blk) &&
1331             to == le32toh(ex->e_blk) + ext4_ext_get_actual_len(ex) - 1) {
1332                 /* Tail cleanup. */
1333                 num = le32toh(ex->e_blk) + ext4_ext_get_actual_len(ex) - from;
1334                 start = ext4_ext_extent_pblock(ex) +
1335                     ext4_ext_get_actual_len(ex) - num;
1336                 ext4_ext_blkfree(ip, start, num, 0);
1337         }
1338
1339         return (0);
1340 }
1341
1342 static int
1343 ext4_ext_rm_index(struct inode *ip, struct ext4_extent_path *path)
1344 {
1345         e4fs_daddr_t leaf;
1346
1347         /* Free index block. */
1348         path--;
1349         leaf = ext4_ext_index_pblock(path->ep_index);
1350         KASSERT(path->ep_header->eh_ecount != 0,
1351             ("ext4_ext_rm_index: bad ecount"));
1352         path->ep_header->eh_ecount =
1353             htole16(le16toh(path->ep_header->eh_ecount) - 1);
1354         ext4_ext_dirty(ip, path);
1355         ext4_ext_blkfree(ip, leaf, 1, 0);
1356         return (0);
1357 }
1358
1359 static int
1360 ext4_ext_rm_leaf(struct inode *ip, struct ext4_extent_path *path,
1361     uint64_t start)
1362 {
1363         struct ext4_extent_header *eh;
1364         struct ext4_extent *ex;
1365         unsigned int a, b, block, num;
1366         unsigned long ex_blk;
1367         unsigned short ex_len;
1368         int depth;
1369         int error, correct_index;
1370
1371         depth = ext4_ext_inode_depth(ip);
1372         if (!path[depth].ep_header) {
1373                 if (path[depth].ep_data == NULL)
1374                         return (EINVAL);
1375                 path[depth].ep_header =
1376                     (struct ext4_extent_header* )path[depth].ep_data;
1377         }
1378
1379         eh = path[depth].ep_header;
1380         if (!eh) {
1381                 SDT_PROBE2(ext2fs, , trace, extents, 1,
1382                     "bad header => extent corrupted");
1383                 return (EIO);
1384         }
1385
1386         ex = EXT_LAST_EXTENT(eh);
1387         ex_blk = le32toh(ex->e_blk);
1388         ex_len = ext4_ext_get_actual_len(ex);
1389
1390         error = 0;
1391         correct_index = 0;
1392         while (ex >= EXT_FIRST_EXTENT(eh) && ex_blk + ex_len > start) {
1393                 path[depth].ep_ext = ex;
1394                 a = ex_blk > start ? ex_blk : start;
1395                 b = (uint64_t)ex_blk + ex_len - 1 <
1396                     EXT4_MAX_BLOCKS ? ex_blk + ex_len - 1 : EXT4_MAX_BLOCKS;
1397
1398                 if (a != ex_blk && b != ex_blk + ex_len - 1)
1399                         return (EINVAL);
1400                 else if (a != ex_blk) {
1401                         /* Remove tail of the extent. */
1402                         block = ex_blk;
1403                         num = a - block;
1404                 } else if (b != ex_blk + ex_len - 1) {
1405                         /* Remove head of the extent, not implemented. */
1406                         return (EINVAL);
1407                 } else {
1408                         /* Remove whole extent. */
1409                         block = ex_blk;
1410                         num = 0;
1411                 }
1412
1413                 if (ex == EXT_FIRST_EXTENT(eh))
1414                         correct_index = 1;
1415
1416                 error = ext4_remove_blocks(ip, ex, a, b);
1417                 if (error)
1418                         goto out;
1419
1420                 if (num == 0) {
1421                         ext4_ext_store_pblock(ex, 0);
1422                         eh->eh_ecount = htole16(le16toh(eh->eh_ecount) - 1);
1423                 }
1424
1425                 ex->e_blk = htole32(block);
1426                 ex->e_len = htole16(num);
1427
1428                 ext4_ext_dirty(ip, path + depth);
1429
1430                 ex--;
1431                 ex_blk = htole32(ex->e_blk);
1432                 ex_len = ext4_ext_get_actual_len(ex);
1433         };
1434
1435         if (correct_index && le16toh(eh->eh_ecount))
1436                 error = ext4_ext_correct_indexes(ip, path);
1437
1438         /*
1439          * If this leaf is free, we should
1440          * remove it from index block above.
1441          */
1442         if (error == 0 && eh->eh_ecount == 0 &&
1443             path[depth].ep_data != NULL)
1444                 error = ext4_ext_rm_index(ip, path + depth);
1445
1446 out:
1447         return (error);
1448 }
1449
1450 static struct buf *
1451 ext4_read_extent_tree_block(struct inode *ip, e4fs_daddr_t pblk,
1452     int depth, int flags)
1453 {
1454         struct m_ext2fs *fs;
1455         struct ext4_extent_header *eh;
1456         struct buf *bp;
1457         int error;
1458
1459         fs = ip->i_e2fs;
1460         error = bread(ip->i_devvp, fsbtodb(fs, pblk),
1461             fs->e2fs_bsize, NOCRED, &bp);
1462         if (error) {
1463                 return (NULL);
1464         }
1465
1466         eh = ext4_ext_block_header(bp->b_data);
1467         if (le16toh(eh->eh_depth) != depth) {
1468                 SDT_PROBE2(ext2fs, , trace, extents, 1,
1469                     "unexpected eh_depth");
1470                 goto err;
1471         }
1472
1473         error = ext4_ext_check_header(ip, eh);
1474         if (error)
1475                 goto err;
1476
1477         return (bp);
1478
1479 err:
1480         brelse(bp);
1481         return (NULL);
1482
1483 }
1484
1485 static int inline
1486 ext4_ext_more_to_rm(struct ext4_extent_path *path)
1487 {
1488
1489         KASSERT(path->ep_index != NULL,
1490             ("ext4_ext_more_to_rm: bad index from path"));
1491
1492         if (path->ep_index < EXT_FIRST_INDEX(path->ep_header))
1493                 return (0);
1494
1495         if (le16toh(path->ep_header->eh_ecount) == path->index_count)
1496                 return (0);
1497
1498         return (1);
1499 }
1500
1501 int
1502 ext4_ext_remove_space(struct inode *ip, off_t length, int flags,
1503     struct ucred *cred, struct thread *td)
1504 {
1505         struct buf *bp;
1506         struct ext4_extent_header *ehp;
1507         struct ext4_extent_path *path;
1508         int depth;
1509         int i, error;
1510
1511         ehp = (struct ext4_extent_header *)ip->i_db;
1512         depth = ext4_ext_inode_depth(ip);
1513
1514         error = ext4_ext_check_header(ip, ehp);
1515         if(error)
1516                 return (error);
1517
1518         path = malloc(sizeof(struct ext4_extent_path) * (depth + 1),
1519             M_EXT2EXTENTS, M_WAITOK | M_ZERO);
1520         if (!path)
1521                 return (ENOMEM);
1522
1523         path[0].ep_header = ehp;
1524         path[0].ep_depth = depth;
1525         i = 0;
1526         while (error == 0 && i >= 0) {
1527                 if (i == depth) {
1528                         /* This is leaf. */
1529                         error = ext4_ext_rm_leaf(ip, path, length);
1530                         if (error)
1531                                 break;
1532                         free(path[i].ep_data, M_EXT2EXTENTS);
1533                         path[i].ep_data = NULL;
1534                         i--;
1535                         continue;
1536                 }
1537
1538                 /* This is index. */
1539                 if (!path[i].ep_header)
1540                         path[i].ep_header =
1541                             (struct ext4_extent_header *)path[i].ep_data;
1542
1543                 if (!path[i].ep_index) {
1544                         /* This level hasn't touched yet. */
1545                         path[i].ep_index = EXT_LAST_INDEX(path[i].ep_header);
1546                         path[i].index_count =
1547                             le16toh(path[i].ep_header->eh_ecount) + 1;
1548                 } else {
1549                         /* We've already was here, see at next index. */
1550                         path[i].ep_index--;
1551                 }
1552
1553                 if (ext4_ext_more_to_rm(path + i)) {
1554                         memset(path + i + 1, 0, sizeof(*path));
1555                         bp = ext4_read_extent_tree_block(ip,
1556                             ext4_ext_index_pblock(path[i].ep_index),
1557                             path[0].ep_depth - (i + 1), 0);
1558                         if (!bp) {
1559                                 error = EIO;
1560                                 break;
1561                         }
1562
1563                         ext4_ext_fill_path_bdata(&path[i+1], bp,
1564                             ext4_ext_index_pblock(path[i].ep_index));
1565                         brelse(bp);
1566                         path[i].index_count =
1567                             le16toh(path[i].ep_header->eh_ecount);
1568                         i++;
1569                 } else {
1570                         if (path[i].ep_header->eh_ecount == 0 && i > 0) {
1571                                 /* Index is empty, remove it. */
1572                                 error = ext4_ext_rm_index(ip, path + i);
1573                         }
1574                         free(path[i].ep_data, M_EXT2EXTENTS);
1575                         path[i].ep_data = NULL;
1576                         i--;
1577                 }
1578         }
1579
1580         if (path->ep_header->eh_ecount == 0) {
1581                 /*
1582                  * Truncate the tree to zero.
1583                  */
1584                  ext4_ext_header(ip)->eh_depth = 0;
1585                  ext4_ext_header(ip)->eh_max = htole16(ext4_ext_space_root(ip));
1586                  ext4_ext_dirty(ip, path);
1587         }
1588
1589         ext4_ext_drop_refs(path);
1590         free(path, M_EXT2EXTENTS);
1591
1592         return (error);
1593 }