]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/ext2fs/ext2_extents.c
Import tzdata 2020a
[FreeBSD/FreeBSD.git] / sys / fs / ext2fs / ext2_extents.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2010 Zheng Liu <lz@freebsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/types.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/vnode.h>
37 #include <sys/bio.h>
38 #include <sys/buf.h>
39 #include <sys/conf.h>
40 #include <sys/sdt.h>
41 #include <sys/stat.h>
42
43 #include <fs/ext2fs/ext2_mount.h>
44 #include <fs/ext2fs/fs.h>
45 #include <fs/ext2fs/inode.h>
46 #include <fs/ext2fs/ext2fs.h>
47 #include <fs/ext2fs/ext2_extents.h>
48 #include <fs/ext2fs/ext2_extern.h>
49
50 SDT_PROVIDER_DECLARE(ext2fs);
51 /*
52  * ext2fs trace probe:
53  * arg0: verbosity. Higher numbers give more verbose messages
54  * arg1: Textual message
55  */
56 SDT_PROBE_DEFINE2(ext2fs, , trace, extents, "int", "char*");
57
58 static MALLOC_DEFINE(M_EXT2EXTENTS, "ext2_extents", "EXT2 extents");
59
60 #ifdef EXT2FS_PRINT_EXTENTS
61 static void
62 ext4_ext_print_extent(struct ext4_extent *ep)
63 {
64
65         printf("    ext %p => (blk %u len %u start %ju)\n",
66             ep, ep->e_blk, ep->e_len,
67             (uint64_t)ep->e_start_hi << 32 | ep->e_start_lo);
68 }
69
70 static void ext4_ext_print_header(struct inode *ip, struct ext4_extent_header *ehp);
71
72 static void
73 ext4_ext_print_index(struct inode *ip, struct ext4_extent_index *ex, int do_walk)
74 {
75         struct m_ext2fs *fs;
76         struct buf *bp;
77         int error;
78
79         fs = ip->i_e2fs;
80
81         printf("    index %p => (blk %u pblk %ju)\n",
82             ex, ex->ei_blk, (uint64_t)ex->ei_leaf_hi << 32 | ex->ei_leaf_lo);
83
84         if(!do_walk)
85                 return;
86
87         if ((error = bread(ip->i_devvp,
88             fsbtodb(fs, ((uint64_t)ex->ei_leaf_hi << 32 | ex->ei_leaf_lo)),
89             (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) {
90                 brelse(bp);
91                 return;
92         }
93
94         ext4_ext_print_header(ip, (struct ext4_extent_header *)bp->b_data);
95
96         brelse(bp);
97
98 }
99
100 static void
101 ext4_ext_print_header(struct inode *ip, struct ext4_extent_header *ehp)
102 {
103         int i;
104
105         printf("header %p => (magic 0x%x entries %d max %d depth %d gen %d)\n",
106             ehp, ehp->eh_magic, ehp->eh_ecount, ehp->eh_max, ehp->eh_depth,
107             ehp->eh_gen);
108
109         for (i = 0; i < ehp->eh_ecount; i++)
110                 if (ehp->eh_depth != 0)
111                         ext4_ext_print_index(ip,
112                             (struct ext4_extent_index *)(ehp + 1 + i), 1);
113                 else
114                         ext4_ext_print_extent((struct ext4_extent *)(ehp + 1 + i));
115 }
116
117 static void
118 ext4_ext_print_path(struct inode *ip, struct ext4_extent_path *path)
119 {
120         int k, l;
121
122         l = path->ep_depth;
123
124         printf("ip=%ju, Path:\n", ip->i_number);
125         for (k = 0; k <= l; k++, path++) {
126                 if (path->ep_index) {
127                         ext4_ext_print_index(ip, path->ep_index, 0);
128                 } else if (path->ep_ext) {
129                         ext4_ext_print_extent(path->ep_ext);
130                 }
131         }
132 }
133
134 void
135 ext4_ext_print_extent_tree_status(struct inode *ip)
136 {
137         struct ext4_extent_header *ehp;
138
139         ehp = (struct ext4_extent_header *)(char *)ip->i_db;
140
141         printf("Extent status:ip=%ju\n", ip->i_number);
142         if (!(ip->i_flag & IN_E4EXTENTS))
143                 return;
144
145         ext4_ext_print_header(ip, ehp);
146
147         return;
148 }
149 #endif
150
151 static inline struct ext4_extent_header *
152 ext4_ext_inode_header(struct inode *ip)
153 {
154
155         return ((struct ext4_extent_header *)ip->i_db);
156 }
157
158 static inline struct ext4_extent_header *
159 ext4_ext_block_header(char *bdata)
160 {
161
162         return ((struct ext4_extent_header *)bdata);
163 }
164
165 static inline unsigned short
166 ext4_ext_inode_depth(struct inode *ip)
167 {
168         struct ext4_extent_header *ehp;
169
170         ehp = (struct ext4_extent_header *)ip->i_data;
171         return (ehp->eh_depth);
172 }
173
174 static inline e4fs_daddr_t
175 ext4_ext_index_pblock(struct ext4_extent_index *index)
176 {
177         e4fs_daddr_t blk;
178
179         blk = index->ei_leaf_lo;
180         blk |= (e4fs_daddr_t)index->ei_leaf_hi << 32;
181
182         return (blk);
183 }
184
185 static inline void
186 ext4_index_store_pblock(struct ext4_extent_index *index, e4fs_daddr_t pb)
187 {
188
189         index->ei_leaf_lo = pb & 0xffffffff;
190         index->ei_leaf_hi = (pb >> 32) & 0xffff;
191 }
192
193
194 static inline e4fs_daddr_t
195 ext4_ext_extent_pblock(struct ext4_extent *extent)
196 {
197         e4fs_daddr_t blk;
198
199         blk = extent->e_start_lo;
200         blk |= (e4fs_daddr_t)extent->e_start_hi << 32;
201
202         return (blk);
203 }
204
205 static inline void
206 ext4_ext_store_pblock(struct ext4_extent *ex, e4fs_daddr_t pb)
207 {
208
209         ex->e_start_lo = pb & 0xffffffff;
210         ex->e_start_hi = (pb >> 32) & 0xffff;
211 }
212
213 int
214 ext4_ext_in_cache(struct inode *ip, daddr_t lbn, struct ext4_extent *ep)
215 {
216         struct ext4_extent_cache *ecp;
217         int ret = EXT4_EXT_CACHE_NO;
218
219         ecp = &ip->i_ext_cache;
220         if (ecp->ec_type == EXT4_EXT_CACHE_NO)
221                 return (ret);
222
223         if (lbn >= ecp->ec_blk && lbn < ecp->ec_blk + ecp->ec_len) {
224                 ep->e_blk = ecp->ec_blk;
225                 ep->e_start_lo = ecp->ec_start & 0xffffffff;
226                 ep->e_start_hi = ecp->ec_start >> 32 & 0xffff;
227                 ep->e_len = ecp->ec_len;
228                 ret = ecp->ec_type;
229         }
230         return (ret);
231 }
232
233 static int
234 ext4_ext_check_header(struct inode *ip, struct ext4_extent_header *eh)
235 {
236         struct m_ext2fs *fs;
237         char *error_msg;
238
239         fs = ip->i_e2fs;
240
241         if (eh->eh_magic != EXT4_EXT_MAGIC) {
242                 error_msg = "header: invalid magic";
243                 goto corrupted;
244         }
245         if (eh->eh_max == 0) {
246                 error_msg = "header: invalid eh_max";
247                 goto corrupted;
248         }
249         if (eh->eh_ecount > eh->eh_max) {
250                 error_msg = "header: invalid eh_entries";
251                 goto corrupted;
252         }
253
254         return (0);
255
256 corrupted:
257         SDT_PROBE2(ext2fs, , trace, extents, 1, error_msg);
258         return (EIO);
259 }
260
261 static void
262 ext4_ext_binsearch_index(struct ext4_extent_path *path, int blk)
263 {
264         struct ext4_extent_header *eh;
265         struct ext4_extent_index *r, *l, *m;
266
267         eh = path->ep_header;
268
269         KASSERT(eh->eh_ecount <= eh->eh_max && eh->eh_ecount > 0,
270             ("ext4_ext_binsearch_index: bad args"));
271
272         l = EXT_FIRST_INDEX(eh) + 1;
273         r = EXT_FIRST_INDEX(eh) + eh->eh_ecount - 1;
274         while (l <= r) {
275                 m = l + (r - l) / 2;
276                 if (blk < m->ei_blk)
277                         r = m - 1;
278                 else
279                         l = m + 1;
280         }
281
282         path->ep_index = l - 1;
283 }
284
285 static void
286 ext4_ext_binsearch_ext(struct ext4_extent_path *path, int blk)
287 {
288         struct ext4_extent_header *eh;
289         struct ext4_extent *r, *l, *m;
290
291         eh = path->ep_header;
292
293         KASSERT(eh->eh_ecount <= eh->eh_max,
294             ("ext4_ext_binsearch_ext: bad args"));
295
296         if (eh->eh_ecount == 0)
297                 return;
298
299         l = EXT_FIRST_EXTENT(eh) + 1;
300         r = EXT_FIRST_EXTENT(eh) + eh->eh_ecount - 1;
301
302         while (l <= r) {
303                 m = l + (r - l) / 2;
304                 if (blk < m->e_blk)
305                         r = m - 1;
306                 else
307                         l = m + 1;
308         }
309
310         path->ep_ext = l - 1;
311 }
312
313 static int
314 ext4_ext_fill_path_bdata(struct ext4_extent_path *path,
315     struct buf *bp, uint64_t blk)
316 {
317
318         KASSERT(path->ep_data == NULL,
319             ("ext4_ext_fill_path_bdata: bad ep_data"));
320
321         path->ep_data = malloc(bp->b_bufsize, M_EXT2EXTENTS, M_WAITOK);
322         if (!path->ep_data)
323                 return (ENOMEM);
324
325         memcpy(path->ep_data, bp->b_data, bp->b_bufsize);
326         path->ep_blk = blk;
327
328         return (0);
329 }
330
331 static void
332 ext4_ext_fill_path_buf(struct ext4_extent_path *path, struct buf *bp)
333 {
334
335         KASSERT(path->ep_data != NULL,
336             ("ext4_ext_fill_path_buf: bad ep_data"));
337
338         memcpy(bp->b_data, path->ep_data, bp->b_bufsize);
339 }
340
341 static void
342 ext4_ext_drop_refs(struct ext4_extent_path *path)
343 {
344         int depth, i;
345
346         if (!path)
347                 return;
348
349         depth = path->ep_depth;
350         for (i = 0; i <= depth; i++, path++)
351                 if (path->ep_data) {
352                         free(path->ep_data, M_EXT2EXTENTS);
353                         path->ep_data = NULL;
354                 }
355 }
356
357 void
358 ext4_ext_path_free(struct ext4_extent_path *path)
359 {
360
361         if (!path)
362                 return;
363
364         ext4_ext_drop_refs(path);
365         free(path, M_EXT2EXTENTS);
366 }
367
368 int
369 ext4_ext_find_extent(struct inode *ip, daddr_t block,
370     struct ext4_extent_path **ppath)
371 {
372         struct m_ext2fs *fs;
373         struct ext4_extent_header *eh;
374         struct ext4_extent_path *path;
375         struct buf *bp;
376         uint64_t blk;
377         int error, depth, i, ppos, alloc;
378
379         fs = ip->i_e2fs;
380         eh = ext4_ext_inode_header(ip);
381         depth = ext4_ext_inode_depth(ip);
382         ppos = 0;
383         alloc = 0;
384
385         error = ext4_ext_check_header(ip, eh);
386         if (error)
387                 return (error);
388
389         if (ppath == NULL)
390                 return (EINVAL);
391
392         path = *ppath;
393         if (path == NULL) {
394                 path = malloc(EXT4_EXT_DEPTH_MAX *
395                     sizeof(struct ext4_extent_path),
396                     M_EXT2EXTENTS, M_WAITOK | M_ZERO);
397                 if (!path)
398                         return (ENOMEM);
399
400                 *ppath = path;
401                 alloc = 1;
402         }
403
404         path[0].ep_header = eh;
405         path[0].ep_data = NULL;
406
407         /* Walk through the tree. */
408         i = depth;
409         while (i) {
410                 ext4_ext_binsearch_index(&path[ppos], block);
411                 blk = ext4_ext_index_pblock(path[ppos].ep_index);
412                 path[ppos].ep_depth = i;
413                 path[ppos].ep_ext = NULL;
414
415                 error = bread(ip->i_devvp, fsbtodb(ip->i_e2fs, blk),
416                     ip->i_e2fs->e2fs_bsize, NOCRED, &bp);
417                 if (error) {
418                         goto error;
419                 }
420
421                 ppos++;
422                 if (ppos > depth) {
423                         SDT_PROBE2(ext2fs, , trace, extents, 1,
424                             "ppos > depth => extent corrupted");
425                         error = EIO;
426                         brelse(bp);
427                         goto error;
428                 }
429
430                 ext4_ext_fill_path_bdata(&path[ppos], bp, blk);
431                 bqrelse(bp);
432
433                 eh = ext4_ext_block_header(path[ppos].ep_data);
434                 if (ext4_ext_check_header(ip, eh) ||
435                     ext2_extent_blk_csum_verify(ip, path[ppos].ep_data)) {
436                         error = EIO;
437                         goto error;
438                 }
439
440                 path[ppos].ep_header = eh;
441
442                 i--;
443         }
444
445         error = ext4_ext_check_header(ip, eh);
446         if (error)
447                 goto error;
448
449         /* Find extent. */
450         path[ppos].ep_depth = i;
451         path[ppos].ep_header = eh;
452         path[ppos].ep_ext = NULL;
453         path[ppos].ep_index = NULL;
454         ext4_ext_binsearch_ext(&path[ppos], block);
455         return (0);
456
457 error:
458         ext4_ext_drop_refs(path);
459         if (alloc)
460                 free(path, M_EXT2EXTENTS);
461
462         *ppath = NULL;
463
464         return (error);
465 }
466
467 static inline int
468 ext4_ext_space_root(struct inode *ip)
469 {
470         int size;
471
472         size = sizeof(ip->i_data);
473         size -= sizeof(struct ext4_extent_header);
474         size /= sizeof(struct ext4_extent);
475
476         return (size);
477 }
478
479 static inline int
480 ext4_ext_space_block(struct inode *ip)
481 {
482         struct m_ext2fs *fs;
483         int size;
484
485         fs = ip->i_e2fs;
486
487         size = (fs->e2fs_bsize - sizeof(struct ext4_extent_header)) /
488             sizeof(struct ext4_extent);
489
490         return (size);
491 }
492
493 static inline int
494 ext4_ext_space_block_index(struct inode *ip)
495 {
496         struct m_ext2fs *fs;
497         int size;
498
499         fs = ip->i_e2fs;
500
501         size = (fs->e2fs_bsize - sizeof(struct ext4_extent_header)) /
502             sizeof(struct ext4_extent_index);
503
504         return (size);
505 }
506
507 void
508 ext4_ext_tree_init(struct inode *ip)
509 {
510         struct ext4_extent_header *ehp;
511
512         ip->i_flag |= IN_E4EXTENTS;
513
514         memset(ip->i_data, 0, EXT2_NDADDR + EXT2_NIADDR);
515         ehp = (struct ext4_extent_header *)ip->i_data;
516         ehp->eh_magic = EXT4_EXT_MAGIC;
517         ehp->eh_max = ext4_ext_space_root(ip);
518         ip->i_ext_cache.ec_type = EXT4_EXT_CACHE_NO;
519         ip->i_flag |= IN_CHANGE | IN_UPDATE;
520         ext2_update(ip->i_vnode, 1);
521 }
522
523 static inline void
524 ext4_ext_put_in_cache(struct inode *ip, uint32_t blk,
525                         uint32_t len, uint32_t start, int type)
526 {
527
528         KASSERT(len != 0, ("ext4_ext_put_in_cache: bad input"));
529
530         ip->i_ext_cache.ec_type = type;
531         ip->i_ext_cache.ec_blk = blk;
532         ip->i_ext_cache.ec_len = len;
533         ip->i_ext_cache.ec_start = start;
534 }
535
536 static e4fs_daddr_t
537 ext4_ext_blkpref(struct inode *ip, struct ext4_extent_path *path,
538     e4fs_daddr_t block)
539 {
540         struct m_ext2fs *fs;
541         struct ext4_extent *ex;
542         e4fs_daddr_t bg_start;
543         int depth;
544
545         fs = ip->i_e2fs;
546
547         if (path) {
548                 depth = path->ep_depth;
549                 ex = path[depth].ep_ext;
550                 if (ex) {
551                         e4fs_daddr_t pblk = ext4_ext_extent_pblock(ex);
552                         e2fs_daddr_t blk = ex->e_blk;
553
554                         if (block > blk)
555                                 return (pblk + (block - blk));
556                         else
557                                 return (pblk - (blk - block));
558                 }
559
560                 /* Try to get block from index itself. */
561                 if (path[depth].ep_data)
562                         return (path[depth].ep_blk);
563         }
564
565         /* Use inode's group. */
566         bg_start = (ip->i_block_group * EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) +
567             fs->e2fs->e2fs_first_dblock;
568
569         return (bg_start + block);
570 }
571
572 static int inline
573 ext4_can_extents_be_merged(struct ext4_extent *ex1,
574     struct ext4_extent *ex2)
575 {
576
577         if (ex1->e_blk + ex1->e_len != ex2->e_blk)
578                 return (0);
579
580         if (ex1->e_len + ex2->e_len > EXT4_MAX_LEN)
581                 return (0);
582
583         if (ext4_ext_extent_pblock(ex1) + ex1->e_len ==
584             ext4_ext_extent_pblock(ex2))
585                 return (1);
586
587         return (0);
588 }
589
590 static unsigned
591 ext4_ext_next_leaf_block(struct inode *ip, struct ext4_extent_path *path)
592 {
593         int depth = path->ep_depth;
594
595         /* Empty tree */
596         if (depth == 0)
597                 return (EXT4_MAX_BLOCKS);
598
599         /* Go to indexes. */
600         depth--;
601
602         while (depth >= 0) {
603                 if (path[depth].ep_index !=
604                     EXT_LAST_INDEX(path[depth].ep_header))
605                         return (path[depth].ep_index[1].ei_blk);
606
607                 depth--;
608         }
609
610         return (EXT4_MAX_BLOCKS);
611 }
612
613 static int
614 ext4_ext_dirty(struct inode *ip, struct ext4_extent_path *path)
615 {
616         struct m_ext2fs *fs;
617         struct buf *bp;
618         uint64_t blk;
619         int error;
620
621         fs = ip->i_e2fs;
622
623         if (!path)
624                 return (EINVAL);
625
626         if (path->ep_data) {
627                 blk = path->ep_blk;
628                 bp = getblk(ip->i_devvp, fsbtodb(fs, blk),
629                     fs->e2fs_bsize, 0, 0, 0);
630                 if (!bp)
631                         return (EIO);
632                 ext4_ext_fill_path_buf(path, bp);
633                 ext2_extent_blk_csum_set(ip, bp->b_data);
634                 error = bwrite(bp);
635         } else {
636                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
637                 error = ext2_update(ip->i_vnode, 1);
638         }
639
640         return (error);
641 }
642
643 static int
644 ext4_ext_insert_index(struct inode *ip, struct ext4_extent_path *path,
645     uint32_t lblk, e4fs_daddr_t blk)
646 {
647         struct m_ext2fs *fs;
648         struct ext4_extent_index *idx;
649         int len;
650
651         fs = ip->i_e2fs;
652
653         if (lblk == path->ep_index->ei_blk) {
654                 SDT_PROBE2(ext2fs, , trace, extents, 1,
655                     "lblk == index blk => extent corrupted");
656                 return (EIO);
657         }
658
659         if (path->ep_header->eh_ecount >= path->ep_header->eh_max) {
660                 SDT_PROBE2(ext2fs, , trace, extents, 1,
661                     "ecout > maxcount => extent corrupted");
662                 return (EIO);
663         }
664
665         if (lblk > path->ep_index->ei_blk) {
666                 /* Insert after. */
667                 idx = path->ep_index + 1;
668         } else {
669                 /* Insert before. */
670                 idx = path->ep_index;
671         }
672
673         len = EXT_LAST_INDEX(path->ep_header) - idx + 1;
674         if (len > 0)
675                 memmove(idx + 1, idx, len * sizeof(struct ext4_extent_index));
676
677         if (idx > EXT_MAX_INDEX(path->ep_header)) {
678                 SDT_PROBE2(ext2fs, , trace, extents, 1,
679                     "index is out of range => extent corrupted");
680                 return (EIO);
681         }
682
683         idx->ei_blk = lblk;
684         ext4_index_store_pblock(idx, blk);
685         path->ep_header->eh_ecount++;
686
687         return (ext4_ext_dirty(ip, path));
688 }
689
690 static e4fs_daddr_t
691 ext4_ext_alloc_meta(struct inode *ip)
692 {
693         e4fs_daddr_t blk = ext2_alloc_meta(ip);
694         if (blk) {
695                 ip->i_blocks += btodb(ip->i_e2fs->e2fs_bsize);
696                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
697                 ext2_update(ip->i_vnode, 1);
698         }
699
700         return (blk);
701 }
702
703 static void
704 ext4_ext_blkfree(struct inode *ip, uint64_t blk, int count, int flags)
705 {
706         struct m_ext2fs *fs;
707         int i, blocksreleased;
708
709         fs = ip->i_e2fs;
710         blocksreleased = count;
711
712         for(i = 0; i < count; i++)
713                 ext2_blkfree(ip, blk + i, fs->e2fs_bsize);
714
715         if (ip->i_blocks >= blocksreleased)
716                 ip->i_blocks -= (btodb(fs->e2fs_bsize)*blocksreleased);
717         else
718                 ip->i_blocks = 0;
719
720         ip->i_flag |= IN_CHANGE | IN_UPDATE;
721         ext2_update(ip->i_vnode, 1);
722 }
723
724 static int
725 ext4_ext_split(struct inode *ip, struct ext4_extent_path *path,
726     struct ext4_extent *newext, int at)
727 {
728         struct m_ext2fs *fs;
729         struct  buf *bp;
730         int depth = ext4_ext_inode_depth(ip);
731         struct ext4_extent_header *neh;
732         struct ext4_extent_index *fidx;
733         struct ext4_extent *ex;
734         int i = at, k, m, a;
735         e4fs_daddr_t newblk, oldblk;
736         uint32_t border;
737         e4fs_daddr_t *ablks = NULL;
738         int error = 0;
739
740         fs = ip->i_e2fs;
741         bp = NULL;
742
743         /*
744          * We will split at current extent for now.
745          */
746         if (path[depth].ep_ext > EXT_MAX_EXTENT(path[depth].ep_header)) {
747                 SDT_PROBE2(ext2fs, , trace, extents, 1,
748                     "extent is out of range => extent corrupted");
749                 return (EIO);
750         }
751
752         if (path[depth].ep_ext != EXT_MAX_EXTENT(path[depth].ep_header))
753                 border = path[depth].ep_ext[1].e_blk;
754         else
755                 border = newext->e_blk;
756
757         /* Allocate new blocks. */
758         ablks = malloc(sizeof(e4fs_daddr_t) * depth,
759             M_EXT2EXTENTS, M_WAITOK | M_ZERO);
760         if (!ablks)
761                 return (ENOMEM);
762         for (a = 0; a < depth - at; a++) {
763                 newblk = ext4_ext_alloc_meta(ip);
764                 if (newblk == 0)
765                         goto cleanup;
766                 ablks[a] = newblk;
767         }
768
769         newblk = ablks[--a];
770         bp = getblk(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, 0, 0, 0);
771         if (!bp) {
772                 error = EIO;
773                 goto cleanup;
774         }
775
776         neh = ext4_ext_block_header(bp->b_data);
777         neh->eh_ecount = 0;
778         neh->eh_max = ext4_ext_space_block(ip);
779         neh->eh_magic = EXT4_EXT_MAGIC;
780         neh->eh_depth = 0;
781         ex = EXT_FIRST_EXTENT(neh);
782
783         if (path[depth].ep_header->eh_ecount != path[depth].ep_header->eh_max) {
784                 SDT_PROBE2(ext2fs, , trace, extents, 1,
785                     "extents count out of range => extent corrupted");
786                 error = EIO;
787                 goto cleanup;
788         }
789
790         /* Start copy from next extent. */
791         m = 0;
792         path[depth].ep_ext++;
793         while (path[depth].ep_ext <= EXT_MAX_EXTENT(path[depth].ep_header)) {
794                 path[depth].ep_ext++;
795                 m++;
796         }
797         if (m) {
798                 memmove(ex, path[depth].ep_ext - m,
799                     sizeof(struct ext4_extent) * m);
800                 neh->eh_ecount = neh->eh_ecount + m;
801         }
802
803         ext2_extent_blk_csum_set(ip, bp->b_data);
804         bwrite(bp);
805         bp = NULL;
806
807         /* Fix old leaf. */
808         if (m) {
809                 path[depth].ep_header->eh_ecount =
810                     path[depth].ep_header->eh_ecount - m;
811                 ext4_ext_dirty(ip, path + depth);
812         }
813
814         /* Create intermediate indexes. */
815         k = depth - at - 1;
816         KASSERT(k >= 0, ("ext4_ext_split: negative k"));
817
818         /* Insert new index into current index block. */
819         i = depth - 1;
820         while (k--) {
821                 oldblk = newblk;
822                 newblk = ablks[--a];
823                 error = bread(ip->i_devvp, fsbtodb(fs, newblk),
824                     (int)fs->e2fs_bsize, NOCRED, &bp);
825                 if (error) {
826                         goto cleanup;
827                 }
828
829                 neh = (struct ext4_extent_header *)bp->b_data;
830                 neh->eh_ecount = 1;
831                 neh->eh_magic = EXT4_EXT_MAGIC;
832                 neh->eh_max = ext4_ext_space_block_index(ip);
833                 neh->eh_depth = depth - i;
834                 fidx = EXT_FIRST_INDEX(neh);
835                 fidx->ei_blk = border;
836                 ext4_index_store_pblock(fidx, oldblk);
837
838                 m = 0;
839                 path[i].ep_index++;
840                 while (path[i].ep_index <= EXT_MAX_INDEX(path[i].ep_header)) {
841                         path[i].ep_index++;
842                         m++;
843                 }
844                 if (m) {
845                         memmove(++fidx, path[i].ep_index - m,
846                             sizeof(struct ext4_extent_index) * m);
847                         neh->eh_ecount = neh->eh_ecount + m;
848                 }
849
850                 ext2_extent_blk_csum_set(ip, bp->b_data);
851                 bwrite(bp);
852                 bp = NULL;
853
854                 /* Fix old index. */
855                 if (m) {
856                         path[i].ep_header->eh_ecount =
857                             path[i].ep_header->eh_ecount - m;
858                         ext4_ext_dirty(ip, path + i);
859                 }
860
861                 i--;
862         }
863
864         error = ext4_ext_insert_index(ip, path + at, border, newblk);
865
866 cleanup:
867         if (bp)
868                 brelse(bp);
869
870         if (error) {
871                 for (i = 0; i < depth; i++) {
872                         if (!ablks[i])
873                                 continue;
874                         ext4_ext_blkfree(ip, ablks[i], 1, 0);
875                 }
876         }
877
878         free(ablks, M_EXT2EXTENTS);
879
880         return (error);
881 }
882
883 static int
884 ext4_ext_grow_indepth(struct inode *ip, struct ext4_extent_path *path,
885     struct ext4_extent *newext)
886 {
887         struct m_ext2fs *fs;
888         struct ext4_extent_path *curpath;
889         struct ext4_extent_header *neh;
890         struct buf *bp;
891         e4fs_daddr_t newblk;
892         int error = 0;
893
894         fs = ip->i_e2fs;
895         curpath = path;
896
897         newblk = ext4_ext_alloc_meta(ip);
898         if (newblk == 0)
899                 return (error);
900
901         bp = getblk(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, 0, 0, 0);
902         if (!bp)
903                 return (EIO);
904
905         /* Move top-level index/leaf into new block. */
906         memmove(bp->b_data, curpath->ep_header, sizeof(ip->i_data));
907
908         /* Set size of new block */
909         neh = ext4_ext_block_header(bp->b_data);
910         neh->eh_magic = EXT4_EXT_MAGIC;
911
912         if (ext4_ext_inode_depth(ip))
913                 neh->eh_max = ext4_ext_space_block_index(ip);
914         else
915                 neh->eh_max = ext4_ext_space_block(ip);
916
917         ext2_extent_blk_csum_set(ip, bp->b_data);
918         error = bwrite(bp);
919         if (error)
920                 goto out;
921
922         bp = NULL;
923
924         curpath->ep_header->eh_magic = EXT4_EXT_MAGIC;
925         curpath->ep_header->eh_max = ext4_ext_space_root(ip);
926         curpath->ep_header->eh_ecount = 1;
927         curpath->ep_index = EXT_FIRST_INDEX(curpath->ep_header);
928         curpath->ep_index->ei_blk = EXT_FIRST_EXTENT(path[0].ep_header)->e_blk;
929         ext4_index_store_pblock(curpath->ep_index, newblk);
930
931         neh = ext4_ext_inode_header(ip);
932         neh->eh_depth = path->ep_depth + 1;
933         ext4_ext_dirty(ip, curpath);
934 out:
935         brelse(bp);
936
937         return (error);
938 }
939
940 static int
941 ext4_ext_create_new_leaf(struct inode *ip, struct ext4_extent_path *path,
942     struct ext4_extent *newext)
943 {
944         struct ext4_extent_path *curpath;
945         int depth, i, error;
946
947 repeat:
948         i = depth = ext4_ext_inode_depth(ip);
949
950         /* Look for free index entry int the tree */
951         curpath = path + depth;
952         while (i > 0 && !EXT_HAS_FREE_INDEX(curpath)) {
953                 i--;
954                 curpath--;
955         }
956
957         /*
958          * We use already allocated block for index block,
959          * so subsequent data blocks should be contiguous.
960          */
961         if (EXT_HAS_FREE_INDEX(curpath)) {
962                 error = ext4_ext_split(ip, path, newext, i);
963                 if (error)
964                         goto out;
965
966                 /* Refill path. */
967                 ext4_ext_drop_refs(path);
968                 error = ext4_ext_find_extent(ip, newext->e_blk, &path);
969                 if (error)
970                         goto out;
971         } else {
972                 /* Tree is full, do grow in depth. */
973                 error = ext4_ext_grow_indepth(ip, path, newext);
974                 if (error)
975                         goto out;
976
977                 /* Refill path. */
978                 ext4_ext_drop_refs(path);
979                 error = ext4_ext_find_extent(ip, newext->e_blk, &path);
980                 if (error)
981                         goto out;
982
983                 /* Check and split tree if required. */
984                 depth = ext4_ext_inode_depth(ip);
985                 if (path[depth].ep_header->eh_ecount ==
986                     path[depth].ep_header->eh_max)
987                         goto repeat;
988         }
989
990 out:
991         return (error);
992 }
993
994 static int
995 ext4_ext_correct_indexes(struct inode *ip, struct ext4_extent_path *path)
996 {
997         struct ext4_extent_header *eh;
998         struct ext4_extent *ex;
999         int32_t border;
1000         int depth, k;
1001
1002         depth = ext4_ext_inode_depth(ip);
1003         eh = path[depth].ep_header;
1004         ex = path[depth].ep_ext;
1005
1006         if (ex == NULL || eh == NULL)
1007                 return (EIO);
1008
1009         if (!depth)
1010                 return (0);
1011
1012         /* We will correct tree if first leaf got modified only. */
1013         if (ex != EXT_FIRST_EXTENT(eh))
1014                 return (0);
1015
1016         k = depth - 1;
1017         border = path[depth].ep_ext->e_blk;
1018         path[k].ep_index->ei_blk = border;
1019         ext4_ext_dirty(ip, path + k);
1020         while (k--) {
1021                 /* Change all left-side indexes. */
1022                 if (path[k+1].ep_index != EXT_FIRST_INDEX(path[k+1].ep_header))
1023                         break;
1024
1025                 path[k].ep_index->ei_blk = border;
1026                 ext4_ext_dirty(ip, path + k);
1027         }
1028
1029         return (0);
1030 }
1031
1032 static int
1033 ext4_ext_insert_extent(struct inode *ip, struct ext4_extent_path *path,
1034     struct ext4_extent *newext)
1035 {
1036         struct ext4_extent_header * eh;
1037         struct ext4_extent *ex, *nex, *nearex;
1038         struct ext4_extent_path *npath;
1039         int depth, len, error, next;
1040
1041         depth = ext4_ext_inode_depth(ip);
1042         ex = path[depth].ep_ext;
1043         npath = NULL;
1044
1045         if (newext->e_len == 0 || path[depth].ep_header == NULL)
1046                 return (EINVAL);
1047
1048         /* Insert block into found extent. */
1049         if (ex && ext4_can_extents_be_merged(ex, newext)) {
1050                 ex->e_len = ex->e_len + newext->e_len;
1051                 eh = path[depth].ep_header;
1052                 nearex = ex;
1053                 goto merge;
1054         }
1055
1056 repeat:
1057         depth = ext4_ext_inode_depth(ip);
1058         eh = path[depth].ep_header;
1059         if (eh->eh_ecount < eh->eh_max)
1060                 goto has_space;
1061
1062         /* Try next leaf */
1063         nex = EXT_LAST_EXTENT(eh);
1064         next = ext4_ext_next_leaf_block(ip, path);
1065         if (newext->e_blk > nex->e_blk && next != EXT4_MAX_BLOCKS) {
1066                 KASSERT(npath == NULL,
1067                     ("ext4_ext_insert_extent: bad path"));
1068
1069                 error = ext4_ext_find_extent(ip, next, &npath);
1070                 if (error)
1071                         goto cleanup;
1072
1073                 if (npath->ep_depth != path->ep_depth) {
1074                         error = EIO;
1075                         goto cleanup;
1076                 }
1077
1078                 eh = npath[depth].ep_header;
1079                 if (eh->eh_ecount < eh->eh_max) {
1080                         path = npath;
1081                         goto repeat;
1082                 }
1083         }
1084
1085         /*
1086          * There is no free space in the found leaf,
1087          * try to add a new leaf to the tree.
1088          */
1089         error = ext4_ext_create_new_leaf(ip, path, newext);
1090         if (error)
1091                 goto cleanup;
1092
1093         depth = ext4_ext_inode_depth(ip);
1094         eh = path[depth].ep_header;
1095
1096 has_space:
1097         nearex = path[depth].ep_ext;
1098         if (!nearex) {
1099                 /* Create new extent in the leaf. */
1100                 path[depth].ep_ext = EXT_FIRST_EXTENT(eh);
1101         } else if (newext->e_blk > nearex->e_blk) {
1102                 if (nearex != EXT_LAST_EXTENT(eh)) {
1103                         len = EXT_MAX_EXTENT(eh) - nearex;
1104                         len = (len - 1) * sizeof(struct ext4_extent);
1105                         len = len < 0 ? 0 : len;
1106                         memmove(nearex + 2, nearex + 1, len);
1107                 }
1108                 path[depth].ep_ext = nearex + 1;
1109         } else {
1110                 len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
1111                 len = len < 0 ? 0 : len;
1112                 memmove(nearex + 1, nearex, len);
1113                 path[depth].ep_ext = nearex;
1114         }
1115
1116         eh->eh_ecount = eh->eh_ecount + 1;
1117         nearex = path[depth].ep_ext;
1118         nearex->e_blk = newext->e_blk;
1119         nearex->e_start_lo = newext->e_start_lo;
1120         nearex->e_start_hi = newext->e_start_hi;
1121         nearex->e_len = newext->e_len;
1122
1123 merge:
1124         /* Try to merge extents to the right. */
1125         while (nearex < EXT_LAST_EXTENT(eh)) {
1126                 if (!ext4_can_extents_be_merged(nearex, nearex + 1))
1127                         break;
1128
1129                 /* Merge with next extent. */
1130                 nearex->e_len = nearex->e_len + nearex[1].e_len;
1131                 if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
1132                         len = (EXT_LAST_EXTENT(eh) - nearex - 1) *
1133                             sizeof(struct ext4_extent);
1134                         memmove(nearex + 1, nearex + 2, len);
1135                 }
1136
1137                 eh->eh_ecount = eh->eh_ecount - 1;
1138                 KASSERT(eh->eh_ecount != 0,
1139                     ("ext4_ext_insert_extent: bad ecount"));
1140         }
1141
1142         /*
1143          * Try to merge extents to the left,
1144          * start from inexes correction.
1145          */
1146         error = ext4_ext_correct_indexes(ip, path);
1147         if (error)
1148                 goto cleanup;
1149
1150         ext4_ext_dirty(ip, path + depth);
1151
1152 cleanup:
1153         if (npath) {
1154                 ext4_ext_drop_refs(npath);
1155                 free(npath, M_EXT2EXTENTS);
1156         }
1157
1158         ip->i_ext_cache.ec_type = EXT4_EXT_CACHE_NO;
1159         return (error);
1160 }
1161
1162 static e4fs_daddr_t
1163 ext4_new_blocks(struct inode *ip, daddr_t lbn, e4fs_daddr_t pref,
1164     struct ucred *cred, unsigned long *count, int *perror)
1165 {
1166         struct m_ext2fs *fs;
1167         e4fs_daddr_t newblk;
1168
1169         /*
1170          * We will allocate only single block for now.
1171          */
1172         if (*count > 1)
1173                 return (0);
1174
1175         fs = ip->i_e2fs;
1176         EXT2_LOCK(ip->i_ump);
1177         *perror = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newblk);
1178         if (*perror)
1179                 return (0);
1180
1181         if (newblk) {
1182                 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1183                 ext2_update(ip->i_vnode, 1);
1184         }
1185
1186         return (newblk);
1187 }
1188
1189 int
1190 ext4_ext_get_blocks(struct inode *ip, e4fs_daddr_t iblk,
1191     unsigned long max_blocks, struct ucred *cred, struct buf **bpp,
1192     int *pallocated, daddr_t *nb)
1193 {
1194         struct m_ext2fs *fs;
1195         struct buf *bp = NULL;
1196         struct ext4_extent_path *path;
1197         struct ext4_extent newex, *ex;
1198         e4fs_daddr_t bpref, newblk = 0;
1199         unsigned long allocated = 0;
1200         int error = 0, depth;
1201
1202         if(bpp)
1203                 *bpp = NULL;
1204         *pallocated = 0;
1205
1206         /* Check cache. */
1207         path = NULL;
1208         if ((bpref = ext4_ext_in_cache(ip, iblk, &newex))) {
1209                 if (bpref == EXT4_EXT_CACHE_IN) {
1210                         /* Block is already allocated. */
1211                         newblk = iblk - newex.e_blk +
1212                             ext4_ext_extent_pblock(&newex);
1213                         allocated = newex.e_len - (iblk - newex.e_blk);
1214                         goto out;
1215                 } else {
1216                         error = EIO;
1217                         goto out2;
1218                 }
1219         }
1220
1221         error = ext4_ext_find_extent(ip, iblk, &path);
1222         if (error) {
1223                 goto out2;
1224         }
1225
1226         depth = ext4_ext_inode_depth(ip);
1227         if (path[depth].ep_ext == NULL && depth != 0) {
1228                 error = EIO;
1229                 goto out2;
1230         }
1231
1232         if ((ex = path[depth].ep_ext)) {
1233                 uint64_t lblk = ex->e_blk;
1234                 uint16_t e_len  = ex->e_len;
1235                 e4fs_daddr_t e_start = ext4_ext_extent_pblock(ex);
1236
1237                 if (e_len > EXT4_MAX_LEN)
1238                         goto out2;
1239
1240                 /* If we found extent covers block, simply return it. */
1241                 if (iblk >= lblk && iblk < lblk + e_len) {
1242                         newblk = iblk - lblk + e_start;
1243                         allocated = e_len - (iblk - lblk);
1244                         ext4_ext_put_in_cache(ip, lblk, e_len,
1245                             e_start, EXT4_EXT_CACHE_IN);
1246                         goto out;
1247                 }
1248         }
1249
1250         /* Allocate the new block. */
1251         if (S_ISREG(ip->i_mode) && (!ip->i_next_alloc_block)) {
1252                 ip->i_next_alloc_goal = 0;
1253         }
1254
1255         bpref = ext4_ext_blkpref(ip, path, iblk);
1256         allocated = max_blocks;
1257         newblk = ext4_new_blocks(ip, iblk, bpref, cred, &allocated, &error);
1258         if (!newblk)
1259                 goto out2;
1260
1261         /* Try to insert new extent into found leaf and return. */
1262         newex.e_blk = iblk;
1263         ext4_ext_store_pblock(&newex, newblk);
1264         newex.e_len = allocated;
1265         error = ext4_ext_insert_extent(ip, path, &newex);
1266         if (error)
1267                 goto out2;
1268
1269         newblk = ext4_ext_extent_pblock(&newex);
1270         ext4_ext_put_in_cache(ip, iblk, allocated, newblk, EXT4_EXT_CACHE_IN);
1271         *pallocated = 1;
1272
1273 out:
1274         if (allocated > max_blocks)
1275                 allocated = max_blocks;
1276
1277         if (bpp)
1278         {
1279                 fs = ip->i_e2fs;
1280                 error = bread(ip->i_devvp, fsbtodb(fs, newblk),
1281                     fs->e2fs_bsize, cred, &bp);
1282                 if (error) {
1283                         brelse(bp);
1284                 } else {
1285                         *bpp = bp;
1286                 }
1287         }
1288
1289 out2:
1290         if (path) {
1291                 ext4_ext_drop_refs(path);
1292                 free(path, M_EXT2EXTENTS);
1293         }
1294
1295         if (nb)
1296                 *nb = newblk;
1297
1298         return (error);
1299 }
1300
1301 static inline uint16_t
1302 ext4_ext_get_actual_len(struct ext4_extent *ext)
1303 {
1304
1305         return (ext->e_len <= EXT_INIT_MAX_LEN ?
1306             ext->e_len : (ext->e_len - EXT_INIT_MAX_LEN));
1307 }
1308
1309 static inline struct ext4_extent_header *
1310 ext4_ext_header(struct inode *ip)
1311 {
1312
1313         return ((struct ext4_extent_header *)ip->i_db);
1314 }
1315
1316 static int
1317 ext4_remove_blocks(struct inode *ip, struct ext4_extent *ex,
1318     unsigned long from, unsigned long to)
1319 {
1320         unsigned long num, start;
1321
1322         if (from >= ex->e_blk &&
1323             to == ex->e_blk + ext4_ext_get_actual_len(ex) - 1) {
1324                 /* Tail cleanup. */
1325                 num = ex->e_blk + ext4_ext_get_actual_len(ex) - from;
1326                 start = ext4_ext_extent_pblock(ex) +
1327                     ext4_ext_get_actual_len(ex) - num;
1328                 ext4_ext_blkfree(ip, start, num, 0);
1329         }
1330
1331         return (0);
1332 }
1333
1334 static int
1335 ext4_ext_rm_index(struct inode *ip, struct ext4_extent_path *path)
1336 {
1337         e4fs_daddr_t leaf;
1338
1339         /* Free index block. */
1340         path--;
1341         leaf = ext4_ext_index_pblock(path->ep_index);
1342         KASSERT(path->ep_header->eh_ecount != 0,
1343             ("ext4_ext_rm_index: bad ecount"));
1344         path->ep_header->eh_ecount--;
1345         ext4_ext_dirty(ip, path);
1346         ext4_ext_blkfree(ip, leaf, 1, 0);
1347         return (0);
1348 }
1349
1350 static int
1351 ext4_ext_rm_leaf(struct inode *ip, struct ext4_extent_path *path,
1352     uint64_t start)
1353 {
1354         struct ext4_extent_header *eh;
1355         struct ext4_extent *ex;
1356         unsigned int a, b, block, num;
1357         unsigned long ex_blk;
1358         unsigned short ex_len;
1359         int depth;
1360         int error, correct_index;
1361
1362         depth = ext4_ext_inode_depth(ip);
1363         if (!path[depth].ep_header) {
1364                 if (path[depth].ep_data == NULL)
1365                         return (EINVAL);
1366                 path[depth].ep_header =
1367                     (struct ext4_extent_header* )path[depth].ep_data;
1368         }
1369
1370         eh = path[depth].ep_header;
1371         if (!eh) {
1372                 SDT_PROBE2(ext2fs, , trace, extents, 1,
1373                     "bad header => extent corrupted");
1374                 return (EIO);
1375         }
1376
1377         ex = EXT_LAST_EXTENT(eh);
1378         ex_blk = ex->e_blk;
1379         ex_len = ext4_ext_get_actual_len(ex);
1380
1381         error = 0;
1382         correct_index = 0;
1383         while (ex >= EXT_FIRST_EXTENT(eh) && ex_blk + ex_len > start) {
1384                 path[depth].ep_ext = ex;
1385                 a = ex_blk > start ? ex_blk : start;
1386                 b = (uint64_t)ex_blk + ex_len - 1 <
1387                     EXT4_MAX_BLOCKS ? ex_blk + ex_len - 1 : EXT4_MAX_BLOCKS;
1388
1389                 if (a != ex_blk && b != ex_blk + ex_len - 1)
1390                         return (EINVAL);
1391                 else if (a != ex_blk) {
1392                         /* Remove tail of the extent. */
1393                         block = ex_blk;
1394                         num = a - block;
1395                 } else if (b != ex_blk + ex_len - 1) {
1396                         /* Remove head of the extent, not implemented. */
1397                         return (EINVAL);
1398                 } else {
1399                         /* Remove whole extent. */
1400                         block = ex_blk;
1401                         num = 0;
1402                 }
1403
1404                 if (ex == EXT_FIRST_EXTENT(eh))
1405                         correct_index = 1;
1406
1407                 error = ext4_remove_blocks(ip, ex, a, b);
1408                 if (error)
1409                         goto out;
1410
1411                 if (num == 0) {
1412                         ext4_ext_store_pblock(ex, 0);
1413                         eh->eh_ecount--;
1414                 }
1415
1416                 ex->e_blk = block;
1417                 ex->e_len = num;
1418
1419                 ext4_ext_dirty(ip, path + depth);
1420
1421                 ex--;
1422                 ex_blk = ex->e_blk;
1423                 ex_len = ext4_ext_get_actual_len(ex);
1424         };
1425
1426         if (correct_index && eh->eh_ecount)
1427                 error = ext4_ext_correct_indexes(ip, path);
1428
1429         /*
1430          * If this leaf is free, we should
1431          * remove it from index block above.
1432          */
1433         if (error == 0 && eh->eh_ecount == 0 && path[depth].ep_data != NULL)
1434                 error = ext4_ext_rm_index(ip, path + depth);
1435
1436 out:
1437         return (error);
1438 }
1439
1440 static struct buf *
1441 ext4_read_extent_tree_block(struct inode *ip, e4fs_daddr_t pblk,
1442     int depth, int flags)
1443 {
1444         struct m_ext2fs *fs;
1445         struct ext4_extent_header *eh;
1446         struct buf *bp;
1447         int error;
1448
1449         fs = ip->i_e2fs;
1450         error = bread(ip->i_devvp, fsbtodb(fs, pblk),
1451             fs->e2fs_bsize, NOCRED, &bp);
1452         if (error) {
1453                 return (NULL);
1454         }
1455
1456         eh = ext4_ext_block_header(bp->b_data);
1457         if (eh->eh_depth != depth) {
1458                 SDT_PROBE2(ext2fs, , trace, extents, 1,
1459                     "unexpected eh_depth");
1460                 goto err;
1461         }
1462
1463         error = ext4_ext_check_header(ip, eh);
1464         if (error)
1465                 goto err;
1466
1467         return (bp);
1468
1469 err:
1470         brelse(bp);
1471         return (NULL);
1472
1473 }
1474
1475 static int inline
1476 ext4_ext_more_to_rm(struct ext4_extent_path *path)
1477 {
1478
1479         KASSERT(path->ep_index != NULL,
1480             ("ext4_ext_more_to_rm: bad index from path"));
1481
1482         if (path->ep_index < EXT_FIRST_INDEX(path->ep_header))
1483                 return (0);
1484
1485         if (path->ep_header->eh_ecount == path->index_count)
1486                 return (0);
1487
1488         return (1);
1489 }
1490
1491 int
1492 ext4_ext_remove_space(struct inode *ip, off_t length, int flags,
1493     struct ucred *cred, struct thread *td)
1494 {
1495         struct buf *bp;
1496         struct ext4_extent_header *ehp;
1497         struct ext4_extent_path *path;
1498         int depth;
1499         int i, error;
1500
1501         ehp = (struct ext4_extent_header *)ip->i_db;
1502         depth = ext4_ext_inode_depth(ip);
1503
1504         error = ext4_ext_check_header(ip, ehp);
1505         if(error)
1506                 return (error);
1507
1508         path = malloc(sizeof(struct ext4_extent_path) * (depth + 1),
1509             M_EXT2EXTENTS, M_WAITOK | M_ZERO);
1510         if (!path)
1511                 return (ENOMEM);
1512
1513         path[0].ep_header = ehp;
1514         path[0].ep_depth = depth;
1515         i = 0;
1516         while (error == 0 && i >= 0) {
1517                 if (i == depth) {
1518                         /* This is leaf. */
1519                         error = ext4_ext_rm_leaf(ip, path, length);
1520                         if (error)
1521                                 break;
1522                         free(path[i].ep_data, M_EXT2EXTENTS);
1523                         path[i].ep_data = NULL;
1524                         i--;
1525                         continue;
1526                 }
1527
1528                 /* This is index. */
1529                 if (!path[i].ep_header)
1530                         path[i].ep_header =
1531                             (struct ext4_extent_header *)path[i].ep_data;
1532
1533                 if (!path[i].ep_index) {
1534                         /* This level hasn't touched yet. */
1535                         path[i].ep_index = EXT_LAST_INDEX(path[i].ep_header);
1536                         path[i].index_count = path[i].ep_header->eh_ecount + 1;
1537                 } else {
1538                         /* We've already was here, see at next index. */
1539                         path[i].ep_index--;
1540                 }
1541
1542                 if (ext4_ext_more_to_rm(path + i)) {
1543                         memset(path + i + 1, 0, sizeof(*path));
1544                         bp = ext4_read_extent_tree_block(ip,
1545                             ext4_ext_index_pblock(path[i].ep_index),
1546                             path[0].ep_depth - (i + 1), 0);
1547                         if (!bp) {
1548                                 error = EIO;
1549                                 break;
1550                         }
1551
1552                         ext4_ext_fill_path_bdata(&path[i+1], bp,
1553                             ext4_ext_index_pblock(path[i].ep_index));
1554                         brelse(bp);
1555                         path[i].index_count = path[i].ep_header->eh_ecount;
1556                         i++;
1557                 } else {
1558                         if (path[i].ep_header->eh_ecount == 0 && i > 0) {
1559                                 /* Index is empty, remove it. */
1560                                 error = ext4_ext_rm_index(ip, path + i);
1561                         }
1562                         free(path[i].ep_data, M_EXT2EXTENTS);
1563                         path[i].ep_data = NULL;
1564                         i--;
1565                 }
1566         }
1567
1568         if (path->ep_header->eh_ecount == 0) {
1569                 /*
1570                  * Truncate the tree to zero.
1571                  */
1572                  ext4_ext_header(ip)->eh_depth = 0;
1573                  ext4_ext_header(ip)->eh_max = ext4_ext_space_root(ip);
1574                  ext4_ext_dirty(ip, path);
1575         }
1576
1577         ext4_ext_drop_refs(path);
1578         free(path, M_EXT2EXTENTS);
1579
1580         return (error);
1581 }