]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/range_tree.c
MFC r258717: MFV r258371,r258372: 4101 metaslab_debug should allow for
[FreeBSD/stable/9.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / range_tree.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2013 by Delphix. All rights reserved.
27  */
28
29 #include <sys/zfs_context.h>
30 #include <sys/spa.h>
31 #include <sys/dmu.h>
32 #include <sys/dnode.h>
33 #include <sys/zio.h>
34 #include <sys/range_tree.h>
35
36 static kmem_cache_t *range_seg_cache;
37
38 void
39 range_tree_init(void)
40 {
41         ASSERT(range_seg_cache == NULL);
42         range_seg_cache = kmem_cache_create("range_seg_cache",
43             sizeof (range_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
44 }
45
46 void
47 range_tree_fini(void)
48 {
49         kmem_cache_destroy(range_seg_cache);
50         range_seg_cache = NULL;
51 }
52
53 void
54 range_tree_stat_verify(range_tree_t *rt)
55 {
56         range_seg_t *rs;
57         uint64_t hist[RANGE_TREE_HISTOGRAM_SIZE] = { 0 };
58         int i;
59
60         for (rs = avl_first(&rt->rt_root); rs != NULL;
61             rs = AVL_NEXT(&rt->rt_root, rs)) {
62                 uint64_t size = rs->rs_end - rs->rs_start;
63                 int idx = highbit(size) - 1;
64
65                 hist[idx]++;
66                 ASSERT3U(hist[idx], !=, 0);
67         }
68
69         for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
70                 if (hist[i] != rt->rt_histogram[i]) {
71                         zfs_dbgmsg("i=%d, hist=%p, hist=%llu, rt_hist=%llu",
72                             i, hist, hist[i], rt->rt_histogram[i]);
73                 }
74                 VERIFY3U(hist[i], ==, rt->rt_histogram[i]);
75         }
76 }
77
78 static void
79 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
80 {
81         uint64_t size = rs->rs_end - rs->rs_start;
82         int idx = highbit(size) - 1;
83
84         ASSERT3U(idx, <,
85             sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
86
87         ASSERT(MUTEX_HELD(rt->rt_lock));
88         rt->rt_histogram[idx]++;
89         ASSERT3U(rt->rt_histogram[idx], !=, 0);
90 }
91
92 static void
93 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
94 {
95         uint64_t size = rs->rs_end - rs->rs_start;
96         int idx = highbit(size) - 1;
97
98         ASSERT3U(idx, <,
99             sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
100
101         ASSERT(MUTEX_HELD(rt->rt_lock));
102         ASSERT3U(rt->rt_histogram[idx], !=, 0);
103         rt->rt_histogram[idx]--;
104 }
105
106 /*
107  * NOTE: caller is responsible for all locking.
108  */
109 static int
110 range_tree_seg_compare(const void *x1, const void *x2)
111 {
112         const range_seg_t *r1 = x1;
113         const range_seg_t *r2 = x2;
114
115         if (r1->rs_start < r2->rs_start) {
116                 if (r1->rs_end > r2->rs_start)
117                         return (0);
118                 return (-1);
119         }
120         if (r1->rs_start > r2->rs_start) {
121                 if (r1->rs_start < r2->rs_end)
122                         return (0);
123                 return (1);
124         }
125         return (0);
126 }
127
128 range_tree_t *
129 range_tree_create(range_tree_ops_t *ops, void *arg, kmutex_t *lp)
130 {
131         range_tree_t *rt;
132
133         rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP);
134
135         avl_create(&rt->rt_root, range_tree_seg_compare,
136             sizeof (range_seg_t), offsetof(range_seg_t, rs_node));
137
138         rt->rt_lock = lp;
139         rt->rt_ops = ops;
140         rt->rt_arg = arg;
141
142         if (rt->rt_ops != NULL)
143                 rt->rt_ops->rtop_create(rt, rt->rt_arg);
144
145         return (rt);
146 }
147
148 void
149 range_tree_destroy(range_tree_t *rt)
150 {
151         VERIFY0(rt->rt_space);
152
153         if (rt->rt_ops != NULL)
154                 rt->rt_ops->rtop_destroy(rt, rt->rt_arg);
155
156         avl_destroy(&rt->rt_root);
157         kmem_free(rt, sizeof (*rt));
158 }
159
160 void
161 range_tree_add(void *arg, uint64_t start, uint64_t size)
162 {
163         range_tree_t *rt = arg;
164         avl_index_t where;
165         range_seg_t rsearch, *rs_before, *rs_after, *rs;
166         uint64_t end = start + size;
167         boolean_t merge_before, merge_after;
168
169         ASSERT(MUTEX_HELD(rt->rt_lock));
170         VERIFY(size != 0);
171
172         rsearch.rs_start = start;
173         rsearch.rs_end = end;
174         rs = avl_find(&rt->rt_root, &rsearch, &where);
175
176         if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end) {
177                 zfs_panic_recover("zfs: allocating allocated segment"
178                     "(offset=%llu size=%llu)\n",
179                     (longlong_t)start, (longlong_t)size);
180                 return;
181         }
182
183         /* Make sure we don't overlap with either of our neighbors */
184         VERIFY(rs == NULL);
185
186         rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
187         rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
188
189         merge_before = (rs_before != NULL && rs_before->rs_end == start);
190         merge_after = (rs_after != NULL && rs_after->rs_start == end);
191
192         if (merge_before && merge_after) {
193                 avl_remove(&rt->rt_root, rs_before);
194                 if (rt->rt_ops != NULL) {
195                         rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
196                         rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
197                 }
198
199                 range_tree_stat_decr(rt, rs_before);
200                 range_tree_stat_decr(rt, rs_after);
201
202                 rs_after->rs_start = rs_before->rs_start;
203                 kmem_cache_free(range_seg_cache, rs_before);
204                 rs = rs_after;
205         } else if (merge_before) {
206                 if (rt->rt_ops != NULL)
207                         rt->rt_ops->rtop_remove(rt, rs_before, rt->rt_arg);
208
209                 range_tree_stat_decr(rt, rs_before);
210
211                 rs_before->rs_end = end;
212                 rs = rs_before;
213         } else if (merge_after) {
214                 if (rt->rt_ops != NULL)
215                         rt->rt_ops->rtop_remove(rt, rs_after, rt->rt_arg);
216
217                 range_tree_stat_decr(rt, rs_after);
218
219                 rs_after->rs_start = start;
220                 rs = rs_after;
221         } else {
222                 rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
223                 rs->rs_start = start;
224                 rs->rs_end = end;
225                 avl_insert(&rt->rt_root, rs, where);
226         }
227
228         if (rt->rt_ops != NULL)
229                 rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
230
231         range_tree_stat_incr(rt, rs);
232         rt->rt_space += size;
233 }
234
235 void
236 range_tree_remove(void *arg, uint64_t start, uint64_t size)
237 {
238         range_tree_t *rt = arg;
239         avl_index_t where;
240         range_seg_t rsearch, *rs, *newseg;
241         uint64_t end = start + size;
242         boolean_t left_over, right_over;
243
244         ASSERT(MUTEX_HELD(rt->rt_lock));
245         VERIFY3U(size, !=, 0);
246         VERIFY3U(size, <=, rt->rt_space);
247
248         rsearch.rs_start = start;
249         rsearch.rs_end = end;
250         rs = avl_find(&rt->rt_root, &rsearch, &where);
251
252         /* Make sure we completely overlap with someone */
253         if (rs == NULL) {
254                 zfs_panic_recover("zfs: freeing free segment "
255                     "(offset=%llu size=%llu)",
256                     (longlong_t)start, (longlong_t)size);
257                 return;
258         }
259         VERIFY3U(rs->rs_start, <=, start);
260         VERIFY3U(rs->rs_end, >=, end);
261
262         left_over = (rs->rs_start != start);
263         right_over = (rs->rs_end != end);
264
265         range_tree_stat_decr(rt, rs);
266
267         if (rt->rt_ops != NULL)
268                 rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg);
269
270         if (left_over && right_over) {
271                 newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP);
272                 newseg->rs_start = end;
273                 newseg->rs_end = rs->rs_end;
274                 range_tree_stat_incr(rt, newseg);
275
276                 rs->rs_end = start;
277
278                 avl_insert_here(&rt->rt_root, newseg, rs, AVL_AFTER);
279                 if (rt->rt_ops != NULL)
280                         rt->rt_ops->rtop_add(rt, newseg, rt->rt_arg);
281         } else if (left_over) {
282                 rs->rs_end = start;
283         } else if (right_over) {
284                 rs->rs_start = end;
285         } else {
286                 avl_remove(&rt->rt_root, rs);
287                 kmem_cache_free(range_seg_cache, rs);
288                 rs = NULL;
289         }
290
291         if (rs != NULL) {
292                 range_tree_stat_incr(rt, rs);
293
294                 if (rt->rt_ops != NULL)
295                         rt->rt_ops->rtop_add(rt, rs, rt->rt_arg);
296         }
297
298         rt->rt_space -= size;
299 }
300
301 static range_seg_t *
302 range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size,
303     avl_index_t *wherep)
304 {
305         range_seg_t rsearch, *rs;
306         uint64_t end = start + size;
307
308         ASSERT(MUTEX_HELD(rt->rt_lock));
309         VERIFY(size != 0);
310
311         rsearch.rs_start = start;
312         rsearch.rs_end = end;
313         rs = avl_find(&rt->rt_root, &rsearch, wherep);
314
315         if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end)
316                 return (rs);
317         return (NULL);
318 }
319
320 void
321 range_tree_verify(range_tree_t *rt, uint64_t off, uint64_t size)
322 {
323         range_seg_t *rs;
324         avl_index_t where;
325
326         mutex_enter(rt->rt_lock);
327         rs = range_tree_find(rt, off, size, &where);
328         if (rs != NULL)
329                 panic("freeing free block; rs=%p", (void *)rs);
330         mutex_exit(rt->rt_lock);
331 }
332
333 boolean_t
334 range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size)
335 {
336         avl_index_t where;
337
338         return (range_tree_find(rt, start, size, &where) != NULL);
339 }
340
341 void
342 range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst)
343 {
344         range_tree_t *rt;
345
346         ASSERT(MUTEX_HELD((*rtsrc)->rt_lock));
347         ASSERT0(range_tree_space(*rtdst));
348         ASSERT0(avl_numnodes(&(*rtdst)->rt_root));
349
350         rt = *rtsrc;
351         *rtsrc = *rtdst;
352         *rtdst = rt;
353 }
354
355 void
356 range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg)
357 {
358         range_seg_t *rs;
359         void *cookie = NULL;
360
361         ASSERT(MUTEX_HELD(rt->rt_lock));
362
363         if (rt->rt_ops != NULL)
364                 rt->rt_ops->rtop_vacate(rt, rt->rt_arg);
365
366         while ((rs = avl_destroy_nodes(&rt->rt_root, &cookie)) != NULL) {
367                 if (func != NULL)
368                         func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
369                 kmem_cache_free(range_seg_cache, rs);
370         }
371
372         bzero(rt->rt_histogram, sizeof (rt->rt_histogram));
373         rt->rt_space = 0;
374 }
375
376 void
377 range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg)
378 {
379         range_seg_t *rs;
380
381         ASSERT(MUTEX_HELD(rt->rt_lock));
382
383         for (rs = avl_first(&rt->rt_root); rs; rs = AVL_NEXT(&rt->rt_root, rs))
384                 func(arg, rs->rs_start, rs->rs_end - rs->rs_start);
385 }
386
387 uint64_t
388 range_tree_space(range_tree_t *rt)
389 {
390         return (rt->rt_space);
391 }