]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/jemalloc/src/ckh.c
Since contrib/libcxxrt's ancestry was never correct, subversion 1.8 and
[FreeBSD/FreeBSD.git] / contrib / jemalloc / src / ckh.c
1 /*
2  *******************************************************************************
3  * Implementation of (2^1+,2) cuckoo hashing, where 2^1+ indicates that each
4  * hash bucket contains 2^n cells, for n >= 1, and 2 indicates that two hash
5  * functions are employed.  The original cuckoo hashing algorithm was described
6  * in:
7  *
8  *   Pagh, R., F.F. Rodler (2004) Cuckoo Hashing.  Journal of Algorithms
9  *     51(2):122-144.
10  *
11  * Generalization of cuckoo hashing was discussed in:
12  *
13  *   Erlingsson, U., M. Manasse, F. McSherry (2006) A cool and practical
14  *     alternative to traditional hash tables.  In Proceedings of the 7th
15  *     Workshop on Distributed Data and Structures (WDAS'06), Santa Clara, CA,
16  *     January 2006.
17  *
18  * This implementation uses precisely two hash functions because that is the
19  * fewest that can work, and supporting multiple hashes is an implementation
20  * burden.  Here is a reproduction of Figure 1 from Erlingsson et al. (2006)
21  * that shows approximate expected maximum load factors for various
22  * configurations:
23  *
24  *           |         #cells/bucket         |
25  *   #hashes |   1   |   2   |   4   |   8   |
26  *   --------+-------+-------+-------+-------+
27  *         1 | 0.006 | 0.006 | 0.03  | 0.12  |
28  *         2 | 0.49  | 0.86  |>0.93< |>0.96< |
29  *         3 | 0.91  | 0.97  | 0.98  | 0.999 |
30  *         4 | 0.97  | 0.99  | 0.999 |       |
31  *
32  * The number of cells per bucket is chosen such that a bucket fits in one cache
33  * line.  So, on 32- and 64-bit systems, we use (8,2) and (4,2) cuckoo hashing,
34  * respectively.
35  *
36  ******************************************************************************/
37 #define JEMALLOC_CKH_C_
38 #include "jemalloc/internal/jemalloc_internal.h"
39
40 /******************************************************************************/
41 /* Function prototypes for non-inline static functions. */
42
43 static bool     ckh_grow(tsd_t *tsd, ckh_t *ckh);
44 static void     ckh_shrink(tsd_t *tsd, ckh_t *ckh);
45
46 /******************************************************************************/
47
48 /*
49  * Search bucket for key and return the cell number if found; SIZE_T_MAX
50  * otherwise.
51  */
52 JEMALLOC_INLINE_C size_t
53 ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key)
54 {
55         ckhc_t *cell;
56         unsigned i;
57
58         for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
59                 cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
60                 if (cell->key != NULL && ckh->keycomp(key, cell->key))
61                         return ((bucket << LG_CKH_BUCKET_CELLS) + i);
62         }
63
64         return (SIZE_T_MAX);
65 }
66
67 /*
68  * Search table for key and return cell number if found; SIZE_T_MAX otherwise.
69  */
70 JEMALLOC_INLINE_C size_t
71 ckh_isearch(ckh_t *ckh, const void *key)
72 {
73         size_t hashes[2], bucket, cell;
74
75         assert(ckh != NULL);
76
77         ckh->hash(key, hashes);
78
79         /* Search primary bucket. */
80         bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
81         cell = ckh_bucket_search(ckh, bucket, key);
82         if (cell != SIZE_T_MAX)
83                 return (cell);
84
85         /* Search secondary bucket. */
86         bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
87         cell = ckh_bucket_search(ckh, bucket, key);
88         return (cell);
89 }
90
91 JEMALLOC_INLINE_C bool
92 ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
93     const void *data)
94 {
95         ckhc_t *cell;
96         unsigned offset, i;
97
98         /*
99          * Cycle through the cells in the bucket, starting at a random position.
100          * The randomness avoids worst-case search overhead as buckets fill up.
101          */
102         offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS);
103         for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
104                 cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
105                     ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
106                 if (cell->key == NULL) {
107                         cell->key = key;
108                         cell->data = data;
109                         ckh->count++;
110                         return (false);
111                 }
112         }
113
114         return (true);
115 }
116
117 /*
118  * No space is available in bucket.  Randomly evict an item, then try to find an
119  * alternate location for that item.  Iteratively repeat this
120  * eviction/relocation procedure until either success or detection of an
121  * eviction/relocation bucket cycle.
122  */
123 JEMALLOC_INLINE_C bool
124 ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
125     void const **argdata)
126 {
127         const void *key, *data, *tkey, *tdata;
128         ckhc_t *cell;
129         size_t hashes[2], bucket, tbucket;
130         unsigned i;
131
132         bucket = argbucket;
133         key = *argkey;
134         data = *argdata;
135         while (true) {
136                 /*
137                  * Choose a random item within the bucket to evict.  This is
138                  * critical to correct function, because without (eventually)
139                  * evicting all items within a bucket during iteration, it
140                  * would be possible to get stuck in an infinite loop if there
141                  * were an item for which both hashes indicated the same
142                  * bucket.
143                  */
144                 i = (unsigned)prng_lg_range(&ckh->prng_state,
145                     LG_CKH_BUCKET_CELLS);
146                 cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
147                 assert(cell->key != NULL);
148
149                 /* Swap cell->{key,data} and {key,data} (evict). */
150                 tkey = cell->key; tdata = cell->data;
151                 cell->key = key; cell->data = data;
152                 key = tkey; data = tdata;
153
154 #ifdef CKH_COUNT
155                 ckh->nrelocs++;
156 #endif
157
158                 /* Find the alternate bucket for the evicted item. */
159                 ckh->hash(key, hashes);
160                 tbucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
161                 if (tbucket == bucket) {
162                         tbucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets)
163                             - 1);
164                         /*
165                          * It may be that (tbucket == bucket) still, if the
166                          * item's hashes both indicate this bucket.  However,
167                          * we are guaranteed to eventually escape this bucket
168                          * during iteration, assuming pseudo-random item
169                          * selection (true randomness would make infinite
170                          * looping a remote possibility).  The reason we can
171                          * never get trapped forever is that there are two
172                          * cases:
173                          *
174                          * 1) This bucket == argbucket, so we will quickly
175                          *    detect an eviction cycle and terminate.
176                          * 2) An item was evicted to this bucket from another,
177                          *    which means that at least one item in this bucket
178                          *    has hashes that indicate distinct buckets.
179                          */
180                 }
181                 /* Check for a cycle. */
182                 if (tbucket == argbucket) {
183                         *argkey = key;
184                         *argdata = data;
185                         return (true);
186                 }
187
188                 bucket = tbucket;
189                 if (!ckh_try_bucket_insert(ckh, bucket, key, data))
190                         return (false);
191         }
192 }
193
194 JEMALLOC_INLINE_C bool
195 ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
196 {
197         size_t hashes[2], bucket;
198         const void *key = *argkey;
199         const void *data = *argdata;
200
201         ckh->hash(key, hashes);
202
203         /* Try to insert in primary bucket. */
204         bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
205         if (!ckh_try_bucket_insert(ckh, bucket, key, data))
206                 return (false);
207
208         /* Try to insert in secondary bucket. */
209         bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
210         if (!ckh_try_bucket_insert(ckh, bucket, key, data))
211                 return (false);
212
213         /*
214          * Try to find a place for this item via iterative eviction/relocation.
215          */
216         return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata));
217 }
218
219 /*
220  * Try to rebuild the hash table from scratch by inserting all items from the
221  * old table into the new.
222  */
223 JEMALLOC_INLINE_C bool
224 ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
225 {
226         size_t count, i, nins;
227         const void *key, *data;
228
229         count = ckh->count;
230         ckh->count = 0;
231         for (i = nins = 0; nins < count; i++) {
232                 if (aTab[i].key != NULL) {
233                         key = aTab[i].key;
234                         data = aTab[i].data;
235                         if (ckh_try_insert(ckh, &key, &data)) {
236                                 ckh->count = count;
237                                 return (true);
238                         }
239                         nins++;
240                 }
241         }
242
243         return (false);
244 }
245
246 static bool
247 ckh_grow(tsd_t *tsd, ckh_t *ckh)
248 {
249         bool ret;
250         ckhc_t *tab, *ttab;
251         unsigned lg_prevbuckets, lg_curcells;
252
253 #ifdef CKH_COUNT
254         ckh->ngrows++;
255 #endif
256
257         /*
258          * It is possible (though unlikely, given well behaved hashes) that the
259          * table will have to be doubled more than once in order to create a
260          * usable table.
261          */
262         lg_prevbuckets = ckh->lg_curbuckets;
263         lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS;
264         while (true) {
265                 size_t usize;
266
267                 lg_curcells++;
268                 usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
269                 if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
270                         ret = true;
271                         goto label_return;
272                 }
273                 tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL,
274                     true, NULL);
275                 if (tab == NULL) {
276                         ret = true;
277                         goto label_return;
278                 }
279                 /* Swap in new table. */
280                 ttab = ckh->tab;
281                 ckh->tab = tab;
282                 tab = ttab;
283                 ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
284
285                 if (!ckh_rebuild(ckh, tab)) {
286                         idalloctm(tsd, tab, tcache_get(tsd, false), true, true);
287                         break;
288                 }
289
290                 /* Rebuilding failed, so back out partially rebuilt table. */
291                 idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
292                 ckh->tab = tab;
293                 ckh->lg_curbuckets = lg_prevbuckets;
294         }
295
296         ret = false;
297 label_return:
298         return (ret);
299 }
300
301 static void
302 ckh_shrink(tsd_t *tsd, ckh_t *ckh)
303 {
304         ckhc_t *tab, *ttab;
305         size_t usize;
306         unsigned lg_prevbuckets, lg_curcells;
307
308         /*
309          * It is possible (though unlikely, given well behaved hashes) that the
310          * table rebuild will fail.
311          */
312         lg_prevbuckets = ckh->lg_curbuckets;
313         lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
314         usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
315         if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
316                 return;
317         tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
318             NULL);
319         if (tab == NULL) {
320                 /*
321                  * An OOM error isn't worth propagating, since it doesn't
322                  * prevent this or future operations from proceeding.
323                  */
324                 return;
325         }
326         /* Swap in new table. */
327         ttab = ckh->tab;
328         ckh->tab = tab;
329         tab = ttab;
330         ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
331
332         if (!ckh_rebuild(ckh, tab)) {
333                 idalloctm(tsd, tab, tcache_get(tsd, false), true, true);
334 #ifdef CKH_COUNT
335                 ckh->nshrinks++;
336 #endif
337                 return;
338         }
339
340         /* Rebuilding failed, so back out partially rebuilt table. */
341         idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
342         ckh->tab = tab;
343         ckh->lg_curbuckets = lg_prevbuckets;
344 #ifdef CKH_COUNT
345         ckh->nshrinkfails++;
346 #endif
347 }
348
349 bool
350 ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
351     ckh_keycomp_t *keycomp)
352 {
353         bool ret;
354         size_t mincells, usize;
355         unsigned lg_mincells;
356
357         assert(minitems > 0);
358         assert(hash != NULL);
359         assert(keycomp != NULL);
360
361 #ifdef CKH_COUNT
362         ckh->ngrows = 0;
363         ckh->nshrinks = 0;
364         ckh->nshrinkfails = 0;
365         ckh->ninserts = 0;
366         ckh->nrelocs = 0;
367 #endif
368         ckh->prng_state = 42; /* Value doesn't really matter. */
369         ckh->count = 0;
370
371         /*
372          * Find the minimum power of 2 that is large enough to fit minitems
373          * entries.  We are using (2+,2) cuckoo hashing, which has an expected
374          * maximum load factor of at least ~0.86, so 0.75 is a conservative load
375          * factor that will typically allow mincells items to fit without ever
376          * growing the table.
377          */
378         assert(LG_CKH_BUCKET_CELLS > 0);
379         mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2;
380         for (lg_mincells = LG_CKH_BUCKET_CELLS;
381             (ZU(1) << lg_mincells) < mincells;
382             lg_mincells++)
383                 ; /* Do nothing. */
384         ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
385         ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
386         ckh->hash = hash;
387         ckh->keycomp = keycomp;
388
389         usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
390         if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
391                 ret = true;
392                 goto label_return;
393         }
394         ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
395             NULL);
396         if (ckh->tab == NULL) {
397                 ret = true;
398                 goto label_return;
399         }
400
401         ret = false;
402 label_return:
403         return (ret);
404 }
405
406 void
407 ckh_delete(tsd_t *tsd, ckh_t *ckh)
408 {
409
410         assert(ckh != NULL);
411
412 #ifdef CKH_VERBOSE
413         malloc_printf(
414             "%s(%p): ngrows: %"FMTu64", nshrinks: %"FMTu64","
415             " nshrinkfails: %"FMTu64", ninserts: %"FMTu64","
416             " nrelocs: %"FMTu64"\n", __func__, ckh,
417             (unsigned long long)ckh->ngrows,
418             (unsigned long long)ckh->nshrinks,
419             (unsigned long long)ckh->nshrinkfails,
420             (unsigned long long)ckh->ninserts,
421             (unsigned long long)ckh->nrelocs);
422 #endif
423
424         idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
425         if (config_debug)
426                 memset(ckh, 0x5a, sizeof(ckh_t));
427 }
428
429 size_t
430 ckh_count(ckh_t *ckh)
431 {
432
433         assert(ckh != NULL);
434
435         return (ckh->count);
436 }
437
438 bool
439 ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
440 {
441         size_t i, ncells;
442
443         for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets +
444             LG_CKH_BUCKET_CELLS)); i < ncells; i++) {
445                 if (ckh->tab[i].key != NULL) {
446                         if (key != NULL)
447                                 *key = (void *)ckh->tab[i].key;
448                         if (data != NULL)
449                                 *data = (void *)ckh->tab[i].data;
450                         *tabind = i + 1;
451                         return (false);
452                 }
453         }
454
455         return (true);
456 }
457
458 bool
459 ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data)
460 {
461         bool ret;
462
463         assert(ckh != NULL);
464         assert(ckh_search(ckh, key, NULL, NULL));
465
466 #ifdef CKH_COUNT
467         ckh->ninserts++;
468 #endif
469
470         while (ckh_try_insert(ckh, &key, &data)) {
471                 if (ckh_grow(tsd, ckh)) {
472                         ret = true;
473                         goto label_return;
474                 }
475         }
476
477         ret = false;
478 label_return:
479         return (ret);
480 }
481
482 bool
483 ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
484     void **data)
485 {
486         size_t cell;
487
488         assert(ckh != NULL);
489
490         cell = ckh_isearch(ckh, searchkey);
491         if (cell != SIZE_T_MAX) {
492                 if (key != NULL)
493                         *key = (void *)ckh->tab[cell].key;
494                 if (data != NULL)
495                         *data = (void *)ckh->tab[cell].data;
496                 ckh->tab[cell].key = NULL;
497                 ckh->tab[cell].data = NULL; /* Not necessary. */
498
499                 ckh->count--;
500                 /* Try to halve the table if it is less than 1/4 full. */
501                 if (ckh->count < (ZU(1) << (ckh->lg_curbuckets
502                     + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
503                     > ckh->lg_minbuckets) {
504                         /* Ignore error due to OOM. */
505                         ckh_shrink(tsd, ckh);
506                 }
507
508                 return (false);
509         }
510
511         return (true);
512 }
513
514 bool
515 ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
516 {
517         size_t cell;
518
519         assert(ckh != NULL);
520
521         cell = ckh_isearch(ckh, searchkey);
522         if (cell != SIZE_T_MAX) {
523                 if (key != NULL)
524                         *key = (void *)ckh->tab[cell].key;
525                 if (data != NULL)
526                         *data = (void *)ckh->tab[cell].data;
527                 return (false);
528         }
529
530         return (true);
531 }
532
533 void
534 ckh_string_hash(const void *key, size_t r_hash[2])
535 {
536
537         hash(key, strlen((const char *)key), 0x94122f33U, r_hash);
538 }
539
540 bool
541 ckh_string_keycomp(const void *k1, const void *k2)
542 {
543
544     assert(k1 != NULL);
545     assert(k2 != NULL);
546
547     return (strcmp((char *)k1, (char *)k2) ? false : true);
548 }
549
550 void
551 ckh_pointer_hash(const void *key, size_t r_hash[2])
552 {
553         union {
554                 const void      *v;
555                 size_t          i;
556         } u;
557
558         assert(sizeof(u.v) == sizeof(u.i));
559         u.v = key;
560         hash(&u.i, sizeof(u.i), 0xd983396eU, r_hash);
561 }
562
563 bool
564 ckh_pointer_keycomp(const void *k1, const void *k2)
565 {
566
567         return ((k1 == k2) ? true : false);
568 }