2 Copyright (c) 2003-2013, Troy D. Hanson http://uthash.sourceforge.net
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
12 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
13 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
14 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
15 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
16 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
17 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
18 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
19 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
20 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 /* $Id: uthash.h 2682 2012-11-23 22:04:22Z kaiwang27 $ */
29 #include <string.h> /* memcmp,strlen */
30 #include <stddef.h> /* ptrdiff_t */
31 #include <stdlib.h> /* exit() */
33 /* These macros use decltype or the earlier __typeof GNU extension.
34 As decltype is only available in newer compilers (VS2010 or gcc 4.3+
35 when compiling c++ source) this code uses whatever method is needed
36 or, for VS2008 where neither is available, uses casting workarounds. */
37 #ifdef _MSC_VER /* MS compiler */
38 #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */
39 #define DECLTYPE(x) (decltype(x))
40 #else /* VS2008 or older (or VS2010 in C mode) */
44 #else /* GNU, Sun and other compilers */
45 #define DECLTYPE(x) (__typeof(x))
49 #define DECLTYPE_ASSIGN(dst,src) \
51 char **_da_dst = (char**)(&(dst)); \
52 *_da_dst = (char*)(src); \
55 #define DECLTYPE_ASSIGN(dst,src) \
57 (dst) = DECLTYPE(dst)(src); \
61 /* a number of the hash function use uint32_t which isn't defined on win32 */
63 typedef unsigned int uint32_t;
64 typedef unsigned char uint8_t;
66 #include <inttypes.h> /* uint32_t */
69 #define UTHASH_VERSION 1.9.7
72 #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */
75 #define uthash_malloc(sz) malloc(sz) /* malloc fcn */
78 #define uthash_free(ptr,sz) free(ptr) /* free fcn */
81 #ifndef uthash_noexpand_fyi
82 #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */
84 #ifndef uthash_expand_fyi
85 #define uthash_expand_fyi(tbl) /* can be defined to log expands */
88 /* initial number of buckets */
89 #define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */
90 #define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */
91 #define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */
93 /* calculate the element whose hash handle address is hhe */
94 #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
96 #define HASH_FIND(hh,head,keyptr,keylen,out) \
98 unsigned _hf_bkt,_hf_hashv; \
101 HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \
102 if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \
103 HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \
104 keyptr,keylen,out); \
110 #define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM)
111 #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0)
112 #define HASH_BLOOM_MAKE(tbl) \
114 (tbl)->bloom_nbits = HASH_BLOOM; \
115 (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \
116 if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \
117 memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \
118 (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \
121 #define HASH_BLOOM_FREE(tbl) \
123 uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \
126 #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
127 #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
129 #define HASH_BLOOM_ADD(tbl,hashv) \
130 HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
132 #define HASH_BLOOM_TEST(tbl,hashv) \
133 HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
136 #define HASH_BLOOM_MAKE(tbl)
137 #define HASH_BLOOM_FREE(tbl)
138 #define HASH_BLOOM_ADD(tbl,hashv)
139 #define HASH_BLOOM_TEST(tbl,hashv) (1)
142 #define HASH_MAKE_TABLE(hh,head) \
144 (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \
145 sizeof(UT_hash_table)); \
146 if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \
147 memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \
148 (head)->hh.tbl->tail = &((head)->hh); \
149 (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \
150 (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \
151 (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \
152 (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \
153 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
154 if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \
155 memset((head)->hh.tbl->buckets, 0, \
156 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
157 HASH_BLOOM_MAKE((head)->hh.tbl); \
158 (head)->hh.tbl->signature = HASH_SIGNATURE; \
161 #define HASH_ADD(hh,head,fieldname,keylen_in,add) \
162 HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
164 #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
167 (add)->hh.next = NULL; \
168 (add)->hh.key = (char*)keyptr; \
169 (add)->hh.keylen = (unsigned)keylen_in; \
172 (head)->hh.prev = NULL; \
173 HASH_MAKE_TABLE(hh,head); \
175 (head)->hh.tbl->tail->next = (add); \
176 (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \
177 (head)->hh.tbl->tail = &((add)->hh); \
179 (head)->hh.tbl->num_items++; \
180 (add)->hh.tbl = (head)->hh.tbl; \
181 HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \
182 (add)->hh.hashv, _ha_bkt); \
183 HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \
184 HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \
185 HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \
186 HASH_FSCK(hh,head); \
189 #define HASH_TO_BKT( hashv, num_bkts, bkt ) \
191 bkt = ((hashv) & ((num_bkts) - 1)); \
194 /* delete "delptr" from the hash table.
195 * "the usual" patch-up process for the app-order doubly-linked-list.
196 * The use of _hd_hh_del below deserves special explanation.
197 * These used to be expressed using (delptr) but that led to a bug
198 * if someone used the same symbol for the head and deletee, like
199 * HASH_DELETE(hh,users,users);
200 * We want that to work, but by changing the head (users) below
201 * we were forfeiting our ability to further refer to the deletee (users)
202 * in the patch-up process. Solution: use scratch space to
203 * copy the deletee pointer, then the latter references are via that
204 * scratch pointer rather than through the repointed (users) symbol.
206 #define HASH_DELETE(hh,head,delptr) \
209 struct UT_hash_handle *_hd_hh_del; \
210 if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \
211 uthash_free((head)->hh.tbl->buckets, \
212 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
213 HASH_BLOOM_FREE((head)->hh.tbl); \
214 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
217 _hd_hh_del = &((delptr)->hh); \
218 if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \
219 (head)->hh.tbl->tail = \
220 (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
221 (head)->hh.tbl->hho); \
223 if ((delptr)->hh.prev) { \
224 ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
225 (head)->hh.tbl->hho))->next = (delptr)->hh.next; \
227 DECLTYPE_ASSIGN(head,(delptr)->hh.next); \
229 if (_hd_hh_del->next) { \
230 ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \
231 (head)->hh.tbl->hho))->prev = \
234 HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \
235 HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \
236 (head)->hh.tbl->num_items--; \
238 HASH_FSCK(hh,head); \
242 /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
243 #define HASH_FIND_STR(head,findstr,out) \
244 HASH_FIND(hh,head,findstr,strlen(findstr),out)
245 #define HASH_ADD_STR(head,strfield,add) \
246 HASH_ADD(hh,head,strfield,strlen(add->strfield),add)
247 #define HASH_FIND_INT(head,findint,out) \
248 HASH_FIND(hh,head,findint,sizeof(int),out)
249 #define HASH_ADD_INT(head,intfield,add) \
250 HASH_ADD(hh,head,intfield,sizeof(int),add)
251 #define HASH_FIND_PTR(head,findptr,out) \
252 HASH_FIND(hh,head,findptr,sizeof(void *),out)
253 #define HASH_ADD_PTR(head,ptrfield,add) \
254 HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
255 #define HASH_DEL(head,delptr) \
256 HASH_DELETE(hh,head,delptr)
258 /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
259 * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
262 #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
263 #define HASH_FSCK(hh,head) \
266 unsigned _count, _bkt_count; \
268 struct UT_hash_handle *_thh; \
271 for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \
273 _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \
276 if (_prev != (char*)(_thh->hh_prev)) { \
277 HASH_OOPS("invalid hh_prev %p, actual %p\n", \
278 _thh->hh_prev, _prev ); \
281 _prev = (char*)(_thh); \
282 _thh = _thh->hh_next; \
284 _count += _bkt_count; \
285 if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \
286 HASH_OOPS("invalid bucket count %d, actual %d\n", \
287 (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \
290 if (_count != (head)->hh.tbl->num_items) { \
291 HASH_OOPS("invalid hh item count %d, actual %d\n", \
292 (head)->hh.tbl->num_items, _count ); \
294 /* traverse hh in app order; check next/prev integrity, count */ \
297 _thh = &(head)->hh; \
300 if (_prev !=(char*)(_thh->prev)) { \
301 HASH_OOPS("invalid prev %p, actual %p\n", \
302 _thh->prev, _prev ); \
304 _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \
305 _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \
306 (head)->hh.tbl->hho) : NULL ); \
308 if (_count != (head)->hh.tbl->num_items) { \
309 HASH_OOPS("invalid app item count %d, actual %d\n", \
310 (head)->hh.tbl->num_items, _count ); \
315 #define HASH_FSCK(hh,head)
318 /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
319 * the descriptor to which this macro is defined for tuning the hash function.
320 * The app can #include <unistd.h> to get the prototype for write(2). */
321 #ifdef HASH_EMIT_KEYS
322 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \
324 unsigned _klen = fieldlen; \
325 write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
326 write(HASH_EMIT_KEYS, keyptr, fieldlen); \
329 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
332 /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
334 #define HASH_FCN HASH_FUNCTION
336 #define HASH_FCN HASH_JEN
339 /* The Bernstein hash function, used in Perl prior to v5.6 */
340 #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \
342 unsigned _hb_keylen=keylen; \
343 char *_hb_key=(char*)(key); \
345 while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \
346 bkt = (hashv) & (num_bkts-1); \
350 /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
351 * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
352 #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \
355 char *_hs_key=(char*)(key); \
357 for(_sx_i=0; _sx_i < keylen; _sx_i++) \
358 hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \
359 bkt = hashv & (num_bkts-1); \
362 #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \
365 char *_hf_key=(char*)(key); \
366 hashv = 2166136261UL; \
367 for(_fn_i=0; _fn_i < keylen; _fn_i++) \
368 hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \
369 bkt = hashv & (num_bkts-1); \
372 #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \
375 char *_ho_key=(char*)(key); \
377 for(_ho_i=0; _ho_i < keylen; _ho_i++) { \
378 hashv += _ho_key[_ho_i]; \
379 hashv += (hashv << 10); \
380 hashv ^= (hashv >> 6); \
382 hashv += (hashv << 3); \
383 hashv ^= (hashv >> 11); \
384 hashv += (hashv << 15); \
385 bkt = hashv & (num_bkts-1); \
388 #define HASH_JEN_MIX(a,b,c) \
390 a -= b; a -= c; a ^= ( c >> 13 ); \
391 b -= c; b -= a; b ^= ( a << 8 ); \
392 c -= a; c -= b; c ^= ( b >> 13 ); \
393 a -= b; a -= c; a ^= ( c >> 12 ); \
394 b -= c; b -= a; b ^= ( a << 16 ); \
395 c -= a; c -= b; c ^= ( b >> 5 ); \
396 a -= b; a -= c; a ^= ( c >> 3 ); \
397 b -= c; b -= a; b ^= ( a << 10 ); \
398 c -= a; c -= b; c ^= ( b >> 15 ); \
401 #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \
403 unsigned _hj_i,_hj_j,_hj_k; \
404 char *_hj_key=(char*)(key); \
405 hashv = 0xfeedbeef; \
406 _hj_i = _hj_j = 0x9e3779b9; \
407 _hj_k = (unsigned)keylen; \
408 while (_hj_k >= 12) { \
409 _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \
410 + ( (unsigned)_hj_key[2] << 16 ) \
411 + ( (unsigned)_hj_key[3] << 24 ) ); \
412 _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \
413 + ( (unsigned)_hj_key[6] << 16 ) \
414 + ( (unsigned)_hj_key[7] << 24 ) ); \
415 hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \
416 + ( (unsigned)_hj_key[10] << 16 ) \
417 + ( (unsigned)_hj_key[11] << 24 ) ); \
419 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
426 case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \
427 case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \
428 case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \
429 case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \
430 case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \
431 case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \
432 case 5: _hj_j += _hj_key[4]; \
433 case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \
434 case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \
435 case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \
436 case 1: _hj_i += _hj_key[0]; \
438 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
439 bkt = hashv & (num_bkts-1); \
442 /* The Paul Hsieh hash function */
444 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
445 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
446 #define get16bits(d) (*((const uint16_t *) (d)))
449 #if !defined (get16bits)
450 #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
451 +(uint32_t)(((const uint8_t *)(d))[0]) )
453 #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \
455 char *_sfh_key=(char*)(key); \
456 uint32_t _sfh_tmp, _sfh_len = keylen; \
458 int _sfh_rem = _sfh_len & 3; \
460 hashv = 0xcafebabe; \
463 for (;_sfh_len > 0; _sfh_len--) { \
464 hashv += get16bits (_sfh_key); \
465 _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \
466 hashv = (hashv << 16) ^ _sfh_tmp; \
467 _sfh_key += 2*sizeof (uint16_t); \
468 hashv += hashv >> 11; \
471 /* Handle end cases */ \
472 switch (_sfh_rem) { \
473 case 3: hashv += get16bits (_sfh_key); \
474 hashv ^= hashv << 16; \
475 hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \
476 hashv += hashv >> 11; \
478 case 2: hashv += get16bits (_sfh_key); \
479 hashv ^= hashv << 11; \
480 hashv += hashv >> 17; \
482 case 1: hashv += *_sfh_key; \
483 hashv ^= hashv << 10; \
484 hashv += hashv >> 1; \
487 /* Force "avalanching" of final 127 bits */ \
488 hashv ^= hashv << 3; \
489 hashv += hashv >> 5; \
490 hashv ^= hashv << 4; \
491 hashv += hashv >> 17; \
492 hashv ^= hashv << 25; \
493 hashv += hashv >> 6; \
494 bkt = hashv & (num_bkts-1); \
497 #ifdef HASH_USING_NO_STRICT_ALIASING
498 /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
499 * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
500 * MurmurHash uses the faster approach only on CPU's where we know it's safe.
502 * Note the preprocessor built-in defines can be emitted using:
504 * gcc -m64 -dM -E - < /dev/null (on gcc)
505 * cc -## a.c (where a.c is a simple test file) (Sun Studio)
507 #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86))
508 #define MUR_GETBLOCK(p,i) p[i]
509 #else /* non intel */
510 #define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0)
511 #define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1)
512 #define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2)
513 #define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3)
514 #define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
515 #if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
516 #define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
517 #define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
518 #define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8))
519 #else /* assume little endian non-intel */
520 #define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
521 #define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
522 #define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8))
524 #define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \
525 (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
526 (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \
529 #define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
530 #define MUR_FMIX(_h) \
539 #define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \
541 const uint8_t *_mur_data = (const uint8_t*)(key); \
542 const int _mur_nblocks = (keylen) / 4; \
543 uint32_t _mur_h1 = 0xf88D5353; \
544 uint32_t _mur_c1 = 0xcc9e2d51; \
545 uint32_t _mur_c2 = 0x1b873593; \
546 uint32_t _mur_k1 = 0; \
547 const uint8_t *_mur_tail; \
548 const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \
550 for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \
551 _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \
552 _mur_k1 *= _mur_c1; \
553 _mur_k1 = MUR_ROTL32(_mur_k1,15); \
554 _mur_k1 *= _mur_c2; \
556 _mur_h1 ^= _mur_k1; \
557 _mur_h1 = MUR_ROTL32(_mur_h1,13); \
558 _mur_h1 = _mur_h1*5+0xe6546b64; \
560 _mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \
562 switch((keylen) & 3) { \
563 case 3: _mur_k1 ^= _mur_tail[2] << 16; \
564 case 2: _mur_k1 ^= _mur_tail[1] << 8; \
565 case 1: _mur_k1 ^= _mur_tail[0]; \
566 _mur_k1 *= _mur_c1; \
567 _mur_k1 = MUR_ROTL32(_mur_k1,15); \
568 _mur_k1 *= _mur_c2; \
569 _mur_h1 ^= _mur_k1; \
571 _mur_h1 ^= (keylen); \
574 bkt = hashv & (num_bkts-1); \
576 #endif /* HASH_USING_NO_STRICT_ALIASING */
578 /* key comparison function; return 0 if keys equal */
579 #define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
581 /* iterate over items in a known bucket to find desired item */
582 #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
584 if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \
587 if ((out)->hh.keylen == keylen_in) { \
588 if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \
590 if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \
595 /* add an item to a bucket */
596 #define HASH_ADD_TO_BKT(head,addhh) \
599 (addhh)->hh_next = head.hh_head; \
600 (addhh)->hh_prev = NULL; \
601 if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \
602 (head).hh_head=addhh; \
603 if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \
604 && (addhh)->tbl->noexpand != 1) { \
605 HASH_EXPAND_BUCKETS((addhh)->tbl); \
609 /* remove an item from a given bucket */
610 #define HASH_DEL_IN_BKT(hh,head,hh_del) \
612 if ((head).hh_head == hh_del) { \
613 (head).hh_head = hh_del->hh_next; \
615 if (hh_del->hh_prev) { \
616 hh_del->hh_prev->hh_next = hh_del->hh_next; \
618 if (hh_del->hh_next) { \
619 hh_del->hh_next->hh_prev = hh_del->hh_prev; \
622 /* Bucket expansion has the effect of doubling the number of buckets
623 * and redistributing the items into the new buckets. Ideally the
624 * items will distribute more or less evenly into the new buckets
625 * (the extent to which this is true is a measure of the quality of
626 * the hash function as it applies to the key domain).
628 * With the items distributed into more buckets, the chain length
629 * (item count) in each bucket is reduced. Thus by expanding buckets
630 * the hash keeps a bound on the chain length. This bounded chain
631 * length is the essence of how a hash provides constant time lookup.
633 * The calculation of tbl->ideal_chain_maxlen below deserves some
634 * explanation. First, keep in mind that we're calculating the ideal
635 * maximum chain length based on the *new* (doubled) bucket count.
636 * In fractions this is just n/b (n=number of items,b=new num buckets).
637 * Since the ideal chain length is an integer, we want to calculate
638 * ceil(n/b). We don't depend on floating point arithmetic in this
639 * hash, so to calculate ceil(n/b) with integers we could write
641 * ceil(n/b) = (n/b) + ((n%b)?1:0)
643 * and in fact a previous version of this hash did just that.
644 * But now we have improved things a bit by recognizing that b is
645 * always a power of two. We keep its base 2 log handy (call it lb),
646 * so now we can write this with a bit shift and logical AND:
648 * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
651 #define HASH_EXPAND_BUCKETS(tbl) \
654 unsigned _he_bkt_i; \
655 struct UT_hash_handle *_he_thh, *_he_hh_nxt; \
656 UT_hash_bucket *_he_new_buckets, *_he_newbkt; \
657 _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \
658 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
659 if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \
660 memset(_he_new_buckets, 0, \
661 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
662 tbl->ideal_chain_maxlen = \
663 (tbl->num_items >> (tbl->log2_num_buckets+1)) + \
664 ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \
665 tbl->nonideal_items = 0; \
666 for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \
668 _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \
670 _he_hh_nxt = _he_thh->hh_next; \
671 HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \
672 _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \
673 if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \
674 tbl->nonideal_items++; \
675 _he_newbkt->expand_mult = _he_newbkt->count / \
676 tbl->ideal_chain_maxlen; \
678 _he_thh->hh_prev = NULL; \
679 _he_thh->hh_next = _he_newbkt->hh_head; \
680 if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \
682 _he_newbkt->hh_head = _he_thh; \
683 _he_thh = _he_hh_nxt; \
686 uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
687 tbl->num_buckets *= 2; \
688 tbl->log2_num_buckets++; \
689 tbl->buckets = _he_new_buckets; \
690 tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \
691 (tbl->ineff_expands+1) : 0; \
692 if (tbl->ineff_expands > 1) { \
694 uthash_noexpand_fyi(tbl); \
696 uthash_expand_fyi(tbl); \
700 /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
701 /* Note that HASH_SORT assumes the hash handle name to be hh.
702 * HASH_SRT was added to allow the hash handle name to be passed in. */
703 #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
704 #define HASH_SRT(hh,head,cmpfcn) \
707 unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \
708 struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \
712 _hs_list = &((head)->hh); \
713 while (_hs_looping) { \
722 for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \
724 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
725 ((void*)((char*)(_hs_q->next) + \
726 (head)->hh.tbl->hho)) : NULL); \
727 if (! (_hs_q) ) break; \
729 _hs_qsize = _hs_insize; \
730 while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \
731 if (_hs_psize == 0) { \
733 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
734 ((void*)((char*)(_hs_q->next) + \
735 (head)->hh.tbl->hho)) : NULL); \
737 } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \
739 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
740 ((void*)((char*)(_hs_p->next) + \
741 (head)->hh.tbl->hho)) : NULL); \
744 cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
745 DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
748 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
749 ((void*)((char*)(_hs_p->next) + \
750 (head)->hh.tbl->hho)) : NULL); \
754 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
755 ((void*)((char*)(_hs_q->next) + \
756 (head)->hh.tbl->hho)) : NULL); \
760 _hs_tail->next = ((_hs_e) ? \
761 ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \
765 _hs_e->prev = ((_hs_tail) ? \
766 ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \
771 _hs_tail->next = NULL; \
772 if ( _hs_nmerges <= 1 ) { \
774 (head)->hh.tbl->tail = _hs_tail; \
775 DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \
779 HASH_FSCK(hh,head); \
783 /* This function selects items from one hash into another hash.
784 * The end result is that the selected items have dual presence
785 * in both hashes. There is no copy of the items made; rather
786 * they are added into the new hash through a secondary hash
787 * hash handle that must be present in the structure. */
788 #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
790 unsigned _src_bkt, _dst_bkt; \
791 void *_last_elt=NULL, *_elt; \
792 UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \
793 ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \
795 for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \
796 for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \
798 _src_hh = _src_hh->hh_next) { \
799 _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \
801 _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \
802 _dst_hh->key = _src_hh->key; \
803 _dst_hh->keylen = _src_hh->keylen; \
804 _dst_hh->hashv = _src_hh->hashv; \
805 _dst_hh->prev = _last_elt; \
806 _dst_hh->next = NULL; \
807 if (_last_elt_hh) { _last_elt_hh->next = _elt; } \
809 DECLTYPE_ASSIGN(dst,_elt); \
810 HASH_MAKE_TABLE(hh_dst,dst); \
812 _dst_hh->tbl = (dst)->hh_dst.tbl; \
814 HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \
815 HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \
816 (dst)->hh_dst.tbl->num_items++; \
818 _last_elt_hh = _dst_hh; \
823 HASH_FSCK(hh_dst,dst); \
826 #define HASH_CLEAR(hh,head) \
829 uthash_free((head)->hh.tbl->buckets, \
830 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \
831 HASH_BLOOM_FREE((head)->hh.tbl); \
832 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
838 #define HASH_ITER(hh,head,el,tmp) \
839 for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \
840 el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
842 #define HASH_ITER(hh,head,el,tmp) \
843 for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
844 el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL))
847 /* obtain a count of items in the hash */
848 #define HASH_COUNT(head) HASH_CNT(hh,head)
849 #define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
851 typedef struct UT_hash_bucket {
852 struct UT_hash_handle *hh_head;
855 /* expand_mult is normally set to 0. In this situation, the max chain length
856 * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
857 * the bucket's chain exceeds this length, bucket expansion is triggered).
858 * However, setting expand_mult to a non-zero value delays bucket expansion
859 * (that would be triggered by additions to this particular bucket)
860 * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
861 * (The multiplier is simply expand_mult+1). The whole idea of this
862 * multiplier is to reduce bucket expansions, since they are expensive, in
863 * situations where we know that a particular bucket tends to be overused.
864 * It is better to let its chain length grow to a longer yet-still-bounded
865 * value, than to do an O(n) bucket expansion too often.
867 unsigned expand_mult;
871 /* random signature used only to find hash tables in external analysis */
872 #define HASH_SIGNATURE 0xa0111fe1
873 #define HASH_BLOOM_SIGNATURE 0xb12220f2
875 typedef struct UT_hash_table {
876 UT_hash_bucket *buckets;
877 unsigned num_buckets, log2_num_buckets;
879 struct UT_hash_handle *tail; /* tail hh in app order, for fast append */
880 ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
882 /* in an ideal situation (all buckets used equally), no bucket would have
883 * more than ceil(#items/#buckets) items. that's the ideal chain length. */
884 unsigned ideal_chain_maxlen;
886 /* nonideal_items is the number of items in the hash whose chain position
887 * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
888 * hash distribution; reaching them in a chain traversal takes >ideal steps */
889 unsigned nonideal_items;
891 /* ineffective expands occur when a bucket doubling was performed, but
892 * afterward, more than half the items in the hash had nonideal chain
893 * positions. If this happens on two consecutive expansions we inhibit any
894 * further expansion, as it's not helping; this happens when the hash
895 * function isn't a good fit for the key domain. When expansion is inhibited
896 * the hash will still work, albeit no longer in constant time. */
897 unsigned ineff_expands, noexpand;
899 uint32_t signature; /* used only to find hash tables in external analysis */
901 uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
908 typedef struct UT_hash_handle {
909 struct UT_hash_table *tbl;
910 void *prev; /* prev element in app order */
911 void *next; /* next element in app order */
912 struct UT_hash_handle *hh_prev; /* previous hh in bucket order */
913 struct UT_hash_handle *hh_next; /* next hh in bucket order */
914 void *key; /* ptr to enclosing struct's key */
915 unsigned keylen; /* enclosing struct's key len */
916 unsigned hashv; /* result of hash-fcn(key) */
919 #endif /* UTHASH_H */