2 February 2013(Wouter) patch defines for BSD endianness, from Brad Smith.
3 January 2012(Wouter) added randomised initial value, fallout from 28c3.
4 March 2007(Wouter) adapted from lookup3.c original, add config.h include.
5 added #ifdef VALGRIND to remove 298,384,660 'unused variable k8' warnings.
6 added include of lookup3.h to check definitions match declarations.
7 removed include of stdint - config.h takes care of platform independence.
8 url http://burtleburtle.net/bob/hash/index.html.
11 -------------------------------------------------------------------------------
12 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
14 These are functions for producing 32-bit hashes for hash table lookup.
15 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
16 are externally useful functions. Routines to test the hash are included
17 if SELF_TEST is defined. You can use this free for any purpose. It's in
18 the public domain. It has no warranty.
20 You probably want to use hashlittle(). hashlittle() and hashbig()
21 hash byte arrays. hashlittle() is is faster than hashbig() on
22 little-endian machines. Intel and AMD are little-endian machines.
23 On second thought, you probably want hashlittle2(), which is identical to
24 hashlittle() except it returns two 32-bit hashes for the price of one.
25 You could implement hashbig2() if you wanted but I haven't bothered here.
27 If you want to find a hash of, say, exactly 7 integers, do
28 a = i1; b = i2; c = i3;
30 a += i4; b += i5; c += i6;
34 then use c as the hash value. If you have a variable length array of
35 4-byte integers to hash, use hashword(). If you have a byte array (like
36 a character string), use hashlittle(). If you have several byte arrays, or
37 a mix of things, see the comments above hashlittle().
39 Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
40 then mix those integers. This is fast (you can do a lot more thorough
41 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
42 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
43 -------------------------------------------------------------------------------
45 /*#define SELF_TEST 1*/
48 #include "util/storage/lookup3.h"
49 #include <stdio.h> /* defines printf for tests */
50 #include <time.h> /* defines time_t for timings in the test */
51 /*#include <stdint.h> defines uint32_t etc (from config.h) */
52 #include <sys/param.h> /* attempt to define endianness */
54 # include <endian.h> /* attempt to define endianness */
56 #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
57 #include <sys/endian.h> /* attempt to define endianness */
60 #include <machine/endian.h> /* attempt to define endianness */
63 /* random initial value */
64 static uint32_t raninit = 0xdeadbeef;
67 hash_set_raninit(uint32_t v)
73 * My best guess at if you are big-endian or little-endian. This may
76 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
77 __BYTE_ORDER == __LITTLE_ENDIAN) || \
78 (defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && \
79 _BYTE_ORDER == _LITTLE_ENDIAN) || \
80 (defined(i386) || defined(__i386__) || defined(__i486__) || \
81 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
82 # define HASH_LITTLE_ENDIAN 1
83 # define HASH_BIG_ENDIAN 0
84 #elif (!defined(_BYTE_ORDER) && !defined(__BYTE_ORDER) && defined(_BIG_ENDIAN))
85 # define HASH_LITTLE_ENDIAN 0
86 # define HASH_BIG_ENDIAN 1
87 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
88 __BYTE_ORDER == __BIG_ENDIAN) || \
89 (defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) && \
90 _BYTE_ORDER == _BIG_ENDIAN) || \
91 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
92 # define HASH_LITTLE_ENDIAN 0
93 # define HASH_BIG_ENDIAN 1
95 # define HASH_LITTLE_ENDIAN 0
96 # define HASH_BIG_ENDIAN 0
99 #define hashsize(n) ((uint32_t)1<<(n))
100 #define hashmask(n) (hashsize(n)-1)
101 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
104 -------------------------------------------------------------------------------
105 mix -- mix 3 32-bit values reversibly.
107 This is reversible, so any information in (a,b,c) before mix() is
108 still in (a,b,c) after mix().
110 If four pairs of (a,b,c) inputs are run through mix(), or through
111 mix() in reverse, there are at least 32 bits of the output that
112 are sometimes the same for one pair and different for another pair.
114 * pairs that differed by one bit, by two bits, in any combination
115 of top bits of (a,b,c), or in any combination of bottom bits of
117 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
118 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
119 is commonly produced by subtraction) look like a single 1-bit
121 * the base values were pseudorandom, all zero but one bit set, or
122 all zero plus a counter that starts at zero.
124 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
129 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
130 for "differ" defined as + with a one-bit base and a two-bit delta. I
131 used http://burtleburtle.net/bob/hash/avalanche.html to choose
132 the operations, constants, and arrangements of the variables.
134 This does not achieve avalanche. There are input bits of (a,b,c)
135 that fail to affect some output bits of (a,b,c), especially of a. The
136 most thoroughly mixed value is c, but it doesn't really even achieve
139 This allows some parallelism. Read-after-writes are good at doubling
140 the number of bits affected, so the goal of mixing pulls in the opposite
141 direction as the goal of parallelism. I did what I could. Rotates
142 seem to cost as much as shifts on every machine I could lay my hands
143 on, and rotates are much kinder to the top and bottom bits, so I used
145 -------------------------------------------------------------------------------
149 a -= c; a ^= rot(c, 4); c += b; \
150 b -= a; b ^= rot(a, 6); a += c; \
151 c -= b; c ^= rot(b, 8); b += a; \
152 a -= c; a ^= rot(c,16); c += b; \
153 b -= a; b ^= rot(a,19); a += c; \
154 c -= b; c ^= rot(b, 4); b += a; \
158 -------------------------------------------------------------------------------
159 final -- final mixing of 3 32-bit values (a,b,c) into c
161 Pairs of (a,b,c) values differing in only a few bits will usually
162 produce values of c that look totally different. This was tested for
163 * pairs that differed by one bit, by two bits, in any combination
164 of top bits of (a,b,c), or in any combination of bottom bits of
166 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
167 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
168 is commonly produced by subtraction) look like a single 1-bit
170 * the base values were pseudorandom, all zero but one bit set, or
171 all zero plus a counter that starts at zero.
173 These constants passed:
176 and these came close:
180 -------------------------------------------------------------------------------
182 #define final(a,b,c) \
184 c ^= b; c -= rot(b,14); \
185 a ^= c; a -= rot(c,11); \
186 b ^= a; b -= rot(a,25); \
187 c ^= b; c -= rot(b,16); \
188 a ^= c; a -= rot(c,4); \
189 b ^= a; b -= rot(a,14); \
190 c ^= b; c -= rot(b,24); \
194 --------------------------------------------------------------------
195 This works on all machines. To be useful, it requires
196 -- that the key be an array of uint32_t's, and
197 -- that the length be the number of uint32_t's in the key
199 The function hashword() is identical to hashlittle() on little-endian
200 machines, and identical to hashbig() on big-endian machines,
201 except that the length has to be measured in uint32_ts rather than in
202 bytes. hashlittle() is more complicated than hashword() only because
203 hashlittle() has to dance around fitting the key bytes into registers.
204 --------------------------------------------------------------------
207 const uint32_t *k, /* the key, an array of uint32_t values */
208 size_t length, /* the length of the key, in uint32_ts */
209 uint32_t initval) /* the previous hash, or an arbitrary value */
213 /* Set up the internal state */
214 a = b = c = raninit + (((uint32_t)length)<<2) + initval;
216 /*------------------------------------------------- handle most of the key */
227 /*------------------------------------------- handle the last 3 uint32_t's */
228 switch(length) /* all the case statements fall through */
234 case 0: /* case 0: nothing left to add */
237 /*------------------------------------------------------ report the result */
245 --------------------------------------------------------------------
246 hashword2() -- same as hashword(), but take two seeds and return two
247 32-bit values. pc and pb must both be nonnull, and *pc and *pb must
248 both be initialized with seeds. If you pass in (*pb)==0, the output
249 (*pc) will be the same as the return value from hashword().
250 --------------------------------------------------------------------
253 const uint32_t *k, /* the key, an array of uint32_t values */
254 size_t length, /* the length of the key, in uint32_ts */
255 uint32_t *pc, /* IN: seed OUT: primary hash value */
256 uint32_t *pb) /* IN: more seed OUT: secondary hash value */
260 /* Set up the internal state */
261 a = b = c = raninit + ((uint32_t)(length<<2)) + *pc;
264 /*------------------------------------------------- handle most of the key */
275 /*------------------------------------------- handle the last 3 uint32_t's */
276 switch(length) /* all the case statements fall through */
282 case 0: /* case 0: nothing left to add */
285 /*------------------------------------------------------ report the result */
289 #endif /* SELF_TEST */
292 -------------------------------------------------------------------------------
293 hashlittle() -- hash a variable-length key into a 32-bit value
294 k : the key (the unaligned variable-length array of bytes)
295 length : the length of the key, counting by bytes
296 initval : can be any 4-byte value
297 Returns a 32-bit value. Every bit of the key affects every bit of
298 the return value. Two keys differing by one or two bits will have
299 totally different hash values.
301 The best hash table sizes are powers of 2. There is no need to do
302 mod a prime (mod is sooo slow!). If you need less than 32 bits,
303 use a bitmask. For example, if you need only 10 bits, do
304 h = (h & hashmask(10));
305 In which case, the hash table should have hashsize(10) elements.
307 If you are hashing n strings (uint8_t **)k, do it like this:
308 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
310 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
311 code any way you wish, private, educational, or commercial. It's free.
313 Use for hash table lookup, or anything where one collision in 2^^32 is
314 acceptable. Do NOT use for cryptographic purposes.
315 -------------------------------------------------------------------------------
318 uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
320 uint32_t a,b,c; /* internal state */
321 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
323 /* Set up the internal state */
324 a = b = c = raninit + ((uint32_t)length) + initval;
327 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
328 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
333 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
344 /*----------------------------- handle the last (probably partial) block */
346 * "k[2]&0xffffff" actually reads beyond the end of the string, but
347 * then masks off the part it's not allowed to read. Because the
348 * string is aligned, the masked-off tail is in the same word as the
349 * rest of the string. Every machine with memory protection I've seen
350 * does it on word boundaries, so is OK with this. But VALGRIND will
351 * still catch it and complain. The masking trick does make the hash
352 * noticably faster for short strings (like English words).
358 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
359 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
360 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
361 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
362 case 8 : b+=k[1]; a+=k[0]; break;
363 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
364 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
365 case 5 : b+=k[1]&0xff; a+=k[0]; break;
366 case 4 : a+=k[0]; break;
367 case 3 : a+=k[0]&0xffffff; break;
368 case 2 : a+=k[0]&0xffff; break;
369 case 1 : a+=k[0]&0xff; break;
370 case 0 : return c; /* zero length strings require no mixing */
373 #else /* make valgrind happy */
375 k8 = (const uint8_t *)k;
378 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
379 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
380 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
381 case 9 : c+=k8[8]; /* fall through */
382 case 8 : b+=k[1]; a+=k[0]; break;
383 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
384 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
385 case 5 : b+=k8[4]; /* fall through */
386 case 4 : a+=k[0]; break;
387 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
388 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
389 case 1 : a+=k8[0]; break;
393 #endif /* !valgrind */
395 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
396 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
399 /*--------------- all but last block: aligned reads and different mixing */
402 a += k[0] + (((uint32_t)k[1])<<16);
403 b += k[2] + (((uint32_t)k[3])<<16);
404 c += k[4] + (((uint32_t)k[5])<<16);
410 /*----------------------------- handle the last (probably partial) block */
411 k8 = (const uint8_t *)k;
414 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
415 b+=k[2]+(((uint32_t)k[3])<<16);
416 a+=k[0]+(((uint32_t)k[1])<<16);
418 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
420 b+=k[2]+(((uint32_t)k[3])<<16);
421 a+=k[0]+(((uint32_t)k[1])<<16);
423 case 9 : c+=k8[8]; /* fall through */
424 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
425 a+=k[0]+(((uint32_t)k[1])<<16);
427 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
429 a+=k[0]+(((uint32_t)k[1])<<16);
431 case 5 : b+=k8[4]; /* fall through */
432 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
434 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
439 case 0 : return c; /* zero length requires no mixing */
442 } else { /* need to read the key one byte at a time */
443 const uint8_t *k = (const uint8_t *)key;
445 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
449 a += ((uint32_t)k[1])<<8;
450 a += ((uint32_t)k[2])<<16;
451 a += ((uint32_t)k[3])<<24;
453 b += ((uint32_t)k[5])<<8;
454 b += ((uint32_t)k[6])<<16;
455 b += ((uint32_t)k[7])<<24;
457 c += ((uint32_t)k[9])<<8;
458 c += ((uint32_t)k[10])<<16;
459 c += ((uint32_t)k[11])<<24;
465 /*-------------------------------- last block: affect all 32 bits of (c) */
466 switch(length) /* all the case statements fall through */
468 case 12: c+=((uint32_t)k[11])<<24;
469 case 11: c+=((uint32_t)k[10])<<16;
470 case 10: c+=((uint32_t)k[9])<<8;
472 case 8 : b+=((uint32_t)k[7])<<24;
473 case 7 : b+=((uint32_t)k[6])<<16;
474 case 6 : b+=((uint32_t)k[5])<<8;
476 case 4 : a+=((uint32_t)k[3])<<24;
477 case 3 : a+=((uint32_t)k[2])<<16;
478 case 2 : a+=((uint32_t)k[1])<<8;
492 * hashlittle2: return 2 32-bit hash values
494 * This is identical to hashlittle(), except it returns two 32-bit hash
495 * values instead of just one. This is good enough for hash table
496 * lookup with 2^^64 buckets, or if you want a second hash if you're not
497 * happy with the first, or if you want a probably-unique 64-bit ID for
498 * the key. *pc is better mixed than *pb, so use *pc first. If you want
499 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
502 const void *key, /* the key to hash */
503 size_t length, /* length of the key */
504 uint32_t *pc, /* IN: primary initval, OUT: primary hash */
505 uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
507 uint32_t a,b,c; /* internal state */
508 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
510 /* Set up the internal state */
511 a = b = c = raninit + ((uint32_t)length) + *pc;
515 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
516 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
521 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
532 /*----------------------------- handle the last (probably partial) block */
534 * "k[2]&0xffffff" actually reads beyond the end of the string, but
535 * then masks off the part it's not allowed to read. Because the
536 * string is aligned, the masked-off tail is in the same word as the
537 * rest of the string. Every machine with memory protection I've seen
538 * does it on word boundaries, so is OK with this. But VALGRIND will
539 * still catch it and complain. The masking trick does make the hash
540 * noticably faster for short strings (like English words).
546 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
547 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
548 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
549 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
550 case 8 : b+=k[1]; a+=k[0]; break;
551 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
552 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
553 case 5 : b+=k[1]&0xff; a+=k[0]; break;
554 case 4 : a+=k[0]; break;
555 case 3 : a+=k[0]&0xffffff; break;
556 case 2 : a+=k[0]&0xffff; break;
557 case 1 : a+=k[0]&0xff; break;
558 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
561 #else /* make valgrind happy */
563 k8 = (const uint8_t *)k;
566 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
567 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
568 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
569 case 9 : c+=k8[8]; /* fall through */
570 case 8 : b+=k[1]; a+=k[0]; break;
571 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
572 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
573 case 5 : b+=k8[4]; /* fall through */
574 case 4 : a+=k[0]; break;
575 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
576 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
577 case 1 : a+=k8[0]; break;
578 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
581 #endif /* !valgrind */
583 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
584 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
587 /*--------------- all but last block: aligned reads and different mixing */
590 a += k[0] + (((uint32_t)k[1])<<16);
591 b += k[2] + (((uint32_t)k[3])<<16);
592 c += k[4] + (((uint32_t)k[5])<<16);
598 /*----------------------------- handle the last (probably partial) block */
599 k8 = (const uint8_t *)k;
602 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
603 b+=k[2]+(((uint32_t)k[3])<<16);
604 a+=k[0]+(((uint32_t)k[1])<<16);
606 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
608 b+=k[2]+(((uint32_t)k[3])<<16);
609 a+=k[0]+(((uint32_t)k[1])<<16);
611 case 9 : c+=k8[8]; /* fall through */
612 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
613 a+=k[0]+(((uint32_t)k[1])<<16);
615 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
617 a+=k[0]+(((uint32_t)k[1])<<16);
619 case 5 : b+=k8[4]; /* fall through */
620 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
622 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
627 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
630 } else { /* need to read the key one byte at a time */
631 const uint8_t *k = (const uint8_t *)key;
633 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
637 a += ((uint32_t)k[1])<<8;
638 a += ((uint32_t)k[2])<<16;
639 a += ((uint32_t)k[3])<<24;
641 b += ((uint32_t)k[5])<<8;
642 b += ((uint32_t)k[6])<<16;
643 b += ((uint32_t)k[7])<<24;
645 c += ((uint32_t)k[9])<<8;
646 c += ((uint32_t)k[10])<<16;
647 c += ((uint32_t)k[11])<<24;
653 /*-------------------------------- last block: affect all 32 bits of (c) */
654 switch(length) /* all the case statements fall through */
656 case 12: c+=((uint32_t)k[11])<<24;
657 case 11: c+=((uint32_t)k[10])<<16;
658 case 10: c+=((uint32_t)k[9])<<8;
660 case 8 : b+=((uint32_t)k[7])<<24;
661 case 7 : b+=((uint32_t)k[6])<<16;
662 case 6 : b+=((uint32_t)k[5])<<8;
664 case 4 : a+=((uint32_t)k[3])<<24;
665 case 3 : a+=((uint32_t)k[2])<<16;
666 case 2 : a+=((uint32_t)k[1])<<8;
669 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
677 #endif /* SELF_TEST */
679 #if 0 /* currently not used */
683 * This is the same as hashword() on big-endian machines. It is different
684 * from hashlittle() on all machines. hashbig() takes advantage of
685 * big-endian byte ordering.
687 uint32_t hashbig( const void *key, size_t length, uint32_t initval)
690 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
692 /* Set up the internal state */
693 a = b = c = raninit + ((uint32_t)length) + initval;
696 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
697 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
702 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
713 /*----------------------------- handle the last (probably partial) block */
715 * "k[2]<<8" actually reads beyond the end of the string, but
716 * then shifts out the part it's not allowed to read. Because the
717 * string is aligned, the illegal read is in the same word as the
718 * rest of the string. Every machine with memory protection I've seen
719 * does it on word boundaries, so is OK with this. But VALGRIND will
720 * still catch it and complain. The masking trick does make the hash
721 * noticably faster for short strings (like English words).
727 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
728 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
729 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
730 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
731 case 8 : b+=k[1]; a+=k[0]; break;
732 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
733 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
734 case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
735 case 4 : a+=k[0]; break;
736 case 3 : a+=k[0]&0xffffff00; break;
737 case 2 : a+=k[0]&0xffff0000; break;
738 case 1 : a+=k[0]&0xff000000; break;
739 case 0 : return c; /* zero length strings require no mixing */
742 #else /* make valgrind happy */
744 k8 = (const uint8_t *)k;
745 switch(length) /* all the case statements fall through */
747 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
748 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
749 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
750 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
751 case 8 : b+=k[1]; a+=k[0]; break;
752 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
753 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
754 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
755 case 4 : a+=k[0]; break;
756 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
757 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
758 case 1 : a+=((uint32_t)k8[0])<<24; break;
762 #endif /* !VALGRIND */
764 } else { /* need to read the key one byte at a time */
765 const uint8_t *k = (const uint8_t *)key;
767 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
770 a += ((uint32_t)k[0])<<24;
771 a += ((uint32_t)k[1])<<16;
772 a += ((uint32_t)k[2])<<8;
773 a += ((uint32_t)k[3]);
774 b += ((uint32_t)k[4])<<24;
775 b += ((uint32_t)k[5])<<16;
776 b += ((uint32_t)k[6])<<8;
777 b += ((uint32_t)k[7]);
778 c += ((uint32_t)k[8])<<24;
779 c += ((uint32_t)k[9])<<16;
780 c += ((uint32_t)k[10])<<8;
781 c += ((uint32_t)k[11]);
787 /*-------------------------------- last block: affect all 32 bits of (c) */
788 switch(length) /* all the case statements fall through */
791 case 11: c+=((uint32_t)k[10])<<8;
792 case 10: c+=((uint32_t)k[9])<<16;
793 case 9 : c+=((uint32_t)k[8])<<24;
795 case 7 : b+=((uint32_t)k[6])<<8;
796 case 6 : b+=((uint32_t)k[5])<<16;
797 case 5 : b+=((uint32_t)k[4])<<24;
799 case 3 : a+=((uint32_t)k[2])<<8;
800 case 2 : a+=((uint32_t)k[1])<<16;
801 case 1 : a+=((uint32_t)k[0])<<24;
811 #endif /* 0 == currently not used */
815 /* used for timings */
824 for (i=0; i<256; ++i) buf[i] = 'x';
827 h = hashlittle(&buf[0],1,h);
830 if (z-a > 0) printf("time %d %.8x\n", z-a, h);
833 /* check that every input bit changes every output bit half the time */
840 uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
841 uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
842 uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
843 uint32_t x[HASHSTATE],y[HASHSTATE];
846 printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
847 for (hlen=0; hlen < MAXLEN; ++hlen)
850 for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
852 for (j=0; j<8; ++j) /*------------------------ for each input bit, */
854 for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
856 for (l=0; l<HASHSTATE; ++l)
857 e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
859 /*---- check that every output bit is affected by that input bit */
860 for (k=0; k<MAXPAIR; k+=2)
863 /* keys have one bit different */
864 for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
865 /* have a and b be two keys differing in only one bit */
868 c[0] = hashlittle(a, hlen, m);
870 b[i] ^= ((k+1)>>(8-j));
871 d[0] = hashlittle(b, hlen, m);
872 /* check every bit is 1, 0, set, and not set at least once */
873 for (l=0; l<HASHSTATE; ++l)
876 f[l] &= ~(c[l]^d[l]);
881 if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
888 printf("Some bit didn't change: ");
889 printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
890 e[0],f[0],g[0],h[0],x[0],y[0]);
891 printf("i %d j %d m %d len %d\n", i, j, m, hlen);
893 if (z==MAXPAIR) goto done;
900 printf("Mix success %2d bytes %2d initvals ",i,m);
901 printf("required %d trials\n", z/2);
907 /* Check for reading beyond the end of the buffer and alignment problems */
910 uint8_t buf[MAXLEN+20], *b;
912 uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
914 uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
916 uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
918 uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
922 printf("Endianness. These lines should all be the same (for values filled in):\n");
923 printf("%.8x %.8x %.8x\n",
924 hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
925 hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
926 hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
928 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
929 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
930 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
931 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
932 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
933 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
934 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
936 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
937 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
938 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
939 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
940 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
941 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
942 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
944 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
945 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
946 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
947 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
948 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
949 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
950 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
952 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
953 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
954 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
955 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
956 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
957 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
958 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
961 /* check that hashlittle2 and hashlittle produce the same results */
963 hashlittle2(q, sizeof(q), &i, &j);
964 if (hashlittle(q, sizeof(q), 47) != i)
965 printf("hashlittle2 and hashlittle mismatch\n");
967 /* check that hashword2 and hashword produce the same results */
970 hashword2(&len, 1, &i, &j);
971 if (hashword(&len, 1, 47) != i)
972 printf("hashword2 and hashword mismatch %x %x\n",
973 i, hashword(&len, 1, 47));
975 /* check hashlittle doesn't read before or after the ends of the string */
976 for (h=0, b=buf+1; h<8; ++h, ++b)
978 for (i=0; i<MAXLEN; ++i)
981 for (j=0; j<i; ++j) *(b+j)=0;
983 /* these should all be equal */
984 ref = hashlittle(b, len, (uint32_t)1);
987 x = hashlittle(b, len, (uint32_t)1);
988 y = hashlittle(b, len, (uint32_t)1);
989 if ((ref != x) || (ref != y))
991 printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
998 /* check for problems with nulls */
1002 uint32_t h,i,state[HASHSTATE];
1006 for (i=0; i<HASHSTATE; ++i) state[i] = 1;
1007 printf("These should all be different\n");
1008 for (i=0, h=0; i<8; ++i)
1010 h = hashlittle(buf, 0, h);
1011 printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
1018 driver1(); /* test that the key is hashed: used for timings */
1019 driver2(); /* test that whole key is hashed thoroughly */
1020 driver3(); /* test that nothing but the key is hashed */
1021 driver4(); /* test hashing multiple buffers (all buffers are null) */
1025 #endif /* SELF_TEST */