2 February 2013(Wouter) patch defines for BSD endianness, from Brad Smith.
3 January 2012(Wouter) added randomised initial value, fallout from 28c3.
4 March 2007(Wouter) adapted from lookup3.c original, add config.h include.
5 added #ifdef VALGRIND to remove 298,384,660 'unused variable k8' warnings.
6 added include of lookup3.h to check definitions match declarations.
7 removed include of stdint - config.h takes care of platform independence.
8 url http://burtleburtle.net/bob/hash/index.html.
11 -------------------------------------------------------------------------------
12 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
14 These are functions for producing 32-bit hashes for hash table lookup.
15 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
16 are externally useful functions. Routines to test the hash are included
17 if SELF_TEST is defined. You can use this free for any purpose. It's in
18 the public domain. It has no warranty.
20 You probably want to use hashlittle(). hashlittle() and hashbig()
21 hash byte arrays. hashlittle() is is faster than hashbig() on
22 little-endian machines. Intel and AMD are little-endian machines.
23 On second thought, you probably want hashlittle2(), which is identical to
24 hashlittle() except it returns two 32-bit hashes for the price of one.
25 You could implement hashbig2() if you wanted but I haven't bothered here.
27 If you want to find a hash of, say, exactly 7 integers, do
28 a = i1; b = i2; c = i3;
30 a += i4; b += i5; c += i6;
34 then use c as the hash value. If you have a variable length array of
35 4-byte integers to hash, use hashword(). If you have a byte array (like
36 a character string), use hashlittle(). If you have several byte arrays, or
37 a mix of things, see the comments above hashlittle().
39 Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
40 then mix those integers. This is fast (you can do a lot more thorough
41 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
42 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
43 -------------------------------------------------------------------------------
45 /*#define SELF_TEST 1*/
48 #include "util/storage/lookup3.h"
49 #include <stdio.h> /* defines printf for tests */
50 #include <time.h> /* defines time_t for timings in the test */
51 /*#include <stdint.h> defines uint32_t etc (from config.h) */
52 #include <sys/param.h> /* attempt to define endianness */
53 #ifdef HAVE_SYS_TYPES_H
54 # include <sys/types.h> /* attempt to define endianness (solaris) */
57 # include <endian.h> /* attempt to define endianness */
59 #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
60 #include <sys/endian.h> /* attempt to define endianness */
63 #include <machine/endian.h> /* attempt to define endianness */
66 /* random initial value */
67 static uint32_t raninit = (uint32_t)0xdeadbeef;
70 hash_set_raninit(uint32_t v)
76 * My best guess at if you are big-endian or little-endian. This may
79 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
80 __BYTE_ORDER == __LITTLE_ENDIAN) || \
81 (defined(i386) || defined(__i386__) || defined(__i486__) || \
82 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL) || defined(__x86))
83 # define HASH_LITTLE_ENDIAN 1
84 # define HASH_BIG_ENDIAN 0
85 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
86 __BYTE_ORDER == __BIG_ENDIAN) || \
87 (defined(sparc) || defined(__sparc) || defined(__sparc__) || defined(POWERPC) || defined(mc68000) || defined(sel))
88 # define HASH_LITTLE_ENDIAN 0
89 # define HASH_BIG_ENDIAN 1
90 #elif defined(_MACHINE_ENDIAN_H_)
91 /* test for machine_endian_h protects failure if some are empty strings */
92 # if defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) && _BYTE_ORDER == _BIG_ENDIAN
93 # define HASH_LITTLE_ENDIAN 0
94 # define HASH_BIG_ENDIAN 1
96 # if defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && _BYTE_ORDER == _LITTLE_ENDIAN
97 # define HASH_LITTLE_ENDIAN 1
98 # define HASH_BIG_ENDIAN 0
99 # endif /* _MACHINE_ENDIAN_H_ */
101 # define HASH_LITTLE_ENDIAN 0
102 # define HASH_BIG_ENDIAN 0
105 #define hashsize(n) ((uint32_t)1<<(n))
106 #define hashmask(n) (hashsize(n)-1)
107 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
110 -------------------------------------------------------------------------------
111 mix -- mix 3 32-bit values reversibly.
113 This is reversible, so any information in (a,b,c) before mix() is
114 still in (a,b,c) after mix().
116 If four pairs of (a,b,c) inputs are run through mix(), or through
117 mix() in reverse, there are at least 32 bits of the output that
118 are sometimes the same for one pair and different for another pair.
120 * pairs that differed by one bit, by two bits, in any combination
121 of top bits of (a,b,c), or in any combination of bottom bits of
123 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
124 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
125 is commonly produced by subtraction) look like a single 1-bit
127 * the base values were pseudorandom, all zero but one bit set, or
128 all zero plus a counter that starts at zero.
130 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
135 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
136 for "differ" defined as + with a one-bit base and a two-bit delta. I
137 used http://burtleburtle.net/bob/hash/avalanche.html to choose
138 the operations, constants, and arrangements of the variables.
140 This does not achieve avalanche. There are input bits of (a,b,c)
141 that fail to affect some output bits of (a,b,c), especially of a. The
142 most thoroughly mixed value is c, but it doesn't really even achieve
145 This allows some parallelism. Read-after-writes are good at doubling
146 the number of bits affected, so the goal of mixing pulls in the opposite
147 direction as the goal of parallelism. I did what I could. Rotates
148 seem to cost as much as shifts on every machine I could lay my hands
149 on, and rotates are much kinder to the top and bottom bits, so I used
151 -------------------------------------------------------------------------------
155 a -= c; a ^= rot(c, 4); c += b; \
156 b -= a; b ^= rot(a, 6); a += c; \
157 c -= b; c ^= rot(b, 8); b += a; \
158 a -= c; a ^= rot(c,16); c += b; \
159 b -= a; b ^= rot(a,19); a += c; \
160 c -= b; c ^= rot(b, 4); b += a; \
164 -------------------------------------------------------------------------------
165 final -- final mixing of 3 32-bit values (a,b,c) into c
167 Pairs of (a,b,c) values differing in only a few bits will usually
168 produce values of c that look totally different. This was tested for
169 * pairs that differed by one bit, by two bits, in any combination
170 of top bits of (a,b,c), or in any combination of bottom bits of
172 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
173 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
174 is commonly produced by subtraction) look like a single 1-bit
176 * the base values were pseudorandom, all zero but one bit set, or
177 all zero plus a counter that starts at zero.
179 These constants passed:
182 and these came close:
186 -------------------------------------------------------------------------------
188 #define final(a,b,c) \
190 c ^= b; c -= rot(b,14); \
191 a ^= c; a -= rot(c,11); \
192 b ^= a; b -= rot(a,25); \
193 c ^= b; c -= rot(b,16); \
194 a ^= c; a -= rot(c,4); \
195 b ^= a; b -= rot(a,14); \
196 c ^= b; c -= rot(b,24); \
200 --------------------------------------------------------------------
201 This works on all machines. To be useful, it requires
202 -- that the key be an array of uint32_t's, and
203 -- that the length be the number of uint32_t's in the key
205 The function hashword() is identical to hashlittle() on little-endian
206 machines, and identical to hashbig() on big-endian machines,
207 except that the length has to be measured in uint32_ts rather than in
208 bytes. hashlittle() is more complicated than hashword() only because
209 hashlittle() has to dance around fitting the key bytes into registers.
210 --------------------------------------------------------------------
213 const uint32_t *k, /* the key, an array of uint32_t values */
214 size_t length, /* the length of the key, in uint32_ts */
215 uint32_t initval) /* the previous hash, or an arbitrary value */
219 /* Set up the internal state */
220 a = b = c = raninit + (((uint32_t)length)<<2) + initval;
222 /*------------------------------------------------- handle most of the key */
233 /*------------------------------------------- handle the last 3 uint32_t's */
234 switch(length) /* all the case statements fall through */
240 case 0: /* case 0: nothing left to add */
243 /*------------------------------------------------------ report the result */
251 --------------------------------------------------------------------
252 hashword2() -- same as hashword(), but take two seeds and return two
253 32-bit values. pc and pb must both be nonnull, and *pc and *pb must
254 both be initialized with seeds. If you pass in (*pb)==0, the output
255 (*pc) will be the same as the return value from hashword().
256 --------------------------------------------------------------------
259 const uint32_t *k, /* the key, an array of uint32_t values */
260 size_t length, /* the length of the key, in uint32_ts */
261 uint32_t *pc, /* IN: seed OUT: primary hash value */
262 uint32_t *pb) /* IN: more seed OUT: secondary hash value */
266 /* Set up the internal state */
267 a = b = c = raninit + ((uint32_t)(length<<2)) + *pc;
270 /*------------------------------------------------- handle most of the key */
281 /*------------------------------------------- handle the last 3 uint32_t's */
282 switch(length) /* all the case statements fall through */
288 case 0: /* case 0: nothing left to add */
291 /*------------------------------------------------------ report the result */
295 #endif /* SELF_TEST */
298 -------------------------------------------------------------------------------
299 hashlittle() -- hash a variable-length key into a 32-bit value
300 k : the key (the unaligned variable-length array of bytes)
301 length : the length of the key, counting by bytes
302 initval : can be any 4-byte value
303 Returns a 32-bit value. Every bit of the key affects every bit of
304 the return value. Two keys differing by one or two bits will have
305 totally different hash values.
307 The best hash table sizes are powers of 2. There is no need to do
308 mod a prime (mod is sooo slow!). If you need less than 32 bits,
309 use a bitmask. For example, if you need only 10 bits, do
310 h = (h & hashmask(10));
311 In which case, the hash table should have hashsize(10) elements.
313 If you are hashing n strings (uint8_t **)k, do it like this:
314 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
316 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
317 code any way you wish, private, educational, or commercial. It's free.
319 Use for hash table lookup, or anything where one collision in 2^^32 is
320 acceptable. Do NOT use for cryptographic purposes.
321 -------------------------------------------------------------------------------
324 uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
326 uint32_t a,b,c; /* internal state */
327 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
329 /* Set up the internal state */
330 a = b = c = raninit + ((uint32_t)length) + initval;
333 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
334 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
339 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
350 /*----------------------------- handle the last (probably partial) block */
352 * "k[2]&0xffffff" actually reads beyond the end of the string, but
353 * then masks off the part it's not allowed to read. Because the
354 * string is aligned, the masked-off tail is in the same word as the
355 * rest of the string. Every machine with memory protection I've seen
356 * does it on word boundaries, so is OK with this. But VALGRIND will
357 * still catch it and complain. The masking trick does make the hash
358 * noticably faster for short strings (like English words).
364 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
365 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
366 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
367 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
368 case 8 : b+=k[1]; a+=k[0]; break;
369 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
370 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
371 case 5 : b+=k[1]&0xff; a+=k[0]; break;
372 case 4 : a+=k[0]; break;
373 case 3 : a+=k[0]&0xffffff; break;
374 case 2 : a+=k[0]&0xffff; break;
375 case 1 : a+=k[0]&0xff; break;
376 case 0 : return c; /* zero length strings require no mixing */
379 #else /* make valgrind happy */
381 k8 = (const uint8_t *)k;
384 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
385 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
386 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
387 case 9 : c+=k8[8]; /* fall through */
388 case 8 : b+=k[1]; a+=k[0]; break;
389 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
390 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
391 case 5 : b+=k8[4]; /* fall through */
392 case 4 : a+=k[0]; break;
393 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
394 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
395 case 1 : a+=k8[0]; break;
399 #endif /* !valgrind */
401 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
402 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
405 /*--------------- all but last block: aligned reads and different mixing */
408 a += k[0] + (((uint32_t)k[1])<<16);
409 b += k[2] + (((uint32_t)k[3])<<16);
410 c += k[4] + (((uint32_t)k[5])<<16);
416 /*----------------------------- handle the last (probably partial) block */
417 k8 = (const uint8_t *)k;
420 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
421 b+=k[2]+(((uint32_t)k[3])<<16);
422 a+=k[0]+(((uint32_t)k[1])<<16);
424 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
426 b+=k[2]+(((uint32_t)k[3])<<16);
427 a+=k[0]+(((uint32_t)k[1])<<16);
429 case 9 : c+=k8[8]; /* fall through */
430 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
431 a+=k[0]+(((uint32_t)k[1])<<16);
433 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
435 a+=k[0]+(((uint32_t)k[1])<<16);
437 case 5 : b+=k8[4]; /* fall through */
438 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
440 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
445 case 0 : return c; /* zero length requires no mixing */
448 } else { /* need to read the key one byte at a time */
449 const uint8_t *k = (const uint8_t *)key;
451 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
455 a += ((uint32_t)k[1])<<8;
456 a += ((uint32_t)k[2])<<16;
457 a += ((uint32_t)k[3])<<24;
459 b += ((uint32_t)k[5])<<8;
460 b += ((uint32_t)k[6])<<16;
461 b += ((uint32_t)k[7])<<24;
463 c += ((uint32_t)k[9])<<8;
464 c += ((uint32_t)k[10])<<16;
465 c += ((uint32_t)k[11])<<24;
471 /*-------------------------------- last block: affect all 32 bits of (c) */
472 switch(length) /* all the case statements fall through */
474 case 12: c+=((uint32_t)k[11])<<24;
475 case 11: c+=((uint32_t)k[10])<<16;
476 case 10: c+=((uint32_t)k[9])<<8;
478 case 8 : b+=((uint32_t)k[7])<<24;
479 case 7 : b+=((uint32_t)k[6])<<16;
480 case 6 : b+=((uint32_t)k[5])<<8;
482 case 4 : a+=((uint32_t)k[3])<<24;
483 case 3 : a+=((uint32_t)k[2])<<16;
484 case 2 : a+=((uint32_t)k[1])<<8;
498 * hashlittle2: return 2 32-bit hash values
500 * This is identical to hashlittle(), except it returns two 32-bit hash
501 * values instead of just one. This is good enough for hash table
502 * lookup with 2^^64 buckets, or if you want a second hash if you're not
503 * happy with the first, or if you want a probably-unique 64-bit ID for
504 * the key. *pc is better mixed than *pb, so use *pc first. If you want
505 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
508 const void *key, /* the key to hash */
509 size_t length, /* length of the key */
510 uint32_t *pc, /* IN: primary initval, OUT: primary hash */
511 uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
513 uint32_t a,b,c; /* internal state */
514 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
516 /* Set up the internal state */
517 a = b = c = raninit + ((uint32_t)length) + *pc;
521 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
522 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
527 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
538 /*----------------------------- handle the last (probably partial) block */
540 * "k[2]&0xffffff" actually reads beyond the end of the string, but
541 * then masks off the part it's not allowed to read. Because the
542 * string is aligned, the masked-off tail is in the same word as the
543 * rest of the string. Every machine with memory protection I've seen
544 * does it on word boundaries, so is OK with this. But VALGRIND will
545 * still catch it and complain. The masking trick does make the hash
546 * noticably faster for short strings (like English words).
552 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
553 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
554 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
555 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
556 case 8 : b+=k[1]; a+=k[0]; break;
557 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
558 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
559 case 5 : b+=k[1]&0xff; a+=k[0]; break;
560 case 4 : a+=k[0]; break;
561 case 3 : a+=k[0]&0xffffff; break;
562 case 2 : a+=k[0]&0xffff; break;
563 case 1 : a+=k[0]&0xff; break;
564 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
567 #else /* make valgrind happy */
569 k8 = (const uint8_t *)k;
572 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
573 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
574 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
575 case 9 : c+=k8[8]; /* fall through */
576 case 8 : b+=k[1]; a+=k[0]; break;
577 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
578 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
579 case 5 : b+=k8[4]; /* fall through */
580 case 4 : a+=k[0]; break;
581 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
582 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
583 case 1 : a+=k8[0]; break;
584 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
587 #endif /* !valgrind */
589 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
590 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
593 /*--------------- all but last block: aligned reads and different mixing */
596 a += k[0] + (((uint32_t)k[1])<<16);
597 b += k[2] + (((uint32_t)k[3])<<16);
598 c += k[4] + (((uint32_t)k[5])<<16);
604 /*----------------------------- handle the last (probably partial) block */
605 k8 = (const uint8_t *)k;
608 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
609 b+=k[2]+(((uint32_t)k[3])<<16);
610 a+=k[0]+(((uint32_t)k[1])<<16);
612 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
614 b+=k[2]+(((uint32_t)k[3])<<16);
615 a+=k[0]+(((uint32_t)k[1])<<16);
617 case 9 : c+=k8[8]; /* fall through */
618 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
619 a+=k[0]+(((uint32_t)k[1])<<16);
621 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
623 a+=k[0]+(((uint32_t)k[1])<<16);
625 case 5 : b+=k8[4]; /* fall through */
626 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
628 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
633 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
636 } else { /* need to read the key one byte at a time */
637 const uint8_t *k = (const uint8_t *)key;
639 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
643 a += ((uint32_t)k[1])<<8;
644 a += ((uint32_t)k[2])<<16;
645 a += ((uint32_t)k[3])<<24;
647 b += ((uint32_t)k[5])<<8;
648 b += ((uint32_t)k[6])<<16;
649 b += ((uint32_t)k[7])<<24;
651 c += ((uint32_t)k[9])<<8;
652 c += ((uint32_t)k[10])<<16;
653 c += ((uint32_t)k[11])<<24;
659 /*-------------------------------- last block: affect all 32 bits of (c) */
660 switch(length) /* all the case statements fall through */
662 case 12: c+=((uint32_t)k[11])<<24;
663 case 11: c+=((uint32_t)k[10])<<16;
664 case 10: c+=((uint32_t)k[9])<<8;
666 case 8 : b+=((uint32_t)k[7])<<24;
667 case 7 : b+=((uint32_t)k[6])<<16;
668 case 6 : b+=((uint32_t)k[5])<<8;
670 case 4 : a+=((uint32_t)k[3])<<24;
671 case 3 : a+=((uint32_t)k[2])<<16;
672 case 2 : a+=((uint32_t)k[1])<<8;
675 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
683 #endif /* SELF_TEST */
685 #if 0 /* currently not used */
689 * This is the same as hashword() on big-endian machines. It is different
690 * from hashlittle() on all machines. hashbig() takes advantage of
691 * big-endian byte ordering.
693 uint32_t hashbig( const void *key, size_t length, uint32_t initval)
696 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
698 /* Set up the internal state */
699 a = b = c = raninit + ((uint32_t)length) + initval;
702 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
703 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
708 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
719 /*----------------------------- handle the last (probably partial) block */
721 * "k[2]<<8" actually reads beyond the end of the string, but
722 * then shifts out the part it's not allowed to read. Because the
723 * string is aligned, the illegal read is in the same word as the
724 * rest of the string. Every machine with memory protection I've seen
725 * does it on word boundaries, so is OK with this. But VALGRIND will
726 * still catch it and complain. The masking trick does make the hash
727 * noticably faster for short strings (like English words).
733 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
734 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
735 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
736 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
737 case 8 : b+=k[1]; a+=k[0]; break;
738 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
739 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
740 case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
741 case 4 : a+=k[0]; break;
742 case 3 : a+=k[0]&0xffffff00; break;
743 case 2 : a+=k[0]&0xffff0000; break;
744 case 1 : a+=k[0]&0xff000000; break;
745 case 0 : return c; /* zero length strings require no mixing */
748 #else /* make valgrind happy */
750 k8 = (const uint8_t *)k;
751 switch(length) /* all the case statements fall through */
753 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
754 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
755 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
756 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
757 case 8 : b+=k[1]; a+=k[0]; break;
758 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
759 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
760 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
761 case 4 : a+=k[0]; break;
762 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
763 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
764 case 1 : a+=((uint32_t)k8[0])<<24; break;
768 #endif /* !VALGRIND */
770 } else { /* need to read the key one byte at a time */
771 const uint8_t *k = (const uint8_t *)key;
773 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
776 a += ((uint32_t)k[0])<<24;
777 a += ((uint32_t)k[1])<<16;
778 a += ((uint32_t)k[2])<<8;
779 a += ((uint32_t)k[3]);
780 b += ((uint32_t)k[4])<<24;
781 b += ((uint32_t)k[5])<<16;
782 b += ((uint32_t)k[6])<<8;
783 b += ((uint32_t)k[7]);
784 c += ((uint32_t)k[8])<<24;
785 c += ((uint32_t)k[9])<<16;
786 c += ((uint32_t)k[10])<<8;
787 c += ((uint32_t)k[11]);
793 /*-------------------------------- last block: affect all 32 bits of (c) */
794 switch(length) /* all the case statements fall through */
797 case 11: c+=((uint32_t)k[10])<<8;
798 case 10: c+=((uint32_t)k[9])<<16;
799 case 9 : c+=((uint32_t)k[8])<<24;
801 case 7 : b+=((uint32_t)k[6])<<8;
802 case 6 : b+=((uint32_t)k[5])<<16;
803 case 5 : b+=((uint32_t)k[4])<<24;
805 case 3 : a+=((uint32_t)k[2])<<8;
806 case 2 : a+=((uint32_t)k[1])<<16;
807 case 1 : a+=((uint32_t)k[0])<<24;
817 #endif /* 0 == currently not used */
821 /* used for timings */
830 for (i=0; i<256; ++i) buf[i] = 'x';
833 h = hashlittle(&buf[0],1,h);
836 if (z-a > 0) printf("time %d %.8x\n", z-a, h);
839 /* check that every input bit changes every output bit half the time */
846 uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
847 uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
848 uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
849 uint32_t x[HASHSTATE],y[HASHSTATE];
852 printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
853 for (hlen=0; hlen < MAXLEN; ++hlen)
856 for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
858 for (j=0; j<8; ++j) /*------------------------ for each input bit, */
860 for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
862 for (l=0; l<HASHSTATE; ++l)
863 e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
865 /*---- check that every output bit is affected by that input bit */
866 for (k=0; k<MAXPAIR; k+=2)
869 /* keys have one bit different */
870 for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
871 /* have a and b be two keys differing in only one bit */
874 c[0] = hashlittle(a, hlen, m);
876 b[i] ^= ((k+1)>>(8-j));
877 d[0] = hashlittle(b, hlen, m);
878 /* check every bit is 1, 0, set, and not set at least once */
879 for (l=0; l<HASHSTATE; ++l)
882 f[l] &= ~(c[l]^d[l]);
887 if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
894 printf("Some bit didn't change: ");
895 printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
896 e[0],f[0],g[0],h[0],x[0],y[0]);
897 printf("i %d j %d m %d len %d\n", i, j, m, hlen);
899 if (z==MAXPAIR) goto done;
906 printf("Mix success %2d bytes %2d initvals ",i,m);
907 printf("required %d trials\n", z/2);
913 /* Check for reading beyond the end of the buffer and alignment problems */
916 uint8_t buf[MAXLEN+20], *b;
918 uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
920 uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
922 uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
924 uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
928 printf("Endianness. These lines should all be the same (for values filled in):\n");
929 printf("%.8x %.8x %.8x\n",
930 hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
931 hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
932 hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
934 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
935 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
936 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
937 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
938 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
939 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
940 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
942 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
943 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
944 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
945 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
946 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
947 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
948 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
950 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
951 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
952 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
953 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
954 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
955 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
956 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
958 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
959 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
960 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
961 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
962 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
963 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
964 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
967 /* check that hashlittle2 and hashlittle produce the same results */
969 hashlittle2(q, sizeof(q), &i, &j);
970 if (hashlittle(q, sizeof(q), 47) != i)
971 printf("hashlittle2 and hashlittle mismatch\n");
973 /* check that hashword2 and hashword produce the same results */
976 hashword2(&len, 1, &i, &j);
977 if (hashword(&len, 1, 47) != i)
978 printf("hashword2 and hashword mismatch %x %x\n",
979 i, hashword(&len, 1, 47));
981 /* check hashlittle doesn't read before or after the ends of the string */
982 for (h=0, b=buf+1; h<8; ++h, ++b)
984 for (i=0; i<MAXLEN; ++i)
987 for (j=0; j<i; ++j) *(b+j)=0;
989 /* these should all be equal */
990 ref = hashlittle(b, len, (uint32_t)1);
993 x = hashlittle(b, len, (uint32_t)1);
994 y = hashlittle(b, len, (uint32_t)1);
995 if ((ref != x) || (ref != y))
997 printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
1004 /* check for problems with nulls */
1008 uint32_t h,i,state[HASHSTATE];
1012 for (i=0; i<HASHSTATE; ++i) state[i] = 1;
1013 printf("These should all be different\n");
1014 for (i=0, h=0; i<8; ++i)
1016 h = hashlittle(buf, 0, h);
1017 printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
1024 driver1(); /* test that the key is hashed: used for timings */
1025 driver2(); /* test that whole key is hashed thoroughly */
1026 driver3(); /* test that nothing but the key is hashed */
1027 driver4(); /* test hashing multiple buffers (all buffers are null) */
1031 #endif /* SELF_TEST */