contrib/unbound/util/storage/lookup3.c

   1 /*
   2   May 2019(Wouter) patch to enable the valgrind clean implementation all the
   3      time.  This enables better security audit and checks, which is better
   4      than the speedup.  Git issue #30.  Renamed the define ARRAY_CLEAN_ACCESS.
   5   February 2013(Wouter) patch defines for BSD endianness, from Brad Smith.
   6   January 2012(Wouter) added randomised initial value, fallout from 28c3.
   7   March 2007(Wouter) adapted from lookup3.c original, add config.h include.
   8      added #ifdef VALGRIND to remove 298,384,660 'unused variable k8' warnings.
   9      added include of lookup3.h to check definitions match declarations.
  10      removed include of stdint - config.h takes care of platform independence.
  11      added fallthrough comments for new gcc warning suppression.
  12   url http://burtleburtle.net/bob/hash/index.html.
  13 */
  14 /*
  15 -------------------------------------------------------------------------------
  16 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
  17
  18 These are functions for producing 32-bit hashes for hash table lookup.
  19 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
  20 are externally useful functions.  Routines to test the hash are included
  21 if SELF_TEST is defined.  You can use this free for any purpose.  It's in
  22 the public domain.  It has no warranty.
  23
  24 You probably want to use hashlittle().  hashlittle() and hashbig()
  25 hash byte arrays.  hashlittle() is is faster than hashbig() on
  26 little-endian machines.  Intel and AMD are little-endian machines.
  27 On second thought, you probably want hashlittle2(), which is identical to
  28 hashlittle() except it returns two 32-bit hashes for the price of one.
  29 You could implement hashbig2() if you wanted but I haven't bothered here.
  30
  31 If you want to find a hash of, say, exactly 7 integers, do
  32   a = i1;  b = i2;  c = i3;
  33   mix(a,b,c);
  34   a += i4; b += i5; c += i6;
  35   mix(a,b,c);
  36   a += i7;
  37   final(a,b,c);
  38 then use c as the hash value.  If you have a variable length array of
  39 4-byte integers to hash, use hashword().  If you have a byte array (like
  40 a character string), use hashlittle().  If you have several byte arrays, or
  41 a mix of things, see the comments above hashlittle().
  42
  43 Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,
  44 then mix those integers.  This is fast (you can do a lot more thorough
  45 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
  46 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
  47 -------------------------------------------------------------------------------
  48 */
  49 /*#define SELF_TEST 1*/
  50 #define ARRAY_CLEAN_ACCESS 1
  51
  52 #include "config.h"
  53 #include "util/storage/lookup3.h"
  54 #include <stdio.h>      /* defines printf for tests */
  55 #include <time.h>       /* defines time_t for timings in the test */
  56 /*#include <stdint.h>     defines uint32_t etc  (from config.h) */
  57 #include <sys/param.h>  /* attempt to define endianness */
  58 #ifdef HAVE_SYS_TYPES_H
  59 # include <sys/types.h> /* attempt to define endianness (solaris) */
  60 #endif
  61 #if defined(linux) || defined(__OpenBSD__)
  62 #  ifdef HAVE_ENDIAN_H
  63 #    include <endian.h>    /* attempt to define endianness */
  64 #  else
  65 #    include <machine/endian.h> /* on older OpenBSD */
  66 #  endif
  67 #endif
  68 #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
  69 #include <sys/endian.h> /* attempt to define endianness */
  70 #endif
  71
  72 /* random initial value */
  73 static uint32_t raninit = (uint32_t)0xdeadbeef;
  74
  75 void
  76 hash_set_raninit(uint32_t v)
  77 {
  78         raninit = v;
  79 }
  80
  81 /*
  82  * My best guess at if you are big-endian or little-endian.  This may
  83  * need adjustment.
  84  */
  85 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
  86      __BYTE_ORDER == __LITTLE_ENDIAN) || \
  87     (defined(i386) || defined(__i386__) || defined(__i486__) || \
  88      defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL) || defined(__x86))
  89 # define HASH_LITTLE_ENDIAN 1
  90 # define HASH_BIG_ENDIAN 0
  91 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
  92        __BYTE_ORDER == __BIG_ENDIAN) || \
  93       (defined(sparc) || defined(__sparc) || defined(__sparc__) || defined(POWERPC) || defined(mc68000) || defined(sel))
  94 # define HASH_LITTLE_ENDIAN 0
  95 # define HASH_BIG_ENDIAN 1
  96 #elif defined(_MACHINE_ENDIAN_H_)
  97 /* test for machine_endian_h protects failure if some are empty strings */
  98 # if defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) && _BYTE_ORDER == _BIG_ENDIAN
  99 #  define HASH_LITTLE_ENDIAN 0
 100 #  define HASH_BIG_ENDIAN 1
 101 # endif
 102 # if defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && _BYTE_ORDER == _LITTLE_ENDIAN
 103 #  define HASH_LITTLE_ENDIAN 1
 104 #  define HASH_BIG_ENDIAN 0
 105 # endif /* _MACHINE_ENDIAN_H_ */
 106 #else
 107 # define HASH_LITTLE_ENDIAN 0
 108 # define HASH_BIG_ENDIAN 0
 109 #endif
 110
 111 #define hashsize(n) ((uint32_t)1<<(n))
 112 #define hashmask(n) (hashsize(n)-1)
 113 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
 114
 115 /*
 116 -------------------------------------------------------------------------------
 117 mix -- mix 3 32-bit values reversibly.
 118
 119 This is reversible, so any information in (a,b,c) before mix() is
 120 still in (a,b,c) after mix().
 121
 122 If four pairs of (a,b,c) inputs are run through mix(), or through
 123 mix() in reverse, there are at least 32 bits of the output that
 124 are sometimes the same for one pair and different for another pair.
 125 This was tested for:
 126 * pairs that differed by one bit, by two bits, in any combination
 127   of top bits of (a,b,c), or in any combination of bottom bits of
 128   (a,b,c).
 129 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
 130   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
 131   is commonly produced by subtraction) look like a single 1-bit
 132   difference.
 133 * the base values were pseudorandom, all zero but one bit set, or
 134   all zero plus a counter that starts at zero.
 135
 136 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
 137 satisfy this are
 138     4  6  8 16 19  4
 139     9 15  3 18 27 15
 140    14  9  3  7 17  3
 141 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
 142 for "differ" defined as + with a one-bit base and a two-bit delta.  I
 143 used http://burtleburtle.net/bob/hash/avalanche.html to choose
 144 the operations, constants, and arrangements of the variables.
 145
 146 This does not achieve avalanche.  There are input bits of (a,b,c)
 147 that fail to affect some output bits of (a,b,c), especially of a.  The
 148 most thoroughly mixed value is c, but it doesn't really even achieve
 149 avalanche in c.
 150
 151 This allows some parallelism.  Read-after-writes are good at doubling
 152 the number of bits affected, so the goal of mixing pulls in the opposite
 153 direction as the goal of parallelism.  I did what I could.  Rotates
 154 seem to cost as much as shifts on every machine I could lay my hands
 155 on, and rotates are much kinder to the top and bottom bits, so I used
 156 rotates.
 157 -------------------------------------------------------------------------------
 158 */
 159 #define mix(a,b,c) \
 160 { \
 161   a -= c;  a ^= rot(c, 4);  c += b; \
 162   b -= a;  b ^= rot(a, 6);  a += c; \
 163   c -= b;  c ^= rot(b, 8);  b += a; \
 164   a -= c;  a ^= rot(c,16);  c += b; \
 165   b -= a;  b ^= rot(a,19);  a += c; \
 166   c -= b;  c ^= rot(b, 4);  b += a; \
 167 }
 168
 169 /*
 170 -------------------------------------------------------------------------------
 171 final -- final mixing of 3 32-bit values (a,b,c) into c
 172
 173 Pairs of (a,b,c) values differing in only a few bits will usually
 174 produce values of c that look totally different.  This was tested for
 175 * pairs that differed by one bit, by two bits, in any combination
 176   of top bits of (a,b,c), or in any combination of bottom bits of
 177   (a,b,c).
 178 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
 179   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
 180   is commonly produced by subtraction) look like a single 1-bit
 181   difference.
 182 * the base values were pseudorandom, all zero but one bit set, or
 183   all zero plus a counter that starts at zero.
 184
 185 These constants passed:
 186  14 11 25 16 4 14 24
 187  12 14 25 16 4 14 24
 188 and these came close:
 189   4  8 15 26 3 22 24
 190  10  8 15 26 3 22 24
 191  11  8 15 26 3 22 24
 192 -------------------------------------------------------------------------------
 193 */
 194 #define final(a,b,c) \
 195 { \
 196   c ^= b; c -= rot(b,14); \
 197   a ^= c; a -= rot(c,11); \
 198   b ^= a; b -= rot(a,25); \
 199   c ^= b; c -= rot(b,16); \
 200   a ^= c; a -= rot(c,4);  \
 201   b ^= a; b -= rot(a,14); \
 202   c ^= b; c -= rot(b,24); \
 203 }
 204
 205 /*
 206 --------------------------------------------------------------------
 207  This works on all machines.  To be useful, it requires
 208  -- that the key be an array of uint32_t's, and
 209  -- that the length be the number of uint32_t's in the key
 210
 211  The function hashword() is identical to hashlittle() on little-endian
 212  machines, and identical to hashbig() on big-endian machines,
 213  except that the length has to be measured in uint32_ts rather than in
 214  bytes.  hashlittle() is more complicated than hashword() only because
 215  hashlittle() has to dance around fitting the key bytes into registers.
 216 --------------------------------------------------------------------
 217 */
 218 uint32_t hashword(
 219 const uint32_t *k,                   /* the key, an array of uint32_t values */
 220 size_t          length,               /* the length of the key, in uint32_ts */
 221 uint32_t        initval)         /* the previous hash, or an arbitrary value */
 222 {
 223   uint32_t a,b,c;
 224
 225   /* Set up the internal state */
 226   a = b = c = raninit + (((uint32_t)length)<<2) + initval;
 227
 228   /*------------------------------------------------- handle most of the key */
 229   while (length > 3)
 230   {
 231     a += k[0];
 232     b += k[1];
 233     c += k[2];
 234     mix(a,b,c);
 235     length -= 3;
 236     k += 3;
 237   }
 238
 239   /*------------------------------------------- handle the last 3 uint32_t's */
 240   switch(length)                     /* all the case statements fall through */
 241   {
 242   case 3 : c+=k[2];
 243         /* fallthrough */
 244   case 2 : b+=k[1];
 245         /* fallthrough */
 246   case 1 : a+=k[0];
 247     final(a,b,c);
 248   case 0:     /* case 0: nothing left to add */
 249     break;
 250   }
 251   /*------------------------------------------------------ report the result */
 252   return c;
 253 }
 254
 255
 256 #ifdef SELF_TEST
 257
 258 /*
 259 --------------------------------------------------------------------
 260 hashword2() -- same as hashword(), but take two seeds and return two
 261 32-bit values.  pc and pb must both be nonnull, and *pc and *pb must
 262 both be initialized with seeds.  If you pass in (*pb)==0, the output
 263 (*pc) will be the same as the return value from hashword().
 264 --------------------------------------------------------------------
 265 */
 266 void hashword2 (
 267 const uint32_t *k,                   /* the key, an array of uint32_t values */
 268 size_t          length,               /* the length of the key, in uint32_ts */
 269 uint32_t       *pc,                      /* IN: seed OUT: primary hash value */
 270 uint32_t       *pb)               /* IN: more seed OUT: secondary hash value */
 271 {
 272   uint32_t a,b,c;
 273
 274   /* Set up the internal state */
 275   a = b = c = raninit + ((uint32_t)(length<<2)) + *pc;
 276   c += *pb;
 277
 278   /*------------------------------------------------- handle most of the key */
 279   while (length > 3)
 280   {
 281     a += k[0];
 282     b += k[1];
 283     c += k[2];
 284     mix(a,b,c);
 285     length -= 3;
 286     k += 3;
 287   }
 288
 289   /*------------------------------------------- handle the last 3 uint32_t's */
 290   switch(length)                     /* all the case statements fall through */
 291   {
 292   case 3 : c+=k[2];
 293   case 2 : b+=k[1];
 294   case 1 : a+=k[0];
 295     final(a,b,c);
 296   case 0:     /* case 0: nothing left to add */
 297     break;
 298   }
 299   /*------------------------------------------------------ report the result */
 300   *pc=c; *pb=b;
 301 }
 302
 303 #endif /* SELF_TEST */
 304
 305 /*
 306 -------------------------------------------------------------------------------
 307 hashlittle() -- hash a variable-length key into a 32-bit value
 308   k       : the key (the unaligned variable-length array of bytes)
 309   length  : the length of the key, counting by bytes
 310   initval : can be any 4-byte value
 311 Returns a 32-bit value.  Every bit of the key affects every bit of
 312 the return value.  Two keys differing by one or two bits will have
 313 totally different hash values.
 314
 315 The best hash table sizes are powers of 2.  There is no need to do
 316 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
 317 use a bitmask.  For example, if you need only 10 bits, do
 318   h = (h & hashmask(10));
 319 In which case, the hash table should have hashsize(10) elements.
 320
 321 If you are hashing n strings (uint8_t **)k, do it like this:
 322   for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
 323
 324 By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
 325 code any way you wish, private, educational, or commercial.  It's free.
 326
 327 Use for hash table lookup, or anything where one collision in 2^^32 is
 328 acceptable.  Do NOT use for cryptographic purposes.
 329 -------------------------------------------------------------------------------
 330 */
 331
 332 uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
 333 {
 334   uint32_t a,b,c;                                          /* internal state */
 335   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
 336
 337   /* Set up the internal state */
 338   a = b = c = raninit + ((uint32_t)length) + initval;
 339
 340   u.ptr = key;
 341   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
 342     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
 343 #ifdef ARRAY_CLEAN_ACCESS
 344     const uint8_t  *k8;
 345 #endif
 346
 347     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
 348     while (length > 12)
 349     {
 350       a += k[0];
 351       b += k[1];
 352       c += k[2];
 353       mix(a,b,c);
 354       length -= 12;
 355       k += 3;
 356     }
 357
 358     /*----------------------------- handle the last (probably partial) block */
 359     /*
 360      * "k[2]&0xffffff" actually reads beyond the end of the string, but
 361      * then masks off the part it's not allowed to read.  Because the
 362      * string is aligned, the masked-off tail is in the same word as the
 363      * rest of the string.  Every machine with memory protection I've seen
 364      * does it on word boundaries, so is OK with this.  But VALGRIND will
 365      * still catch it and complain.  The masking trick does make the hash
 366      * noticeably faster for short strings (like English words).
 367      */
 368 #ifndef ARRAY_CLEAN_ACCESS
 369
 370     switch(length)
 371     {
 372     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 373     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
 374     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
 375     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
 376     case 8 : b+=k[1]; a+=k[0]; break;
 377     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
 378     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
 379     case 5 : b+=k[1]&0xff; a+=k[0]; break;
 380     case 4 : a+=k[0]; break;
 381     case 3 : a+=k[0]&0xffffff; break;
 382     case 2 : a+=k[0]&0xffff; break;
 383     case 1 : a+=k[0]&0xff; break;
 384     case 0 : return c;              /* zero length strings require no mixing */
 385     }
 386
 387 #else /* make valgrind happy */
 388
 389     k8 = (const uint8_t *)k;
 390     switch(length)
 391     {
 392     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 393     case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
 394     case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
 395     case 9 : c+=k8[8];                   /* fall through */
 396     case 8 : b+=k[1]; a+=k[0]; break;
 397     case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
 398     case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
 399     case 5 : b+=k8[4];                   /* fall through */
 400     case 4 : a+=k[0]; break;
 401     case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
 402     case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
 403     case 1 : a+=k8[0]; break;
 404     case 0 : return c;
 405     }
 406
 407 #endif /* !valgrind */
 408
 409   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
 410     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
 411     const uint8_t  *k8;
 412
 413     /*--------------- all but last block: aligned reads and different mixing */
 414     while (length > 12)
 415     {
 416       a += k[0] + (((uint32_t)k[1])<<16);
 417       b += k[2] + (((uint32_t)k[3])<<16);
 418       c += k[4] + (((uint32_t)k[5])<<16);
 419       mix(a,b,c);
 420       length -= 12;
 421       k += 6;
 422     }
 423
 424     /*----------------------------- handle the last (probably partial) block */
 425     k8 = (const uint8_t *)k;
 426     switch(length)
 427     {
 428     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
 429              b+=k[2]+(((uint32_t)k[3])<<16);
 430              a+=k[0]+(((uint32_t)k[1])<<16);
 431              break;
 432     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
 433     case 10: c+=k[4];
 434              b+=k[2]+(((uint32_t)k[3])<<16);
 435              a+=k[0]+(((uint32_t)k[1])<<16);
 436              break;
 437     case 9 : c+=k8[8];                      /* fall through */
 438     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
 439              a+=k[0]+(((uint32_t)k[1])<<16);
 440              break;
 441     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
 442     case 6 : b+=k[2];
 443              a+=k[0]+(((uint32_t)k[1])<<16);
 444              break;
 445     case 5 : b+=k8[4];                      /* fall through */
 446     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
 447              break;
 448     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
 449     case 2 : a+=k[0];
 450              break;
 451     case 1 : a+=k8[0];
 452              break;
 453     case 0 : return c;                     /* zero length requires no mixing */
 454     }
 455
 456   } else {                        /* need to read the key one byte at a time */
 457     const uint8_t *k = (const uint8_t *)key;
 458
 459     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
 460     while (length > 12)
 461     {
 462       a += k[0];
 463       a += ((uint32_t)k[1])<<8;
 464       a += ((uint32_t)k[2])<<16;
 465       a += ((uint32_t)k[3])<<24;
 466       b += k[4];
 467       b += ((uint32_t)k[5])<<8;
 468       b += ((uint32_t)k[6])<<16;
 469       b += ((uint32_t)k[7])<<24;
 470       c += k[8];
 471       c += ((uint32_t)k[9])<<8;
 472       c += ((uint32_t)k[10])<<16;
 473       c += ((uint32_t)k[11])<<24;
 474       mix(a,b,c);
 475       length -= 12;
 476       k += 12;
 477     }
 478
 479     /*-------------------------------- last block: affect all 32 bits of (c) */
 480     switch(length)                   /* all the case statements fall through */
 481     {
 482     case 12: c+=((uint32_t)k[11])<<24;
 483         /* fallthrough */
 484     case 11: c+=((uint32_t)k[10])<<16;
 485         /* fallthrough */
 486     case 10: c+=((uint32_t)k[9])<<8;
 487         /* fallthrough */
 488     case 9 : c+=k[8];
 489         /* fallthrough */
 490     case 8 : b+=((uint32_t)k[7])<<24;
 491         /* fallthrough */
 492     case 7 : b+=((uint32_t)k[6])<<16;
 493         /* fallthrough */
 494     case 6 : b+=((uint32_t)k[5])<<8;
 495         /* fallthrough */
 496     case 5 : b+=k[4];
 497         /* fallthrough */
 498     case 4 : a+=((uint32_t)k[3])<<24;
 499         /* fallthrough */
 500     case 3 : a+=((uint32_t)k[2])<<16;
 501         /* fallthrough */
 502     case 2 : a+=((uint32_t)k[1])<<8;
 503         /* fallthrough */
 504     case 1 : a+=k[0];
 505              break;
 506     case 0 : return c;
 507     }
 508   }
 509
 510   final(a,b,c);
 511   return c;
 512 }
 513
 514 #ifdef SELF_TEST
 515
 516 /*
 517  * hashlittle2: return 2 32-bit hash values
 518  *
 519  * This is identical to hashlittle(), except it returns two 32-bit hash
 520  * values instead of just one.  This is good enough for hash table
 521  * lookup with 2^^64 buckets, or if you want a second hash if you're not
 522  * happy with the first, or if you want a probably-unique 64-bit ID for
 523  * the key.  *pc is better mixed than *pb, so use *pc first.  If you want
 524  * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
 525  */
 526 void hashlittle2(
 527   const void *key,       /* the key to hash */
 528   size_t      length,    /* length of the key */
 529   uint32_t   *pc,        /* IN: primary initval, OUT: primary hash */
 530   uint32_t   *pb)        /* IN: secondary initval, OUT: secondary hash */
 531 {
 532   uint32_t a,b,c;                                          /* internal state */
 533   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
 534
 535   /* Set up the internal state */
 536   a = b = c = raninit + ((uint32_t)length) + *pc;
 537   c += *pb;
 538
 539   u.ptr = key;
 540   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
 541     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
 542 #ifdef VALGRIND
 543     const uint8_t  *k8;
 544 #endif
 545
 546     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
 547     while (length > 12)
 548     {
 549       a += k[0];
 550       b += k[1];
 551       c += k[2];
 552       mix(a,b,c);
 553       length -= 12;
 554       k += 3;
 555     }
 556
 557     /*----------------------------- handle the last (probably partial) block */
 558     /*
 559      * "k[2]&0xffffff" actually reads beyond the end of the string, but
 560      * then masks off the part it's not allowed to read.  Because the
 561      * string is aligned, the masked-off tail is in the same word as the
 562      * rest of the string.  Every machine with memory protection I've seen
 563      * does it on word boundaries, so is OK with this.  But VALGRIND will
 564      * still catch it and complain.  The masking trick does make the hash
 565      * noticeably faster for short strings (like English words).
 566      */
 567 #ifndef VALGRIND
 568
 569     switch(length)
 570     {
 571     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 572     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
 573     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
 574     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
 575     case 8 : b+=k[1]; a+=k[0]; break;
 576     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
 577     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
 578     case 5 : b+=k[1]&0xff; a+=k[0]; break;
 579     case 4 : a+=k[0]; break;
 580     case 3 : a+=k[0]&0xffffff; break;
 581     case 2 : a+=k[0]&0xffff; break;
 582     case 1 : a+=k[0]&0xff; break;
 583     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
 584     }
 585
 586 #else /* make valgrind happy */
 587
 588     k8 = (const uint8_t *)k;
 589     switch(length)
 590     {
 591     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 592     case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
 593     case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
 594     case 9 : c+=k8[8];                   /* fall through */
 595     case 8 : b+=k[1]; a+=k[0]; break;
 596     case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
 597     case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
 598     case 5 : b+=k8[4];                   /* fall through */
 599     case 4 : a+=k[0]; break;
 600     case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
 601     case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
 602     case 1 : a+=k8[0]; break;
 603     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
 604     }
 605
 606 #endif /* !valgrind */
 607
 608   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
 609     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
 610     const uint8_t  *k8;
 611
 612     /*--------------- all but last block: aligned reads and different mixing */
 613     while (length > 12)
 614     {
 615       a += k[0] + (((uint32_t)k[1])<<16);
 616       b += k[2] + (((uint32_t)k[3])<<16);
 617       c += k[4] + (((uint32_t)k[5])<<16);
 618       mix(a,b,c);
 619       length -= 12;
 620       k += 6;
 621     }
 622
 623     /*----------------------------- handle the last (probably partial) block */
 624     k8 = (const uint8_t *)k;
 625     switch(length)
 626     {
 627     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
 628              b+=k[2]+(((uint32_t)k[3])<<16);
 629              a+=k[0]+(((uint32_t)k[1])<<16);
 630              break;
 631     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
 632     case 10: c+=k[4];
 633              b+=k[2]+(((uint32_t)k[3])<<16);
 634              a+=k[0]+(((uint32_t)k[1])<<16);
 635              break;
 636     case 9 : c+=k8[8];                      /* fall through */
 637     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
 638              a+=k[0]+(((uint32_t)k[1])<<16);
 639              break;
 640     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
 641     case 6 : b+=k[2];
 642              a+=k[0]+(((uint32_t)k[1])<<16);
 643              break;
 644     case 5 : b+=k8[4];                      /* fall through */
 645     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
 646              break;
 647     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
 648     case 2 : a+=k[0];
 649              break;
 650     case 1 : a+=k8[0];
 651              break;
 652     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
 653     }
 654
 655   } else {                        /* need to read the key one byte at a time */
 656     const uint8_t *k = (const uint8_t *)key;
 657
 658     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
 659     while (length > 12)
 660     {
 661       a += k[0];
 662       a += ((uint32_t)k[1])<<8;
 663       a += ((uint32_t)k[2])<<16;
 664       a += ((uint32_t)k[3])<<24;
 665       b += k[4];
 666       b += ((uint32_t)k[5])<<8;
 667       b += ((uint32_t)k[6])<<16;
 668       b += ((uint32_t)k[7])<<24;
 669       c += k[8];
 670       c += ((uint32_t)k[9])<<8;
 671       c += ((uint32_t)k[10])<<16;
 672       c += ((uint32_t)k[11])<<24;
 673       mix(a,b,c);
 674       length -= 12;
 675       k += 12;
 676     }
 677
 678     /*-------------------------------- last block: affect all 32 bits of (c) */
 679     switch(length)                   /* all the case statements fall through */
 680     {
 681     case 12: c+=((uint32_t)k[11])<<24;
 682     case 11: c+=((uint32_t)k[10])<<16;
 683     case 10: c+=((uint32_t)k[9])<<8;
 684     case 9 : c+=k[8];
 685     case 8 : b+=((uint32_t)k[7])<<24;
 686     case 7 : b+=((uint32_t)k[6])<<16;
 687     case 6 : b+=((uint32_t)k[5])<<8;
 688     case 5 : b+=k[4];
 689     case 4 : a+=((uint32_t)k[3])<<24;
 690     case 3 : a+=((uint32_t)k[2])<<16;
 691     case 2 : a+=((uint32_t)k[1])<<8;
 692     case 1 : a+=k[0];
 693              break;
 694     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
 695     }
 696   }
 697
 698   final(a,b,c);
 699   *pc=c; *pb=b;
 700 }
 701
 702 #endif /* SELF_TEST */
 703
 704 #if 0   /* currently not used */
 705
 706 /*
 707  * hashbig():
 708  * This is the same as hashword() on big-endian machines.  It is different
 709  * from hashlittle() on all machines.  hashbig() takes advantage of
 710  * big-endian byte ordering.
 711  */
 712 uint32_t hashbig( const void *key, size_t length, uint32_t initval)
 713 {
 714   uint32_t a,b,c;
 715   union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
 716
 717   /* Set up the internal state */
 718   a = b = c = raninit + ((uint32_t)length) + initval;
 719
 720   u.ptr = key;
 721   if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
 722     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
 723 #ifdef VALGRIND
 724     const uint8_t  *k8;
 725 #endif
 726
 727     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
 728     while (length > 12)
 729     {
 730       a += k[0];
 731       b += k[1];
 732       c += k[2];
 733       mix(a,b,c);
 734       length -= 12;
 735       k += 3;
 736     }
 737
 738     /*----------------------------- handle the last (probably partial) block */
 739     /*
 740      * "k[2]<<8" actually reads beyond the end of the string, but
 741      * then shifts out the part it's not allowed to read.  Because the
 742      * string is aligned, the illegal read is in the same word as the
 743      * rest of the string.  Every machine with memory protection I've seen
 744      * does it on word boundaries, so is OK with this.  But VALGRIND will
 745      * still catch it and complain.  The masking trick does make the hash
 746      * noticeably faster for short strings (like English words).
 747      */
 748 #ifndef VALGRIND
 749
 750     switch(length)
 751     {
 752     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 753     case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
 754     case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
 755     case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
 756     case 8 : b+=k[1]; a+=k[0]; break;
 757     case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
 758     case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
 759     case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
 760     case 4 : a+=k[0]; break;
 761     case 3 : a+=k[0]&0xffffff00; break;
 762     case 2 : a+=k[0]&0xffff0000; break;
 763     case 1 : a+=k[0]&0xff000000; break;
 764     case 0 : return c;              /* zero length strings require no mixing */
 765     }
 766
 767 #else  /* make valgrind happy */
 768
 769     k8 = (const uint8_t *)k;
 770     switch(length)                   /* all the case statements fall through */
 771     {
 772     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 773     case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */
 774     case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */
 775     case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */
 776     case 8 : b+=k[1]; a+=k[0]; break;
 777     case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */
 778     case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */
 779     case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */
 780     case 4 : a+=k[0]; break;
 781     case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */
 782     case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */
 783     case 1 : a+=((uint32_t)k8[0])<<24; break;
 784     case 0 : return c;
 785     }
 786
 787 #endif /* !VALGRIND */
 788
 789   } else {                        /* need to read the key one byte at a time */
 790     const uint8_t *k = (const uint8_t *)key;
 791
 792     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
 793     while (length > 12)
 794     {
 795       a += ((uint32_t)k[0])<<24;
 796       a += ((uint32_t)k[1])<<16;
 797       a += ((uint32_t)k[2])<<8;
 798       a += ((uint32_t)k[3]);
 799       b += ((uint32_t)k[4])<<24;
 800       b += ((uint32_t)k[5])<<16;
 801       b += ((uint32_t)k[6])<<8;
 802       b += ((uint32_t)k[7]);
 803       c += ((uint32_t)k[8])<<24;
 804       c += ((uint32_t)k[9])<<16;
 805       c += ((uint32_t)k[10])<<8;
 806       c += ((uint32_t)k[11]);
 807       mix(a,b,c);
 808       length -= 12;
 809       k += 12;
 810     }
 811
 812     /*-------------------------------- last block: affect all 32 bits of (c) */
 813     switch(length)                   /* all the case statements fall through */
 814     {
 815     case 12: c+=k[11];
 816     case 11: c+=((uint32_t)k[10])<<8;
 817     case 10: c+=((uint32_t)k[9])<<16;
 818     case 9 : c+=((uint32_t)k[8])<<24;
 819     case 8 : b+=k[7];
 820     case 7 : b+=((uint32_t)k[6])<<8;
 821     case 6 : b+=((uint32_t)k[5])<<16;
 822     case 5 : b+=((uint32_t)k[4])<<24;
 823     case 4 : a+=k[3];
 824     case 3 : a+=((uint32_t)k[2])<<8;
 825     case 2 : a+=((uint32_t)k[1])<<16;
 826     case 1 : a+=((uint32_t)k[0])<<24;
 827              break;
 828     case 0 : return c;
 829     }
 830   }
 831
 832   final(a,b,c);
 833   return c;
 834 }
 835
 836 #endif /* 0 == currently not used */
 837
 838 #ifdef SELF_TEST
 839
 840 /* used for timings */
 841 void driver1(void)
 842 {
 843   uint8_t buf[256];
 844   uint32_t i;
 845   uint32_t h=0;
 846   time_t a,z;
 847
 848   time(&a);
 849   for (i=0; i<256; ++i) buf[i] = 'x';
 850   for (i=0; i<1; ++i)
 851   {
 852     h = hashlittle(&buf[0],1,h);
 853   }
 854   time(&z);
 855   if (z-a > 0) printf("time %d %.8x\n", z-a, h);
 856 }
 857
 858 /* check that every input bit changes every output bit half the time */
 859 #define HASHSTATE 1
 860 #define HASHLEN   1
 861 #define MAXPAIR 60
 862 #define MAXLEN  70
 863 void driver2(void)
 864 {
 865   uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
 866   uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
 867   uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
 868   uint32_t x[HASHSTATE],y[HASHSTATE];
 869   uint32_t hlen;
 870
 871   printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
 872   for (hlen=0; hlen < MAXLEN; ++hlen)
 873   {
 874     z=0;
 875     for (i=0; i<hlen; ++i)  /*----------------------- for each input byte, */
 876     {
 877       for (j=0; j<8; ++j)   /*------------------------ for each input bit, */
 878       {
 879         for (m=1; m<8; ++m) /*------------ for several possible initvals, */
 880         {
 881           for (l=0; l<HASHSTATE; ++l)
 882             e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
 883
 884           /*---- check that every output bit is affected by that input bit */
 885           for (k=0; k<MAXPAIR; k+=2)
 886           {
 887             uint32_t finished=1;
 888             /* keys have one bit different */
 889             for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
 890             /* have a and b be two keys differing in only one bit */
 891             a[i] ^= (k<<j);
 892             a[i] ^= (k>>(8-j));
 893              c[0] = hashlittle(a, hlen, m);
 894             b[i] ^= ((k+1)<<j);
 895             b[i] ^= ((k+1)>>(8-j));
 896              d[0] = hashlittle(b, hlen, m);
 897             /* check every bit is 1, 0, set, and not set at least once */
 898             for (l=0; l<HASHSTATE; ++l)
 899             {
 900               e[l] &= (c[l]^d[l]);
 901               f[l] &= ~(c[l]^d[l]);
 902               g[l] &= c[l];
 903               h[l] &= ~c[l];
 904               x[l] &= d[l];
 905               y[l] &= ~d[l];
 906               if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
 907             }
 908             if (finished) break;
 909           }
 910           if (k>z) z=k;
 911           if (k==MAXPAIR)
 912           {
 913              printf("Some bit didn't change: ");
 914              printf("%.8x %.8x %.8x %.8x %.8x %.8x  ",
 915                     e[0],f[0],g[0],h[0],x[0],y[0]);
 916              printf("i %d j %d m %d len %d\n", i, j, m, hlen);
 917           }
 918           if (z==MAXPAIR) goto done;
 919         }
 920       }
 921     }
 922    done:
 923     if (z < MAXPAIR)
 924     {
 925       printf("Mix success  %2d bytes  %2d initvals  ",i,m);
 926       printf("required  %d  trials\n", z/2);
 927     }
 928   }
 929   printf("\n");
 930 }
 931
 932 /* Check for reading beyond the end of the buffer and alignment problems */
 933 void driver3(void)
 934 {
 935   uint8_t buf[MAXLEN+20], *b;
 936   uint32_t len;
 937   uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
 938   uint32_t h;
 939   uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
 940   uint32_t i;
 941   uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
 942   uint32_t j;
 943   uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
 944   uint32_t ref,x,y;
 945   uint8_t *p;
 946
 947   printf("Endianness.  These lines should all be the same (for values filled in):\n");
 948   printf("%.8x                            %.8x                            %.8x\n",
 949          hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
 950          hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
 951          hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
 952   p = q;
 953   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
 954          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
 955          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
 956          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
 957          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
 958          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
 959          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
 960   p = &qq[1];
 961   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
 962          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
 963          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
 964          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
 965          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
 966          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
 967          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
 968   p = &qqq[2];
 969   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
 970          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
 971          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
 972          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
 973          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
 974          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
 975          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
 976   p = &qqqq[3];
 977   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
 978          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
 979          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
 980          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
 981          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
 982          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
 983          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
 984   printf("\n");
 985
 986   /* check that hashlittle2 and hashlittle produce the same results */
 987   i=47; j=0;
 988   hashlittle2(q, sizeof(q), &i, &j);
 989   if (hashlittle(q, sizeof(q), 47) != i)
 990     printf("hashlittle2 and hashlittle mismatch\n");
 991
 992   /* check that hashword2 and hashword produce the same results */
 993   len = raninit;
 994   i=47, j=0;
 995   hashword2(&len, 1, &i, &j);
 996   if (hashword(&len, 1, 47) != i)
 997     printf("hashword2 and hashword mismatch %x %x\n",
 998            i, hashword(&len, 1, 47));
 999
1000   /* check hashlittle doesn't read before or after the ends of the string */
1001   for (h=0, b=buf+1; h<8; ++h, ++b)
1002   {
1003     for (i=0; i<MAXLEN; ++i)
1004     {
1005       len = i;
1006       for (j=0; j<i; ++j) *(b+j)=0;
1007
1008       /* these should all be equal */
1009       ref = hashlittle(b, len, (uint32_t)1);
1010       *(b+i)=(uint8_t)~0;
1011       *(b-1)=(uint8_t)~0;
1012       x = hashlittle(b, len, (uint32_t)1);
1013       y = hashlittle(b, len, (uint32_t)1);
1014       if ((ref != x) || (ref != y))
1015       {
1016         printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
1017                h, i);
1018       }
1019     }
1020   }
1021 }
1022
1023 /* check for problems with nulls */
1024  void driver4(void)
1025 {
1026   uint8_t buf[1];
1027   uint32_t h,i,state[HASHSTATE];
1028
1029
1030   buf[0] = ~0;
1031   for (i=0; i<HASHSTATE; ++i) state[i] = 1;
1032   printf("These should all be different\n");
1033   for (i=0, h=0; i<8; ++i)
1034   {
1035     h = hashlittle(buf, 0, h);
1036     printf("%2ld  0-byte strings, hash is  %.8x\n", i, h);
1037   }
1038 }
1039
1040
1041 int main(void)
1042 {
1043   driver1();   /* test that the key is hashed: used for timings */
1044   driver2();   /* test that whole key is hashed thoroughly */
1045   driver3();   /* test that nothing but the key is hashed */
1046   driver4();   /* test hashing multiple buffers (all buffers are null) */
1047   return 1;
1048 }
1049
1050 #endif  /* SELF_TEST */