contrib/apr/tables/apr_hash.c

   1 /* Licensed to the Apache Software Foundation (ASF) under one or more
   2  * contributor license agreements.  See the NOTICE file distributed with
   3  * this work for additional information regarding copyright ownership.
   4  * The ASF licenses this file to You under the Apache License, Version 2.0
   5  * (the "License"); you may not use this file except in compliance with
   6  * the License.  You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "apr_private.h"
  18
  19 #include "apr_general.h"
  20 #include "apr_pools.h"
  21 #include "apr_time.h"
  22
  23 #include "apr_hash.h"
  24
  25 #if APR_HAVE_STDLIB_H
  26 #include <stdlib.h>
  27 #endif
  28 #if APR_HAVE_STRING_H
  29 #include <string.h>
  30 #endif
  31
  32 #if APR_POOL_DEBUG && APR_HAVE_STDIO_H
  33 #include <stdio.h>
  34 #endif
  35
  36 /*
  37  * The internal form of a hash table.
  38  *
  39  * The table is an array indexed by the hash of the key; collisions
  40  * are resolved by hanging a linked list of hash entries off each
  41  * element of the array. Although this is a really simple design it
  42  * isn't too bad given that pools have a low allocation overhead.
  43  */
  44
  45 typedef struct apr_hash_entry_t apr_hash_entry_t;
  46
  47 struct apr_hash_entry_t {
  48     apr_hash_entry_t *next;
  49     unsigned int      hash;
  50     const void       *key;
  51     apr_ssize_t       klen;
  52     const void       *val;
  53 };
  54
  55 /*
  56  * Data structure for iterating through a hash table.
  57  *
  58  * We keep a pointer to the next hash entry here to allow the current
  59  * hash entry to be freed or otherwise mangled between calls to
  60  * apr_hash_next().
  61  */
  62 struct apr_hash_index_t {
  63     apr_hash_t         *ht;
  64     apr_hash_entry_t   *this, *next;
  65     unsigned int        index;
  66 };
  67
  68 /*
  69  * The size of the array is always a power of two. We use the maximum
  70  * index rather than the size so that we can use bitwise-AND for
  71  * modular arithmetic.
  72  * The count of hash entries may be greater depending on the chosen
  73  * collision rate.
  74  */
  75 struct apr_hash_t {
  76     apr_pool_t          *pool;
  77     apr_hash_entry_t   **array;
  78     apr_hash_index_t     iterator;  /* For apr_hash_first(NULL, ...) */
  79     unsigned int         count, max, seed;
  80     apr_hashfunc_t       hash_func;
  81     apr_hash_entry_t    *free;  /* List of recycled entries */
  82 };
  83
  84 #define INITIAL_MAX 15 /* tunable == 2^n - 1 */
  85
  86
  87 /*
  88  * Hash creation functions.
  89  */
  90
  91 static apr_hash_entry_t **alloc_array(apr_hash_t *ht, unsigned int max)
  92 {
  93    return apr_pcalloc(ht->pool, sizeof(*ht->array) * (max + 1));
  94 }
  95
  96 APR_DECLARE(apr_hash_t *) apr_hash_make(apr_pool_t *pool)
  97 {
  98     apr_hash_t *ht;
  99     apr_time_t now = apr_time_now();
 100
 101     ht = apr_palloc(pool, sizeof(apr_hash_t));
 102     ht->pool = pool;
 103     ht->free = NULL;
 104     ht->count = 0;
 105     ht->max = INITIAL_MAX;
 106     ht->seed = (unsigned int)((now >> 32) ^ now ^ (apr_uintptr_t)pool ^
 107                               (apr_uintptr_t)ht ^ (apr_uintptr_t)&now) - 1;
 108     ht->array = alloc_array(ht, ht->max);
 109     ht->hash_func = NULL;
 110
 111     return ht;
 112 }
 113
 114 APR_DECLARE(apr_hash_t *) apr_hash_make_custom(apr_pool_t *pool,
 115                                                apr_hashfunc_t hash_func)
 116 {
 117     apr_hash_t *ht = apr_hash_make(pool);
 118     ht->hash_func = hash_func;
 119     return ht;
 120 }
 121
 122
 123 /*
 124  * Hash iteration functions.
 125  */
 126
 127 APR_DECLARE(apr_hash_index_t *) apr_hash_next(apr_hash_index_t *hi)
 128 {
 129     hi->this = hi->next;
 130     while (!hi->this) {
 131         if (hi->index > hi->ht->max)
 132             return NULL;
 133
 134         hi->this = hi->ht->array[hi->index++];
 135     }
 136     hi->next = hi->this->next;
 137     return hi;
 138 }
 139
 140 APR_DECLARE(apr_hash_index_t *) apr_hash_first(apr_pool_t *p, apr_hash_t *ht)
 141 {
 142     apr_hash_index_t *hi;
 143     if (p)
 144         hi = apr_palloc(p, sizeof(*hi));
 145     else
 146         hi = &ht->iterator;
 147
 148     hi->ht = ht;
 149     hi->index = 0;
 150     hi->this = NULL;
 151     hi->next = NULL;
 152     return apr_hash_next(hi);
 153 }
 154
 155 APR_DECLARE(void) apr_hash_this(apr_hash_index_t *hi,
 156                                 const void **key,
 157                                 apr_ssize_t *klen,
 158                                 void **val)
 159 {
 160     if (key)  *key  = hi->this->key;
 161     if (klen) *klen = hi->this->klen;
 162     if (val)  *val  = (void *)hi->this->val;
 163 }
 164
 165
 166 /*
 167  * Expanding a hash table
 168  */
 169
 170 static void expand_array(apr_hash_t *ht)
 171 {
 172     apr_hash_index_t *hi;
 173     apr_hash_entry_t **new_array;
 174     unsigned int new_max;
 175
 176     new_max = ht->max * 2 + 1;
 177     new_array = alloc_array(ht, new_max);
 178     for (hi = apr_hash_first(NULL, ht); hi; hi = apr_hash_next(hi)) {
 179         unsigned int i = hi->this->hash & new_max;
 180         hi->this->next = new_array[i];
 181         new_array[i] = hi->this;
 182     }
 183     ht->array = new_array;
 184     ht->max = new_max;
 185 }
 186
 187 static unsigned int hashfunc_default(const char *char_key, apr_ssize_t *klen,
 188                                      unsigned int hash)
 189 {
 190     const unsigned char *key = (const unsigned char *)char_key;
 191     const unsigned char *p;
 192     apr_ssize_t i;
 193
 194     /*
 195      * This is the popular `times 33' hash algorithm which is used by
 196      * perl and also appears in Berkeley DB. This is one of the best
 197      * known hash functions for strings because it is both computed
 198      * very fast and distributes very well.
 199      *
 200      * The originator may be Dan Bernstein but the code in Berkeley DB
 201      * cites Chris Torek as the source. The best citation I have found
 202      * is "Chris Torek, Hash function for text in C, Usenet message
 203      * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
 204      * Salz's USENIX 1992 paper about INN which can be found at
 205      * <http://citeseer.nj.nec.com/salz92internetnews.html>.
 206      *
 207      * The magic of number 33, i.e. why it works better than many other
 208      * constants, prime or not, has never been adequately explained by
 209      * anyone. So I try an explanation: if one experimentally tests all
 210      * multipliers between 1 and 256 (as I did while writing a low-level
 211      * data structure library some time ago) one detects that even
 212      * numbers are not useable at all. The remaining 128 odd numbers
 213      * (except for the number 1) work more or less all equally well.
 214      * They all distribute in an acceptable way and this way fill a hash
 215      * table with an average percent of approx. 86%.
 216      *
 217      * If one compares the chi^2 values of the variants (see
 218      * Bob Jenkins ``Hashing Frequently Asked Questions'' at
 219      * http://burtleburtle.net/bob/hash/hashfaq.html for a description
 220      * of chi^2), the number 33 not even has the best value. But the
 221      * number 33 and a few other equally good numbers like 17, 31, 63,
 222      * 127 and 129 have nevertheless a great advantage to the remaining
 223      * numbers in the large set of possible multipliers: their multiply
 224      * operation can be replaced by a faster operation based on just one
 225      * shift plus either a single addition or subtraction operation. And
 226      * because a hash function has to both distribute good _and_ has to
 227      * be very fast to compute, those few numbers should be preferred.
 228      *
 229      *                  -- Ralf S. Engelschall <rse@engelschall.com>
 230      */
 231
 232     if (*klen == APR_HASH_KEY_STRING) {
 233         for (p = key; *p; p++) {
 234             hash = hash * 33 + *p;
 235         }
 236         *klen = p - key;
 237     }
 238     else {
 239         for (p = key, i = *klen; i; i--, p++) {
 240             hash = hash * 33 + *p;
 241         }
 242     }
 243
 244     return hash;
 245 }
 246
 247 APR_DECLARE_NONSTD(unsigned int) apr_hashfunc_default(const char *char_key,
 248                                                       apr_ssize_t *klen)
 249 {
 250     return hashfunc_default(char_key, klen, 0);
 251 }
 252
 253 /*
 254  * This is where we keep the details of the hash function and control
 255  * the maximum collision rate.
 256  *
 257  * If val is non-NULL it creates and initializes a new hash entry if
 258  * there isn't already one there; it returns an updatable pointer so
 259  * that hash entries can be removed.
 260  */
 261
 262 static apr_hash_entry_t **find_entry(apr_hash_t *ht,
 263                                      const void *key,
 264                                      apr_ssize_t klen,
 265                                      const void *val)
 266 {
 267     apr_hash_entry_t **hep, *he;
 268     unsigned int hash;
 269
 270     if (ht->hash_func)
 271         hash = ht->hash_func(key, &klen);
 272     else
 273         hash = hashfunc_default(key, &klen, ht->seed);
 274
 275     /* scan linked list */
 276     for (hep = &ht->array[hash & ht->max], he = *hep;
 277          he; hep = &he->next, he = *hep) {
 278         if (he->hash == hash
 279             && he->klen == klen
 280             && memcmp(he->key, key, klen) == 0)
 281             break;
 282     }
 283     if (he || !val)
 284         return hep;
 285
 286     /* add a new entry for non-NULL values */
 287     if ((he = ht->free) != NULL)
 288         ht->free = he->next;
 289     else
 290         he = apr_palloc(ht->pool, sizeof(*he));
 291     he->next = NULL;
 292     he->hash = hash;
 293     he->key  = key;
 294     he->klen = klen;
 295     he->val  = val;
 296     *hep = he;
 297     ht->count++;
 298     return hep;
 299 }
 300
 301 APR_DECLARE(apr_hash_t *) apr_hash_copy(apr_pool_t *pool,
 302                                         const apr_hash_t *orig)
 303 {
 304     apr_hash_t *ht;
 305     apr_hash_entry_t *new_vals;
 306     unsigned int i, j;
 307
 308     ht = apr_palloc(pool, sizeof(apr_hash_t) +
 309                     sizeof(*ht->array) * (orig->max + 1) +
 310                     sizeof(apr_hash_entry_t) * orig->count);
 311     ht->pool = pool;
 312     ht->free = NULL;
 313     ht->count = orig->count;
 314     ht->max = orig->max;
 315     ht->seed = orig->seed;
 316     ht->hash_func = orig->hash_func;
 317     ht->array = (apr_hash_entry_t **)((char *)ht + sizeof(apr_hash_t));
 318
 319     new_vals = (apr_hash_entry_t *)((char *)(ht) + sizeof(apr_hash_t) +
 320                                     sizeof(*ht->array) * (orig->max + 1));
 321     j = 0;
 322     for (i = 0; i <= ht->max; i++) {
 323         apr_hash_entry_t **new_entry = &(ht->array[i]);
 324         apr_hash_entry_t *orig_entry = orig->array[i];
 325         while (orig_entry) {
 326             *new_entry = &new_vals[j++];
 327             (*new_entry)->hash = orig_entry->hash;
 328             (*new_entry)->key = orig_entry->key;
 329             (*new_entry)->klen = orig_entry->klen;
 330             (*new_entry)->val = orig_entry->val;
 331             new_entry = &((*new_entry)->next);
 332             orig_entry = orig_entry->next;
 333         }
 334         *new_entry = NULL;
 335     }
 336     return ht;
 337 }
 338
 339 APR_DECLARE(void *) apr_hash_get(apr_hash_t *ht,
 340                                  const void *key,
 341                                  apr_ssize_t klen)
 342 {
 343     apr_hash_entry_t *he;
 344     he = *find_entry(ht, key, klen, NULL);
 345     if (he)
 346         return (void *)he->val;
 347     else
 348         return NULL;
 349 }
 350
 351 APR_DECLARE(void) apr_hash_set(apr_hash_t *ht,
 352                                const void *key,
 353                                apr_ssize_t klen,
 354                                const void *val)
 355 {
 356     apr_hash_entry_t **hep;
 357     hep = find_entry(ht, key, klen, val);
 358     if (*hep) {
 359         if (!val) {
 360             /* delete entry */
 361             apr_hash_entry_t *old = *hep;
 362             *hep = (*hep)->next;
 363             old->next = ht->free;
 364             ht->free = old;
 365             --ht->count;
 366         }
 367         else {
 368             /* replace entry */
 369             (*hep)->val = val;
 370             /* check that the collision rate isn't too high */
 371             if (ht->count > ht->max) {
 372                 expand_array(ht);
 373             }
 374         }
 375     }
 376     /* else key not present and val==NULL */
 377 }
 378
 379 APR_DECLARE(unsigned int) apr_hash_count(apr_hash_t *ht)
 380 {
 381     return ht->count;
 382 }
 383
 384 APR_DECLARE(void) apr_hash_clear(apr_hash_t *ht)
 385 {
 386     apr_hash_index_t *hi;
 387     for (hi = apr_hash_first(NULL, ht); hi; hi = apr_hash_next(hi))
 388         apr_hash_set(ht, hi->this->key, hi->this->klen, NULL);
 389 }
 390
 391 APR_DECLARE(apr_hash_t*) apr_hash_overlay(apr_pool_t *p,
 392                                           const apr_hash_t *overlay,
 393                                           const apr_hash_t *base)
 394 {
 395     return apr_hash_merge(p, overlay, base, NULL, NULL);
 396 }
 397
 398 APR_DECLARE(apr_hash_t *) apr_hash_merge(apr_pool_t *p,
 399                                          const apr_hash_t *overlay,
 400                                          const apr_hash_t *base,
 401                                          void * (*merger)(apr_pool_t *p,
 402                                                      const void *key,
 403                                                      apr_ssize_t klen,
 404                                                      const void *h1_val,
 405                                                      const void *h2_val,
 406                                                      const void *data),
 407                                          const void *data)
 408 {
 409     apr_hash_t *res;
 410     apr_hash_entry_t *new_vals = NULL;
 411     apr_hash_entry_t *iter;
 412     apr_hash_entry_t *ent;
 413     unsigned int i, j, k, hash;
 414
 415 #if APR_POOL_DEBUG
 416     /* we don't copy keys and values, so it's necessary that
 417      * overlay->a.pool and base->a.pool have a life span at least
 418      * as long as p
 419      */
 420     if (!apr_pool_is_ancestor(overlay->pool, p)) {
 421         fprintf(stderr,
 422                 "apr_hash_merge: overlay's pool is not an ancestor of p\n");
 423         abort();
 424     }
 425     if (!apr_pool_is_ancestor(base->pool, p)) {
 426         fprintf(stderr,
 427                 "apr_hash_merge: base's pool is not an ancestor of p\n");
 428         abort();
 429     }
 430 #endif
 431
 432     res = apr_palloc(p, sizeof(apr_hash_t));
 433     res->pool = p;
 434     res->free = NULL;
 435     res->hash_func = base->hash_func;
 436     res->count = base->count;
 437     res->max = (overlay->max > base->max) ? overlay->max : base->max;
 438     if (base->count + overlay->count > res->max) {
 439         res->max = res->max * 2 + 1;
 440     }
 441     res->seed = base->seed;
 442     res->array = alloc_array(res, res->max);
 443     if (base->count + overlay->count) {
 444         new_vals = apr_palloc(p, sizeof(apr_hash_entry_t) *
 445                               (base->count + overlay->count));
 446     }
 447     j = 0;
 448     for (k = 0; k <= base->max; k++) {
 449         for (iter = base->array[k]; iter; iter = iter->next) {
 450             i = iter->hash & res->max;
 451             new_vals[j].klen = iter->klen;
 452             new_vals[j].key = iter->key;
 453             new_vals[j].val = iter->val;
 454             new_vals[j].hash = iter->hash;
 455             new_vals[j].next = res->array[i];
 456             res->array[i] = &new_vals[j];
 457             j++;
 458         }
 459     }
 460
 461     for (k = 0; k <= overlay->max; k++) {
 462         for (iter = overlay->array[k]; iter; iter = iter->next) {
 463             if (res->hash_func)
 464                 hash = res->hash_func(iter->key, &iter->klen);
 465             else
 466                 hash = hashfunc_default(iter->key, &iter->klen, res->seed);
 467             i = hash & res->max;
 468             for (ent = res->array[i]; ent; ent = ent->next) {
 469                 if ((ent->klen == iter->klen) &&
 470                     (memcmp(ent->key, iter->key, iter->klen) == 0)) {
 471                     if (merger) {
 472                         ent->val = (*merger)(p, iter->key, iter->klen,
 473                                              iter->val, ent->val, data);
 474                     }
 475                     else {
 476                         ent->val = iter->val;
 477                     }
 478                     break;
 479                 }
 480             }
 481             if (!ent) {
 482                 new_vals[j].klen = iter->klen;
 483                 new_vals[j].key = iter->key;
 484                 new_vals[j].val = iter->val;
 485                 new_vals[j].hash = hash;
 486                 new_vals[j].next = res->array[i];
 487                 res->array[i] = &new_vals[j];
 488                 res->count++;
 489                 j++;
 490             }
 491         }
 492     }
 493     return res;
 494 }
 495
 496 /* This is basically the following...
 497  * for every element in hash table {
 498  *    comp elemeny.key, element.value
 499  * }
 500  *
 501  * Like with apr_table_do, the comp callback is called for each and every
 502  * element of the hash table.
 503  */
 504 APR_DECLARE(int) apr_hash_do(apr_hash_do_callback_fn_t *comp,
 505                              void *rec, const apr_hash_t *ht)
 506 {
 507     apr_hash_index_t  hix;
 508     apr_hash_index_t *hi;
 509     int rv, dorv  = 1;
 510
 511     hix.ht    = (apr_hash_t *)ht;
 512     hix.index = 0;
 513     hix.this  = NULL;
 514     hix.next  = NULL;
 515
 516     if ((hi = apr_hash_next(&hix))) {
 517         /* Scan the entire table */
 518         do {
 519             rv = (*comp)(rec, hi->this->key, hi->this->klen, hi->this->val);
 520         } while (rv && (hi = apr_hash_next(hi)));
 521
 522         if (rv == 0) {
 523             dorv = 0;
 524         }
 525     }
 526     return dorv;
 527 }
 528
 529 APR_POOL_IMPLEMENT_ACCESSOR(hash)