contrib/apr/tables/apr_hash.c

   1 /* Licensed to the Apache Software Foundation (ASF) under one or more
   2  * contributor license agreements.  See the NOTICE file distributed with
   3  * this work for additional information regarding copyright ownership.
   4  * The ASF licenses this file to You under the Apache License, Version 2.0
   5  * (the "License"); you may not use this file except in compliance with
   6  * the License.  You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "apr_private.h"
  18
  19 #include "apr_general.h"
  20 #include "apr_pools.h"
  21 #include "apr_time.h"
  22
  23 #include "apr_hash.h"
  24
  25 #if APR_HAVE_STDLIB_H
  26 #include <stdlib.h>
  27 #endif
  28 #if APR_HAVE_STRING_H
  29 #include <string.h>
  30 #endif
  31
  32 #if APR_POOL_DEBUG && APR_HAVE_STDIO_H
  33 #include <stdio.h>
  34 #endif
  35
  36 /*
  37  * The internal form of a hash table.
  38  *
  39  * The table is an array indexed by the hash of the key; collisions
  40  * are resolved by hanging a linked list of hash entries off each
  41  * element of the array. Although this is a really simple design it
  42  * isn't too bad given that pools have a low allocation overhead.
  43  */
  44
  45 typedef struct apr_hash_entry_t apr_hash_entry_t;
  46
  47 struct apr_hash_entry_t {
  48     apr_hash_entry_t *next;
  49     unsigned int      hash;
  50     const void       *key;
  51     apr_ssize_t       klen;
  52     const void       *val;
  53 };
  54
  55 /*
  56  * Data structure for iterating through a hash table.
  57  *
  58  * We keep a pointer to the next hash entry here to allow the current
  59  * hash entry to be freed or otherwise mangled between calls to
  60  * apr_hash_next().
  61  */
  62 struct apr_hash_index_t {
  63     apr_hash_t         *ht;
  64     apr_hash_entry_t   *this, *next;
  65     unsigned int        index;
  66 };
  67
  68 /*
  69  * The size of the array is always a power of two. We use the maximum
  70  * index rather than the size so that we can use bitwise-AND for
  71  * modular arithmetic.
  72  * The count of hash entries may be greater depending on the chosen
  73  * collision rate.
  74  */
  75 struct apr_hash_t {
  76     apr_pool_t          *pool;
  77     apr_hash_entry_t   **array;
  78     apr_hash_index_t     iterator;  /* For apr_hash_first(NULL, ...) */
  79     unsigned int         count, max, seed;
  80     apr_hashfunc_t       hash_func;
  81     apr_hash_entry_t    *free;  /* List of recycled entries */
  82 };
  83
  84 #define INITIAL_MAX 15 /* tunable == 2^n - 1 */
  85
  86
  87 /*
  88  * Hash creation functions.
  89  */
  90
  91 static apr_hash_entry_t **alloc_array(apr_hash_t *ht, unsigned int max)
  92 {
  93    return apr_pcalloc(ht->pool, sizeof(*ht->array) * (max + 1));
  94 }
  95
  96 APR_DECLARE(apr_hash_t *) apr_hash_make(apr_pool_t *pool)
  97 {
  98     apr_hash_t *ht;
  99     apr_time_t now = apr_time_now();
 100
 101     ht = apr_palloc(pool, sizeof(apr_hash_t));
 102     ht->pool = pool;
 103     ht->free = NULL;
 104     ht->count = 0;
 105     ht->max = INITIAL_MAX;
 106     ht->seed = (unsigned int)((now >> 32) ^ now ^ (apr_uintptr_t)pool ^
 107                               (apr_uintptr_t)ht ^ (apr_uintptr_t)&now) - 1;
 108     ht->array = alloc_array(ht, ht->max);
 109     ht->hash_func = NULL;
 110
 111     return ht;
 112 }
 113
 114 APR_DECLARE(apr_hash_t *) apr_hash_make_custom(apr_pool_t *pool,
 115                                                apr_hashfunc_t hash_func)
 116 {
 117     apr_hash_t *ht = apr_hash_make(pool);
 118     ht->hash_func = hash_func;
 119     return ht;
 120 }
 121
 122
 123 /*
 124  * Hash iteration functions.
 125  */
 126
 127 APR_DECLARE(apr_hash_index_t *) apr_hash_next(apr_hash_index_t *hi)
 128 {
 129     hi->this = hi->next;
 130     while (!hi->this) {
 131         if (hi->index > hi->ht->max)
 132             return NULL;
 133
 134         hi->this = hi->ht->array[hi->index++];
 135     }
 136     hi->next = hi->this->next;
 137     return hi;
 138 }
 139
 140 APR_DECLARE(apr_hash_index_t *) apr_hash_first(apr_pool_t *p, apr_hash_t *ht)
 141 {
 142     apr_hash_index_t *hi;
 143     if (p)
 144         hi = apr_palloc(p, sizeof(*hi));
 145     else
 146         hi = &ht->iterator;
 147
 148     hi->ht = ht;
 149     hi->index = 0;
 150     hi->this = NULL;
 151     hi->next = NULL;
 152     return apr_hash_next(hi);
 153 }
 154
 155 APR_DECLARE(void) apr_hash_this(apr_hash_index_t *hi,
 156                                 const void **key,
 157                                 apr_ssize_t *klen,
 158                                 void **val)
 159 {
 160     if (key)  *key  = hi->this->key;
 161     if (klen) *klen = hi->this->klen;
 162     if (val)  *val  = (void *)hi->this->val;
 163 }
 164
 165 APR_DECLARE(const void *) apr_hash_this_key(apr_hash_index_t *hi)
 166 {
 167     const void *key;
 168
 169     apr_hash_this(hi, &key, NULL, NULL);
 170     return key;
 171 }
 172
 173 APR_DECLARE(apr_ssize_t) apr_hash_this_key_len(apr_hash_index_t *hi)
 174 {
 175     apr_ssize_t klen;
 176
 177     apr_hash_this(hi, NULL, &klen, NULL);
 178     return klen;
 179 }
 180
 181 APR_DECLARE(void *) apr_hash_this_val(apr_hash_index_t *hi)
 182 {
 183     void *val;
 184
 185     apr_hash_this(hi, NULL, NULL, &val);
 186     return val;
 187 }
 188
 189 /*
 190  * Expanding a hash table
 191  */
 192
 193 static void expand_array(apr_hash_t *ht)
 194 {
 195     apr_hash_index_t *hi;
 196     apr_hash_entry_t **new_array;
 197     unsigned int new_max;
 198
 199     new_max = ht->max * 2 + 1;
 200     new_array = alloc_array(ht, new_max);
 201     for (hi = apr_hash_first(NULL, ht); hi; hi = apr_hash_next(hi)) {
 202         unsigned int i = hi->this->hash & new_max;
 203         hi->this->next = new_array[i];
 204         new_array[i] = hi->this;
 205     }
 206     ht->array = new_array;
 207     ht->max = new_max;
 208 }
 209
 210 static unsigned int hashfunc_default(const char *char_key, apr_ssize_t *klen,
 211                                      unsigned int hash)
 212 {
 213     const unsigned char *key = (const unsigned char *)char_key;
 214     const unsigned char *p;
 215     apr_ssize_t i;
 216
 217     /*
 218      * This is the popular `times 33' hash algorithm which is used by
 219      * perl and also appears in Berkeley DB. This is one of the best
 220      * known hash functions for strings because it is both computed
 221      * very fast and distributes very well.
 222      *
 223      * The originator may be Dan Bernstein but the code in Berkeley DB
 224      * cites Chris Torek as the source. The best citation I have found
 225      * is "Chris Torek, Hash function for text in C, Usenet message
 226      * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
 227      * Salz's USENIX 1992 paper about INN which can be found at
 228      * <http://citeseer.nj.nec.com/salz92internetnews.html>.
 229      *
 230      * The magic of number 33, i.e. why it works better than many other
 231      * constants, prime or not, has never been adequately explained by
 232      * anyone. So I try an explanation: if one experimentally tests all
 233      * multipliers between 1 and 256 (as I did while writing a low-level
 234      * data structure library some time ago) one detects that even
 235      * numbers are not useable at all. The remaining 128 odd numbers
 236      * (except for the number 1) work more or less all equally well.
 237      * They all distribute in an acceptable way and this way fill a hash
 238      * table with an average percent of approx. 86%.
 239      *
 240      * If one compares the chi^2 values of the variants (see
 241      * Bob Jenkins ``Hashing Frequently Asked Questions'' at
 242      * http://burtleburtle.net/bob/hash/hashfaq.html for a description
 243      * of chi^2), the number 33 not even has the best value. But the
 244      * number 33 and a few other equally good numbers like 17, 31, 63,
 245      * 127 and 129 have nevertheless a great advantage to the remaining
 246      * numbers in the large set of possible multipliers: their multiply
 247      * operation can be replaced by a faster operation based on just one
 248      * shift plus either a single addition or subtraction operation. And
 249      * because a hash function has to both distribute good _and_ has to
 250      * be very fast to compute, those few numbers should be preferred.
 251      *
 252      *                  -- Ralf S. Engelschall <rse@engelschall.com>
 253      */
 254
 255     if (*klen == APR_HASH_KEY_STRING) {
 256         for (p = key; *p; p++) {
 257             hash = hash * 33 + *p;
 258         }
 259         *klen = p - key;
 260     }
 261     else {
 262         for (p = key, i = *klen; i; i--, p++) {
 263             hash = hash * 33 + *p;
 264         }
 265     }
 266
 267     return hash;
 268 }
 269
 270 APR_DECLARE_NONSTD(unsigned int) apr_hashfunc_default(const char *char_key,
 271                                                       apr_ssize_t *klen)
 272 {
 273     return hashfunc_default(char_key, klen, 0);
 274 }
 275
 276 /*
 277  * This is where we keep the details of the hash function and control
 278  * the maximum collision rate.
 279  *
 280  * If val is non-NULL it creates and initializes a new hash entry if
 281  * there isn't already one there; it returns an updatable pointer so
 282  * that hash entries can be removed.
 283  */
 284
 285 static apr_hash_entry_t **find_entry(apr_hash_t *ht,
 286                                      const void *key,
 287                                      apr_ssize_t klen,
 288                                      const void *val)
 289 {
 290     apr_hash_entry_t **hep, *he;
 291     unsigned int hash;
 292
 293     if (ht->hash_func)
 294         hash = ht->hash_func(key, &klen);
 295     else
 296         hash = hashfunc_default(key, &klen, ht->seed);
 297
 298     /* scan linked list */
 299     for (hep = &ht->array[hash & ht->max], he = *hep;
 300          he; hep = &he->next, he = *hep) {
 301         if (he->hash == hash
 302             && he->klen == klen
 303             && memcmp(he->key, key, klen) == 0)
 304             break;
 305     }
 306     if (he || !val)
 307         return hep;
 308
 309     /* add a new entry for non-NULL values */
 310     if ((he = ht->free) != NULL)
 311         ht->free = he->next;
 312     else
 313         he = apr_palloc(ht->pool, sizeof(*he));
 314     he->next = NULL;
 315     he->hash = hash;
 316     he->key  = key;
 317     he->klen = klen;
 318     he->val  = val;
 319     *hep = he;
 320     ht->count++;
 321     return hep;
 322 }
 323
 324 APR_DECLARE(apr_hash_t *) apr_hash_copy(apr_pool_t *pool,
 325                                         const apr_hash_t *orig)
 326 {
 327     apr_hash_t *ht;
 328     apr_hash_entry_t *new_vals;
 329     unsigned int i, j;
 330
 331     ht = apr_palloc(pool, sizeof(apr_hash_t) +
 332                     sizeof(*ht->array) * (orig->max + 1) +
 333                     sizeof(apr_hash_entry_t) * orig->count);
 334     ht->pool = pool;
 335     ht->free = NULL;
 336     ht->count = orig->count;
 337     ht->max = orig->max;
 338     ht->seed = orig->seed;
 339     ht->hash_func = orig->hash_func;
 340     ht->array = (apr_hash_entry_t **)((char *)ht + sizeof(apr_hash_t));
 341
 342     new_vals = (apr_hash_entry_t *)((char *)(ht) + sizeof(apr_hash_t) +
 343                                     sizeof(*ht->array) * (orig->max + 1));
 344     j = 0;
 345     for (i = 0; i <= ht->max; i++) {
 346         apr_hash_entry_t **new_entry = &(ht->array[i]);
 347         apr_hash_entry_t *orig_entry = orig->array[i];
 348         while (orig_entry) {
 349             *new_entry = &new_vals[j++];
 350             (*new_entry)->hash = orig_entry->hash;
 351             (*new_entry)->key = orig_entry->key;
 352             (*new_entry)->klen = orig_entry->klen;
 353             (*new_entry)->val = orig_entry->val;
 354             new_entry = &((*new_entry)->next);
 355             orig_entry = orig_entry->next;
 356         }
 357         *new_entry = NULL;
 358     }
 359     return ht;
 360 }
 361
 362 APR_DECLARE(void *) apr_hash_get(apr_hash_t *ht,
 363                                  const void *key,
 364                                  apr_ssize_t klen)
 365 {
 366     apr_hash_entry_t *he;
 367     he = *find_entry(ht, key, klen, NULL);
 368     if (he)
 369         return (void *)he->val;
 370     else
 371         return NULL;
 372 }
 373
 374 APR_DECLARE(void) apr_hash_set(apr_hash_t *ht,
 375                                const void *key,
 376                                apr_ssize_t klen,
 377                                const void *val)
 378 {
 379     apr_hash_entry_t **hep;
 380     hep = find_entry(ht, key, klen, val);
 381     if (*hep) {
 382         if (!val) {
 383             /* delete entry */
 384             apr_hash_entry_t *old = *hep;
 385             *hep = (*hep)->next;
 386             old->next = ht->free;
 387             ht->free = old;
 388             --ht->count;
 389         }
 390         else {
 391             /* replace entry */
 392             (*hep)->val = val;
 393             /* check that the collision rate isn't too high */
 394             if (ht->count > ht->max) {
 395                 expand_array(ht);
 396             }
 397         }
 398     }
 399     /* else key not present and val==NULL */
 400 }
 401
 402 APR_DECLARE(unsigned int) apr_hash_count(apr_hash_t *ht)
 403 {
 404     return ht->count;
 405 }
 406
 407 APR_DECLARE(void) apr_hash_clear(apr_hash_t *ht)
 408 {
 409     apr_hash_index_t *hi;
 410     for (hi = apr_hash_first(NULL, ht); hi; hi = apr_hash_next(hi))
 411         apr_hash_set(ht, hi->this->key, hi->this->klen, NULL);
 412 }
 413
 414 APR_DECLARE(apr_hash_t*) apr_hash_overlay(apr_pool_t *p,
 415                                           const apr_hash_t *overlay,
 416                                           const apr_hash_t *base)
 417 {
 418     return apr_hash_merge(p, overlay, base, NULL, NULL);
 419 }
 420
 421 APR_DECLARE(apr_hash_t *) apr_hash_merge(apr_pool_t *p,
 422                                          const apr_hash_t *overlay,
 423                                          const apr_hash_t *base,
 424                                          void * (*merger)(apr_pool_t *p,
 425                                                      const void *key,
 426                                                      apr_ssize_t klen,
 427                                                      const void *h1_val,
 428                                                      const void *h2_val,
 429                                                      const void *data),
 430                                          const void *data)
 431 {
 432     apr_hash_t *res;
 433     apr_hash_entry_t *new_vals = NULL;
 434     apr_hash_entry_t *iter;
 435     apr_hash_entry_t *ent;
 436     unsigned int i, j, k, hash;
 437
 438 #if APR_POOL_DEBUG
 439     /* we don't copy keys and values, so it's necessary that
 440      * overlay->a.pool and base->a.pool have a life span at least
 441      * as long as p
 442      */
 443     if (!apr_pool_is_ancestor(overlay->pool, p)) {
 444         fprintf(stderr,
 445                 "apr_hash_merge: overlay's pool is not an ancestor of p\n");
 446         abort();
 447     }
 448     if (!apr_pool_is_ancestor(base->pool, p)) {
 449         fprintf(stderr,
 450                 "apr_hash_merge: base's pool is not an ancestor of p\n");
 451         abort();
 452     }
 453 #endif
 454
 455     res = apr_palloc(p, sizeof(apr_hash_t));
 456     res->pool = p;
 457     res->free = NULL;
 458     res->hash_func = base->hash_func;
 459     res->count = base->count;
 460     res->max = (overlay->max > base->max) ? overlay->max : base->max;
 461     if (base->count + overlay->count > res->max) {
 462         res->max = res->max * 2 + 1;
 463     }
 464     res->seed = base->seed;
 465     res->array = alloc_array(res, res->max);
 466     if (base->count + overlay->count) {
 467         new_vals = apr_palloc(p, sizeof(apr_hash_entry_t) *
 468                               (base->count + overlay->count));
 469     }
 470     j = 0;
 471     for (k = 0; k <= base->max; k++) {
 472         for (iter = base->array[k]; iter; iter = iter->next) {
 473             i = iter->hash & res->max;
 474             new_vals[j].klen = iter->klen;
 475             new_vals[j].key = iter->key;
 476             new_vals[j].val = iter->val;
 477             new_vals[j].hash = iter->hash;
 478             new_vals[j].next = res->array[i];
 479             res->array[i] = &new_vals[j];
 480             j++;
 481         }
 482     }
 483
 484     for (k = 0; k <= overlay->max; k++) {
 485         for (iter = overlay->array[k]; iter; iter = iter->next) {
 486             if (res->hash_func)
 487                 hash = res->hash_func(iter->key, &iter->klen);
 488             else
 489                 hash = hashfunc_default(iter->key, &iter->klen, res->seed);
 490             i = hash & res->max;
 491             for (ent = res->array[i]; ent; ent = ent->next) {
 492                 if ((ent->klen == iter->klen) &&
 493                     (memcmp(ent->key, iter->key, iter->klen) == 0)) {
 494                     if (merger) {
 495                         ent->val = (*merger)(p, iter->key, iter->klen,
 496                                              iter->val, ent->val, data);
 497                     }
 498                     else {
 499                         ent->val = iter->val;
 500                     }
 501                     break;
 502                 }
 503             }
 504             if (!ent) {
 505                 new_vals[j].klen = iter->klen;
 506                 new_vals[j].key = iter->key;
 507                 new_vals[j].val = iter->val;
 508                 new_vals[j].hash = hash;
 509                 new_vals[j].next = res->array[i];
 510                 res->array[i] = &new_vals[j];
 511                 res->count++;
 512                 j++;
 513             }
 514         }
 515     }
 516     return res;
 517 }
 518
 519 /* This is basically the following...
 520  * for every element in hash table {
 521  *    comp elemeny.key, element.value
 522  * }
 523  *
 524  * Like with apr_table_do, the comp callback is called for each and every
 525  * element of the hash table.
 526  */
 527 APR_DECLARE(int) apr_hash_do(apr_hash_do_callback_fn_t *comp,
 528                              void *rec, const apr_hash_t *ht)
 529 {
 530     apr_hash_index_t  hix;
 531     apr_hash_index_t *hi;
 532     int rv, dorv  = 1;
 533
 534     hix.ht    = (apr_hash_t *)ht;
 535     hix.index = 0;
 536     hix.this  = NULL;
 537     hix.next  = NULL;
 538
 539     if ((hi = apr_hash_next(&hix))) {
 540         /* Scan the entire table */
 541         do {
 542             rv = (*comp)(rec, hi->this->key, hi->this->klen, hi->this->val);
 543         } while (rv && (hi = apr_hash_next(hi)));
 544
 545         if (rv == 0) {
 546             dorv = 0;
 547         }
 548     }
 549     return dorv;
 550 }
 551
 552 APR_POOL_IMPLEMENT_ACCESSOR(hash)