]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/libucl/src/ucl_hash.c
MFV: r360512
[FreeBSD/FreeBSD.git] / contrib / libucl / src / ucl_hash.c
1 /* Copyright (c) 2013, Vsevolod Stakhov
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *       * Redistributions of source code must retain the above copyright
7  *         notice, this list of conditions and the following disclaimer.
8  *       * Redistributions in binary form must reproduce the above copyright
9  *         notice, this list of conditions and the following disclaimer in the
10  *         documentation and/or other materials provided with the distribution.
11  *
12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23
24 #include "ucl_internal.h"
25 #include "ucl_hash.h"
26 #include "khash.h"
27 #include "kvec.h"
28 #include "mum.h"
29
30 #include <time.h>
31 #include <limits.h>
32
33 struct ucl_hash_elt {
34         const ucl_object_t *obj;
35         size_t ar_idx;
36 };
37
38 struct ucl_hash_struct {
39         void *hash;
40         kvec_t(const ucl_object_t *) ar;
41         bool caseless;
42 };
43
44 static uint64_t
45 ucl_hash_seed (void)
46 {
47         static uint64_t seed;
48
49         if (seed == 0) {
50 #ifdef UCL_RANDOM_FUNCTION
51                 seed = UCL_RANDOM_FUNCTION;
52 #else
53                 /* Not very random but can be useful for our purposes */
54                 seed = time (NULL);
55 #endif
56         }
57
58         return seed;
59 }
60
61 static const unsigned char lc_map[256] = {
62                 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
63                 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
64                 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
65                 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
66                 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
67                 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
68                 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
69                 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
70                 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
71                 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
72                 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
73                 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
74                 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
75                 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
76                 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
77                 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
78                 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
79                 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
80                 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
81                 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
82                 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
83                 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
84                 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
85                 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
86                 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
87                 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
88                 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
89                 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
90                 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
91                 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
92                 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
93                 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
94 };
95
96 #if (defined(WORD_BIT) && WORD_BIT == 64) || \
97         (defined(__WORDSIZE) && __WORDSIZE == 64) || \
98         defined(__x86_64__) || \
99         defined(__amd64__)
100 #define UCL64_BIT_HASH 1
101 #endif
102
103 static inline uint32_t
104 ucl_hash_func (const ucl_object_t *o)
105 {
106         return mum_hash (o->key, o->keylen, ucl_hash_seed ());
107 }
108 static inline int
109 ucl_hash_equal (const ucl_object_t *k1, const ucl_object_t *k2)
110 {
111         if (k1->keylen == k2->keylen) {
112                 return memcmp (k1->key, k2->key, k1->keylen) == 0;
113         }
114
115         return 0;
116 }
117
118 KHASH_INIT (ucl_hash_node, const ucl_object_t *, struct ucl_hash_elt, 1,
119                 ucl_hash_func, ucl_hash_equal)
120
121 static inline uint32_t
122 ucl_hash_caseless_func (const ucl_object_t *o)
123 {
124         unsigned len = o->keylen;
125         unsigned leftover = o->keylen % 8;
126         unsigned fp, i;
127         const uint8_t* s = (const uint8_t*)o->key;
128         union {
129                 struct {
130                         unsigned char c1, c2, c3, c4, c5, c6, c7, c8;
131                 } c;
132                 uint64_t pp;
133         } u;
134         uint64_t r;
135
136         fp = len - leftover;
137         r = ucl_hash_seed ();
138
139         for (i = 0; i != fp; i += 8) {
140                 u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
141                 u.c.c5 = s[i + 4], u.c.c6 = s[i + 5], u.c.c7 = s[i + 6], u.c.c8 = s[i + 7];
142                 u.c.c1 = lc_map[u.c.c1];
143                 u.c.c2 = lc_map[u.c.c2];
144                 u.c.c3 = lc_map[u.c.c3];
145                 u.c.c4 = lc_map[u.c.c4];
146                 u.c.c1 = lc_map[u.c.c5];
147                 u.c.c2 = lc_map[u.c.c6];
148                 u.c.c3 = lc_map[u.c.c7];
149                 u.c.c4 = lc_map[u.c.c8];
150                 r = mum_hash_step (r, u.pp);
151         }
152
153         u.pp = 0;
154         switch (leftover) {
155         case 7:
156                 u.c.c7 = lc_map[(unsigned char)s[i++]];
157         case 6:
158                 u.c.c6 = lc_map[(unsigned char)s[i++]];
159         case 5:
160                 u.c.c5 = lc_map[(unsigned char)s[i++]];
161         case 4:
162                 u.c.c4 = lc_map[(unsigned char)s[i++]];
163         case 3:
164                 u.c.c3 = lc_map[(unsigned char)s[i++]];
165         case 2:
166                 u.c.c2 = lc_map[(unsigned char)s[i++]];
167         case 1:
168                 u.c.c1 = lc_map[(unsigned char)s[i]];
169                 r = mum_hash_step (r, u.pp);
170                 break;
171         }
172
173         return mum_hash_finish (r);
174 }
175
176 static inline int
177 ucl_hash_caseless_equal (const ucl_object_t *k1, const ucl_object_t *k2)
178 {
179         if (k1->keylen == k2->keylen) {
180                 return memcmp (k1->key, k2->key, k1->keylen) == 0;
181         }
182
183         return 0;
184 }
185
186 KHASH_INIT (ucl_hash_caseless_node, const ucl_object_t *, struct ucl_hash_elt, 1,
187                 ucl_hash_caseless_func, ucl_hash_caseless_equal)
188
189 ucl_hash_t*
190 ucl_hash_create (bool ignore_case)
191 {
192         ucl_hash_t *new;
193
194         new = UCL_ALLOC (sizeof (ucl_hash_t));
195         if (new != NULL) {
196                 kv_init (new->ar);
197
198                 new->caseless = ignore_case;
199                 if (ignore_case) {
200                         khash_t(ucl_hash_caseless_node) *h = kh_init (ucl_hash_caseless_node);
201                         new->hash = (void *)h;
202                 }
203                 else {
204                         khash_t(ucl_hash_node) *h = kh_init (ucl_hash_node);
205                         new->hash = (void *)h;
206                 }
207         }
208         return new;
209 }
210
211 void ucl_hash_destroy (ucl_hash_t* hashlin, ucl_hash_free_func func)
212 {
213         const ucl_object_t *cur, *tmp;
214
215         if (hashlin == NULL) {
216                 return;
217         }
218
219         if (func != NULL) {
220                 /* Iterate over the hash first */
221                 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
222                                 hashlin->hash;
223                 khiter_t k;
224
225                 for (k = kh_begin (h); k != kh_end (h); ++k) {
226                         if (kh_exist (h, k)) {
227                                 cur = (kh_value (h, k)).obj;
228                                 while (cur != NULL) {
229                                         tmp = cur->next;
230                                         func (__DECONST (ucl_object_t *, cur));
231                                         cur = tmp;
232                                 }
233                         }
234                 }
235         }
236
237         if (hashlin->caseless) {
238                 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
239                         hashlin->hash;
240                 kh_destroy (ucl_hash_caseless_node, h);
241         }
242         else {
243                 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
244                         hashlin->hash;
245                 kh_destroy (ucl_hash_node, h);
246         }
247
248         kv_destroy (hashlin->ar);
249         UCL_FREE (sizeof (*hashlin), hashlin);
250 }
251
252 void
253 ucl_hash_insert (ucl_hash_t* hashlin, const ucl_object_t *obj,
254                 const char *key, unsigned keylen)
255 {
256         khiter_t k;
257         int ret;
258         struct ucl_hash_elt *elt;
259
260         if (hashlin == NULL) {
261                 return;
262         }
263
264         if (hashlin->caseless) {
265                 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
266                                 hashlin->hash;
267                 k = kh_put (ucl_hash_caseless_node, h, obj, &ret);
268                 if (ret > 0) {
269                         elt = &kh_value (h, k);
270                         kv_push (const ucl_object_t *, hashlin->ar, obj);
271                         elt->obj = obj;
272                         elt->ar_idx = kv_size (hashlin->ar) - 1;
273                 }
274         }
275         else {
276                 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
277                                 hashlin->hash;
278                 k = kh_put (ucl_hash_node, h, obj, &ret);
279                 if (ret > 0) {
280                         elt = &kh_value (h, k);
281                         kv_push (const ucl_object_t *, hashlin->ar, obj);
282                         elt->obj = obj;
283                         elt->ar_idx = kv_size (hashlin->ar) - 1;
284                 }
285         }
286 }
287
288 void ucl_hash_replace (ucl_hash_t* hashlin, const ucl_object_t *old,
289                 const ucl_object_t *new)
290 {
291         khiter_t k;
292         int ret;
293         struct ucl_hash_elt elt, *pelt;
294
295         if (hashlin == NULL) {
296                 return;
297         }
298
299         if (hashlin->caseless) {
300                 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
301                                 hashlin->hash;
302                 k = kh_put (ucl_hash_caseless_node, h, old, &ret);
303                 if (ret == 0) {
304                         elt = kh_value (h, k);
305                         kh_del (ucl_hash_caseless_node, h, k);
306                         k = kh_put (ucl_hash_caseless_node, h, new, &ret);
307                         pelt = &kh_value (h, k);
308                         pelt->obj = new;
309                         pelt->ar_idx = elt.ar_idx;
310                         kv_A (hashlin->ar, elt.ar_idx) = new;
311                 }
312         }
313         else {
314                 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
315                                 hashlin->hash;
316                 k = kh_put (ucl_hash_node, h, old, &ret);
317                 if (ret == 0) {
318                         elt = kh_value (h, k);
319                         kh_del (ucl_hash_node, h, k);
320                         k = kh_put (ucl_hash_node, h, new, &ret);
321                         pelt = &kh_value (h, k);
322                         pelt->obj = new;
323                         pelt->ar_idx = elt.ar_idx;
324                         kv_A (hashlin->ar, elt.ar_idx) = new;
325                 }
326         }
327 }
328
329 struct ucl_hash_real_iter {
330         const ucl_object_t **cur;
331         const ucl_object_t **end;
332 };
333
334 const void*
335 ucl_hash_iterate (ucl_hash_t *hashlin, ucl_hash_iter_t *iter)
336 {
337         struct ucl_hash_real_iter *it = (struct ucl_hash_real_iter *)(*iter);
338         const ucl_object_t *ret = NULL;
339
340         if (hashlin == NULL) {
341                 return NULL;
342         }
343
344         if (it == NULL) {
345                 it = UCL_ALLOC (sizeof (*it));
346
347                 if (it == NULL) {
348                         return NULL;
349                 }
350
351                 it->cur = &hashlin->ar.a[0];
352                 it->end = it->cur + hashlin->ar.n;
353         }
354
355         if (it->cur < it->end) {
356                 ret = *it->cur++;
357         }
358         else {
359                 UCL_FREE (sizeof (*it), it);
360                 *iter = NULL;
361                 return NULL;
362         }
363
364         *iter = it;
365
366         return ret;
367 }
368
369 bool
370 ucl_hash_iter_has_next (ucl_hash_t *hashlin, ucl_hash_iter_t iter)
371 {
372         struct ucl_hash_real_iter *it = (struct ucl_hash_real_iter *)(iter);
373
374         return it->cur < it->end - 1;
375 }
376
377
378 const ucl_object_t*
379 ucl_hash_search (ucl_hash_t* hashlin, const char *key, unsigned keylen)
380 {
381         khiter_t k;
382         const ucl_object_t *ret = NULL;
383         ucl_object_t search;
384         struct ucl_hash_elt *elt;
385
386         search.key = key;
387         search.keylen = keylen;
388
389         if (hashlin == NULL) {
390                 return NULL;
391         }
392
393         if (hashlin->caseless) {
394                 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
395                                                 hashlin->hash;
396
397                 k = kh_get (ucl_hash_caseless_node, h, &search);
398                 if (k != kh_end (h)) {
399                         elt = &kh_value (h, k);
400                         ret = elt->obj;
401                 }
402         }
403         else {
404                 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
405                                                 hashlin->hash;
406                 k = kh_get (ucl_hash_node, h, &search);
407                 if (k != kh_end (h)) {
408                         elt = &kh_value (h, k);
409                         ret = elt->obj;
410                 }
411         }
412
413         return ret;
414 }
415
416 void
417 ucl_hash_delete (ucl_hash_t* hashlin, const ucl_object_t *obj)
418 {
419         khiter_t k;
420         struct ucl_hash_elt *elt;
421
422         if (hashlin == NULL) {
423                 return;
424         }
425
426         if (hashlin->caseless) {
427                 khash_t(ucl_hash_caseless_node) *h = (khash_t(ucl_hash_caseless_node) *)
428                         hashlin->hash;
429
430                 k = kh_get (ucl_hash_caseless_node, h, obj);
431                 if (k != kh_end (h)) {
432                         elt = &kh_value (h, k);
433                         kv_del (const ucl_object_t *, hashlin->ar, elt->ar_idx);
434                         kh_del (ucl_hash_caseless_node, h, k);
435                 }
436         }
437         else {
438                 khash_t(ucl_hash_node) *h = (khash_t(ucl_hash_node) *)
439                         hashlin->hash;
440                 k = kh_get (ucl_hash_node, h, obj);
441                 if (k != kh_end (h)) {
442                         elt = &kh_value (h, k);
443                         kv_del (const ucl_object_t *, hashlin->ar, elt->ar_idx);
444                         kh_del (ucl_hash_node, h, k);
445                 }
446         }
447 }