2 * hash.c : dumping and reading hash tables to/from files.
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
29 #include <apr_version.h>
30 #include <apr_pools.h>
32 #include <apr_file_io.h>
34 #include "svn_types.h"
35 #include "svn_string.h"
36 #include "svn_error.h"
38 #include "svn_sorts.h"
40 #include "svn_pools.h"
42 #include "private/svn_dep_compat.h"
43 #include "private/svn_sorts_private.h"
44 #include "private/svn_subr_private.h"
46 #include "svn_private_config.h"
52 * The format of a dumped hash table is:
55 * name (a string of <nlength> bytes, followed by a newline)
57 * val (a string of <vlength> bytes, followed by a newline)
62 * (Yes, there is a newline after END.)
73 * A forthright entrance, yet coquettish on the tongue, its deceptively
74 * fruity exterior hides the warm mahagony undercurrent that is the
75 * hallmark of Chateau Fraisant-Pitre. Connoisseurs of the region will
76 * be pleased to note the familiar, subtle hints of mulberries and
77 * carburator fluid. Its confident finish is marred only by a barely
78 * detectable suggestion of rancid squid ink.
90 /*** Dumping and loading hash files. */
92 /* Implements svn_hash_read2 and svn_hash_read_incremental. */
94 svn_hash__read_entry(svn_hash__entry_t *entry,
96 const char *terminator,
97 svn_boolean_t incremental,
100 svn_stringbuf_t *buf;
108 /* Read a key length line. Might be END, though. */
109 SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool));
111 /* Check for the end of the hash. */
112 if ((!terminator && eof && buf->len == 0)
113 || (terminator && (strcmp(buf->data, terminator) == 0)))
123 /* Check for unexpected end of stream */
125 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
126 _("Serialized hash missing terminator"));
128 if ((buf->len >= 3) && (buf->data[0] == 'K') && (buf->data[1] == ' '))
130 /* Get the length of the key */
131 err = svn_cstring_strtoui64(&ui64, buf->data + 2,
132 0, APR_SIZE_MAX, 10);
134 return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
135 _("Serialized hash malformed key length"));
136 entry->keylen = (apr_size_t)ui64;
138 /* Now read that much into a buffer. */
139 entry->key = apr_palloc(pool, entry->keylen + 1);
140 SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen));
141 entry->key[entry->keylen] = '\0';
143 /* Suck up extra newline after key data */
145 SVN_ERR(svn_stream_read_full(stream, &c, &len));
147 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
148 _("Serialized hash malformed key data"));
150 /* Read a val length line */
151 SVN_ERR(svn_stream_readline(stream, &buf, "\n", &eof, pool));
153 if ((buf->data[0] == 'V') && (buf->data[1] == ' '))
155 /* Get the length of the val */
156 err = svn_cstring_strtoui64(&ui64, buf->data + 2,
157 0, APR_SIZE_MAX, 10);
159 return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
160 _("Serialized hash malformed value length"));
161 entry->vallen = (apr_size_t)ui64;
163 entry->val = apr_palloc(pool, entry->vallen + 1);
164 SVN_ERR(svn_stream_read_full(stream, entry->val, &entry->vallen));
165 entry->val[entry->vallen] = '\0';
167 /* Suck up extra newline after val data */
169 SVN_ERR(svn_stream_read_full(stream, &c, &len));
171 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
172 _("Serialized hash malformed value data"));
175 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
176 _("Serialized hash malformed"));
178 else if (incremental && (buf->len >= 3)
179 && (buf->data[0] == 'D') && (buf->data[1] == ' '))
181 /* Get the length of the key */
182 err = svn_cstring_strtoui64(&ui64, buf->data + 2,
183 0, APR_SIZE_MAX, 10);
185 return svn_error_create(SVN_ERR_MALFORMED_FILE, err,
186 _("Serialized hash malformed key length"));
187 entry->keylen = (apr_size_t)ui64;
189 /* Now read that much into a buffer. */
190 entry->key = apr_palloc(pool, entry->keylen + 1);
191 SVN_ERR(svn_stream_read_full(stream, entry->key, &entry->keylen));
192 entry->key[entry->keylen] = '\0';
194 /* Suck up extra newline after key data */
196 SVN_ERR(svn_stream_read_full(stream, &c, &len));
198 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
199 _("Serialized hash malformed key data"));
201 /* Remove this hash entry. */
207 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
208 _("Serialized hash malformed"));
215 hash_read(apr_hash_t *hash, svn_stream_t *stream, const char *terminator,
216 svn_boolean_t incremental, apr_pool_t *pool)
218 apr_pool_t *iterpool = svn_pool_create(pool);
222 svn_hash__entry_t entry;
224 svn_pool_clear(iterpool);
225 SVN_ERR(svn_hash__read_entry(&entry, stream, terminator,
226 incremental, iterpool));
229 if (entry.key == NULL)
234 /* Add a new hash entry. */
235 apr_hash_set(hash, apr_pstrmemdup(pool, entry.key, entry.keylen),
237 svn_string_ncreate(entry.val, entry.vallen, pool));
241 /* Remove this hash entry. */
242 apr_hash_set(hash, entry.key, entry.keylen, NULL);
246 svn_pool_destroy(iterpool);
251 /* Implements svn_hash_write2 and svn_hash_write_incremental. */
253 hash_write(apr_hash_t *hash, apr_hash_t *oldhash, svn_stream_t *stream,
254 const char *terminator, apr_pool_t *pool)
258 apr_array_header_t *list;
261 subpool = svn_pool_create(pool);
263 list = svn_sort__hash(hash, svn_sort_compare_items_lexically, pool);
264 for (i = 0; i < list->nelts; i++)
266 svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t);
267 svn_string_t *valstr = item->value;
269 svn_pool_clear(subpool);
271 /* Don't output entries equal to the ones in oldhash, if present. */
274 svn_string_t *oldstr = apr_hash_get(oldhash, item->key, item->klen);
276 if (oldstr && svn_string_compare(valstr, oldstr))
281 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL,
282 _("Cannot serialize negative length"));
285 SVN_ERR(svn_stream_printf(stream, subpool,
286 "K %" APR_SIZE_T_FMT "\n%s\n"
287 "V %" APR_SIZE_T_FMT "\n",
288 (apr_size_t) item->klen,
289 (const char *) item->key,
292 SVN_ERR(svn_stream_write(stream, valstr->data, &len));
293 SVN_ERR(svn_stream_puts(stream, "\n"));
298 /* Output a deletion entry for each property in oldhash but not hash. */
299 list = svn_sort__hash(oldhash, svn_sort_compare_items_lexically,
301 for (i = 0; i < list->nelts; i++)
303 svn_sort__item_t *item = &APR_ARRAY_IDX(list, i, svn_sort__item_t);
305 svn_pool_clear(subpool);
307 /* If it's not present in the new hash, write out a D entry. */
308 if (! apr_hash_get(hash, item->key, item->klen))
309 SVN_ERR(svn_stream_printf(stream, subpool,
310 "D %" APR_SSIZE_T_FMT "\n%s\n",
311 item->klen, (const char *) item->key));
316 SVN_ERR(svn_stream_printf(stream, subpool, "%s\n", terminator));
318 svn_pool_destroy(subpool);
323 svn_error_t *svn_hash_read2(apr_hash_t *hash, svn_stream_t *stream,
324 const char *terminator, apr_pool_t *pool)
326 return hash_read(hash, stream, terminator, FALSE, pool);
330 svn_error_t *svn_hash_read_incremental(apr_hash_t *hash,
331 svn_stream_t *stream,
332 const char *terminator,
335 return hash_read(hash, stream, terminator, TRUE, pool);
340 svn_hash_write2(apr_hash_t *hash, svn_stream_t *stream,
341 const char *terminator, apr_pool_t *pool)
343 return hash_write(hash, NULL, stream, terminator, pool);
348 svn_hash_write_incremental(apr_hash_t *hash, apr_hash_t *oldhash,
349 svn_stream_t *stream, const char *terminator,
352 SVN_ERR_ASSERT(oldhash != NULL);
353 return hash_write(hash, oldhash, stream, terminator, pool);
358 svn_hash_write(apr_hash_t *hash, apr_file_t *destfile, apr_pool_t *pool)
360 return hash_write(hash, NULL, svn_stream_from_aprfile2(destfile, TRUE, pool),
361 SVN_HASH_TERMINATOR, pool);
365 /* There are enough quirks in the deprecated svn_hash_read that we
366 should just preserve its implementation. */
368 svn_hash_read(apr_hash_t *hash,
373 char buf[SVN_KEYLINE_MAXLEN];
381 /* Read a key length line. Might be END, though. */
382 apr_size_t len = sizeof(buf);
384 err = svn_io_read_length_line(srcfile, buf, &len, pool);
385 if (err && APR_STATUS_IS_EOF(err->apr_err) && first_time)
387 /* We got an EOF on our very first attempt to read, which
388 means it's a zero-byte file. No problem, just go home. */
389 svn_error_clear(err);
393 /* Any other circumstance is a genuine error. */
398 if (((len == 3) && (buf[0] == 'E') && (buf[1] == 'N') && (buf[2] == 'D'))
401 && (buf[1] == 'R') /* We formerly used just "END" to */
402 && (buf[2] == 'O') /* end a property hash, but later */
403 && (buf[3] == 'P') /* we added "PROPS-END", so that */
404 && (buf[4] == 'S') /* the fs dump format would be */
405 && (buf[5] == '-') /* more human-readable. That's */
406 && (buf[6] == 'E') /* why we accept either way here. */
410 /* We've reached the end of the dumped hash table, so leave. */
413 else if ((buf[0] == 'K') && (buf[1] == ' '))
419 /* Get the length of the key */
420 SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2));
423 /* Now read that much into a buffer, + 1 byte for null terminator */
424 keybuf = apr_palloc(pool, keylen + 1);
425 SVN_ERR(svn_io_file_read_full2(srcfile,
427 &num_read, NULL, pool));
428 ((char *) keybuf)[keylen] = '\0';
430 /* Suck up extra newline after key data */
431 SVN_ERR(svn_io_file_getc(&c, srcfile, pool));
433 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
435 /* Read a val length line */
437 SVN_ERR(svn_io_read_length_line(srcfile, buf, &len, pool));
439 if ((buf[0] == 'V') && (buf[1] == ' '))
441 svn_string_t *value = apr_palloc(pool, sizeof(*value));
445 /* Get the length of the value */
446 SVN_ERR(svn_cstring_atoi(&parsed_len, buf + 2));
449 /* Again, 1 extra byte for the null termination. */
450 valbuf = apr_palloc(pool, vallen + 1);
451 SVN_ERR(svn_io_file_read_full2(srcfile,
453 &num_read, NULL, pool));
454 ((char *) valbuf)[vallen] = '\0';
456 /* Suck up extra newline after val data */
457 SVN_ERR(svn_io_file_getc(&c, srcfile, pool));
459 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
461 value->data = valbuf;
464 /* The Grand Moment: add a new hash entry! */
465 apr_hash_set(hash, keybuf, keylen, value);
469 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
474 return svn_error_create(SVN_ERR_MALFORMED_FILE, NULL, NULL);
481 /*** Diffing hashes ***/
484 svn_hash_diff(apr_hash_t *hash_a,
486 svn_hash_diff_func_t diff_func,
487 void *diff_func_baton,
490 apr_hash_index_t *hi;
493 for (hi = apr_hash_first(pool, hash_a); hi; hi = apr_hash_next(hi))
498 apr_hash_this(hi, &key, &klen, NULL);
500 if (hash_b && (apr_hash_get(hash_b, key, klen)))
501 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_both,
504 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_a,
509 for (hi = apr_hash_first(pool, hash_b); hi; hi = apr_hash_next(hi))
514 apr_hash_this(hi, &key, &klen, NULL);
516 if (! (hash_a && apr_hash_get(hash_a, key, klen)))
517 SVN_ERR((*diff_func)(key, klen, svn_hash_diff_key_b,
525 /*** Misc. hash APIs ***/
528 svn_hash_keys(apr_array_header_t **array,
532 apr_hash_index_t *hi;
534 *array = apr_array_make(pool, apr_hash_count(hash), sizeof(const char *));
536 for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi))
538 APR_ARRAY_PUSH(*array, const char *) = apr_hash_this_key(hi);
546 svn_hash_from_cstring_keys(apr_hash_t **hash_p,
547 const apr_array_header_t *keys,
551 apr_hash_t *hash = svn_hash__make(pool);
552 for (i = 0; i < keys->nelts; i++)
555 apr_pstrdup(pool, APR_ARRAY_IDX(keys, i, const char *));
556 svn_hash_sets(hash, key, key);
564 /*** Specialized getter APIs ***/
567 svn_hash__get_cstring(apr_hash_t *hash,
569 const char *default_value)
573 const char *value = svn_hash_gets(hash, key);
574 return value ? value : default_value;
577 return default_value;
582 svn_hash__get_bool(apr_hash_t *hash, const char *key,
583 svn_boolean_t default_value)
585 const char *tmp_value = svn_hash__get_cstring(hash, key, NULL);
586 svn_tristate_t value = svn_tristate__from_word(tmp_value);
588 if (value == svn_tristate_true)
590 else if (value == svn_tristate_false)
593 return default_value;
598 /*** Optimized hash function ***/
600 /* apr_hashfunc_t optimized for the key that we use in SVN: paths and
601 * property names. Its primary goal is speed for keys of known length.
603 * Since strings tend to spawn large value spaces (usually differ in many
604 * bits with differences spanning a larger section of the key), we can be
605 * quite sloppy extracting a hash value. The more keys there are in a
606 * hash container, the more bits of the value returned by this function
607 * will be used. For a small number of string keys, choosing bits from any
608 * any fix location close to the tail of those keys would usually be good
609 * enough to prevent high collision rates.
612 hashfunc_compatible(const char *char_key, apr_ssize_t *klen)
614 unsigned int hash = 0;
615 const unsigned char *key = (const unsigned char *)char_key;
616 const unsigned char *p;
619 if (*klen == APR_HASH_KEY_STRING)
620 *klen = strlen(char_key);
622 #if SVN_UNALIGNED_ACCESS_IS_OK
623 for (p = key, i = *klen; i >= 4; i-=4, p+=4)
625 apr_uint32_t chunk = *(const apr_uint32_t *)p;
627 /* the ">> 17" part gives upper bits in the chunk a chance to make
628 some impact as well */
629 hash = hash * 33 * 33 * 33 * 33 + chunk + (chunk >> 17);
632 for (p = key, i = *klen; i >= 4; i-=4, p+=4)
634 hash = hash * 33 * 33 * 33 * 33
635 + p[0] * 33 * 33 * 33
642 hash = hash * 33 + *p;
648 svn_hash__make(apr_pool_t *pool)
650 return apr_hash_make_custom(pool, hashfunc_compatible);