1 /* prefix_string.c --- implement strings based on a prefix tree
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
24 #include "private/svn_string_private.h"
26 /* A node in the tree represents a common prefix. The root node is the
27 * empty prefix. Nodes may have up to 256 sub-nodes, each starting with
28 * a different character (possibly '\0').
30 * The nodes in the tree store only up to 8 chars of the respective common
31 * prefix, i.e. longer common prefixes must be drawn out over multiple
32 * hierarchy levels. This is a space <-> efficiency trade-off.
34 * Strings are the leaf nodes in the tree and use a specialized, smaller
35 * data structure. They may add 0 to 7 extra chars to the prefix. Both
36 * data types can be discerned by the last char in the data buffer. This
37 * must be 0 for strings (leaves) and non-0 otherwise. Please note that
38 * ordinary nodes have a length information so that no terminating 0 is
42 /* forward declaration */
43 typedef struct node_t node_t;
45 /* String type and tree leaf.
47 struct svn_prefix_string__t
49 /* mandatory prefix */
52 /* 0 ..7 chars to add the the prefix. NUL-terminated. */
56 /* A node inside the tree, i.e. not a string and not a leaf (unless this is
59 * Note: keep the ordering to minimize size / alignment overhead on 64 bit
64 /* pointer to the parent prefix plus the 1 .. 8 extra chars.
65 * Only the root will provide 0 extra chars. */
66 svn_prefix_string__t key;
68 /* Length of the prefix from the root down to and including this one.
69 * 0 for the root node. Only then will key.prefix be NULL. */
72 /* Number of entries used in SUB_NODES. */
73 apr_uint32_t sub_node_count;
75 /* The sub-nodes, ordered by first char. node_t and svn_prefix_string__t
76 * may be mixed here. May be NULL.
77 * The number of allocated entries is always a power-of-two and only
78 * given implicitly by SUB_NODE_COUNT. */
79 struct node_t **sub_nodes;
82 /* The actual tree structure. */
83 struct svn_prefix_tree__t
85 /* the common tree root (represents the empty prefix). */
88 /* all sub-nodes & strings will be allocated from this pool */
92 /* Return TRUE, iff NODE is a leaf node.
97 return node->key.data[7] == 0;
100 /* Ensure that the sub-nodes array of NODE within TREE has at least one
101 * unused entry. Re-allocate as necessary.
104 auto_realloc_sub_nodes(svn_prefix_tree__t *tree,
107 if (node->sub_node_count & (node->sub_node_count - 1))
110 if (node->sub_node_count == 0)
112 node->sub_nodes = apr_pcalloc(tree->pool, sizeof(*node->sub_nodes));
117 = apr_pcalloc(tree->pool,
118 2 * node->sub_node_count * sizeof(*sub_nodes));
119 memcpy(sub_nodes, node->sub_nodes,
120 node->sub_node_count * sizeof(*sub_nodes));
121 node->sub_nodes = sub_nodes;
125 /* Given the COUNT pointers in the SUB_NODES array, return the location at
126 * which KEY is either located or would be inserted.
129 search_lower_bound(node_t **sub_nodes,
134 int upper = count - 1;
136 /* Binary search for the lowest position at which to insert KEY. */
137 while (lower <= upper)
139 int current = lower + (upper - lower) / 2;
141 if ((unsigned char)sub_nodes[current]->key.data[0] < key)
151 svn_prefix_tree__create(apr_pool_t *pool)
153 svn_prefix_tree__t *tree = apr_pcalloc(pool, sizeof(*tree));
156 tree->root = apr_pcalloc(pool, sizeof(*tree->root));
157 tree->root->key.data[7] = '\xff';
162 svn_prefix_string__t *
163 svn_prefix_string__create(svn_prefix_tree__t *tree,
166 svn_prefix_string__t *new_string;
167 apr_size_t len = strlen(s);
168 node_t *node = tree->root;
172 /* walk the existing tree until we either find S or the node at which S
173 * has to be inserted */
179 /* index of the matching sub-node */
180 idx = node->sub_node_count
181 ? search_lower_bound(node->sub_nodes,
182 (unsigned char)s[node->length],
183 node->sub_node_count)
186 /* any (partially) matching sub-nodes? */
187 if (idx == (int)node->sub_node_count
188 || node->sub_nodes[idx]->key.data[0] != s[node->length])
191 sub_node = node->sub_nodes[idx];
193 /* fully matching sub-node? */
194 if (is_leaf(sub_node))
196 if (strcmp(sub_node->key.data, s + node->length) == 0)
197 return &sub_node->key;
201 apr_size_t sub_node_len = sub_node->length - node->length;
202 if (strncmp(sub_node->key.data, s + node->length,
210 /* partial match -> split */
211 while (sub_node->key.data[match] == s[node->length + match])
214 new_node = apr_pcalloc(tree->pool, sizeof(*new_node));
215 new_node->key = sub_node->key;
216 new_node->length = node->length + match;
217 new_node->key.data[7] = '\xff';
218 new_node->sub_node_count = 1;
219 new_node->sub_nodes = apr_palloc(tree->pool, sizeof(node_t *));
220 new_node->sub_nodes[0] = sub_node;
222 memmove(sub_node->key.data, sub_node->key.data + match, 8 - match);
224 /* replace old sub-node with new one and continue lookup */
225 sub_node->key.prefix = new_node;
226 node->sub_nodes[idx] = new_node;
230 /* add sub-node(s) and final string */
231 while (node->length + 7 < len)
233 new_node = apr_pcalloc(tree->pool, sizeof(*new_node));
234 new_node->key.prefix = node;
235 new_node->length = node->length + 8;
236 memcpy(new_node->key.data, s + node->length, 8);
238 auto_realloc_sub_nodes(tree, node);
239 memmove(node->sub_nodes + idx + 1, node->sub_nodes + idx,
240 (node->sub_node_count - idx) * sizeof(node_t *));
242 /* replace old sub-node with new one and continue lookup */
243 node->sub_nodes[idx] = new_node;
244 node->sub_node_count++;
249 new_string = apr_pcalloc(tree->pool, sizeof(*new_string));
250 new_string->prefix = node;
251 memcpy(new_string->data, s + node->length, len - node->length);
253 auto_realloc_sub_nodes(tree, node);
254 memmove(node->sub_nodes + idx + 1, node->sub_nodes + idx,
255 (node->sub_node_count - idx) * sizeof(node_t *));
257 node->sub_nodes[idx] = (node_t *)new_string;
258 node->sub_node_count++;
263 svn_prefix_string__expand(const svn_prefix_string__t *s,
266 apr_size_t s_len = strlen(s->data);
267 apr_size_t len = s->prefix->length + s_len;
268 char *buffer = apr_palloc(pool, len + 1);
270 svn_string_t *result = apr_pcalloc(pool, sizeof(*result));
271 result->data = buffer;
277 memcpy(buffer + s->prefix->length, s->data, len - s->prefix->length);
278 len = s->prefix->length;
286 svn_prefix_string__compare(const svn_prefix_string__t *lhs,
287 const svn_prefix_string__t *rhs)
289 const node_t *lhs_parent = lhs->prefix;
290 const node_t *rhs_parent = rhs->prefix;
295 /* find the common root */
296 while (lhs_parent != rhs_parent)
298 if (lhs_parent->length <= rhs_parent->length)
300 rhs = &rhs_parent->key;
301 rhs_parent = rhs_parent->key.prefix;
303 else if (rhs_parent->length <= lhs_parent->length)
305 lhs = &lhs_parent->key;
306 lhs_parent = lhs_parent->key.prefix;
310 assert(lhs_parent && rhs_parent);
313 /* at the common root, strings will differ in the first follow-up char */
314 return (int)(unsigned char)lhs->data[0] - (int)(unsigned char)rhs->data[0];