3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
23 * @file svn_string_private.h
24 * @brief Non-public string utility functions.
28 #ifndef SVN_STRING_PRIVATE_H
29 #define SVN_STRING_PRIVATE_H
31 #include "svn_string.h" /* for svn_boolean_t, svn_error_t */
35 #endif /* __cplusplus */
38 * @defgroup svn_string String handling
43 /** Private functions.
45 * @defgroup svn_string_private Private functions
50 /** A self-contained memory buffer of known size.
52 * Intended to be used where a single variable-sized buffer is needed
53 * within an iteration, a scratch pool is available and we want to
54 * avoid the cost of creating another pool just for the iteration.
56 typedef struct svn_membuf_t
58 /** The a pool from which this buffer was originally allocated, and is not
59 * necessarily specific to this buffer. This is used only for allocating
60 * more memory from when the buffer needs to grow.
64 /** pointer to the memory */
67 /** total size of buffer allocated */
72 /* Initialize a memory buffer of the given size */
74 svn_membuf__create(svn_membuf_t *membuf, apr_size_t size, apr_pool_t *pool);
76 /* Ensure that the given memory buffer has at least the given size */
78 svn_membuf__ensure(svn_membuf_t *membuf, apr_size_t size);
80 /* Resize the given memory buffer, preserving its contents. */
82 svn_membuf__resize(svn_membuf_t *membuf, apr_size_t size);
84 /* Zero-fill the given memory */
86 svn_membuf__zero(svn_membuf_t *membuf);
88 /* Zero-fill the given memory buffer up to the smaller of SIZE and the
89 current buffer size. */
91 svn_membuf__nzero(svn_membuf_t *membuf, apr_size_t size);
93 /* Inline implementation of svn_membuf__zero.
94 * Note that PMEMBUF is evaluated only once.
96 #define SVN_MEMBUF__ZERO(pmembuf) \
99 svn_membuf_t *const _m_b_f_ = (pmembuf); \
100 memset(_m_b_f_->data, 0, _m_b_f_->size); \
104 /* Inline implementation of svn_membuf__nzero
105 * Note that PMEMBUF and PSIZE are evaluated only once.
107 #define SVN_MEMBUF__NZERO(pmembuf, psize) \
110 svn_membuf_t *const _m_b_f_ = (pmembuf); \
111 const apr_size_t _s_z_ = (psize); \
112 if (_s_z_ > _m_b_f_->size) \
113 memset(_m_b_f_->data, 0, _m_b_f_->size); \
115 memset(_m_b_f_->data, 0, _s_z_); \
120 /* In non-debug mode, just use these inlie replacements */
121 #define svn_membuf__zero(B) SVN_MEMBUF__ZERO((B))
122 #define svn_membuf__nzero(B, S) SVN_MEMBUF__NZERO((B), (S))
126 /** Returns the #svn_string_t information contained in the data and
127 * len members of @a strbuf. This is effectively a typecast, converting
128 * @a strbuf into an #svn_string_t. This first will become invalid and must
129 * not be accessed after this function returned.
132 svn_stringbuf__morph_into_string(svn_stringbuf_t *strbuf);
134 /** Like strtoul but with a fixed base of 10 and without overflow checks.
135 * This allows the compiler to generate massively faster (4x on 64bit LINUX)
136 * code. Overflow checks may be added on the caller side where you might
137 * want to test for a more specific value range anyway.
140 svn__strtoul(const char *buffer, const char **end);
142 /** Number of chars needed to represent signed (19 places + sign + NUL) or
143 * unsigned (20 places + NUL) integers as strings.
145 #define SVN_INT64_BUFFER_SIZE 21
147 /** Writes the @a number as string into @a dest. The latter must provide
148 * space for at least #SVN_INT64_BUFFER_SIZE characters. Returns the number
149 * chars written excluding the terminating NUL.
152 svn__ui64toa(char * dest, apr_uint64_t number);
154 /** Writes the @a number as string into @a dest. The latter must provide
155 * space for at least #SVN_INT64_BUFFER_SIZE characters. Returns the number
156 * chars written excluding the terminating NUL.
159 svn__i64toa(char * dest, apr_int64_t number);
161 /** Returns a decimal string for @a number allocated in @a pool. Put in
162 * the @a separator at each third place.
165 svn__ui64toa_sep(apr_uint64_t number, char separator, apr_pool_t *pool);
167 /** Returns a decimal string for @a number allocated in @a pool. Put in
168 * the @a separator at each third place.
171 svn__i64toa_sep(apr_int64_t number, char separator, apr_pool_t *pool);
174 /** Writes the @a number as base36-encoded string into @a dest. The latter
175 * must provide space for at least #SVN_INT64_BUFFER_SIZE characters.
176 * Returns the number chars written excluding the terminating NUL.
178 * @note The actual maximum buffer requirement is much shorter than
179 * #SVN_INT64_BUFFER_SIZE but introducing yet another constant is only
180 * marginally useful and may open the door to security issues when e.g.
181 * switching between base10 and base36 encoding.
184 svn__ui64tobase36(char *dest, apr_uint64_t number);
186 /** Returns the value of the base36 encoded unsigned integer starting at
187 * @a source. If @a next is not NULL, @a *next will be set to the first
188 * position after the integer.
190 * The data in @a source will be considered part of the number to parse
191 * as long as the characters are within the base36 range. If there are
192 * no such characters to begin with, 0 is returned. Inputs with more than
193 * #SVN_INT64_BUFFER_SIZE digits will not be fully parsed, i.e. the value
194 * of @a *next as well as the return value are undefined.
197 svn__base36toui64(const char **next, const char *source);
200 * The upper limit of the similarity range returned by
201 * svn_cstring__similarity() and svn_string__similarity().
203 #define SVN_STRING__SIM_RANGE_MAX 1000000
206 * Computes the similarity score of STRA and STRB. Returns the ratio
207 * of the length of their longest common subsequence and the average
208 * length of the strings, normalized to the range
209 * [0..SVN_STRING__SIM_RANGE_MAX]. The result is equivalent to
212 * difflib.SequenceMatcher.ratio
214 * Optionally sets *RLCS to the length of the longest common
215 * subsequence of STRA and STRB. Using BUFFER for temporary storage,
216 * requires memory proportional to the length of the shorter string.
218 * The LCS algorithm used is described in, e.g.,
220 * http://en.wikipedia.org/wiki/Longest_common_subsequence_problem
222 * Q: Why another LCS when we already have one in libsvn_diff?
223 * A: svn_diff__lcs is too heavyweight and too generic for the
224 * purposes of similarity testing. Whilst it would be possible
225 * to use a character-based tokenizer with it, we really only need
226 * the *length* of the LCS for the similarity score, not all the
227 * other information that svn_diff__lcs produces in order to
228 * make printing diffs possible.
230 * Q: Is there a limit on the length of the string parameters?
231 * A: Only available memory. But note that the LCS algorithm used
232 * has O(strlen(STRA) * strlen(STRB)) worst-case performance,
233 * so do keep a rein on your enthusiasm.
236 svn_cstring__similarity(const char *stra, const char *strb,
237 svn_membuf_t *buffer, apr_size_t *rlcs);
240 * Like svn_cstring__similarity, but accepts svn_string_t's instead
241 * of NUL-terminated character strings.
244 svn_string__similarity(const svn_string_t *stringa,
245 const svn_string_t *stringb,
246 svn_membuf_t *buffer, apr_size_t *rlcs);
249 /* Return the lowest position at which A and B differ. If no difference
250 * can be found in the first MAX_LEN characters, MAX_LEN will be returned.
253 svn_cstring__match_length(const char *a,
257 /* Return the number of bytes before A and B that don't differ. If no
258 * difference can be found in the first MAX_LEN characters, MAX_LEN will
259 * be returned. Please note that A-MAX_LEN and B-MAX_LEN must both be
263 svn_cstring__reverse_match_length(const char *a,
271 * Prefix trees allow for a space-efficient representation of a set of path-
272 * like strings, i.e. those that share common prefixes. Any given string
273 * value will be stored only once, i.e. two strings stored in the same tree
274 * are equal if and only if the point to the same #svn_prefix_string__t.
276 * @defgroup svn_prefix_string Strings in prefix trees.
281 * Opaque data type for prefix-tree-based strings.
283 typedef struct svn_prefix_string__t svn_prefix_string__t;
286 * Opaque data type representing a prefix tree
288 typedef struct svn_prefix_tree__t svn_prefix_tree__t;
291 * Return a new prefix tree allocated in @a pool.
294 svn_prefix_tree__create(apr_pool_t *pool);
297 * Return a string with the value @a s stored in @a tree. If no such string
298 * exists yet, add it automatically.
300 svn_prefix_string__t *
301 svn_prefix_string__create(svn_prefix_tree__t *tree,
305 * Return the contents of @a s as a new string object allocated in @a pool.
308 svn_prefix_string__expand(const svn_prefix_string__t *s,
312 * Compare the two strings @a lhs and @a rhs that must be part of the same
316 svn_prefix_string__compare(const svn_prefix_string__t *lhs,
317 const svn_prefix_string__t *rhs);
326 #endif /* __cplusplus */
328 #endif /* SVN_STRING_PRIVATE_H */