2 * similarity.c: Utility functions for finding similar strings in lists
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
24 /* ==================================================================== */
32 #include "svn_string.h"
35 #include "private/svn_string_private.h"
37 #include "svn_private_config.h"
40 /* Context for token similarity checking */
41 struct svn_cl__simcheck_context_t
43 svn_string_t key; /* The token we're comparing with */
44 svn_membuf_t buffer; /* Buffer for similarity testing */
48 /* Similarity test between two property names */
49 static APR_INLINE apr_size_t
50 simcheck_key_diff(const svn_string_t *key, const svn_string_t *ctx,
51 svn_membuf_t *buffer, apr_size_t *diff)
54 const apr_size_t score = svn_string__similarity(key, ctx, buffer, &lcs);
55 if (key->len > ctx->len)
56 *diff = key->len - lcs;
58 *diff = ctx->len - lcs;
63 /* Key comparator for qsort for svn_cl__simcheck_t */
65 simcheck_compare(const void *pkeya, const void *pkeyb)
67 svn_cl__simcheck_t *const keya = *(svn_cl__simcheck_t *const *)pkeya;
68 svn_cl__simcheck_t *const keyb = *(svn_cl__simcheck_t *const *)pkeyb;
69 svn_cl__simcheck_context_t *const context = keya->context;
71 if (keya->score == -1)
72 keya->score = simcheck_key_diff(&keya->token, &context->key,
73 &context->buffer, &keya->diff);
74 if (keyb->score == -1)
75 keyb->score = simcheck_key_diff(&keyb->token, &context->key,
76 &context->buffer, &keyb->diff);
78 return (keya->score < keyb->score ? 1
79 : (keya->score > keyb->score ? -1
80 : (keya->diff > keyb->diff ? 1
81 : (keya->diff < keyb->diff ? -1 : 0))));
85 svn_cl__similarity_check(const char *key,
86 svn_cl__simcheck_t **tokens,
87 apr_size_t token_count,
88 apr_pool_t *scratch_pool)
93 svn_cl__simcheck_context_t context;
94 context.key.data = key;
95 context.key.len = strlen(key);
96 svn_membuf__create(&context.buffer, 0, scratch_pool);
98 /* Populate the score, diff and context members. */
99 for (i = 0; i < token_count; ++i)
101 svn_cl__simcheck_t *const token = tokens[i];
104 token->context = &context;
107 /* Sort the tokens by similarity. */
108 qsort(tokens, token_count, sizeof(*tokens), simcheck_compare);
110 /* Remove references to the context, since it points to the stack,
111 and calculate the number of results that are at least two-thirds
112 similar to the key. */
113 for (i = 0, result = 1; i < token_count; ++i)
115 svn_cl__simcheck_t *const token = tokens[i];
116 token->context = NULL;
117 /* If you update this factor, consider updating
118 * ../libsvn_subr/cmdline.c:most_similar(). */
119 if (token->score >= (2 * SVN_STRING__SIM_RANGE_MAX + 1) / 3)
123 if (0 == tokens[0]->diff)
124 return 0; /* We found an exact match. */