2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2004 Tim J. Robbins.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * "Set of characters" ADT implemented as a splay tree of extents, with
30 * a lookup table cache to simplify looking up the first bunch of
31 * characters (which are presumably more common than others).
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
44 static struct csnode * cset_delete(struct csnode *, wchar_t);
45 static __inline int cset_rangecmp(struct csnode *, wchar_t);
46 static struct csnode * cset_splay(struct csnode *, wchar_t);
50 * Allocate a set of characters.
57 if ((cs = malloc(sizeof(*cs))) == NULL)
60 cs->cs_classes = NULL;
61 cs->cs_havecache = false;
62 cs->cs_invert = false;
68 * Add a character to the set.
71 cset_add(struct cset *cs, wchar_t ch)
73 struct csnode *csn, *ncsn;
76 cs->cs_havecache = false;
79 * Inserting into empty tree; new item becomes the root.
81 if (cs->cs_root == NULL) {
82 csn = malloc(sizeof(*cs->cs_root));
85 csn->csn_left = csn->csn_right = NULL;
86 csn->csn_min = csn->csn_max = ch;
92 * Splay to check whether the item already exists, and otherwise,
93 * where we should put it.
95 csn = cs->cs_root = cset_splay(cs->cs_root, ch);
98 * Avoid adding duplicate nodes.
100 if (cset_rangecmp(csn, ch) == 0)
104 * Allocate a new node and make it the new root.
106 ncsn = malloc(sizeof(*ncsn));
109 ncsn->csn_min = ncsn->csn_max = ch;
110 if (cset_rangecmp(csn, ch) < 0) {
111 ncsn->csn_left = csn->csn_left;
112 ncsn->csn_right = csn;
113 csn->csn_left = NULL;
115 ncsn->csn_right = csn->csn_right;
116 ncsn->csn_left = csn;
117 csn->csn_right = NULL;
122 * Coalesce with left and right neighbours if possible.
124 if (ncsn->csn_left != NULL) {
125 ncsn->csn_left = cset_splay(ncsn->csn_left, ncsn->csn_min - 1);
126 if (ncsn->csn_left->csn_max == ncsn->csn_min - 1) {
127 oval = ncsn->csn_left->csn_min;
128 ncsn->csn_left = cset_delete(ncsn->csn_left,
129 ncsn->csn_left->csn_min);
130 ncsn->csn_min = oval;
133 if (ncsn->csn_right != NULL) {
134 ncsn->csn_right = cset_splay(ncsn->csn_right,
136 if (ncsn->csn_right->csn_min == ncsn->csn_max + 1) {
137 oval = ncsn->csn_right->csn_max;
138 ncsn->csn_right = cset_delete(ncsn->csn_right,
139 ncsn->csn_right->csn_min);
140 ncsn->csn_max = oval;
149 * Determine whether a character is in the set without using
153 cset_in_hard(struct cset *cs, wchar_t ch)
157 for (csc = cs->cs_classes; csc != NULL; csc = csc->csc_next)
158 if (csc->csc_invert ^ (iswctype(ch, csc->csc_type) != 0))
159 return (cs->cs_invert ^ true);
160 if (cs->cs_root != NULL) {
161 cs->cs_root = cset_splay(cs->cs_root, ch);
162 return (cs->cs_invert ^ (cset_rangecmp(cs->cs_root, ch) == 0));
164 return (cs->cs_invert ^ false);
172 cset_cache(struct cset *cs)
176 for (i = 0; i < CS_CACHE_SIZE; i++)
177 cs->cs_cache[i] = cset_in_hard(cs, i);
179 cs->cs_havecache = true;
184 * Invert the character set.
187 cset_invert(struct cset *cs)
190 cs->cs_invert ^= true;
191 cs->cs_havecache = false;
196 * Add a wctype()-style character class to the set, optionally
200 cset_addclass(struct cset *cs, wctype_t type, bool invert)
204 csc = malloc(sizeof(*csc));
207 csc->csc_type = type;
208 csc->csc_invert = invert;
209 csc->csc_next = cs->cs_classes;
210 cs->cs_classes = csc;
211 cs->cs_havecache = false;
216 cset_rangecmp(struct csnode *t, wchar_t ch)
226 static struct csnode *
227 cset_splay(struct csnode *t, wchar_t ch)
229 struct csnode N, *l, *r, *y;
232 * Based on public domain code from Sleator.
237 N.csn_left = N.csn_right = NULL;
240 if (cset_rangecmp(t, ch) < 0) {
241 if (t->csn_left != NULL &&
242 cset_rangecmp(t->csn_left, ch) < 0) {
244 t->csn_left = y->csn_right;
248 if (t->csn_left == NULL)
253 } else if (cset_rangecmp(t, ch) > 0) {
254 if (t->csn_right != NULL &&
255 cset_rangecmp(t->csn_right, ch) > 0) {
257 t->csn_right = y->csn_left;
261 if (t->csn_right == NULL)
269 l->csn_right = t->csn_left;
270 r->csn_left = t->csn_right;
271 t->csn_left = N.csn_right;
272 t->csn_right = N.csn_left;
276 static struct csnode *
277 cset_delete(struct csnode *t, wchar_t ch)
282 t = cset_splay(t, ch);
283 assert(cset_rangecmp(t, ch) == 0);
284 if (t->csn_left == NULL)
287 x = cset_splay(t->csn_left, ch);
288 x->csn_right = t->csn_right;