1 /* ccl - routines for character classes */
3 /* Copyright (c) 1990 The Regents of the University of California. */
4 /* All rights reserved. */
6 /* This code is derived from software contributed to Berkeley by */
9 /* The United States Government has rights in this work pursuant */
10 /* to contract no. DE-AC03-76SF00098 between the United States */
11 /* Department of Energy and the University of California. */
13 /* This file is part of flex. */
15 /* Redistribution and use in source and binary forms, with or without */
16 /* modification, are permitted provided that the following conditions */
19 /* 1. Redistributions of source code must retain the above copyright */
20 /* notice, this list of conditions and the following disclaimer. */
21 /* 2. Redistributions in binary form must reproduce the above copyright */
22 /* notice, this list of conditions and the following disclaimer in the */
23 /* documentation and/or other materials provided with the distribution. */
25 /* Neither the name of the University nor the names of its contributors */
26 /* may be used to endorse or promote products derived from this software */
27 /* without specific prior written permission. */
29 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
36 /* return true if the chr is in the ccl. Takes negation into account. */
38 ccl_contains (const int cclp, const int ch)
45 for (i = 0; i < len; ++i)
46 if (ccltbl[ind + i] == ch)
53 /* ccladd - add a single character to a ccl */
55 void ccladd (int cclp, int ch)
57 int ind, len, newpos, i;
64 /* check to see if the character is already in the ccl */
66 for (i = 0; i < len; ++i)
67 if (ccltbl[ind + i] == ch)
72 ccl_has_nl[cclp] = true;
76 if (newpos >= current_max_ccl_tbl_size) {
77 current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
81 ccltbl = reallocate_Character_array (ccltbl,
82 current_max_ccl_tbl_size);
85 ccllen[cclp] = len + 1;
86 ccltbl[newpos] = (unsigned char) ch;
89 /* dump_cclp - same thing as list_character_set, but for cclps. */
91 static void dump_cclp (FILE* file, int cclp)
97 for (i = 0; i < csize; ++i) {
98 if (ccl_contains(cclp, i)){
103 fputs (readable_form (i), file);
105 while (++i < csize && ccl_contains(cclp,i)) ;
107 if (i - 1 > start_char)
109 fprintf (file, "-%s",
110 readable_form (i - 1));
121 /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
123 ccl_set_diff (int a, int b)
127 /* create new class */
130 /* In order to handle negation, we spin through all possible chars,
131 * addding each char in a that is not in b.
132 * (This could be O(n^2), but n is small and bounded.)
134 for ( ch = 0; ch < csize; ++ch )
135 if (ccl_contains (a, ch) && !ccl_contains(b, ch))
140 fprintf(stderr, "ccl_set_diff (");
141 fprintf(stderr, "\n ");
142 dump_cclp (stderr, a);
143 fprintf(stderr, "\n ");
144 dump_cclp (stderr, b);
145 fprintf(stderr, "\n ");
146 dump_cclp (stderr, d);
147 fprintf(stderr, "\n)\n");
152 /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
154 ccl_set_union (int a, int b)
158 /* create new class */
162 for (i = 0; i < ccllen[a]; ++i)
163 ccladd (d, ccltbl[cclmap[a] + i]);
166 for (i = 0; i < ccllen[b]; ++i)
167 ccladd (d, ccltbl[cclmap[b] + i]);
171 fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
172 fprintf(stderr, "\n ");
173 dump_cclp (stderr, a);
174 fprintf(stderr, "\n ");
175 dump_cclp (stderr, b);
176 fprintf(stderr, "\n ");
177 dump_cclp (stderr, d);
178 fprintf(stderr, "\n)\n");
184 /* cclinit - return an empty ccl */
188 if (++lastccl >= current_maxccls) {
189 current_maxccls += MAX_CCLS_INCREMENT;
194 reallocate_integer_array (cclmap, current_maxccls);
196 reallocate_integer_array (ccllen, current_maxccls);
197 cclng = reallocate_integer_array (cclng, current_maxccls);
199 reallocate_bool_array (ccl_has_nl,
204 /* we're making the first ccl */
208 /* The new pointer is just past the end of the last ccl.
209 * Since the cclmap points to the \first/ character of a
210 * ccl, adding the length of the ccl to the cclmap pointer
211 * will produce a cursor to the first free space.
214 cclmap[lastccl - 1] + ccllen[lastccl - 1];
217 cclng[lastccl] = 0; /* ccl's start out life un-negated */
218 ccl_has_nl[lastccl] = false;
224 /* cclnegate - negate the given ccl */
226 void cclnegate (int cclp)
229 ccl_has_nl[cclp] = !ccl_has_nl[cclp];
233 /* list_character_set - list the members of a set of characters in CCL form
235 * Writes to the given file a character-class representation of those
236 * characters present in the given CCL. A character is present if it
237 * has a non-zero value in the cset array.
240 void list_character_set (FILE *file, int cset[])
246 for (i = 0; i < csize; ++i) {
252 fputs (readable_form (i), file);
254 while (++i < csize && cset[i]) ;
256 if (i - 1 > start_char)
258 fprintf (file, "-%s",
259 readable_form (i - 1));
268 /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
269 * scanner. Specifically, if a lowercase or uppercase character, x, is in the
270 * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
271 * be in the range. If not, then this range is ambiguous, and the function
272 * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that
273 * [a-z] will be labeled ambiguous because it does not include [A-Z].
275 * @param c1 the lower end of the range
276 * @param c2 the upper end of the range
277 * @return true if [c1-c2] is not ambiguous for a caseless scanner.
279 bool range_covers_case (int c1, int c2)
283 for (i = c1; i <= c2; i++) {
285 o = reverse_case (i);
286 if (o < c1 || c2 < o)
293 /** Reverse the case of a character, if possible.
294 * @return c if case-reversal does not apply.
296 int reverse_case (int c)
298 return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
301 /** Return true if c is uppercase or lowercase. */
302 bool has_case (int c)
304 return (isupper (c) || islower (c)) ? true : false;