]> CyberLeo.Net >> Repos - FreeBSD/releng/9.3.git/blob - contrib/bind9/lib/isc/regex.c
Copy stable/9 to releng/9.3 as part of the 9.3-RELEASE cycle.
[FreeBSD/releng/9.3.git] / contrib / bind9 / lib / isc / regex.c
1 /*
2  * Copyright (C) 2013  Internet Systems Consortium, Inc. ("ISC")
3  *
4  * Permission to use, copy, modify, and/or distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
9  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
11  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
13  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14  * PERFORMANCE OF THIS SOFTWARE.
15  */
16
17 #include <config.h>
18
19 #include <isc/file.h>
20 #include <isc/regex.h>
21 #include <isc/string.h>
22
23 #if VALREGEX_REPORT_REASON
24 #define FAIL(x) do { reason = (x); goto error; } while(0)
25 #else
26 #define FAIL(x) goto error
27 #endif
28
29 /*
30  * Validate the regular expression 'C' locale.
31  */
32 int
33 isc_regex_validate(const char *c) {
34         enum {
35                 none, parse_bracket, parse_bound,
36                 parse_ce, parse_ec, parse_cc
37         } state = none;
38         /* Well known character classes. */
39         const char *cc[] = {
40                 ":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:",
41                 ":space:", ":blank:", ":lower:", ":upper:", ":cntrl:",
42                 ":print:", ":xdigit:"
43         };
44         isc_boolean_t seen_comma = ISC_FALSE;
45         isc_boolean_t seen_high = ISC_FALSE;
46         isc_boolean_t seen_char = ISC_FALSE;
47         isc_boolean_t seen_ec = ISC_FALSE;
48         isc_boolean_t seen_ce = ISC_FALSE;
49         isc_boolean_t have_atom = ISC_FALSE;
50         int group = 0;
51         int range = 0;
52         int sub = 0;
53         isc_boolean_t empty_ok = ISC_FALSE;
54         isc_boolean_t neg = ISC_FALSE;
55         isc_boolean_t was_multiple = ISC_FALSE;
56         unsigned int low = 0;
57         unsigned int high = 0;
58         const char *ccname = NULL;
59         int range_start = 0;
60 #if VALREGEX_REPORT_REASON
61         const char *reason = "";
62 #endif
63
64         if (c == NULL || *c == 0)
65                 FAIL("empty string");
66
67         while (c != NULL && *c != 0) {
68                 switch (state) {
69                 case none:
70                         switch (*c) {
71                         case '\\':      /* make literal */
72                                 ++c;
73                                 switch (*c) {
74                                 case '1': case '2': case '3':
75                                 case '4': case '5': case '6':
76                                 case '7': case '8': case '9':
77                                         if ((*c - '0') > sub)
78                                                 FAIL("bad back reference");
79                                         have_atom = ISC_TRUE;
80                                         was_multiple = ISC_FALSE;
81                                         break;
82                                 case 0:
83                                         FAIL("escaped end-of-string");
84                                 default:
85                                         goto literal;
86                                 }
87                                 ++c;
88                                 break;
89                         case '[':       /* bracket start */
90                                 ++c;
91                                 neg = ISC_FALSE;
92                                 was_multiple = ISC_FALSE;
93                                 seen_char = ISC_FALSE;
94                                 state = parse_bracket;
95                                 break;
96                         case '{':       /* bound start */
97                                 switch (c[1]) {
98                                 case '0': case '1': case '2': case '3':
99                                 case '4': case '5': case '6': case '7':
100                                 case '8': case '9':
101                                         if (!have_atom)
102                                                 FAIL("no atom");
103                                         if (was_multiple)
104                                                 FAIL("was multiple");
105                                         seen_comma = ISC_FALSE;
106                                         seen_high = ISC_FALSE;
107                                         low = high = 0;
108                                         state = parse_bound;
109                                         break;
110                                 default:
111                                         goto literal;
112                                 }
113                                 ++c;
114                                 have_atom = ISC_TRUE;
115                                 was_multiple = ISC_TRUE;
116                                 break;
117                         case '}':
118                                 goto literal;
119                         case '(':       /* group start */
120                                 have_atom = ISC_FALSE;
121                                 was_multiple = ISC_FALSE;
122                                 empty_ok = ISC_TRUE;
123                                 ++group;
124                                 ++sub;
125                                 ++c;
126                                 break;
127                         case ')':       /* group end */
128                                 if (group && !have_atom && !empty_ok)
129                                         FAIL("empty alternative");
130                                 have_atom = ISC_TRUE;
131                                 was_multiple = ISC_FALSE;
132                                 if (group != 0)
133                                         --group;
134                                 ++c;
135                                 break;
136                         case '|':       /* alternative seperator */
137                                 if (!have_atom)
138                                         FAIL("no atom");
139                                 have_atom = ISC_FALSE;
140                                 empty_ok = ISC_FALSE;
141                                 was_multiple = ISC_FALSE;
142                                 ++c;
143                                 break;
144                         case '^':
145                         case '$':
146                                 have_atom = ISC_TRUE;
147                                 was_multiple = ISC_TRUE;
148                                 ++c;
149                                 break;
150                         case '+':
151                         case '*':
152                         case '?':
153                                 if (was_multiple)
154                                         FAIL("was multiple");
155                                 if (!have_atom)
156                                         FAIL("no atom");
157                                 have_atom = ISC_TRUE;
158                                 was_multiple = ISC_TRUE;
159                                 ++c;
160                                 break;
161                         case '.':
162                         default:
163                         literal:
164                                 have_atom = ISC_TRUE;
165                                 was_multiple = ISC_FALSE;
166                                 ++c;
167                                 break;
168                         }
169                         break;
170                 case parse_bound:
171                         switch (*c) {
172                         case '0': case '1': case '2': case '3': case '4':
173                         case '5': case '6': case '7': case '8': case '9':
174                                 if (!seen_comma) {
175                                         low = low * 10 + *c - '0';
176                                         if (low > 255)
177                                                 FAIL("lower bound too big");
178                                 } else {
179                                         seen_high = ISC_TRUE;
180                                         high = high * 10 + *c - '0';
181                                         if (high > 255)
182                                                 FAIL("upper bound too big");
183                                 }
184                                 ++c;
185                                 break;
186                         case ',':
187                                 if (seen_comma)
188                                         FAIL("multiple commas");
189                                 seen_comma = ISC_TRUE;
190                                 ++c;
191                                 break;
192                         default:
193                         case '{':
194                                 FAIL("non digit/comma");
195                         case '}':
196                                 if (seen_high && low > high)
197                                         FAIL("bad parse bound");
198                                 seen_comma = ISC_FALSE;
199                                 state = none;
200                                 ++c;
201                                 break;
202                         }
203                         break;
204                 case parse_bracket:
205                         switch (*c) {
206                         case '^':
207                                 if (seen_char || neg) goto inside;
208                                 neg = ISC_TRUE;
209                                 ++c;
210                                 break;
211                         case '-':
212                                 if (range == 2) goto inside;
213                                 if (!seen_char) goto inside;
214                                 if (range == 1)
215                                         FAIL("bad range");
216                                 range = 2;
217                                 ++c;
218                                 break;
219                         case '[':
220                                 ++c;
221                                 switch (*c) {
222                                 case '.':       /* collating element */
223                                         if (range) --range;
224                                         ++c;
225                                         state = parse_ce;
226                                         seen_ce = ISC_FALSE;
227                                         break;
228                                 case '=':       /* equivalence class */
229                                         if (range == 2)
230                                             FAIL("equivalence class in range");
231                                         ++c;
232                                         state = parse_ec;
233                                         seen_ec = ISC_FALSE;
234                                         break;
235                                 case ':':       /* character class */
236                                         if (range == 2)
237                                               FAIL("character class in range");
238                                         ccname = c;
239                                         ++c;
240                                         state = parse_cc;
241                                         break;
242                                 }
243                                 seen_char = ISC_TRUE;
244                                 break;
245                         case ']':
246                                 if (!c[1] && !seen_char)
247                                         FAIL("unfinished brace");
248                                 if (!seen_char)
249                                         goto inside;
250                                 ++c;
251                                 range = 0;
252                                 have_atom = ISC_TRUE;
253                                 state = none;
254                                 break;
255                         default:
256                         inside:
257                                 seen_char = ISC_TRUE;
258                                 if (range == 2 && *c < range_start)
259                                         FAIL("out of order range");
260                                 if (range != 0)
261                                         --range;
262                                 range_start = *c;
263                                 ++c;
264                                 break;
265                         };
266                         break;
267                 case parse_ce:
268                         switch (*c) {
269                         case '.':
270                                 ++c;
271                                 switch (*c) {
272                                 case ']':
273                                         if (!seen_ce)
274                                                  FAIL("empty ce");
275                                         ++c;
276                                         state = parse_bracket;
277                                         break;
278                                 default:
279                                         if (seen_ce)
280                                                 range_start = 256;
281                                         else
282                                                 range_start = '.';
283                                         seen_ce = ISC_TRUE;
284                                         break;
285                                 }
286                                 break;
287                         default:
288                                 if (seen_ce)
289                                         range_start = 256;
290                                 else
291                                         range_start = *c;
292                                 seen_ce = ISC_TRUE;
293                                 ++c;
294                                 break;
295                         }
296                         break;
297                 case parse_ec:
298                         switch (*c) {
299                         case '=':
300                                 ++c;
301                                 switch (*c) {
302                                 case ']':
303                                         if (!seen_ec)
304                                                 FAIL("no ec");
305                                         ++c;
306                                         state = parse_bracket;
307                                         break;
308                                 default:
309                                         seen_ec = ISC_TRUE;
310                                         break;
311                                 }
312                                 break;
313                         default:
314                                 seen_ec = ISC_TRUE;
315                                 ++c;
316                                 break;
317                         }
318                         break;
319                 case parse_cc:
320                         switch (*c) {
321                         case ':':
322                                 ++c;
323                                 switch (*c) {
324                                 case ']': {
325                                         unsigned int i;
326                                         isc_boolean_t found = ISC_FALSE;
327                                         for (i = 0;
328                                              i < sizeof(cc)/sizeof(*cc);
329                                              i++)
330                                         {
331                                                 unsigned int len;
332                                                 len = strlen(cc[i]);
333                                                 if (len !=
334                                                     (unsigned int)(c - ccname))
335                                                         continue;
336                                                 if (strncmp(cc[i], ccname, len))
337                                                         continue;
338                                                 found = ISC_TRUE;
339                                         }
340                                         if (!found)
341                                                 FAIL("unknown cc");
342                                         ++c;
343                                         state = parse_bracket;
344                                         break;
345                                         }
346                                 default:
347                                         break;
348                                 }
349                                 break;
350                         default:
351                                 ++c;
352                                 break;
353                         }
354                         break;
355                 }
356         }
357         if (group != 0)
358                 FAIL("group open");
359         if (state != none)
360                 FAIL("incomplete");
361         if (!have_atom)
362                 FAIL("no atom");
363         return (sub);
364
365  error:
366 #if VALREGEX_REPORT_REASON
367         fprintf(stderr, "%s\n", reason);
368 #endif
369         return (-1);
370 }