]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - contrib/bind9/lib/isc/regex.c
Update BIND to 9.9.8
[FreeBSD/stable/9.git] / contrib / bind9 / lib / isc / regex.c
1 /*
2  * Copyright (C) 2013-2015  Internet Systems Consortium, Inc. ("ISC")
3  *
4  * Permission to use, copy, modify, and/or distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
9  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
11  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
13  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14  * PERFORMANCE OF THIS SOFTWARE.
15  */
16
17 #include <config.h>
18
19 #include <isc/file.h>
20 #include <isc/print.h>
21 #include <isc/regex.h>
22 #include <isc/string.h>
23
24 #if VALREGEX_REPORT_REASON
25 #define FAIL(x) do { reason = (x); goto error; } while(0)
26 #else
27 #define FAIL(x) goto error
28 #endif
29
30 /*
31  * Validate the regular expression 'C' locale.
32  */
33 int
34 isc_regex_validate(const char *c) {
35         enum {
36                 none, parse_bracket, parse_bound,
37                 parse_ce, parse_ec, parse_cc
38         } state = none;
39         /* Well known character classes. */
40         const char *cc[] = {
41                 ":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:",
42                 ":space:", ":blank:", ":lower:", ":upper:", ":cntrl:",
43                 ":print:", ":xdigit:"
44         };
45         isc_boolean_t seen_comma = ISC_FALSE;
46         isc_boolean_t seen_high = ISC_FALSE;
47         isc_boolean_t seen_char = ISC_FALSE;
48         isc_boolean_t seen_ec = ISC_FALSE;
49         isc_boolean_t seen_ce = ISC_FALSE;
50         isc_boolean_t have_atom = ISC_FALSE;
51         int group = 0;
52         int range = 0;
53         int sub = 0;
54         isc_boolean_t empty_ok = ISC_FALSE;
55         isc_boolean_t neg = ISC_FALSE;
56         isc_boolean_t was_multiple = ISC_FALSE;
57         unsigned int low = 0;
58         unsigned int high = 0;
59         const char *ccname = NULL;
60         int range_start = 0;
61 #if VALREGEX_REPORT_REASON
62         const char *reason = "";
63 #endif
64
65         if (c == NULL || *c == 0)
66                 FAIL("empty string");
67
68         while (c != NULL && *c != 0) {
69                 switch (state) {
70                 case none:
71                         switch (*c) {
72                         case '\\':      /* make literal */
73                                 ++c;
74                                 switch (*c) {
75                                 case '1': case '2': case '3':
76                                 case '4': case '5': case '6':
77                                 case '7': case '8': case '9':
78                                         if ((*c - '0') > sub)
79                                                 FAIL("bad back reference");
80                                         have_atom = ISC_TRUE;
81                                         was_multiple = ISC_FALSE;
82                                         break;
83                                 case 0:
84                                         FAIL("escaped end-of-string");
85                                 default:
86                                         goto literal;
87                                 }
88                                 ++c;
89                                 break;
90                         case '[':       /* bracket start */
91                                 ++c;
92                                 neg = ISC_FALSE;
93                                 was_multiple = ISC_FALSE;
94                                 seen_char = ISC_FALSE;
95                                 state = parse_bracket;
96                                 break;
97                         case '{':       /* bound start */
98                                 switch (c[1]) {
99                                 case '0': case '1': case '2': case '3':
100                                 case '4': case '5': case '6': case '7':
101                                 case '8': case '9':
102                                         if (!have_atom)
103                                                 FAIL("no atom");
104                                         if (was_multiple)
105                                                 FAIL("was multiple");
106                                         seen_comma = ISC_FALSE;
107                                         seen_high = ISC_FALSE;
108                                         low = high = 0;
109                                         state = parse_bound;
110                                         break;
111                                 default:
112                                         goto literal;
113                                 }
114                                 ++c;
115                                 have_atom = ISC_TRUE;
116                                 was_multiple = ISC_TRUE;
117                                 break;
118                         case '}':
119                                 goto literal;
120                         case '(':       /* group start */
121                                 have_atom = ISC_FALSE;
122                                 was_multiple = ISC_FALSE;
123                                 empty_ok = ISC_TRUE;
124                                 ++group;
125                                 ++sub;
126                                 ++c;
127                                 break;
128                         case ')':       /* group end */
129                                 if (group && !have_atom && !empty_ok)
130                                         FAIL("empty alternative");
131                                 have_atom = ISC_TRUE;
132                                 was_multiple = ISC_FALSE;
133                                 if (group != 0)
134                                         --group;
135                                 ++c;
136                                 break;
137                         case '|':       /* alternative seperator */
138                                 if (!have_atom)
139                                         FAIL("no atom");
140                                 have_atom = ISC_FALSE;
141                                 empty_ok = ISC_FALSE;
142                                 was_multiple = ISC_FALSE;
143                                 ++c;
144                                 break;
145                         case '^':
146                         case '$':
147                                 have_atom = ISC_TRUE;
148                                 was_multiple = ISC_TRUE;
149                                 ++c;
150                                 break;
151                         case '+':
152                         case '*':
153                         case '?':
154                                 if (was_multiple)
155                                         FAIL("was multiple");
156                                 if (!have_atom)
157                                         FAIL("no atom");
158                                 have_atom = ISC_TRUE;
159                                 was_multiple = ISC_TRUE;
160                                 ++c;
161                                 break;
162                         case '.':
163                         default:
164                         literal:
165                                 have_atom = ISC_TRUE;
166                                 was_multiple = ISC_FALSE;
167                                 ++c;
168                                 break;
169                         }
170                         break;
171                 case parse_bound:
172                         switch (*c) {
173                         case '0': case '1': case '2': case '3': case '4':
174                         case '5': case '6': case '7': case '8': case '9':
175                                 if (!seen_comma) {
176                                         low = low * 10 + *c - '0';
177                                         if (low > 255)
178                                                 FAIL("lower bound too big");
179                                 } else {
180                                         seen_high = ISC_TRUE;
181                                         high = high * 10 + *c - '0';
182                                         if (high > 255)
183                                                 FAIL("upper bound too big");
184                                 }
185                                 ++c;
186                                 break;
187                         case ',':
188                                 if (seen_comma)
189                                         FAIL("multiple commas");
190                                 seen_comma = ISC_TRUE;
191                                 ++c;
192                                 break;
193                         default:
194                         case '{':
195                                 FAIL("non digit/comma");
196                         case '}':
197                                 if (seen_high && low > high)
198                                         FAIL("bad parse bound");
199                                 seen_comma = ISC_FALSE;
200                                 state = none;
201                                 ++c;
202                                 break;
203                         }
204                         break;
205                 case parse_bracket:
206                         switch (*c) {
207                         case '^':
208                                 if (seen_char || neg) goto inside;
209                                 neg = ISC_TRUE;
210                                 ++c;
211                                 break;
212                         case '-':
213                                 if (range == 2) goto inside;
214                                 if (!seen_char) goto inside;
215                                 if (range == 1)
216                                         FAIL("bad range");
217                                 range = 2;
218                                 ++c;
219                                 break;
220                         case '[':
221                                 ++c;
222                                 switch (*c) {
223                                 case '.':       /* collating element */
224                                         if (range != 0) --range;
225                                         ++c;
226                                         state = parse_ce;
227                                         seen_ce = ISC_FALSE;
228                                         break;
229                                 case '=':       /* equivalence class */
230                                         if (range == 2)
231                                             FAIL("equivalence class in range");
232                                         ++c;
233                                         state = parse_ec;
234                                         seen_ec = ISC_FALSE;
235                                         break;
236                                 case ':':       /* character class */
237                                         if (range == 2)
238                                               FAIL("character class in range");
239                                         ccname = c;
240                                         ++c;
241                                         state = parse_cc;
242                                         break;
243                                 }
244                                 seen_char = ISC_TRUE;
245                                 break;
246                         case ']':
247                                 if (!c[1] && !seen_char)
248                                         FAIL("unfinished brace");
249                                 if (!seen_char)
250                                         goto inside;
251                                 ++c;
252                                 range = 0;
253                                 have_atom = ISC_TRUE;
254                                 state = none;
255                                 break;
256                         default:
257                         inside:
258                                 seen_char = ISC_TRUE;
259                                 if (range == 2 && (*c & 0xff) < range_start)
260                                         FAIL("out of order range");
261                                 if (range != 0)
262                                         --range;
263                                 range_start = *c & 0xff;
264                                 ++c;
265                                 break;
266                         };
267                         break;
268                 case parse_ce:
269                         switch (*c) {
270                         case '.':
271                                 ++c;
272                                 switch (*c) {
273                                 case ']':
274                                         if (!seen_ce)
275                                                  FAIL("empty ce");
276                                         ++c;
277                                         state = parse_bracket;
278                                         break;
279                                 default:
280                                         if (seen_ce)
281                                                 range_start = 256;
282                                         else
283                                                 range_start = '.';
284                                         seen_ce = ISC_TRUE;
285                                         break;
286                                 }
287                                 break;
288                         default:
289                                 if (seen_ce)
290                                         range_start = 256;
291                                 else
292                                         range_start = *c;
293                                 seen_ce = ISC_TRUE;
294                                 ++c;
295                                 break;
296                         }
297                         break;
298                 case parse_ec:
299                         switch (*c) {
300                         case '=':
301                                 ++c;
302                                 switch (*c) {
303                                 case ']':
304                                         if (!seen_ec)
305                                                 FAIL("no ec");
306                                         ++c;
307                                         state = parse_bracket;
308                                         break;
309                                 default:
310                                         seen_ec = ISC_TRUE;
311                                         break;
312                                 }
313                                 break;
314                         default:
315                                 seen_ec = ISC_TRUE;
316                                 ++c;
317                                 break;
318                         }
319                         break;
320                 case parse_cc:
321                         switch (*c) {
322                         case ':':
323                                 ++c;
324                                 switch (*c) {
325                                 case ']': {
326                                         unsigned int i;
327                                         isc_boolean_t found = ISC_FALSE;
328                                         for (i = 0;
329                                              i < sizeof(cc)/sizeof(*cc);
330                                              i++)
331                                         {
332                                                 unsigned int len;
333                                                 len = strlen(cc[i]);
334                                                 if (len !=
335                                                     (unsigned int)(c - ccname))
336                                                         continue;
337                                                 if (strncmp(cc[i], ccname, len))
338                                                         continue;
339                                                 found = ISC_TRUE;
340                                         }
341                                         if (!found)
342                                                 FAIL("unknown cc");
343                                         ++c;
344                                         state = parse_bracket;
345                                         break;
346                                         }
347                                 default:
348                                         break;
349                                 }
350                                 break;
351                         default:
352                                 ++c;
353                                 break;
354                         }
355                         break;
356                 }
357         }
358         if (group != 0)
359                 FAIL("group open");
360         if (state != none)
361                 FAIL("incomplete");
362         if (!have_atom)
363                 FAIL("no atom");
364         return (sub);
365
366  error:
367 #if VALREGEX_REPORT_REASON
368         fprintf(stderr, "%s\n", reason);
369 #endif
370         return (-1);
371 }