2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
36 static const char copyright[] =
37 "@(#) Copyright (c) 1980, 1993\n\
38 The Regents of the University of California. All rights reserved.\n";
42 static const char sccsid[] = "@(#)regexp.c 8.1 (Berkeley) 6/6/93";
52 static void expconv(void);
54 bool _escaped; /* true if we are currently x_escaped */
55 char *s_start; /* start of string */
56 bool l_onecase; /* true if upper and lower equivalent */
58 #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
60 /* STRNCMP - like strncmp except that we convert the
61 * first string to lower case before comparing
62 * if l_onecase is set.
66 STRNCMP(register char *s1, register char *s2, register int len)
70 if (*s2 - makelower(*s1))
71 return (*s2 - makelower(*s1));
90 /* The following routine converts an irregular expression to
93 * Either meta symbols (\a \d or \p) or character strings or
94 * operations ( alternation or perenthesizing ) can be
95 * specified. Each starts with a descriptor byte. The descriptor
96 * byte has STR set for strings, META set for meta symbols
97 * and OPER set for operations.
98 * The descriptor byte can also have the OPT bit set if the object
99 * defined is optional. Also ALT can be set to indicate an alternation.
101 * For metasymbols the byte following the descriptor byte identities
102 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
103 * strings the byte after the descriptor is a character count for
106 * meta symbols := descriptor
109 * strings := descriptor
113 * operatins := descriptor
119 * handy macros for accessing parts of match blocks
121 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
122 #define MNEXT(A) (A+2) /* character following a metasymbol block */
124 #define OSYM(A) (*(A+1)) /* symbol in an operation block */
125 #define OCNT(A) (*(A+2)) /* character count */
126 #define ONEXT(A) (A+3) /* next character after the operation */
127 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
129 #define SCNT(A) (*(A+1)) /* byte count of a string */
130 #define SSTR(A) (A+2) /* address of the string */
131 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */
134 * bit flags in the descriptor
142 static char *ccre; /* pointer to current position in converted exp*/
143 static char *ure; /* pointer current position in unconverted exp */
145 /* re: unconverted irregular expression */
149 register char *cre; /* pointer to converted regular expression */
151 /* allocate room for the converted expression */
156 cre = malloc(4 * strlen(re) + 3);
160 /* start the conversion with a \a */
165 /* start the conversion (its recursive) */
174 register char *cs; /* pointer to current symbol in converted exp */
175 register char c; /* character being processed */
176 register char *acs; /* pinter to last alternate */
179 /* let the conversion begin */
183 switch (c = *ure++) {
186 switch (c = *ure++) {
188 /* escaped characters are just characters */
190 if (cs == NULL || (*cs & STR) == 0) {
200 /* normal(?) metacharacters */
205 if (acs != NULL && acs != cs) {
208 OCNT(acs) = ccre - acs;
221 /* just put the symbol in */
224 if (acs != NULL && acs != cs) {
227 OCNT(acs) = ccre - acs;
238 /* mark the last match sequence as optional */
244 /* recurse and define a subexpression */
246 if (acs != NULL && acs != cs) {
249 OCNT(acs) = ccre - acs;
259 OCNT(cs) = ccre - cs; /* offset to next symbol */
262 /* return from a recursion */
267 OCNT(acs) = ccre - acs;
278 /* mark the last match sequence as having an alternate */
279 /* the third byte will contain an offset to jump over the */
280 /* alternate match in case the first did not fail */
282 if (acs != NULL && acs != cs)
283 OCNT(ccre) = ccre - acs; /* make a back pointer */
291 acs = cs; /* remember that the pointer is to be filles */
294 /* if its not a metasymbol just build a scharacter string */
296 if (cs == NULL || (*cs & STR) == 0) {
310 OCNT(acs) = ccre - acs;
317 /* end of convertre */
321 * The following routine recognises an irregular expression
322 * with the following special characters:
324 * \? - means last match was optional
325 * \a - matches any number of characters
326 * \d - matches any number of spaces and tabs
327 * \p - matches any number of alphanumeric
329 * characters matched will be copied into
330 * the area pointed to by 'name'.
332 * \( \) - grouping used mostly for alternation and
335 * The irregular expression must be translated to internal form
336 * prior to calling this routine
338 * The value returned is the pointer to the first non \a
343 * s: string to check for a match in
344 * re: a converted irregular expression
345 * mstring: where to put whatever matches a \p
348 expmatch (register char *s, register char *re, register char *mstring)
350 register char *cs; /* the current symbol */
351 register char *ptr,*s1; /* temporary pointer */
352 bool matched; /* a temporary bool */
354 /* initial conditions */
360 /* loop till expression string is exhausted (or at least pretty tired) */
362 switch (*cs & (OPER | STR | META)) {
364 /* try to match a string */
366 matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
369 /* hoorah it matches */
372 } else if (*cs & ALT) {
374 /* alternation, skip to next expression */
376 } else if (*cs & OPT) {
378 /* the match is optional */
380 matched = 1; /* indicate a successful match */
383 /* no match, error return */
388 /* an operator, do something fancy */
392 /* this is an alternation */
396 /* last thing in the alternation was a match, skip ahead */
400 /* no match, keep trying */
404 /* this is a grouping, recurse */
406 ptr = expmatch(s, ONEXT(cs), mstring);
409 /* the subexpression matched */
412 } else if (*cs & ALT) {
414 /* alternation, skip to next expression */
416 } else if (*cs & OPT) {
418 /* the match is optional */
419 matched = 1; /* indicate a successful match */
422 /* no match, error return */
430 /* try to match a metasymbol */
434 /* try to match anything and remember what was matched */
437 * This is really the same as trying the match the
438 * remaining parts of the expression to any subset
443 ptr = expmatch(s1, MNEXT(cs), mstring);
444 if (ptr != NULL && s1 != s) {
446 /* we have a match, remember the match */
447 strncpy (mstring, s, s1 - s);
448 mstring[s1 - s] = '\0';
450 } else if (ptr != NULL && (*cs & OPT)) {
452 /* it was aoptional so no match is ok */
454 } else if (ptr != NULL) {
456 /* not optional and we still matched */
459 if (!(isalnum(*s1) || *s1 == '_' ||
462 /* C++ scope operator */
463 (strlen(s1) > 1 && *s1 == ':' && s1[1] == ':' &&
467 _escaped = _escaped ? false : true;
473 /* try to match anything */
476 * This is really the same as trying the match the
477 * remaining parts of the expression to any subset
482 ptr = expmatch(s1, MNEXT(cs), mstring);
483 if (ptr != NULL && s1 != s) {
485 /* we have a match */
487 } else if (ptr != NULL && (*cs & OPT)) {
489 /* it was aoptional so no match is ok */
491 } else if (ptr != NULL) {
493 /* not optional and we still matched */
497 _escaped = _escaped ? false : true;
503 /* fail if we are currently _escaped */
510 /* match any number of tabs and spaces */
513 while (*s == ' ' || *s == '\t')
515 if (s != ptr || s == s_start) {
517 /* match, be happy */
520 } else if (*s == '\n' || *s == '\0') {
522 /* match, be happy */
525 } else if (*cs & ALT) {
527 /* try the next part */
530 } else if (*cs & OPT) {
537 /* no match, error return */
541 /* check for end of line */
543 if (*s == '\0' || *s == '\n') {
545 /* match, be happy */
549 } else if (*cs & ALT) {
551 /* try the next part */
554 } else if (*cs & OPT) {
561 /* no match, error return */
565 /* check for start of line */
569 /* match, be happy */
572 } else if (*cs & ALT) {
574 /* try the next part */
577 } else if (*cs & OPT) {
584 /* no match, error return */
588 /* end of a subexpression, return success */