2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by the University of
17 * California, Berkeley and its contributors.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 static char copyright[] =
37 "@(#) Copyright (c) 1980, 1993\n\
38 The Regents of the University of California. All rights reserved.\n";
42 static char sccsid[] = "@(#)regexp.c 8.1 (Berkeley) 6/6/93";
54 static void expconv __P((void));
56 boolean _escaped; /* true if we are currently _escaped */
57 char *s_start; /* start of string */
58 boolean l_onecase; /* true if upper and lower equivalent */
60 #define makelower(c) (isupper((c)) ? tolower((c)) : (c))
62 /* STRNCMP - like strncmp except that we convert the
63 * first string to lower case before comparing
64 * if l_onecase is set.
69 register char *s1,*s2;
74 if (*s2 - makelower(*s1))
75 return (*s2 - makelower(*s1));
94 /* The following routine converts an irregular expression to
97 * Either meta symbols (\a \d or \p) or character strings or
98 * operations ( alternation or perenthesizing ) can be
99 * specified. Each starts with a descriptor byte. The descriptor
100 * byte has STR set for strings, META set for meta symbols
101 * and OPER set for operations.
102 * The descriptor byte can also have the OPT bit set if the object
103 * defined is optional. Also ALT can be set to indicate an alternation.
105 * For metasymbols the byte following the descriptor byte identities
106 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For
107 * strings the byte after the descriptor is a character count for
110 * meta symbols := descriptor
113 * strings := descriptor
117 * operatins := descriptor
123 * handy macros for accessing parts of match blocks
125 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
126 #define MNEXT(A) (A+2) /* character following a metasymbol block */
128 #define OSYM(A) (*(A+1)) /* symbol in an operation block */
129 #define OCNT(A) (*(A+2)) /* character count */
130 #define ONEXT(A) (A+3) /* next character after the operation */
131 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
133 #define SCNT(A) (*(A+1)) /* byte count of a string */
134 #define SSTR(A) (A+2) /* address of the string */
135 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */
138 * bit flags in the descriptor
146 static char *ccre; /* pointer to current position in converted exp*/
147 static char *ure; /* pointer current position in unconverted exp */
151 char *re; /* unconverted irregular expression */
153 register char *cre; /* pointer to converted regular expression */
155 /* allocate room for the converted expression */
160 cre = malloc (4 * strlen(re) + 3);
164 /* start the conversion with a \a */
169 /* start the conversion (its recursive) */
178 register char *cs; /* pointer to current symbol in converted exp */
179 register char c; /* character being processed */
180 register char *acs; /* pinter to last alternate */
183 /* let the conversion begin */
186 while (*ure != NIL) {
187 switch (c = *ure++) {
190 switch (c = *ure++) {
192 /* escaped characters are just characters */
194 if (cs == NIL || (*cs & STR) == 0) {
204 /* normal(?) metacharacters */
209 if (acs != NIL && acs != cs) {
212 OCNT(acs) = ccre - acs;
225 /* just put the symbol in */
228 if (acs != NIL && acs != cs) {
231 OCNT(acs) = ccre - acs;
242 /* mark the last match sequence as optional */
248 /* recurse and define a subexpression */
250 if (acs != NIL && acs != cs) {
253 OCNT(acs) = ccre - acs;
263 OCNT(cs) = ccre - cs; /* offset to next symbol */
266 /* return from a recursion */
271 OCNT(acs) = ccre - acs;
282 /* mark the last match sequence as having an alternate */
283 /* the third byte will contain an offset to jump over the */
284 /* alternate match in case the first did not fail */
286 if (acs != NIL && acs != cs)
287 OCNT(ccre) = ccre - acs; /* make a back pointer */
295 acs = cs; /* remember that the pointer is to be filles */
298 /* if its not a metasymbol just build a scharacter string */
300 if (cs == NIL || (*cs & STR) == 0) {
314 OCNT(acs) = ccre - acs;
321 /* end of convertre */
325 * The following routine recognises an irregular expresion
326 * with the following special characters:
328 * \? - means last match was optional
329 * \a - matches any number of characters
330 * \d - matches any number of spaces and tabs
331 * \p - matches any number of alphanumeric
333 * characters matched will be copied into
334 * the area pointed to by 'name'.
336 * \( \) - grouping used mostly for alternation and
339 * The irregular expression must be translated to internal form
340 * prior to calling this routine
342 * The value returned is the pointer to the first non \a
347 expmatch (s, re, mstring)
348 register char *s; /* string to check for a match in */
349 register char *re; /* a converted irregular expression */
350 register char *mstring; /* where to put whatever matches a \p */
352 register char *cs; /* the current symbol */
353 register char *ptr,*s1; /* temporary pointer */
354 boolean matched; /* a temporary boolean */
356 /* initial conditions */
362 /* loop till expression string is exhausted (or at least pretty tired) */
364 switch (*cs & (OPER | STR | META)) {
366 /* try to match a string */
368 matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
371 /* hoorah it matches */
374 } else if (*cs & ALT) {
376 /* alternation, skip to next expression */
378 } else if (*cs & OPT) {
380 /* the match is optional */
382 matched = 1; /* indicate a successful match */
385 /* no match, error return */
390 /* an operator, do something fancy */
394 /* this is an alternation */
398 /* last thing in the alternation was a match, skip ahead */
402 /* no match, keep trying */
406 /* this is a grouping, recurse */
408 ptr = expmatch (s, ONEXT(cs), mstring);
411 /* the subexpression matched */
414 } else if (*cs & ALT) {
416 /* alternation, skip to next expression */
418 } else if (*cs & OPT) {
420 /* the match is optional */
421 matched = 1; /* indicate a successful match */
424 /* no match, error return */
432 /* try to match a metasymbol */
436 /* try to match anything and remember what was matched */
439 * This is really the same as trying the match the
440 * remaining parts of the expression to any subset
445 ptr = expmatch (s1, MNEXT(cs), mstring);
446 if (ptr != NIL && s1 != s) {
448 /* we have a match, remember the match */
449 strncpy (mstring, s, s1 - s);
450 mstring[s1 - s] = '\0';
452 } else if (ptr != NIL && (*cs & OPT)) {
454 /* it was aoptional so no match is ok */
456 } else if (ptr != NIL) {
458 /* not optional and we still matched */
461 if (!(isalnum(*s1) || *s1 == '_' ||
464 /* C++ scope operator */
465 (strlen(s1) > 1 && *s1 == ':' && s1[1] == ':' &&
469 _escaped = _escaped ? FALSE : TRUE;
475 /* try to match anything */
478 * This is really the same as trying the match the
479 * remaining parts of the expression to any subset
484 ptr = expmatch (s1, MNEXT(cs), mstring);
485 if (ptr != NIL && s1 != s) {
487 /* we have a match */
489 } else if (ptr != NIL && (*cs & OPT)) {
491 /* it was aoptional so no match is ok */
493 } else if (ptr != NIL) {
495 /* not optional and we still matched */
499 _escaped = _escaped ? FALSE : TRUE;
505 /* fail if we are currently _escaped */
512 /* match any number of tabs and spaces */
515 while (*s == ' ' || *s == '\t')
517 if (s != ptr || s == s_start) {
519 /* match, be happy */
522 } else if (*s == '\n' || *s == '\0') {
524 /* match, be happy */
527 } else if (*cs & ALT) {
529 /* try the next part */
532 } else if (*cs & OPT) {
539 /* no match, error return */
543 /* check for end of line */
545 if (*s == '\0' || *s == '\n') {
547 /* match, be happy */
551 } else if (*cs & ALT) {
553 /* try the next part */
556 } else if (*cs & OPT) {
563 /* no match, error return */
567 /* check for start of line */
571 /* match, be happy */
574 } else if (*cs & ALT) {
576 /* try the next part */
579 } else if (*cs & OPT) {
586 /* no match, error return */
590 /* end of a subexpression, return success */