1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 ****************************************************************/
25 /* lasciate ogne speranza, voi ch'intrate. */
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
40 #define HAT (NCHARS+2) /* matches ^ in regular expr */
44 #define type(v) (v)->nobj /* badly overloaded here */
45 #define info(v) (v)->ntype /* badly overloaded here */
46 #define left(v) (v)->narg[0]
47 #define right(v) (v)->narg[1]
48 #define parent(v) (v)->nnext
50 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
51 #define ELEAF case EMPTYRE: /* empty string in regexp */
52 #define UNARY case STAR: case PLUS: case QUEST:
54 /* encoding in tree Nodes:
55 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
56 left is index, right contains value or pointer to value
57 unary (STAR, PLUS, QUEST): left is child, right is null
58 binary (CAT, OR): left and right are children
59 parent contains pointer to parent
67 int rtok; /* next token in current re */
69 static uschar *rlxstr;
70 static uschar *prestr; /* current position in current re */
71 static uschar *lastre; /* origin of last re */
72 static uschar *lastatom; /* origin of last Atom */
73 static uschar *starttok;
74 static uschar *basestr; /* starts with original, replaced during
75 repetition processing */
76 static uschar *firstbasestr;
84 #define NFA 20 /* cache this many dynamic fa's */
86 int nfatab = 0; /* entries in fatab */
88 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
94 if (setvec == NULL) { /* first time through any RE */
96 setvec = (int *) malloc(maxsetvec * sizeof(int));
97 tmpset = (int *) malloc(maxsetvec * sizeof(int));
98 if (setvec == NULL || tmpset == NULL)
99 overflo("out of space initializing makedfa");
102 if (compile_time) /* a constant for sure */
103 return mkdfa(s, anchor);
104 for (i = 0; i < nfatab; i++) /* is it there already? */
105 if (fatab[i]->anchor == anchor
106 && strcmp((const char *) fatab[i]->restr, s) == 0) {
107 fatab[i]->use = now++;
110 pfa = mkdfa(s, anchor);
111 if (nfatab < NFA) { /* room for another */
113 fatab[nfatab]->use = now++;
117 use = fatab[0]->use; /* replace least-recently used */
119 for (i = 1; i < nfatab; i++)
120 if (fatab[i]->use < use) {
130 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
131 /* anchor = 1 for anchored matches, else 0 */
136 firstbasestr = (uschar *) s;
137 basestr = firstbasestr;
139 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
140 /* put ALL STAR in front of reg. exp. */
141 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
142 /* put FINAL after reg. exp. */
145 penter(p1); /* enter parent pointers and leaf indices */
146 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
147 overflo("out of space for fa");
148 f->accept = poscnt-1; /* penter has computed number of positions in re */
149 cfoll(f, p1); /* set up follow sets */
151 if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
152 overflo("out of space in makedfa");
153 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
154 overflo("out of space in makedfa");
156 f->initstat = makeinit(f, anchor);
158 f->restr = (uschar *) tostring(s);
159 if (firstbasestr != basestr) {
166 int makeinit(fa *f, int anchor)
173 k = *(f->re[0].lfollow);
175 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
176 overflo("out of space in makeinit");
177 for (i=0; i <= k; i++) {
178 (f->posns[2])[i] = (f->re[0].lfollow)[i];
180 if ((f->posns[2])[1] == f->accept)
182 for (i=0; i < NCHARS; i++)
183 f->gototab[2][i] = 0;
184 f->curstat = cgoto(f, 2, HAT);
186 *f->posns[2] = k-1; /* leave out position 0 */
187 for (i=0; i < k; i++) {
188 (f->posns[0])[i] = (f->posns[2])[i];
191 f->out[0] = f->out[2];
193 --(*f->posns[f->curstat]);
198 void penter(Node *p) /* set up parent pointers and leaf indices */
215 parent(right(p)) = p;
217 default: /* can't happen */
218 FATAL("can't happen: unknown type %d in penter", type(p));
223 void freetr(Node *p) /* free parse tree */
240 default: /* can't happen */
241 FATAL("can't happen: unknown type %d in freetr", type(p));
246 /* in the parsing of regular expressions, metacharacters like . have */
247 /* to be seen literally; \056 is not a metacharacter. */
249 int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */
250 { /* only pick up one 8-bit byte (2 chars) */
255 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
257 n = 16 * n + *p - '0';
258 else if (*p >= 'a' && *p <= 'f')
259 n = 16 * n + *p - 'a' + 10;
260 else if (*p >= 'A' && *p <= 'F')
261 n = 16 * n + *p - 'A' + 10;
267 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
269 int quoted(uschar **pp) /* pick up next thing after a \\ */
270 /* and increment *pp */
275 if ((c = *p++) == 't')
287 else if (c == 'x') { /* hexadecimal goo follows */
288 c = hexstr(&p); /* this adds a null if number is invalid */
289 } else if (isoctdigit(c)) { /* \d \dd \ddd */
291 if (isoctdigit(*p)) {
292 n = 8 * n + *p++ - '0';
294 n = 8 * n + *p++ - '0';
303 static int collate_range_cmp(int a, int b)
307 if ((uschar)a == (uschar)b)
311 return (strcoll(s[0], s[1]));
314 char *cclenter(const char *argp) /* add a character class */
318 uschar *p = (uschar *) argp;
320 static uschar *buf = NULL;
321 static int bufsz = 100;
324 if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
325 FATAL("out of space for character class [%.10s...] 1", p);
327 for (i = 0; (c = *p++) != 0; ) {
330 } else if (c == '-' && i > 0 && bp[-1] != 0) {
336 if (collate_range_cmp(c, c2) > 0) {
341 for (j = 0; j < NCHARS; j++) {
342 if ((collate_range_cmp(c, j) > 0) ||
343 collate_range_cmp(j, c2) > 0)
345 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
346 FATAL("out of space for character class [%.10s...] 2", p);
353 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
354 FATAL("out of space for character class [%.10s...] 3", p);
359 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
361 return (char *) tostring((char *) buf);
364 void overflo(const char *s)
366 FATAL("regular expression too big: %.30s...", s);
369 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
377 f->re[info(v)].ltype = type(v);
378 f->re[info(v)].lval.np = right(v);
379 while (f->accept >= maxsetvec) { /* guessing here! */
381 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
382 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
383 if (setvec == NULL || tmpset == NULL)
384 overflo("out of space in cfoll()");
386 for (i = 0; i <= f->accept; i++)
389 follow(v); /* computes setvec and setcnt */
390 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
391 overflo("out of space building follow set");
392 f->re[info(v)].lfollow = p;
394 for (i = f->accept; i >= 0; i--)
406 default: /* can't happen */
407 FATAL("can't happen: unknown type %d in cfoll", type(v));
411 int first(Node *p) /* collects initially active leaves of p into setvec */
412 /* returns 0 if p matches empty string */
419 lp = info(p); /* look for high-water mark of subscripts */
420 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
422 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
423 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
424 if (setvec == NULL || tmpset == NULL)
425 overflo("out of space in first()");
427 if (type(p) == EMPTYRE) {
431 if (setvec[lp] != 1) {
435 if (type(p) == CCL && (*(char *) right(p)) == '\0')
436 return(0); /* empty CCL */
439 if (first(left(p)) == 0) return(0);
446 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
450 if (first(left(p)) == 0 || b == 0) return(0);
453 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
457 void follow(Node *v) /* collects leaves that can follow v into setvec */
461 if (type(v) == FINAL)
477 if (v == left(p)) { /* v is left child of p */
478 if (first(right(p)) == 0) {
482 } else /* v is right child */
488 int member(int c, const char *sarg) /* is c in s? */
490 uschar *s = (uschar *) sarg;
498 int match(fa *f, const char *p0) /* shortest match ? */
501 uschar *p = (uschar *) p0;
503 s = f->reset ? makeinit(f,0) : f->initstat;
507 /* assert(*p < NCHARS); */
508 if ((ns = f->gototab[s][*p]) != 0)
518 int pmatch(fa *f, const char *p0) /* longest match, for sub */
521 uschar *p = (uschar *) p0;
525 /* s = f->reset ? makeinit(f,1) : f->initstat; */
527 f->initstat = s = makeinit(f,1);
536 if (f->out[s]) /* final state */
538 /* assert(*q < NCHARS); */
539 if ((ns = f->gototab[s][*q]) != 0)
543 if (s == 1) { /* no transition */
549 goto nextin; /* no match */
553 patlen = q-p-1; /* don't count $ */
561 for (i = 2; i <= f->curstat; i++)
564 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
565 overflo("out of space in pmatch");
566 for (i = 0; i <= k; i++)
567 (f->posns[2])[i] = (f->posns[0])[i];
568 f->initstat = f->curstat = 2;
569 f->out[2] = f->out[0];
570 for (i = 0; i < NCHARS; i++)
571 f->gototab[2][i] = 0;
577 int nematch(fa *f, const char *p0) /* non-empty match, for sub */
580 uschar *p = (uschar *) p0;
584 /* s = f->reset ? makeinit(f,1) : f->initstat; */
586 f->initstat = s = makeinit(f,1);
594 if (f->out[s]) /* final state */
596 /* assert(*q < NCHARS); */
597 if ((ns = f->gototab[s][*q]) != 0)
601 if (s == 1) { /* no transition */
606 goto nnextin; /* no nonempty match */
610 patlen = q-p-1; /* don't count $ */
618 for (i = 2; i <= f->curstat; i++)
621 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
622 overflo("out of state space");
623 for (i = 0; i <= k; i++)
624 (f->posns[2])[i] = (f->posns[0])[i];
625 f->initstat = f->curstat = 2;
626 f->out[2] = f->out[0];
627 for (i = 0; i < NCHARS; i++)
628 f->gototab[2][i] = 0;
635 Node *reparse(const char *p) /* parses regular expression pointed to by p */
636 { /* uses relex() to scan regular expression */
639 dprintf( ("reparse <%s>\n", p) );
640 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
642 /* GNU compatibility: an empty regexp matches anything */
644 /* FATAL("empty regular expression"); previous */
645 return(op2(EMPTYRE, NIL, NIL));
649 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
653 Node *regexp(void) /* top-level parse of reg expr */
655 return (alt(concat(primary())));
666 np = op2(CHAR, NIL, itonp(rlxval));
671 return (unary(op2(ALL, NIL, NIL)));
674 return (unary(op2(EMPTYRE, NIL, NIL)));
678 return (unary(op2(DOT, NIL, NIL)));
680 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
685 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
691 return (unary(op2(CHAR, NIL, itonp(HAT))));
694 return (unary(op2(CHAR, NIL, NIL)));
697 savelastatom = starttok - basestr; /* Retain over recursion */
699 if (rtok == ')') { /* special pleading for () */
701 return unary(op2(CCL, NIL, (Node *) tostring("")));
705 lastatom = basestr + savelastatom; /* Restore */
710 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
712 FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
714 return 0; /*NOTREACHED*/
717 Node *concat(Node *np)
720 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
721 return (concat(op2(CAT, np, primary())));
724 return (concat(op2(CAT, op2(CCL, NIL, (Node *) tostring("")),
734 return (alt(op2(OR, np, concat(primary()))));
739 Node *unary(Node *np)
744 return (unary(op2(STAR, np, NIL)));
747 return (unary(op2(PLUS, np, NIL)));
750 return (unary(op2(QUEST, np, NIL)));
757 * Character class definitions conformant to the POSIX locale as
758 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
759 * and operating character sets are both ASCII (ISO646) or supersets
762 * Note that to avoid overflowing the temporary buffer used in
763 * relex(), the expanded character class (prior to range expansion)
764 * must be less than twice the size of their full name.
767 /* Because isblank doesn't show up in any of the header files on any
768 * system i use, it's defined here. if some other locale has a richer
769 * definition of "blank", define HAS_ISBLANK and provide your own
771 * the parentheses here are an attempt to find a path through the maze
772 * of macro definition and/or function and/or version provided. thanks
773 * to nelson beebe for the suggestion; let's see if it works everywhere.
776 /* #define HAS_ISBLANK */
779 int (xisblank)(int c)
781 return c==' ' || c=='\t';
791 { "alnum", 5, isalnum },
792 { "alpha", 5, isalpha },
794 { "blank", 5, xisblank },
796 { "blank", 5, isblank },
798 { "cntrl", 5, iscntrl },
799 { "digit", 5, isdigit },
800 { "graph", 5, isgraph },
801 { "lower", 5, islower },
802 { "print", 5, isprint },
803 { "punct", 5, ispunct },
804 { "space", 5, isspace },
805 { "upper", 5, isupper },
806 { "xdigit", 6, isxdigit },
810 #define REPEAT_SIMPLE 0
811 #define REPEAT_PLUS_APPENDED 1
812 #define REPEAT_WITH_Q 2
813 #define REPEAT_ZERO 3
816 replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
817 int atomlen, int firstnum, int secondnum, int special_case)
822 int init_q = (firstnum==0); /* first added char will be ? */
823 int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
824 int prefix_length = reptok - basestr; /* prefix includes first rep */
825 int suffix_length = strlen((char *) reptok) - reptoklen; /* string after rep specifier */
826 int size = prefix_length + suffix_length;
828 if (firstnum > 1) { /* add room for reps 2 through firstnum */
829 size += atomlen*(firstnum-1);
832 /* Adjust size of buffer for special cases */
833 if (special_case == REPEAT_PLUS_APPENDED) {
834 size++; /* for the final + */
835 } else if (special_case == REPEAT_WITH_Q) {
836 size += init_q + (atomlen+1)* n_q_reps;
837 } else if (special_case == REPEAT_ZERO) {
838 size += 2; /* just a null ERE: () */
840 if ((buf = (uschar *) malloc(size+1)) == NULL)
841 FATAL("out of space in reg expr %.10s..", lastre);
842 memcpy(buf, basestr, prefix_length); /* copy prefix */
844 if (special_case == REPEAT_ZERO) {
849 for (i=1; i < firstnum; i++) { /* copy x reps */
850 memcpy(&buf[j], atom, atomlen);
853 if (special_case == REPEAT_PLUS_APPENDED) {
855 } else if (special_case == REPEAT_WITH_Q) {
856 if (init_q) buf[j++] = '?';
857 for (i=0; i < n_q_reps; i++) { /* copy x? reps */
858 memcpy(&buf[j], atom, atomlen);
863 memcpy(&buf[j], reptok+reptoklen, suffix_length);
864 if (special_case == REPEAT_ZERO) {
865 buf[j+suffix_length] = '\0';
869 /* free old basestr */
870 if (firstbasestr != basestr) {
875 prestr = buf + prefix_length;
876 if (special_case == REPEAT_ZERO) {
883 static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
884 int atomlen, int firstnum, int secondnum)
887 In general, the repetition specifier or "bound" is replaced here
888 by an equivalent ERE string, repeating the immediately previous atom
889 and appending ? and + as needed. Note that the first copy of the
890 atom is left in place, except in the special_case of a zero-repeat
893 if (secondnum < 0) { /* means {n,} -> repeat n-1 times followed by PLUS */
895 /* 0 or 1: should be handled before you get here */
896 FATAL("internal error");
898 return replace_repeat(reptok, reptoklen, atom, atomlen,
899 firstnum, secondnum, REPEAT_PLUS_APPENDED);
901 } else if (firstnum == secondnum) { /* {n} or {n,n} -> simply repeat n-1 times */
902 if (firstnum == 0) { /* {0} or {0,0} */
903 /* This case is unusual because the resulting
904 replacement string might actually be SMALLER than
906 return replace_repeat(reptok, reptoklen, atom, atomlen,
907 firstnum, secondnum, REPEAT_ZERO);
908 } else { /* (firstnum >= 1) */
909 return replace_repeat(reptok, reptoklen, atom, atomlen,
910 firstnum, secondnum, REPEAT_SIMPLE);
912 } else if (firstnum < secondnum) { /* {n,m} -> repeat n-1 times then alternate */
913 /* x{n,m} => xx...x{1, m-n+1} => xx...x?x?x?..x? */
914 return replace_repeat(reptok, reptoklen, atom, atomlen,
915 firstnum, secondnum, REPEAT_WITH_Q);
916 } else { /* Error - shouldn't be here (n>m) */
917 FATAL("internal error");
922 int relex(void) /* lexical analyzer for reparse */
926 static uschar *buf = NULL;
927 static int bufsz = 100;
929 struct charclass *cc;
931 int num, m, commafound, digitfound;
932 const uschar *startreptok;
937 switch (c = *prestr++) {
939 case '*': return STAR;
940 case '+': return PLUS;
941 case '?': return QUEST;
942 case '.': return DOT;
943 case '\0': prestr--; return '\0';
950 rlxval = quoted(&prestr);
956 if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
957 FATAL("out of space in reg expr %.10s..", lastre);
959 if (*prestr == '^') {
965 n = 2 * strlen((const char *) prestr)+1;
966 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
967 FATAL("out of space for reg expr %.10s...", lastre);
969 if ((c = *prestr++) == '\\') {
971 if ((c = *prestr++) == '\0')
972 FATAL("nonterminated character class %.20s...", lastre);
974 /* } else if (c == '\n') { */
975 /* FATAL("newline in character class %.20s...", lastre); */
976 } else if (c == '[' && *prestr == ':') {
977 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
978 for (cc = charclasses; cc->cc_name; cc++)
979 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
981 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
982 prestr[2 + cc->cc_namelen] == ']') {
983 prestr += cc->cc_namelen + 3;
985 * BUG: We begin at 1, instead of 0, since we
986 * would otherwise prematurely terminate the
987 * string for classes like [[:cntrl:]]. This
988 * means that we can't match the NUL character,
989 * not without first adapting the entire
990 * program to track each string's length.
992 for (i = 1; i <= UCHAR_MAX; i++) {
993 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
994 FATAL("out of space for reg expr %.10s...", lastre);
995 if (cc->cc_func(i)) {
1002 } else if (c == '[' && *prestr == '.') {
1005 collate_char = *prestr++;
1006 if (*prestr == '.' && prestr[1] == ']') {
1008 /* Found it: map via locale TBD: for
1009 now, simply return this char. This
1010 is sufficient to pass conformance
1013 if (*prestr == ']') {
1015 rlxval = collate_char;
1019 } else if (c == '[' && *prestr == '=') {
1022 equiv_char = *prestr++;
1023 if (*prestr == '=' && prestr[1] == ']') {
1025 /* Found it: map via locale TBD: for now
1026 simply return this char. This is
1027 sufficient to pass conformance test
1030 if (*prestr == ']') {
1032 rlxval = equiv_char;
1036 } else if (c == '\0') {
1037 FATAL("nonterminated character class %.20s", lastre);
1038 } else if (bp == buf) { /* 1st char is special */
1040 } else if (c == ']') {
1042 rlxstr = (uschar *) tostring((char *) buf);
1052 if (isdigit(*(prestr))) {
1053 num = 0; /* Process as a repetition */
1057 startreptok = prestr-1;
1058 /* Remember start of previous atom here ? */
1059 } else { /* just a { char, not a repetition */
1064 if ((c = *prestr++) == '}') {
1066 if (digitfound) { /* {n,m} */
1069 FATAL("illegal repetition expression: class %.20s",
1071 if ((n==0) && (m==1)) {
1075 if (n==0) return STAR;
1076 if (n==1) return PLUS;
1079 if (digitfound) { /* {n} same as {n,n} */
1083 FATAL("illegal repetition expression: class %.20s",
1087 if (repeat(starttok, prestr-starttok, lastatom,
1088 startreptok - lastatom, n, m) > 0) {
1089 if ((n==0) && (m==0)) {
1092 /* must rescan input for next token */
1095 /* Failed to replace: eat up {...} characters
1096 and treat like just PLUS */
1098 } else if (c == '\0') {
1099 FATAL("nonterminated character class %.20s",
1101 } else if (isdigit(c)) {
1102 num = 10 * num + c - '0';
1104 } else if (c == ',') {
1106 FATAL("illegal repetition expression: class %.20s",
1108 /* looking for {n,} or {n,m} */
1111 digitfound = 0; /* reset */
1114 FATAL("illegal repetition expression: class %.20s",
1122 int cgoto(fa *f, int s, int c)
1127 assert(c == HAT || c < NCHARS);
1128 while (f->accept >= maxsetvec) { /* guessing here! */
1130 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
1131 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
1132 if (setvec == NULL || tmpset == NULL)
1133 overflo("out of space in cgoto()");
1135 for (i = 0; i <= f->accept; i++)
1138 /* compute positions of gototab[s,c] into setvec */
1140 for (i = 1; i <= *p; i++) {
1141 if ((k = f->re[p[i]].ltype) != FINAL) {
1142 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
1143 || (k == DOT && c != 0 && c != HAT)
1144 || (k == ALL && c != 0)
1145 || (k == EMPTYRE && c != 0)
1146 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
1147 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
1148 q = f->re[p[i]].lfollow;
1149 for (j = 1; j <= *q; j++) {
1150 if (q[j] >= maxsetvec) {
1152 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
1153 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
1154 if (setvec == NULL || tmpset == NULL)
1155 overflo("cgoto overflow");
1157 if (setvec[q[j]] == 0) {
1165 /* determine if setvec is a previous state */
1168 for (i = f->accept; i >= 0; i--)
1172 /* tmpset == previous state? */
1173 for (i = 1; i <= f->curstat; i++) {
1175 if ((k = tmpset[0]) != p[0])
1177 for (j = 1; j <= k; j++)
1178 if (tmpset[j] != p[j])
1180 /* setvec is state i */
1181 f->gototab[s][c] = i;
1186 /* add tmpset to current set of states */
1187 if (f->curstat >= NSTATES-1) {
1190 for (i = 2; i < NSTATES; i++)
1194 for (i = 0; i < NCHARS; i++)
1195 f->gototab[f->curstat][i] = 0;
1196 xfree(f->posns[f->curstat]);
1197 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
1198 overflo("out of space in cgoto");
1200 f->posns[f->curstat] = p;
1201 f->gototab[s][c] = f->curstat;
1202 for (i = 0; i <= setcnt; i++)
1204 if (setvec[f->accept])
1205 f->out[f->curstat] = 1;
1207 f->out[f->curstat] = 0;
1212 void freefa(fa *f) /* free a finite automaton */
1218 for (i = 0; i <= f->curstat; i++)
1220 for (i = 0; i <= f->accept; i++) {
1221 xfree(f->re[i].lfollow);
1222 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
1223 xfree((f->re[i].lval.np));