1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 ****************************************************************/
25 /* lasciate ogne speranza, voi ch'intrate. */
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
42 #define type(v) (v)->nobj /* badly overloaded here */
43 #define info(v) (v)->ntype /* badly overloaded here */
44 #define left(v) (v)->narg[0]
45 #define right(v) (v)->narg[1]
46 #define parent(v) (v)->nnext
48 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
49 #define ELEAF case EMPTYRE: /* empty string in regexp */
50 #define UNARY case STAR: case PLUS: case QUEST:
52 /* encoding in tree Nodes:
53 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
54 left is index, right contains value or pointer to value
55 unary (STAR, PLUS, QUEST): left is child, right is null
56 binary (CAT, OR): left and right are children
57 parent contains pointer to parent
65 int rtok; /* next token in current re */
67 static uschar *rlxstr;
68 static uschar *prestr; /* current position in current re */
69 static uschar *lastre; /* origin of last re */
70 static uschar *lastatom; /* origin of last Atom */
71 static uschar *starttok;
72 static uschar *basestr; /* starts with original, replaced during
73 repetition processing */
74 static uschar *firstbasestr;
82 #define NFA 20 /* cache this many dynamic fa's */
84 int nfatab = 0; /* entries in fatab */
86 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
92 if (setvec == NULL) { /* first time through any RE */
94 setvec = (int *) malloc(maxsetvec * sizeof(int));
95 tmpset = (int *) malloc(maxsetvec * sizeof(int));
96 if (setvec == NULL || tmpset == NULL)
97 overflo("out of space initializing makedfa");
100 if (compile_time) /* a constant for sure */
101 return mkdfa(s, anchor);
102 for (i = 0; i < nfatab; i++) /* is it there already? */
103 if (fatab[i]->anchor == anchor
104 && strcmp((const char *) fatab[i]->restr, s) == 0) {
105 fatab[i]->use = now++;
108 pfa = mkdfa(s, anchor);
109 if (nfatab < NFA) { /* room for another */
111 fatab[nfatab]->use = now++;
115 use = fatab[0]->use; /* replace least-recently used */
117 for (i = 1; i < nfatab; i++)
118 if (fatab[i]->use < use) {
128 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
129 /* anchor = 1 for anchored matches, else 0 */
134 firstbasestr = (uschar *) s;
135 basestr = firstbasestr;
137 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
138 /* put ALL STAR in front of reg. exp. */
139 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
140 /* put FINAL after reg. exp. */
143 penter(p1); /* enter parent pointers and leaf indices */
144 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
145 overflo("out of space for fa");
146 f->accept = poscnt-1; /* penter has computed number of positions in re */
147 cfoll(f, p1); /* set up follow sets */
149 if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
150 overflo("out of space in makedfa");
151 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
152 overflo("out of space in makedfa");
154 f->initstat = makeinit(f, anchor);
156 f->restr = (uschar *) tostring(s);
157 if (firstbasestr != basestr) {
164 int makeinit(fa *f, int anchor)
171 k = *(f->re[0].lfollow);
173 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
174 overflo("out of space in makeinit");
175 for (i=0; i <= k; i++) {
176 (f->posns[2])[i] = (f->re[0].lfollow)[i];
178 if ((f->posns[2])[1] == f->accept)
180 for (i=0; i < NCHARS; i++)
181 f->gototab[2][i] = 0;
182 f->curstat = cgoto(f, 2, HAT);
184 *f->posns[2] = k-1; /* leave out position 0 */
185 for (i=0; i < k; i++) {
186 (f->posns[0])[i] = (f->posns[2])[i];
189 f->out[0] = f->out[2];
191 --(*f->posns[f->curstat]);
196 void penter(Node *p) /* set up parent pointers and leaf indices */
213 parent(right(p)) = p;
215 default: /* can't happen */
216 FATAL("can't happen: unknown type %d in penter", type(p));
221 void freetr(Node *p) /* free parse tree */
238 default: /* can't happen */
239 FATAL("can't happen: unknown type %d in freetr", type(p));
244 /* in the parsing of regular expressions, metacharacters like . have */
245 /* to be seen literally; \056 is not a metacharacter. */
247 int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */
248 { /* only pick up one 8-bit byte (2 chars) */
253 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
255 n = 16 * n + *p - '0';
256 else if (*p >= 'a' && *p <= 'f')
257 n = 16 * n + *p - 'a' + 10;
258 else if (*p >= 'A' && *p <= 'F')
259 n = 16 * n + *p - 'A' + 10;
265 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
267 int quoted(uschar **pp) /* pick up next thing after a \\ */
268 /* and increment *pp */
273 if ((c = *p++) == 't')
285 else if (c == 'x') { /* hexadecimal goo follows */
286 c = hexstr(&p); /* this adds a null if number is invalid */
287 } else if (isoctdigit(c)) { /* \d \dd \ddd */
289 if (isoctdigit(*p)) {
290 n = 8 * n + *p++ - '0';
292 n = 8 * n + *p++ - '0';
301 static int collate_range_cmp(int a, int b)
305 if ((uschar)a == (uschar)b)
309 return (strcoll(s[0], s[1]));
312 char *cclenter(const char *argp) /* add a character class */
316 uschar *p = (uschar *) argp;
318 static uschar *buf = NULL;
319 static int bufsz = 100;
322 if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
323 FATAL("out of space for character class [%.10s...] 1", p);
325 for (i = 0; (c = *p++) != 0; ) {
328 } else if (c == '-' && i > 0 && bp[-1] != 0) {
334 if (collate_range_cmp(c, c2) > 0) {
339 for (j = 0; j < NCHARS; j++) {
340 if ((collate_range_cmp(c, j) > 0) ||
341 collate_range_cmp(j, c2) > 0)
343 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
344 FATAL("out of space for character class [%.10s...] 2", p);
351 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
352 FATAL("out of space for character class [%.10s...] 3", p);
357 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
359 return (char *) tostring((char *) buf);
362 void overflo(const char *s)
364 FATAL("regular expression too big: %.30s...", s);
367 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
375 f->re[info(v)].ltype = type(v);
376 f->re[info(v)].lval.np = right(v);
377 while (f->accept >= maxsetvec) { /* guessing here! */
379 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
380 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
381 if (setvec == NULL || tmpset == NULL)
382 overflo("out of space in cfoll()");
384 for (i = 0; i <= f->accept; i++)
387 follow(v); /* computes setvec and setcnt */
388 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
389 overflo("out of space building follow set");
390 f->re[info(v)].lfollow = p;
392 for (i = f->accept; i >= 0; i--)
404 default: /* can't happen */
405 FATAL("can't happen: unknown type %d in cfoll", type(v));
409 int first(Node *p) /* collects initially active leaves of p into setvec */
410 /* returns 0 if p matches empty string */
417 lp = info(p); /* look for high-water mark of subscripts */
418 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
420 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
421 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
422 if (setvec == NULL || tmpset == NULL)
423 overflo("out of space in first()");
425 if (type(p) == EMPTYRE) {
429 if (setvec[lp] != 1) {
433 if (type(p) == CCL && (*(char *) right(p)) == '\0')
434 return(0); /* empty CCL */
437 if (first(left(p)) == 0) return(0);
444 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
448 if (first(left(p)) == 0 || b == 0) return(0);
451 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
455 void follow(Node *v) /* collects leaves that can follow v into setvec */
459 if (type(v) == FINAL)
475 if (v == left(p)) { /* v is left child of p */
476 if (first(right(p)) == 0) {
480 } else /* v is right child */
486 int member(int c, const char *sarg) /* is c in s? */
488 uschar *s = (uschar *) sarg;
496 int match(fa *f, const char *p0) /* shortest match ? */
499 uschar *p = (uschar *) p0;
501 s = f->reset ? makeinit(f,0) : f->initstat;
505 /* assert(*p < NCHARS); */
506 if ((ns = f->gototab[s][*p]) != 0)
516 int pmatch(fa *f, const char *p0) /* longest match, for sub */
519 uschar *p = (uschar *) p0;
523 /* s = f->reset ? makeinit(f,1) : f->initstat; */
525 f->initstat = s = makeinit(f,1);
534 if (f->out[s]) /* final state */
536 /* assert(*q < NCHARS); */
537 if ((ns = f->gototab[s][*q]) != 0)
541 if (s == 1) { /* no transition */
547 goto nextin; /* no match */
551 patlen = q-p-1; /* don't count $ */
559 for (i = 2; i <= f->curstat; i++)
562 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
563 overflo("out of space in pmatch");
564 for (i = 0; i <= k; i++)
565 (f->posns[2])[i] = (f->posns[0])[i];
566 f->initstat = f->curstat = 2;
567 f->out[2] = f->out[0];
568 for (i = 0; i < NCHARS; i++)
569 f->gototab[2][i] = 0;
575 int nematch(fa *f, const char *p0) /* non-empty match, for sub */
578 uschar *p = (uschar *) p0;
582 /* s = f->reset ? makeinit(f,1) : f->initstat; */
584 f->initstat = s = makeinit(f,1);
592 if (f->out[s]) /* final state */
594 /* assert(*q < NCHARS); */
595 if ((ns = f->gototab[s][*q]) != 0)
599 if (s == 1) { /* no transition */
604 goto nnextin; /* no nonempty match */
608 patlen = q-p-1; /* don't count $ */
616 for (i = 2; i <= f->curstat; i++)
619 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
620 overflo("out of state space");
621 for (i = 0; i <= k; i++)
622 (f->posns[2])[i] = (f->posns[0])[i];
623 f->initstat = f->curstat = 2;
624 f->out[2] = f->out[0];
625 for (i = 0; i < NCHARS; i++)
626 f->gototab[2][i] = 0;
633 Node *reparse(const char *p) /* parses regular expression pointed to by p */
634 { /* uses relex() to scan regular expression */
637 dprintf( ("reparse <%s>\n", p) );
638 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
640 /* GNU compatibility: an empty regexp matches anything */
642 /* FATAL("empty regular expression"); previous */
643 return(op2(EMPTYRE, NIL, NIL));
647 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
651 Node *regexp(void) /* top-level parse of reg expr */
653 return (alt(concat(primary())));
664 np = op2(CHAR, NIL, itonp(rlxval));
669 return (unary(op2(ALL, NIL, NIL)));
672 return (unary(op2(EMPTYRE, NIL, NIL)));
676 return (unary(op2(DOT, NIL, NIL)));
678 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
683 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
689 return (unary(op2(CHAR, NIL, itonp(HAT))));
692 return (unary(op2(CHAR, NIL, NIL)));
695 savelastatom = starttok - basestr; /* Retain over recursion */
697 if (rtok == ')') { /* special pleading for () */
699 return unary(op2(CCL, NIL, (Node *) tostring("")));
703 lastatom = basestr + savelastatom; /* Restore */
708 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
710 FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
712 return 0; /*NOTREACHED*/
715 Node *concat(Node *np)
718 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
719 return (concat(op2(CAT, np, primary())));
722 return (concat(op2(CAT, op2(CCL, NIL, (Node *) tostring("")),
732 return (alt(op2(OR, np, concat(primary()))));
737 Node *unary(Node *np)
742 return (unary(op2(STAR, np, NIL)));
745 return (unary(op2(PLUS, np, NIL)));
748 return (unary(op2(QUEST, np, NIL)));
755 * Character class definitions conformant to the POSIX locale as
756 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
757 * and operating character sets are both ASCII (ISO646) or supersets
760 * Note that to avoid overflowing the temporary buffer used in
761 * relex(), the expanded character class (prior to range expansion)
762 * must be less than twice the size of their full name.
765 /* Because isblank doesn't show up in any of the header files on any
766 * system i use, it's defined here. if some other locale has a richer
767 * definition of "blank", define HAS_ISBLANK and provide your own
769 * the parentheses here are an attempt to find a path through the maze
770 * of macro definition and/or function and/or version provided. thanks
771 * to nelson beebe for the suggestion; let's see if it works everywhere.
774 /* #define HAS_ISBLANK */
777 int (xisblank)(int c)
779 return c==' ' || c=='\t';
789 { "alnum", 5, isalnum },
790 { "alpha", 5, isalpha },
792 { "blank", 5, xisblank },
794 { "blank", 5, isblank },
796 { "cntrl", 5, iscntrl },
797 { "digit", 5, isdigit },
798 { "graph", 5, isgraph },
799 { "lower", 5, islower },
800 { "print", 5, isprint },
801 { "punct", 5, ispunct },
802 { "space", 5, isspace },
803 { "upper", 5, isupper },
804 { "xdigit", 6, isxdigit },
808 #define REPEAT_SIMPLE 0
809 #define REPEAT_PLUS_APPENDED 1
810 #define REPEAT_WITH_Q 2
811 #define REPEAT_ZERO 3
814 replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
815 int atomlen, int firstnum, int secondnum, int special_case)
820 int init_q = (firstnum==0); /* first added char will be ? */
821 int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
822 int prefix_length = reptok - basestr; /* prefix includes first rep */
823 int suffix_length = strlen((char *) reptok) - reptoklen; /* string after rep specifier */
824 int size = prefix_length + suffix_length;
826 if (firstnum > 1) { /* add room for reps 2 through firstnum */
827 size += atomlen*(firstnum-1);
830 /* Adjust size of buffer for special cases */
831 if (special_case == REPEAT_PLUS_APPENDED) {
832 size++; /* for the final + */
833 } else if (special_case == REPEAT_WITH_Q) {
834 size += init_q + (atomlen+1)* n_q_reps;
835 } else if (special_case == REPEAT_ZERO) {
836 size += 2; /* just a null ERE: () */
838 if ((buf = (uschar *) malloc(size+1)) == NULL)
839 FATAL("out of space in reg expr %.10s..", lastre);
840 memcpy(buf, basestr, prefix_length); /* copy prefix */
842 if (special_case == REPEAT_ZERO) {
847 for (i=1; i < firstnum; i++) { /* copy x reps */
848 memcpy(&buf[j], atom, atomlen);
851 if (special_case == REPEAT_PLUS_APPENDED) {
853 } else if (special_case == REPEAT_WITH_Q) {
854 if (init_q) buf[j++] = '?';
855 for (i=0; i < n_q_reps; i++) { /* copy x? reps */
856 memcpy(&buf[j], atom, atomlen);
861 memcpy(&buf[j], reptok+reptoklen, suffix_length);
862 if (special_case == REPEAT_ZERO) {
863 buf[j+suffix_length] = '\0';
867 /* free old basestr */
868 if (firstbasestr != basestr) {
873 prestr = buf + prefix_length;
874 if (special_case == REPEAT_ZERO) {
881 static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
882 int atomlen, int firstnum, int secondnum)
885 In general, the repetition specifier or "bound" is replaced here
886 by an equivalent ERE string, repeating the immediately previous atom
887 and appending ? and + as needed. Note that the first copy of the
888 atom is left in place, except in the special_case of a zero-repeat
891 if (secondnum < 0) { /* means {n,} -> repeat n-1 times followed by PLUS */
893 /* 0 or 1: should be handled before you get here */
894 FATAL("internal error");
896 return replace_repeat(reptok, reptoklen, atom, atomlen,
897 firstnum, secondnum, REPEAT_PLUS_APPENDED);
899 } else if (firstnum == secondnum) { /* {n} or {n,n} -> simply repeat n-1 times */
900 if (firstnum == 0) { /* {0} or {0,0} */
901 /* This case is unusual because the resulting
902 replacement string might actually be SMALLER than
904 return replace_repeat(reptok, reptoklen, atom, atomlen,
905 firstnum, secondnum, REPEAT_ZERO);
906 } else { /* (firstnum >= 1) */
907 return replace_repeat(reptok, reptoklen, atom, atomlen,
908 firstnum, secondnum, REPEAT_SIMPLE);
910 } else if (firstnum < secondnum) { /* {n,m} -> repeat n-1 times then alternate */
911 /* x{n,m} => xx...x{1, m-n+1} => xx...x?x?x?..x? */
912 return replace_repeat(reptok, reptoklen, atom, atomlen,
913 firstnum, secondnum, REPEAT_WITH_Q);
914 } else { /* Error - shouldn't be here (n>m) */
915 FATAL("internal error");
920 int relex(void) /* lexical analyzer for reparse */
924 static uschar *buf = NULL;
925 static int bufsz = 100;
927 struct charclass *cc;
929 int num, m, commafound, digitfound;
930 const uschar *startreptok;
935 switch (c = *prestr++) {
937 case '*': return STAR;
938 case '+': return PLUS;
939 case '?': return QUEST;
940 case '.': return DOT;
941 case '\0': prestr--; return '\0';
948 rlxval = quoted(&prestr);
954 if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
955 FATAL("out of space in reg expr %.10s..", lastre);
957 if (*prestr == '^') {
963 n = 2 * strlen((const char *) prestr)+1;
964 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
965 FATAL("out of space for reg expr %.10s...", lastre);
967 if ((c = *prestr++) == '\\') {
969 if ((c = *prestr++) == '\0')
970 FATAL("nonterminated character class %.20s...", lastre);
972 /* } else if (c == '\n') { */
973 /* FATAL("newline in character class %.20s...", lastre); */
974 } else if (c == '[' && *prestr == ':') {
975 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
976 for (cc = charclasses; cc->cc_name; cc++)
977 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
979 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
980 prestr[2 + cc->cc_namelen] == ']') {
981 prestr += cc->cc_namelen + 3;
983 * BUG: We begin at 1, instead of 0, since we
984 * would otherwise prematurely terminate the
985 * string for classes like [[:cntrl:]]. This
986 * means that we can't match the NUL character,
987 * not without first adapting the entire
988 * program to track each string's length.
990 for (i = 1; i <= UCHAR_MAX; i++) {
991 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
992 FATAL("out of space for reg expr %.10s...", lastre);
993 if (cc->cc_func(i)) {
1000 } else if (c == '[' && *prestr == '.') {
1003 collate_char = *prestr++;
1004 if (*prestr == '.' && prestr[1] == ']') {
1006 /* Found it: map via locale TBD: for
1007 now, simply return this char. This
1008 is sufficient to pass conformance
1011 if (*prestr == ']') {
1013 rlxval = collate_char;
1017 } else if (c == '[' && *prestr == '=') {
1020 equiv_char = *prestr++;
1021 if (*prestr == '=' && prestr[1] == ']') {
1023 /* Found it: map via locale TBD: for now
1024 simply return this char. This is
1025 sufficient to pass conformance test
1028 if (*prestr == ']') {
1030 rlxval = equiv_char;
1034 } else if (c == '\0') {
1035 FATAL("nonterminated character class %.20s", lastre);
1036 } else if (bp == buf) { /* 1st char is special */
1038 } else if (c == ']') {
1040 rlxstr = (uschar *) tostring((char *) buf);
1050 if (isdigit(*(prestr))) {
1051 num = 0; /* Process as a repetition */
1055 startreptok = prestr-1;
1056 /* Remember start of previous atom here ? */
1057 } else { /* just a { char, not a repetition */
1062 if ((c = *prestr++) == '}') {
1064 if (digitfound) { /* {n,m} */
1067 FATAL("illegal repetition expression: class %.20s",
1069 if ((n==0) && (m==1)) {
1073 if (n==0) return STAR;
1074 if (n==1) return PLUS;
1077 if (digitfound) { /* {n} same as {n,n} */
1081 FATAL("illegal repetition expression: class %.20s",
1085 if (repeat(starttok, prestr-starttok, lastatom,
1086 startreptok - lastatom, n, m) > 0) {
1087 if ((n==0) && (m==0)) {
1090 /* must rescan input for next token */
1093 /* Failed to replace: eat up {...} characters
1094 and treat like just PLUS */
1096 } else if (c == '\0') {
1097 FATAL("nonterminated character class %.20s",
1099 } else if (isdigit(c)) {
1100 num = 10 * num + c - '0';
1102 } else if (c == ',') {
1104 FATAL("illegal repetition expression: class %.20s",
1106 /* looking for {n,} or {n,m} */
1109 digitfound = 0; /* reset */
1112 FATAL("illegal repetition expression: class %.20s",
1120 int cgoto(fa *f, int s, int c)
1125 assert(c == HAT || c < NCHARS);
1126 while (f->accept >= maxsetvec) { /* guessing here! */
1128 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
1129 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
1130 if (setvec == NULL || tmpset == NULL)
1131 overflo("out of space in cgoto()");
1133 for (i = 0; i <= f->accept; i++)
1136 /* compute positions of gototab[s,c] into setvec */
1138 for (i = 1; i <= *p; i++) {
1139 if ((k = f->re[p[i]].ltype) != FINAL) {
1140 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
1141 || (k == DOT && c != 0 && c != HAT)
1142 || (k == ALL && c != 0)
1143 || (k == EMPTYRE && c != 0)
1144 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
1145 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
1146 q = f->re[p[i]].lfollow;
1147 for (j = 1; j <= *q; j++) {
1148 if (q[j] >= maxsetvec) {
1150 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
1151 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
1152 if (setvec == NULL || tmpset == NULL)
1153 overflo("cgoto overflow");
1155 if (setvec[q[j]] == 0) {
1163 /* determine if setvec is a previous state */
1166 for (i = f->accept; i >= 0; i--)
1170 /* tmpset == previous state? */
1171 for (i = 1; i <= f->curstat; i++) {
1173 if ((k = tmpset[0]) != p[0])
1175 for (j = 1; j <= k; j++)
1176 if (tmpset[j] != p[j])
1178 /* setvec is state i */
1179 f->gototab[s][c] = i;
1184 /* add tmpset to current set of states */
1185 if (f->curstat >= NSTATES-1) {
1188 for (i = 2; i < NSTATES; i++)
1192 for (i = 0; i < NCHARS; i++)
1193 f->gototab[f->curstat][i] = 0;
1194 xfree(f->posns[f->curstat]);
1195 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
1196 overflo("out of space in cgoto");
1198 f->posns[f->curstat] = p;
1199 f->gototab[s][c] = f->curstat;
1200 for (i = 0; i <= setcnt; i++)
1202 if (setvec[f->accept])
1203 f->out[f->curstat] = 1;
1205 f->out[f->curstat] = 0;
1210 void freefa(fa *f) /* free a finite automaton */
1216 for (i = 0; i <= f->curstat; i++)
1218 for (i = 0; i <= f->accept; i++) {
1219 xfree(f->re[i].lfollow);
1220 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
1221 xfree((f->re[i].lval.np));