1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 ****************************************************************/
25 /* lasciate ogne speranza, voi ch'intrate. */
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
38 #include "awkgram.tab.h"
42 #define type(v) (v)->nobj /* badly overloaded here */
43 #define info(v) (v)->ntype /* badly overloaded here */
44 #define left(v) (v)->narg[0]
45 #define right(v) (v)->narg[1]
46 #define parent(v) (v)->nnext
48 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
49 #define ELEAF case EMPTYRE: /* empty string in regexp */
50 #define UNARY case STAR: case PLUS: case QUEST:
52 /* encoding in tree Nodes:
53 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
54 left is index, right contains value or pointer to value
55 unary (STAR, PLUS, QUEST): left is child, right is null
56 binary (CAT, OR): left and right are children
57 parent contains pointer to parent
65 int rtok; /* next token in current re */
67 static const uschar *rlxstr;
68 static const uschar *prestr; /* current position in current re */
69 static const uschar *lastre; /* origin of last re */
70 static const uschar *lastatom; /* origin of last Atom */
71 static const uschar *starttok;
72 static const uschar *basestr; /* starts with original, replaced during
73 repetition processing */
74 static const uschar *firstbasestr;
82 #define NFA 128 /* cache this many dynamic fa's */
84 int nfatab = 0; /* entries in fatab */
87 intalloc(size_t n, const char *f)
89 int *p = (int *) calloc(n, sizeof(int));
96 resizesetvec(const char *f)
102 setvec = (int *) realloc(setvec, maxsetvec * sizeof(*setvec));
103 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(*tmpset));
104 if (setvec == NULL || tmpset == NULL)
109 resize_state(fa *f, int state)
116 if (++state < f->state_count)
119 new_count = state + 10; /* needs to be tuned */
121 p = (unsigned int **) realloc(f->gototab, new_count * sizeof(f->gototab[0]));
126 p2 = (uschar *) realloc(f->out, new_count * sizeof(f->out[0]));
131 p3 = (int **) realloc(f->posns, new_count * sizeof(f->posns[0]));
136 for (i = f->state_count; i < new_count; ++i) {
137 f->gototab[i] = (unsigned int *) calloc(NCHARS, sizeof(**f->gototab));
138 if (f->gototab[i] == NULL)
143 f->state_count = new_count;
149 fa *makedfa(const char *s, bool anchor) /* returns dfa for reg expr s */
155 if (setvec == NULL) { /* first time through any RE */
156 resizesetvec(__func__);
159 if (compile_time != RUNNING) /* a constant for sure */
160 return mkdfa(s, anchor);
161 for (i = 0; i < nfatab; i++) /* is it there already? */
162 if (fatab[i]->anchor == anchor
163 && strcmp((const char *) fatab[i]->restr, s) == 0) {
164 fatab[i]->use = now++;
167 pfa = mkdfa(s, anchor);
168 if (nfatab < NFA) { /* room for another */
170 fatab[nfatab]->use = now++;
174 use = fatab[0]->use; /* replace least-recently used */
176 for (i = 1; i < nfatab; i++)
177 if (fatab[i]->use < use) {
187 fa *mkdfa(const char *s, bool anchor) /* does the real work of making a dfa */
188 /* anchor = true for anchored matches, else false */
193 firstbasestr = (const uschar *) s;
194 basestr = firstbasestr;
196 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
197 /* put ALL STAR in front of reg. exp. */
198 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
199 /* put FINAL after reg. exp. */
202 penter(p1); /* enter parent pointers and leaf indices */
203 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt * sizeof(rrow))) == NULL)
205 f->accept = poscnt-1; /* penter has computed number of positions in re */
206 cfoll(f, p1); /* set up follow sets */
209 f->posns[0] = intalloc(*(f->re[0].lfollow), __func__);
210 f->posns[1] = intalloc(1, __func__);
212 f->initstat = makeinit(f, anchor);
214 f->restr = (uschar *) tostring(s);
215 if (firstbasestr != basestr) {
222 int makeinit(fa *f, bool anchor)
228 k = *(f->re[0].lfollow);
230 f->posns[2] = intalloc(k + 1, __func__);
231 for (i = 0; i <= k; i++) {
232 (f->posns[2])[i] = (f->re[0].lfollow)[i];
234 if ((f->posns[2])[1] == f->accept)
236 for (i = 0; i < NCHARS; i++)
237 f->gototab[2][i] = 0;
238 f->curstat = cgoto(f, 2, HAT);
240 *f->posns[2] = k-1; /* leave out position 0 */
241 for (i = 0; i < k; i++) {
242 (f->posns[0])[i] = (f->posns[2])[i];
245 f->out[0] = f->out[2];
247 --(*f->posns[f->curstat]);
252 void penter(Node *p) /* set up parent pointers and leaf indices */
269 parent(right(p)) = p;
273 default: /* can't happen */
274 FATAL("can't happen: unknown type %d in penter", type(p));
279 void freetr(Node *p) /* free parse tree */
297 default: /* can't happen */
298 FATAL("can't happen: unknown type %d in freetr", type(p));
303 /* in the parsing of regular expressions, metacharacters like . have */
304 /* to be seen literally; \056 is not a metacharacter. */
306 int hexstr(const uschar **pp) /* find and eval hex string at pp, return new p */
307 { /* only pick up one 8-bit byte (2 chars) */
312 for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
314 n = 16 * n + *p - '0';
315 else if (*p >= 'a' && *p <= 'f')
316 n = 16 * n + *p - 'a' + 10;
317 else if (*p >= 'A' && *p <= 'F')
318 n = 16 * n + *p - 'A' + 10;
324 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
326 int quoted(const uschar **pp) /* pick up next thing after a \\ */
327 /* and increment *pp */
329 const uschar *p = *pp;
332 if ((c = *p++) == 't')
348 else if (c == 'x') { /* hexadecimal goo follows */
349 c = hexstr(&p); /* this adds a null if number is invalid */
350 } else if (isoctdigit(c)) { /* \d \dd \ddd */
352 if (isoctdigit(*p)) {
353 n = 8 * n + *p++ - '0';
355 n = 8 * n + *p++ - '0';
364 static int collate_range_cmp(int a, int b)
368 if ((uschar)a == (uschar)b)
372 return (strcoll(s[0], s[1]));
375 char *cclenter(const char *argp) /* add a character class */
378 const uschar *op, *p = (const uschar *) argp;
380 static uschar *buf = NULL;
381 static int bufsz = 100;
384 if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
385 FATAL("out of space for character class [%.10s...] 1", p);
387 for (i = 0; (c = *p++) != 0; ) {
390 } else if (c == '-' && i > 0 && bp[-1] != 0) {
396 if (collate_range_cmp(c, c2) > 0) {
401 for (j = 0; j < NCHARS; j++) {
402 if ((collate_range_cmp(c, j) > 0) ||
403 collate_range_cmp(j, c2) > 0)
405 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
406 FATAL("out of space for character class [%.10s...] 2", p);
413 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
414 FATAL("out of space for character class [%.10s...] 3", p);
419 DPRINTF("cclenter: in = |%s|, out = |%s|\n", op, buf);
421 return (char *) tostring((char *) buf);
424 void overflo(const char *s)
426 FATAL("regular expression too big: out of space in %.30s...", s);
429 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
437 f->re[info(v)].ltype = type(v);
438 f->re[info(v)].lval.np = right(v);
439 while (f->accept >= maxsetvec) { /* guessing here! */
440 resizesetvec(__func__);
442 for (i = 0; i <= f->accept; i++)
445 follow(v); /* computes setvec and setcnt */
446 p = intalloc(setcnt + 1, __func__);
447 f->re[info(v)].lfollow = p;
449 for (i = f->accept; i >= 0; i--)
463 default: /* can't happen */
464 FATAL("can't happen: unknown type %d in cfoll", type(v));
468 int first(Node *p) /* collects initially active leaves of p into setvec */
469 /* returns 0 if p matches empty string */
476 lp = info(p); /* look for high-water mark of subscripts */
477 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
478 resizesetvec(__func__);
480 if (type(p) == EMPTYRE) {
484 if (setvec[lp] != 1) {
488 if (type(p) == CCL && (*(char *) right(p)) == '\0')
489 return(0); /* empty CCL */
492 if (first(left(p)) == 0)
500 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
504 if (first(left(p)) == 0 || b == 0) return(0);
509 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
513 void follow(Node *v) /* collects leaves that can follow v into setvec */
517 if (type(v) == FINAL)
533 if (v == left(p)) { /* v is left child of p */
534 if (first(right(p)) == 0) {
538 } else /* v is right child */
544 int member(int c, const char *sarg) /* is c in s? */
546 const uschar *s = (const uschar *) sarg;
554 int match(fa *f, const char *p0) /* shortest match ? */
557 const uschar *p = (const uschar *) p0;
560 assert (s < f->state_count);
565 /* assert(*p < NCHARS); */
566 if ((ns = f->gototab[s][*p]) != 0)
576 int pmatch(fa *f, const char *p0) /* longest match, for sub */
579 const uschar *p = (const uschar *) p0;
583 assert(s < f->state_count);
585 patbeg = (const char *)p;
590 if (f->out[s]) /* final state */
592 /* assert(*q < NCHARS); */
593 if ((ns = f->gototab[s][*q]) != 0)
598 assert(s < f->state_count);
600 if (s == 1) { /* no transition */
602 patbeg = (const char *) p;
606 goto nextin; /* no match */
610 patlen = q-p-1; /* don't count $ */
612 patbeg = (const char *) p;
619 for (i = 2; i <= f->curstat; i++)
620 n xfree(f->posns[i]);
622 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
623 overflo("out of space in pmatch");
624 for (i = 0; i <= k; i++)
625 (f->posns[2])[i] = (f->posns[0])[i];
626 f->initstat = f->curstat = 2;
627 f->out[2] = f->out[0];
628 for (i = 0; i < NCHARS; i++)
629 f->gototab[2][i] = 0;
636 int nematch(fa *f, const char *p0) /* non-empty match, for sub */
639 const uschar *p = (const uschar *) p0;
643 assert(s < f->state_count);
645 patbeg = (const char *)p;
650 if (f->out[s]) /* final state */
652 /* assert(*q < NCHARS); */
653 if ((ns = f->gototab[s][*q]) != 0)
657 if (s == 1) { /* no transition */
659 patbeg = (const char *) p;
662 goto nnextin; /* no nonempty match */
666 patlen = q-p-1; /* don't count $ */
668 patbeg = (const char *) p;
675 for (i = 2; i <= f->curstat; i++)
678 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
679 overflo("out of state space");
680 for (i = 0; i <= k; i++)
681 (f->posns[2])[i] = (f->posns[0])[i];
682 f->initstat = f->curstat = 2;
683 f->out[2] = f->out[0];
684 for (i = 0; i < NCHARS; i++)
685 f->gototab[2][i] = 0;
699 * A stream-fed version of nematch which transfers characters to a
700 * null-terminated buffer. All characters up to and including the last
701 * character of the matching text or EOF are placed in the buffer. If
702 * a match is found, patbeg and patlen are set appropriately.
705 * false No match found.
709 bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
712 int bufsize = *pbufsize;
713 int c, i, j, k, ns, s;
719 * All indices relative to buf.
720 * i <= j <= k <= bufsize
722 * i: origin of active substring
723 * j: current character
724 * k: destination of next getc()
732 if (!adjbuf((char **) &buf, &bufsize, bufsize+1, quantum, 0, "fnematch"))
733 FATAL("stream '%.30s...' too long", buf);
734 buf[k++] = (c = getc(f)) != EOF ? c : 0;
737 /* assert(c < NCHARS); */
739 if ((ns = pfa->gototab[s][c]) != 0)
742 s = cgoto(pfa, s, c);
744 if (pfa->out[s]) { /* final state */
746 if (c == 0) /* don't count $ */
749 } while (buf[j] && s != 1);
751 } while (buf[i] && !patlen);
753 /* adjbuf() may have relocated a resized buffer. Inform the world. */
758 patbeg = (char *) buf + i;
760 * Under no circumstances is the last character fed to
761 * the automaton part of the match. It is EOF's nullbyte,
762 * or it sent the automaton into a state with no further
763 * transitions available (s==1), or both. Room for a
764 * terminating nullbyte is guaranteed.
766 * ungetc any chars after the end of matching text
767 * (except for EOF's nullbyte, if present) and null
768 * terminate the buffer.
771 if (buf[--k] && ungetc(buf[k], f) == EOF)
772 FATAL("unable to ungetc '%c'", buf[k]);
773 while (k > i + patlen);
781 Node *reparse(const char *p) /* parses regular expression pointed to by p */
782 { /* uses relex() to scan regular expression */
785 DPRINTF("reparse <%s>\n", p);
786 lastre = prestr = (const uschar *) p; /* prestr points to string to be parsed */
788 /* GNU compatibility: an empty regexp matches anything */
790 /* FATAL("empty regular expression"); previous */
791 return(op2(EMPTYRE, NIL, NIL));
795 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
799 Node *regexp(void) /* top-level parse of reg expr */
801 return (alt(concat(primary())));
812 np = op2(CHAR, NIL, itonp(rlxval));
817 return (unary(op2(ALL, NIL, NIL)));
820 return (unary(op2(EMPTYRE, NIL, NIL)));
824 return (unary(op2(DOT, NIL, NIL)));
826 np = op2(CCL, NIL, (Node*) cclenter((const char *) rlxstr));
831 np = op2(NCCL, NIL, (Node *) cclenter((const char *) rlxstr));
837 return (unary(op2(CHAR, NIL, itonp(HAT))));
840 return (unary(op2(CHAR, NIL, NIL)));
843 savelastatom = starttok - basestr; /* Retain over recursion */
845 if (rtok == ')') { /* special pleading for () */
847 return unary(op2(CCL, NIL, (Node *) tostring("")));
851 lastatom = basestr + savelastatom; /* Restore */
856 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
858 FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
860 return 0; /*NOTREACHED*/
863 Node *concat(Node *np)
866 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
867 return (concat(op2(CAT, np, primary())));
870 return (concat(op2(CAT, op2(CCL, NIL, (Node *) tostring("")),
880 return (alt(op2(OR, np, concat(primary()))));
885 Node *unary(Node *np)
890 return (unary(op2(STAR, np, NIL)));
893 return (unary(op2(PLUS, np, NIL)));
896 return (unary(op2(QUEST, np, NIL)));
899 return (unary(op2(ZERO, np, NIL)));
906 * Character class definitions conformant to the POSIX locale as
907 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
908 * and operating character sets are both ASCII (ISO646) or supersets
911 * Note that to avoid overflowing the temporary buffer used in
912 * relex(), the expanded character class (prior to range expansion)
913 * must be less than twice the size of their full name.
916 /* Because isblank doesn't show up in any of the header files on any
917 * system i use, it's defined here. if some other locale has a richer
918 * definition of "blank", define HAS_ISBLANK and provide your own
920 * the parentheses here are an attempt to find a path through the maze
921 * of macro definition and/or function and/or version provided. thanks
922 * to nelson beebe for the suggestion; let's see if it works everywhere.
925 /* #define HAS_ISBLANK */
928 int (xisblank)(int c)
930 return c==' ' || c=='\t';
935 static const struct charclass {
940 { "alnum", 5, isalnum },
941 { "alpha", 5, isalpha },
943 { "blank", 5, xisblank },
945 { "blank", 5, isblank },
947 { "cntrl", 5, iscntrl },
948 { "digit", 5, isdigit },
949 { "graph", 5, isgraph },
950 { "lower", 5, islower },
951 { "print", 5, isprint },
952 { "punct", 5, ispunct },
953 { "space", 5, isspace },
954 { "upper", 5, isupper },
955 { "xdigit", 6, isxdigit },
959 #define REPEAT_SIMPLE 0
960 #define REPEAT_PLUS_APPENDED 1
961 #define REPEAT_WITH_Q 2
962 #define REPEAT_ZERO 3
965 replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom,
966 int atomlen, int firstnum, int secondnum, int special_case)
971 int init_q = (firstnum == 0); /* first added char will be ? */
972 int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */
973 int prefix_length = reptok - basestr; /* prefix includes first rep */
974 int suffix_length = strlen((const char *) reptok) - reptoklen; /* string after rep specifier */
975 int size = prefix_length + suffix_length;
977 if (firstnum > 1) { /* add room for reps 2 through firstnum */
978 size += atomlen*(firstnum-1);
981 /* Adjust size of buffer for special cases */
982 if (special_case == REPEAT_PLUS_APPENDED) {
983 size++; /* for the final + */
984 } else if (special_case == REPEAT_WITH_Q) {
985 size += init_q + (atomlen+1)* n_q_reps;
986 } else if (special_case == REPEAT_ZERO) {
987 size += 2; /* just a null ERE: () */
989 if ((buf = (uschar *) malloc(size + 1)) == NULL)
990 FATAL("out of space in reg expr %.10s..", lastre);
991 memcpy(buf, basestr, prefix_length); /* copy prefix */
993 if (special_case == REPEAT_ZERO) {
998 for (i = 1; i < firstnum; i++) { /* copy x reps */
999 memcpy(&buf[j], atom, atomlen);
1002 if (special_case == REPEAT_PLUS_APPENDED) {
1004 } else if (special_case == REPEAT_WITH_Q) {
1007 for (i = init_q; i < n_q_reps; i++) { /* copy x? reps */
1008 memcpy(&buf[j], atom, atomlen);
1013 memcpy(&buf[j], reptok+reptoklen, suffix_length);
1014 if (special_case == REPEAT_ZERO) {
1015 buf[j+suffix_length] = '\0';
1019 /* free old basestr */
1020 if (firstbasestr != basestr) {
1025 prestr = buf + prefix_length;
1026 if (special_case == REPEAT_ZERO) {
1033 static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
1034 int atomlen, int firstnum, int secondnum)
1037 In general, the repetition specifier or "bound" is replaced here
1038 by an equivalent ERE string, repeating the immediately previous atom
1039 and appending ? and + as needed. Note that the first copy of the
1040 atom is left in place, except in the special_case of a zero-repeat
1043 if (secondnum < 0) { /* means {n,} -> repeat n-1 times followed by PLUS */
1045 /* 0 or 1: should be handled before you get here */
1046 FATAL("internal error");
1048 return replace_repeat(reptok, reptoklen, atom, atomlen,
1049 firstnum, secondnum, REPEAT_PLUS_APPENDED);
1051 } else if (firstnum == secondnum) { /* {n} or {n,n} -> simply repeat n-1 times */
1052 if (firstnum == 0) { /* {0} or {0,0} */
1053 /* This case is unusual because the resulting
1054 replacement string might actually be SMALLER than
1056 return replace_repeat(reptok, reptoklen, atom, atomlen,
1057 firstnum, secondnum, REPEAT_ZERO);
1058 } else { /* (firstnum >= 1) */
1059 return replace_repeat(reptok, reptoklen, atom, atomlen,
1060 firstnum, secondnum, REPEAT_SIMPLE);
1062 } else if (firstnum < secondnum) { /* {n,m} -> repeat n-1 times then alternate */
1063 /* x{n,m} => xx...x{1, m-n+1} => xx...x?x?x?..x? */
1064 return replace_repeat(reptok, reptoklen, atom, atomlen,
1065 firstnum, secondnum, REPEAT_WITH_Q);
1066 } else { /* Error - shouldn't be here (n>m) */
1067 FATAL("internal error");
1072 int relex(void) /* lexical analyzer for reparse */
1076 static uschar *buf = NULL;
1077 static int bufsz = 100;
1079 const struct charclass *cc;
1082 bool commafound, digitfound;
1083 const uschar *startreptok;
1084 static int parens = 0;
1089 switch (c = *prestr++) {
1090 case '|': return OR;
1091 case '*': return STAR;
1092 case '+': return PLUS;
1093 case '?': return QUEST;
1094 case '.': return DOT;
1095 case '\0': prestr--; return '\0';
1107 /* unmatched close parenthesis; per POSIX, treat as literal */
1111 rlxval = quoted(&prestr);
1117 if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
1118 FATAL("out of space in reg expr %.10s..", lastre);
1120 if (*prestr == '^') {
1126 n = 2 * strlen((const char *) prestr)+1;
1127 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
1128 FATAL("out of space for reg expr %.10s...", lastre);
1130 if ((c = *prestr++) == '\\') {
1132 if ((c = *prestr++) == '\0')
1133 FATAL("nonterminated character class %.20s...", lastre);
1135 /* } else if (c == '\n') { */
1136 /* FATAL("newline in character class %.20s...", lastre); */
1137 } else if (c == '[' && *prestr == ':') {
1138 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
1139 for (cc = charclasses; cc->cc_name; cc++)
1140 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
1142 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
1143 prestr[2 + cc->cc_namelen] == ']') {
1144 prestr += cc->cc_namelen + 3;
1146 * BUG: We begin at 1, instead of 0, since we
1147 * would otherwise prematurely terminate the
1148 * string for classes like [[:cntrl:]]. This
1149 * means that we can't match the NUL character,
1150 * not without first adapting the entire
1151 * program to track each string's length.
1153 for (i = 1; i <= UCHAR_MAX; i++) {
1154 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
1155 FATAL("out of space for reg expr %.10s...", lastre);
1156 if (cc->cc_func(i)) {
1157 /* escape backslash */
1169 } else if (c == '[' && *prestr == '.') {
1172 collate_char = *prestr++;
1173 if (*prestr == '.' && prestr[1] == ']') {
1175 /* Found it: map via locale TBD: for
1176 now, simply return this char. This
1177 is sufficient to pass conformance
1180 if (*prestr == ']') {
1182 rlxval = collate_char;
1186 } else if (c == '[' && *prestr == '=') {
1189 equiv_char = *prestr++;
1190 if (*prestr == '=' && prestr[1] == ']') {
1192 /* Found it: map via locale TBD: for now
1193 simply return this char. This is
1194 sufficient to pass conformance test
1197 if (*prestr == ']') {
1199 rlxval = equiv_char;
1203 } else if (c == '\0') {
1204 FATAL("nonterminated character class %.20s", lastre);
1205 } else if (bp == buf) { /* 1st char is special */
1207 } else if (c == ']') {
1209 rlxstr = (uschar *) tostring((char *) buf);
1219 if (isdigit(*(prestr))) {
1220 num = 0; /* Process as a repetition */
1224 startreptok = prestr-1;
1225 /* Remember start of previous atom here ? */
1226 } else { /* just a { char, not a repetition */
1231 if ((c = *prestr++) == '}') {
1233 if (digitfound) { /* {n,m} */
1236 FATAL("illegal repetition expression: class %.20s",
1238 if (n == 0 && m == 1) {
1248 if (digitfound) { /* {n} same as {n,n} */
1252 FATAL("illegal repetition expression: class %.20s",
1256 if (repeat(starttok, prestr-starttok, lastatom,
1257 startreptok - lastatom, n, m) > 0) {
1258 if (n == 0 && m == 0) {
1261 /* must rescan input for next token */
1264 /* Failed to replace: eat up {...} characters
1265 and treat like just PLUS */
1267 } else if (c == '\0') {
1268 FATAL("nonterminated character class %.20s",
1270 } else if (isdigit(c)) {
1271 num = 10 * num + c - '0';
1273 } else if (c == ',') {
1275 FATAL("illegal repetition expression: class %.20s",
1277 /* looking for {n,} or {n,m} */
1280 digitfound = false; /* reset */
1283 FATAL("illegal repetition expression: class %.20s",
1291 int cgoto(fa *f, int s, int c)
1296 assert(c == HAT || c < NCHARS);
1297 while (f->accept >= maxsetvec) { /* guessing here! */
1298 resizesetvec(__func__);
1300 for (i = 0; i <= f->accept; i++)
1304 /* compute positions of gototab[s,c] into setvec */
1306 for (i = 1; i <= *p; i++) {
1307 if ((k = f->re[p[i]].ltype) != FINAL) {
1308 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
1309 || (k == DOT && c != 0 && c != HAT)
1310 || (k == ALL && c != 0)
1311 || (k == EMPTYRE && c != 0)
1312 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
1313 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
1314 q = f->re[p[i]].lfollow;
1315 for (j = 1; j <= *q; j++) {
1316 if (q[j] >= maxsetvec) {
1317 resizesetvec(__func__);
1319 if (setvec[q[j]] == 0) {
1327 /* determine if setvec is a previous state */
1330 for (i = f->accept; i >= 0; i--)
1334 resize_state(f, f->curstat > s ? f->curstat : s);
1335 /* tmpset == previous state? */
1336 for (i = 1; i <= f->curstat; i++) {
1338 if ((k = tmpset[0]) != p[0])
1340 for (j = 1; j <= k; j++)
1341 if (tmpset[j] != p[j])
1343 /* setvec is state i */
1345 f->gototab[s][c] = i;
1350 /* add tmpset to current set of states */
1352 resize_state(f, f->curstat);
1353 for (i = 0; i < NCHARS; i++)
1354 f->gototab[f->curstat][i] = 0;
1355 xfree(f->posns[f->curstat]);
1356 p = intalloc(setcnt + 1, __func__);
1358 f->posns[f->curstat] = p;
1360 f->gototab[s][c] = f->curstat;
1361 for (i = 0; i <= setcnt; i++)
1363 if (setvec[f->accept])
1364 f->out[f->curstat] = 1;
1366 f->out[f->curstat] = 0;
1371 void freefa(fa *f) /* free a finite automaton */
1377 for (i = 0; i < f->state_count; i++)
1378 xfree(f->gototab[i])
1379 for (i = 0; i <= f->curstat; i++)
1381 for (i = 0; i <= f->accept; i++) {
1382 xfree(f->re[i].lfollow);
1383 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
1384 xfree(f->re[i].lval.np);