1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 ****************************************************************/
30 #include "awkgram.tab.h"
32 extern YYSTYPE yylval;
40 typedef struct Keyword {
46 const Keyword keywords[] = { /* keep sorted: binary searched */
47 { "BEGIN", XBEGIN, XBEGIN },
48 { "END", XEND, XEND },
49 { "NF", VARNF, VARNF },
50 { "and", FAND, BLTIN },
51 { "atan2", FATAN, BLTIN },
52 { "break", BREAK, BREAK },
53 { "close", CLOSE, CLOSE },
54 { "compl", FCOMPL, BLTIN },
55 { "continue", CONTINUE, CONTINUE },
56 { "cos", FCOS, BLTIN },
57 { "delete", DELETE, DELETE },
59 { "else", ELSE, ELSE },
60 { "exit", EXIT, EXIT },
61 { "exp", FEXP, BLTIN },
62 { "fflush", FFLUSH, BLTIN },
64 { "func", FUNC, FUNC },
65 { "function", FUNC, FUNC },
66 { "gensub", GENSUB, GENSUB },
67 { "getline", GETLINE, GETLINE },
68 { "gsub", GSUB, GSUB },
71 { "index", INDEX, INDEX },
72 { "int", FINT, BLTIN },
73 { "length", FLENGTH, BLTIN },
74 { "log", FLOG, BLTIN },
75 { "lshift", FLSHIFT, BLTIN },
76 { "match", MATCHFCN, MATCHFCN },
77 { "next", NEXT, NEXT },
78 { "nextfile", NEXTFILE, NEXTFILE },
79 { "or", FFOR, BLTIN },
80 { "print", PRINT, PRINT },
81 { "printf", PRINTF, PRINTF },
82 { "rand", FRAND, BLTIN },
83 { "return", RETURN, RETURN },
84 { "rshift", FRSHIFT, BLTIN },
85 { "sin", FSIN, BLTIN },
86 { "split", SPLIT, SPLIT },
87 { "sprintf", SPRINTF, SPRINTF },
88 { "sqrt", FSQRT, BLTIN },
89 { "srand", FSRAND, BLTIN },
90 { "strftime", FSTRFTIME, BLTIN },
92 { "substr", SUBSTR, SUBSTR },
93 { "system", FSYSTEM, BLTIN },
94 { "systime", FSYSTIME, BLTIN },
95 { "tolower", FTOLOWER, BLTIN },
96 { "toupper", FTOUPPER, BLTIN },
97 { "while", WHILE, WHILE },
98 { "xor", FXOR, BLTIN },
101 #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
103 static int peek(void)
110 static int gettok(char **pbuf, int *psz) /* get next input token */
122 if (!isalnum(c) && c != '.' && c != '_')
126 if (isalpha(c) || c == '_') { /* it's a varname */
127 for ( ; (c = input()) != 0; ) {
129 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
130 FATAL( "out of space for name %.10s...", buf );
131 if (isalnum(c) || c == '_')
140 retc = 'a'; /* alphanumeric */
141 } else { /* maybe it's a number, but could be . */
143 /* read input until can't be a number */
144 for ( ; (c = input()) != 0; ) {
146 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
147 FATAL( "out of space for number %.10s...", buf );
148 if (isdigit(c) || c == 'e' || c == 'E'
149 || c == '.' || c == '+' || c == '-')
157 strtod(buf, &rem); /* parse the number */
158 if (rem == buf) { /* it wasn't a valid number at all */
159 buf[1] = 0; /* return one character as token */
160 retc = (uschar)buf[0]; /* character is its own type */
161 unputstr(rem+1); /* put rest back for later */
162 } else { /* some prefix was a number */
163 unputstr(rem); /* put rest back for later */
164 rem[0] = 0; /* truncate buf after number part */
165 retc = '0'; /* type is number */
176 bool sc = false; /* true => return a } right now */
177 bool reg = false; /* true => return a REGEXPR now */
182 static char *buf = NULL;
183 static int bufsize = 5; /* BUG: setting this small causes core dump! */
185 if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
186 FATAL( "out of space in yylex" );
196 c = gettok(&buf, &bufsize);
199 if (isalpha(c) || c == '_')
202 char *cp = tostring(buf);
205 if (is_number(cp, & result))
206 yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
208 yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
210 /* should this also have STR set? */
216 case '\n': /* {EOL} */
219 case '\r': /* assume \n is coming */
220 case ' ': /* {WS}+ */
223 case '#': /* #.* strip comments */
224 while ((c = input()) != '\n' && c != 0)
228 * Next line is a hack, itcompensates for
229 * unput's treatment of \n.
236 if (peek() == '\n') {
239 } else if (peek() == '\r') {
240 input(); input(); /* \n */
258 input(); yylval.i = NE; RET(NE);
259 } else if (peek() == '~') {
260 input(); yylval.i = NOTMATCH; RET(MATCHOP);
268 input(); yylval.i = LE; RET(LE);
270 yylval.i = LT; RET(LT);
274 input(); yylval.i = EQ; RET(EQ);
276 yylval.i = ASSIGN; RET(ASGNOP);
280 input(); yylval.i = GE; RET(GE);
281 } else if (peek() == '>') {
282 input(); yylval.i = APPEND; RET(APPEND);
284 yylval.i = GT; RET(GT);
288 input(); yylval.i = INCR; RET(INCR);
289 } else if (peek() == '=') {
290 input(); yylval.i = ADDEQ; RET(ASGNOP);
295 input(); yylval.i = DECR; RET(DECR);
296 } else if (peek() == '=') {
297 input(); yylval.i = SUBEQ; RET(ASGNOP);
301 if (peek() == '=') { /* *= */
302 input(); yylval.i = MULTEQ; RET(ASGNOP);
303 } else if (peek() == '*') { /* ** or **= */
304 input(); /* eat 2nd * */
306 input(); yylval.i = POWEQ; RET(ASGNOP);
316 input(); yylval.i = MODEQ; RET(ASGNOP);
321 input(); yylval.i = POWEQ; RET(ASGNOP);
326 /* BUG: awkward, if not wrong */
327 c = gettok(&buf, &bufsize);
329 if (strcmp(buf, "NF") == 0) { /* very special */
334 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
338 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
340 } else if (c == 0) { /* */
341 SYNTAX( "unexpected end of input after $" );
372 return string(); /* BUG: should be like tran.c ? */
384 static char *buf = NULL;
385 static int bufsz = 500;
387 if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
388 FATAL("out of space for strings");
389 for (bp = buf; (c = input()) != '"'; ) {
390 if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
391 FATAL("out of space for string %.10s...", buf);
397 SYNTAX( "non-terminated string %.10s...", buf );
398 if (c == 0) /* hopeless */
399 FATAL( "giving up" );
406 case '"': *bp++ = '"'; break;
407 case 'n': *bp++ = '\n'; break;
408 case 't': *bp++ = '\t'; break;
409 case 'f': *bp++ = '\f'; break;
410 case 'r': *bp++ = '\r'; break;
411 case 'b': *bp++ = '\b'; break;
412 case 'v': *bp++ = '\v'; break;
413 case 'a': *bp++ = '\a'; break;
414 case '\\': *bp++ = '\\'; break;
416 case '0': case '1': case '2': /* octal: \d \dd \ddd */
417 case '3': case '4': case '5': case '6': case '7':
419 if ((c = peek()) >= '0' && c < '8') {
420 n = 8 * n + input() - '0';
421 if ((c = peek()) >= '0' && c < '8')
422 n = 8 * n + input() - '0';
427 case 'x': /* hex \x0-9a-fA-F + */
428 { char xbuf[100], *px;
429 for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
431 || (c >= 'a' && c <= 'f')
432 || (c >= 'A' && c <= 'F'))
439 sscanf(xbuf, "%x", (unsigned int *) &n);
456 *bp++ = ' '; *bp++ = '\0';
457 yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
463 static int binsearch(char *w, const Keyword *kp, int n)
465 int cond, low, mid, high;
469 while (low <= high) {
470 mid = (low + high) / 2;
471 if ((cond = strcmp(w, kp[mid].word)) < 0)
486 n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
487 if (n != -1) { /* found in table */
490 switch (kp->type) { /* special handling */
492 if (kp->sub == FSYSTEM && safe)
493 SYNTAX( "system is unsafe" );
497 SYNTAX( "illegal nested function" );
501 SYNTAX( "return not in function" );
504 yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
510 c = peek(); /* look for '(' */
511 if (c != '(' && infunc && (n=isarg(w)) >= 0) {
515 yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
524 void startreg(void) /* next call to yylex will return a regular expression */
532 static char *buf = NULL;
533 static int bufsz = 500;
536 if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
537 FATAL("out of space for rex expr");
539 for ( ; (c = input()) != '/' && c != 0; ) {
540 if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
541 FATAL("out of space for reg expr %.10s...", buf);
544 SYNTAX( "newline in regular expression %.10s...", buf );
547 } else if (c == '\\') {
556 SYNTAX("non-terminated regular expression %.10s...", buf);
557 yylval.s = tostring(buf);
562 /* low-level lexical stuff, sort of inherited from lex */
566 char yysbuf[100]; /* pushback buffer */
567 char *yysptr = yysbuf;
570 int input(void) /* get next lexical input character */
573 extern char *lexprog;
576 c = (uschar)*--yysptr;
577 else if (lexprog != NULL) { /* awk '...' */
578 if ((c = (uschar)*lexprog) != 0)
580 } else /* awk -f ... */
584 if (ep >= ebuf + sizeof ebuf)
593 void unput(int c) /* put lexical character back on input */
597 if (yysptr >= yysbuf + sizeof(yysbuf))
598 FATAL("pushed back too much: %.20s...", yysbuf);
601 ep = ebuf + sizeof(ebuf) - 1;
604 void unputstr(const char *s) /* put a string back on input */
608 for (i = strlen(s)-1; i >= 0; i--)