]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/one-true-awk/lex.c
Add two missing eventhandler.h headers
[FreeBSD/FreeBSD.git] / contrib / one-true-awk / lex.c
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include "awk.h"
30 #include "ytab.h"
31
32 extern YYSTYPE  yylval;
33 extern int      infunc;
34
35 int     lineno  = 1;
36 int     bracecnt = 0;
37 int     brackcnt  = 0;
38 int     parencnt = 0;
39
40 typedef struct Keyword {
41         const char *word;
42         int     sub;
43         int     type;
44 } Keyword;
45
46 Keyword keywords[] ={   /* keep sorted: binary searched */
47         { "BEGIN",      XBEGIN,         XBEGIN },
48         { "END",        XEND,           XEND },
49         { "NF",         VARNF,          VARNF },
50         { "and",        FAND,           BLTIN },
51         { "atan2",      FATAN,          BLTIN },
52         { "break",      BREAK,          BREAK },
53         { "close",      CLOSE,          CLOSE },
54         { "compl",      FCOMPL,         BLTIN },
55         { "continue",   CONTINUE,       CONTINUE },
56         { "cos",        FCOS,           BLTIN },
57         { "delete",     DELETE,         DELETE },
58         { "do",         DO,             DO },
59         { "else",       ELSE,           ELSE },
60         { "exit",       EXIT,           EXIT },
61         { "exp",        FEXP,           BLTIN },
62         { "fflush",     FFLUSH,         BLTIN },
63         { "for",        FOR,            FOR },
64         { "func",       FUNC,           FUNC },
65         { "function",   FUNC,           FUNC },
66         { "getline",    GETLINE,        GETLINE },
67         { "gsub",       GSUB,           GSUB },
68         { "if",         IF,             IF },
69         { "in",         IN,             IN },
70         { "index",      INDEX,          INDEX },
71         { "int",        FINT,           BLTIN },
72         { "length",     FLENGTH,        BLTIN },
73         { "log",        FLOG,           BLTIN },
74         { "lshift",     FLSHIFT,        BLTIN },
75         { "match",      MATCHFCN,       MATCHFCN },
76         { "next",       NEXT,           NEXT },
77         { "nextfile",   NEXTFILE,       NEXTFILE },
78         { "or",         FFOR,           BLTIN },
79         { "print",      PRINT,          PRINT },
80         { "printf",     PRINTF,         PRINTF },
81         { "rand",       FRAND,          BLTIN },
82         { "return",     RETURN,         RETURN },
83         { "rshift",     FRSHIFT,        BLTIN },
84         { "sin",        FSIN,           BLTIN },
85         { "split",      SPLIT,          SPLIT },
86         { "sprintf",    SPRINTF,        SPRINTF },
87         { "sqrt",       FSQRT,          BLTIN },
88         { "srand",      FSRAND,         BLTIN },
89         { "sub",        SUB,            SUB },
90         { "substr",     SUBSTR,         SUBSTR },
91         { "system",     FSYSTEM,        BLTIN },
92         { "tolower",    FTOLOWER,       BLTIN },
93         { "toupper",    FTOUPPER,       BLTIN },
94         { "while",      WHILE,          WHILE },
95         { "xor",        FXOR,           BLTIN },
96 };
97
98 #define RET(x)  { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
99
100 int peek(void)
101 {
102         int c = input();
103         unput(c);
104         return c;
105 }
106
107 int gettok(char **pbuf, int *psz)       /* get next input token */
108 {
109         int c, retc;
110         char *buf = *pbuf;
111         int sz = *psz;
112         char *bp = buf;
113
114         c = input();
115         if (c == 0)
116                 return 0;
117         buf[0] = c;
118         buf[1] = 0;
119         if (!isalnum(c) && c != '.' && c != '_')
120                 return c;
121
122         *bp++ = c;
123         if (isalpha(c) || c == '_') {   /* it's a varname */
124                 for ( ; (c = input()) != 0; ) {
125                         if (bp-buf >= sz)
126                                 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
127                                         FATAL( "out of space for name %.10s...", buf );
128                         if (isalnum(c) || c == '_')
129                                 *bp++ = c;
130                         else {
131                                 *bp = 0;
132                                 unput(c);
133                                 break;
134                         }
135                 }
136                 *bp = 0;
137                 retc = 'a';     /* alphanumeric */
138         } else {        /* maybe it's a number, but could be . */
139                 char *rem;
140                 /* read input until can't be a number */
141                 for ( ; (c = input()) != 0; ) {
142                         if (bp-buf >= sz)
143                                 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
144                                         FATAL( "out of space for number %.10s...", buf );
145                         if (isdigit(c) || c == 'e' || c == 'E' 
146                           || c == '.' || c == '+' || c == '-')
147                                 *bp++ = c;
148                         else {
149                                 unput(c);
150                                 break;
151                         }
152                 }
153                 *bp = 0;
154                 strtod(buf, &rem);      /* parse the number */
155                 if (rem == buf) {       /* it wasn't a valid number at all */
156                         buf[1] = 0;     /* return one character as token */
157                         retc = buf[0];  /* character is its own type */
158                         unputstr(rem+1); /* put rest back for later */
159                 } else {        /* some prefix was a number */
160                         unputstr(rem);  /* put rest back for later */
161                         rem[0] = 0;     /* truncate buf after number part */
162                         retc = '0';     /* type is number */
163                 }
164         }
165         *pbuf = buf;
166         *psz = sz;
167         return retc;
168 }
169
170 int     word(char *);
171 int     string(void);
172 int     regexpr(void);
173 int     sc      = 0;    /* 1 => return a } right now */
174 int     reg     = 0;    /* 1 => return a REGEXPR now */
175
176 int yylex(void)
177 {
178         int c;
179         static char *buf = NULL;
180         static int bufsize = 5; /* BUG: setting this small causes core dump! */
181
182         if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
183                 FATAL( "out of space in yylex" );
184         if (sc) {
185                 sc = 0;
186                 RET('}');
187         }
188         if (reg) {
189                 reg = 0;
190                 return regexpr();
191         }
192         for (;;) {
193                 c = gettok(&buf, &bufsize);
194                 if (c == 0)
195                         return 0;
196                 if (isalpha(c) || c == '_')
197                         return word(buf);
198                 if (isdigit(c)) {
199                         yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
200                         /* should this also have STR set? */
201                         RET(NUMBER);
202                 }
203         
204                 yylval.i = c;
205                 switch (c) {
206                 case '\n':      /* {EOL} */
207                         RET(NL);
208                 case '\r':      /* assume \n is coming */
209                 case ' ':       /* {WS}+ */
210                 case '\t':
211                         break;
212                 case '#':       /* #.* strip comments */
213                         while ((c = input()) != '\n' && c != 0)
214                                 ;
215                         unput(c);
216                         break;
217                 case ';':
218                         RET(';');
219                 case '\\':
220                         if (peek() == '\n') {
221                                 input();
222                         } else if (peek() == '\r') {
223                                 input(); input();       /* \n */
224                                 lineno++;
225                         } else {
226                                 RET(c);
227                         }
228                         break;
229                 case '&':
230                         if (peek() == '&') {
231                                 input(); RET(AND);
232                         } else 
233                                 RET('&');
234                 case '|':
235                         if (peek() == '|') {
236                                 input(); RET(BOR);
237                         } else
238                                 RET('|');
239                 case '!':
240                         if (peek() == '=') {
241                                 input(); yylval.i = NE; RET(NE);
242                         } else if (peek() == '~') {
243                                 input(); yylval.i = NOTMATCH; RET(MATCHOP);
244                         } else
245                                 RET(NOT);
246                 case '~':
247                         yylval.i = MATCH;
248                         RET(MATCHOP);
249                 case '<':
250                         if (peek() == '=') {
251                                 input(); yylval.i = LE; RET(LE);
252                         } else {
253                                 yylval.i = LT; RET(LT);
254                         }
255                 case '=':
256                         if (peek() == '=') {
257                                 input(); yylval.i = EQ; RET(EQ);
258                         } else {
259                                 yylval.i = ASSIGN; RET(ASGNOP);
260                         }
261                 case '>':
262                         if (peek() == '=') {
263                                 input(); yylval.i = GE; RET(GE);
264                         } else if (peek() == '>') {
265                                 input(); yylval.i = APPEND; RET(APPEND);
266                         } else {
267                                 yylval.i = GT; RET(GT);
268                         }
269                 case '+':
270                         if (peek() == '+') {
271                                 input(); yylval.i = INCR; RET(INCR);
272                         } else if (peek() == '=') {
273                                 input(); yylval.i = ADDEQ; RET(ASGNOP);
274                         } else
275                                 RET('+');
276                 case '-':
277                         if (peek() == '-') {
278                                 input(); yylval.i = DECR; RET(DECR);
279                         } else if (peek() == '=') {
280                                 input(); yylval.i = SUBEQ; RET(ASGNOP);
281                         } else
282                                 RET('-');
283                 case '*':
284                         if (peek() == '=') {    /* *= */
285                                 input(); yylval.i = MULTEQ; RET(ASGNOP);
286                         } else if (peek() == '*') {     /* ** or **= */
287                                 input();        /* eat 2nd * */
288                                 if (peek() == '=') {
289                                         input(); yylval.i = POWEQ; RET(ASGNOP);
290                                 } else {
291                                         RET(POWER);
292                                 }
293                         } else
294                                 RET('*');
295                 case '/':
296                         RET('/');
297                 case '%':
298                         if (peek() == '=') {
299                                 input(); yylval.i = MODEQ; RET(ASGNOP);
300                         } else
301                                 RET('%');
302                 case '^':
303                         if (peek() == '=') {
304                                 input(); yylval.i = POWEQ; RET(ASGNOP);
305                         } else
306                                 RET(POWER);
307
308                 case '$':
309                         /* BUG: awkward, if not wrong */
310                         c = gettok(&buf, &bufsize);
311                         if (isalpha(c)) {
312                                 if (strcmp(buf, "NF") == 0) {   /* very special */
313                                         unputstr("(NF)");
314                                         RET(INDIRECT);
315                                 }
316                                 c = peek();
317                                 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
318                                         unputstr(buf);
319                                         RET(INDIRECT);
320                                 }
321                                 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
322                                 RET(IVAR);
323                         } else if (c == 0) {    /*  */
324                                 SYNTAX( "unexpected end of input after $" );
325                                 RET(';');
326                         } else {
327                                 unputstr(buf);
328                                 RET(INDIRECT);
329                         }
330         
331                 case '}':
332                         if (--bracecnt < 0)
333                                 SYNTAX( "extra }" );
334                         sc = 1;
335                         RET(';');
336                 case ']':
337                         if (--brackcnt < 0)
338                                 SYNTAX( "extra ]" );
339                         RET(']');
340                 case ')':
341                         if (--parencnt < 0)
342                                 SYNTAX( "extra )" );
343                         RET(')');
344                 case '{':
345                         bracecnt++;
346                         RET('{');
347                 case '[':
348                         brackcnt++;
349                         RET('[');
350                 case '(':
351                         parencnt++;
352                         RET('(');
353         
354                 case '"':
355                         return string();        /* BUG: should be like tran.c ? */
356         
357                 default:
358                         RET(c);
359                 }
360         }
361 }
362
363 int string(void)
364 {
365         int c, n;
366         char *s, *bp;
367         static char *buf = NULL;
368         static int bufsz = 500;
369
370         if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
371                 FATAL("out of space for strings");
372         for (bp = buf; (c = input()) != '"'; ) {
373                 if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
374                         FATAL("out of space for string %.10s...", buf);
375                 switch (c) {
376                 case '\n':
377                 case '\r':
378                 case 0:
379                         SYNTAX( "non-terminated string %.10s...", buf );
380                         lineno++;
381                         if (c == 0)     /* hopeless */
382                                 FATAL( "giving up" );
383                         break;
384                 case '\\':
385                         c = input();
386                         switch (c) {
387                         case '"': *bp++ = '"'; break;
388                         case 'n': *bp++ = '\n'; break;  
389                         case 't': *bp++ = '\t'; break;
390                         case 'f': *bp++ = '\f'; break;
391                         case 'r': *bp++ = '\r'; break;
392                         case 'b': *bp++ = '\b'; break;
393                         case 'v': *bp++ = '\v'; break;
394                         case 'a': *bp++ = '\007'; break;
395                         case '\\': *bp++ = '\\'; break;
396
397                         case '0': case '1': case '2': /* octal: \d \dd \ddd */
398                         case '3': case '4': case '5': case '6': case '7':
399                                 n = c - '0';
400                                 if ((c = peek()) >= '0' && c < '8') {
401                                         n = 8 * n + input() - '0';
402                                         if ((c = peek()) >= '0' && c < '8')
403                                                 n = 8 * n + input() - '0';
404                                 }
405                                 *bp++ = n;
406                                 break;
407
408                         case 'x':       /* hex  \x0-9a-fA-F + */
409                             {   char xbuf[100], *px;
410                                 for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
411                                         if (isdigit(c)
412                                          || (c >= 'a' && c <= 'f')
413                                          || (c >= 'A' && c <= 'F'))
414                                                 *px++ = c;
415                                         else
416                                                 break;
417                                 }
418                                 *px = 0;
419                                 unput(c);
420                                 sscanf(xbuf, "%x", (unsigned int *) &n);
421                                 *bp++ = n;
422                                 break;
423                             }
424
425                         default: 
426                                 *bp++ = c;
427                                 break;
428                         }
429                         break;
430                 default:
431                         *bp++ = c;
432                         break;
433                 }
434         }
435         *bp = 0; 
436         s = tostring(buf);
437         *bp++ = ' '; *bp++ = 0;
438         yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
439         RET(STRING);
440 }
441
442
443 int binsearch(char *w, Keyword *kp, int n)
444 {
445         int cond, low, mid, high;
446
447         low = 0;
448         high = n - 1;
449         while (low <= high) {
450                 mid = (low + high) / 2;
451                 if ((cond = strcmp(w, kp[mid].word)) < 0)
452                         high = mid - 1;
453                 else if (cond > 0)
454                         low = mid + 1;
455                 else
456                         return mid;
457         }
458         return -1;
459 }
460
461 int word(char *w) 
462 {
463         Keyword *kp;
464         int c, n;
465
466         n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
467 /* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
468         kp = keywords + n;
469         if (n != -1) {  /* found in table */
470                 yylval.i = kp->sub;
471                 switch (kp->type) {     /* special handling */
472                 case BLTIN:
473                         if (kp->sub == FSYSTEM && safe)
474                                 SYNTAX( "system is unsafe" );
475                         RET(kp->type);
476                 case FUNC:
477                         if (infunc)
478                                 SYNTAX( "illegal nested function" );
479                         RET(kp->type);
480                 case RETURN:
481                         if (!infunc)
482                                 SYNTAX( "return not in function" );
483                         RET(kp->type);
484                 case VARNF:
485                         yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
486                         RET(VARNF);
487                 default:
488                         RET(kp->type);
489                 }
490         }
491         c = peek();     /* look for '(' */
492         if (c != '(' && infunc && (n=isarg(w)) >= 0) {
493                 yylval.i = n;
494                 RET(ARG);
495         } else {
496                 yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
497                 if (c == '(') {
498                         RET(CALL);
499                 } else {
500                         RET(VAR);
501                 }
502         }
503 }
504
505 void startreg(void)     /* next call to yylex will return a regular expression */
506 {
507         reg = 1;
508 }
509
510 int regexpr(void)
511 {
512         int c;
513         static char *buf = NULL;
514         static int bufsz = 500;
515         char *bp;
516
517         if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
518                 FATAL("out of space for rex expr");
519         bp = buf;
520         for ( ; (c = input()) != '/' && c != 0; ) {
521                 if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
522                         FATAL("out of space for reg expr %.10s...", buf);
523                 if (c == '\n') {
524                         SYNTAX( "newline in regular expression %.10s...", buf ); 
525                         unput('\n');
526                         break;
527                 } else if (c == '\\') {
528                         *bp++ = '\\'; 
529                         *bp++ = input();
530                 } else {
531                         *bp++ = c;
532                 }
533         }
534         *bp = 0;
535         if (c == 0)
536                 SYNTAX("non-terminated regular expression %.10s...", buf);
537         yylval.s = tostring(buf);
538         unput('/');
539         RET(REGEXPR);
540 }
541
542 /* low-level lexical stuff, sort of inherited from lex */
543
544 char    ebuf[300];
545 char    *ep = ebuf;
546 char    yysbuf[100];    /* pushback buffer */
547 char    *yysptr = yysbuf;
548 FILE    *yyin = NULL;
549
550 int input(void) /* get next lexical input character */
551 {
552         int c;
553         extern char *lexprog;
554
555         if (yysptr > yysbuf)
556                 c = (uschar)*--yysptr;
557         else if (lexprog != NULL) {     /* awk '...' */
558                 if ((c = (uschar)*lexprog) != 0)
559                         lexprog++;
560         } else                          /* awk -f ... */
561                 c = pgetc();
562         if (c == '\n')
563                 lineno++;
564         else if (c == EOF)
565                 c = 0;
566         if (ep >= ebuf + sizeof ebuf)
567                 ep = ebuf;
568         return *ep++ = c;
569 }
570
571 void unput(int c)       /* put lexical character back on input */
572 {
573         if (c == '\n')
574                 lineno--;
575         if (yysptr >= yysbuf + sizeof(yysbuf))
576                 FATAL("pushed back too much: %.20s...", yysbuf);
577         *yysptr++ = c;
578         if (--ep < ebuf)
579                 ep = ebuf + sizeof(ebuf) - 1;
580 }
581
582 void unputstr(const char *s)    /* put a string back on input */
583 {
584         int i;
585
586         for (i = strlen(s)-1; i >= 0; i--)
587                 unput(s[i]);
588 }