]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/one-true-awk/lex.c
sysctl(9): Fix a few mandoc related issues
[FreeBSD/FreeBSD.git] / contrib / one-true-awk / lex.c
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include "awk.h"
30 #include "ytab.h"
31
32 extern YYSTYPE  yylval;
33 extern int      infunc;
34
35 int     lineno  = 1;
36 int     bracecnt = 0;
37 int     brackcnt  = 0;
38 int     parencnt = 0;
39
40 typedef struct Keyword {
41         const char *word;
42         int     sub;
43         int     type;
44 } Keyword;
45
46 Keyword keywords[] ={   /* keep sorted: binary searched */
47         { "BEGIN",      XBEGIN,         XBEGIN },
48         { "END",        XEND,           XEND },
49         { "NF",         VARNF,          VARNF },
50         { "and",        FAND,           BLTIN },
51         { "atan2",      FATAN,          BLTIN },
52         { "break",      BREAK,          BREAK },
53         { "close",      CLOSE,          CLOSE },
54         { "compl",      FCOMPL,         BLTIN },
55         { "continue",   CONTINUE,       CONTINUE },
56         { "cos",        FCOS,           BLTIN },
57         { "delete",     DELETE,         DELETE },
58         { "do",         DO,             DO },
59         { "else",       ELSE,           ELSE },
60         { "exit",       EXIT,           EXIT },
61         { "exp",        FEXP,           BLTIN },
62         { "fflush",     FFLUSH,         BLTIN },
63         { "for",        FOR,            FOR },
64         { "func",       FUNC,           FUNC },
65         { "function",   FUNC,           FUNC },
66         { "getline",    GETLINE,        GETLINE },
67         { "gsub",       GSUB,           GSUB },
68         { "if",         IF,             IF },
69         { "in",         IN,             IN },
70         { "index",      INDEX,          INDEX },
71         { "int",        FINT,           BLTIN },
72         { "length",     FLENGTH,        BLTIN },
73         { "log",        FLOG,           BLTIN },
74         { "lshift",     FLSHIFT,        BLTIN },
75         { "match",      MATCHFCN,       MATCHFCN },
76         { "next",       NEXT,           NEXT },
77         { "nextfile",   NEXTFILE,       NEXTFILE },
78         { "or",         FFOR,           BLTIN },
79         { "print",      PRINT,          PRINT },
80         { "printf",     PRINTF,         PRINTF },
81         { "rand",       FRAND,          BLTIN },
82         { "return",     RETURN,         RETURN },
83         { "rshift",     FRSHIFT,        BLTIN },
84         { "sin",        FSIN,           BLTIN },
85         { "split",      SPLIT,          SPLIT },
86         { "sprintf",    SPRINTF,        SPRINTF },
87         { "sqrt",       FSQRT,          BLTIN },
88         { "srand",      FSRAND,         BLTIN },
89         { "sub",        SUB,            SUB },
90         { "substr",     SUBSTR,         SUBSTR },
91         { "system",     FSYSTEM,        BLTIN },
92         { "tolower",    FTOLOWER,       BLTIN },
93         { "toupper",    FTOUPPER,       BLTIN },
94         { "while",      WHILE,          WHILE },
95         { "xor",        FXOR,           BLTIN },
96 };
97
98 #define RET(x)  { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
99
100 int peek(void)
101 {
102         int c = input();
103         unput(c);
104         return c;
105 }
106
107 int gettok(char **pbuf, int *psz)       /* get next input token */
108 {
109         int c, retc;
110         char *buf = *pbuf;
111         int sz = *psz;
112         char *bp = buf;
113
114         c = input();
115         if (c == 0)
116                 return 0;
117         buf[0] = c;
118         buf[1] = 0;
119         if (!isalnum(c) && c != '.' && c != '_')
120                 return c;
121
122         *bp++ = c;
123         if (isalpha(c) || c == '_') {   /* it's a varname */
124                 for ( ; (c = input()) != 0; ) {
125                         if (bp-buf >= sz)
126                                 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
127                                         FATAL( "out of space for name %.10s...", buf );
128                         if (isalnum(c) || c == '_')
129                                 *bp++ = c;
130                         else {
131                                 *bp = 0;
132                                 unput(c);
133                                 break;
134                         }
135                 }
136                 *bp = 0;
137                 retc = 'a';     /* alphanumeric */
138         } else {        /* maybe it's a number, but could be . */
139                 char *rem;
140                 /* read input until can't be a number */
141                 for ( ; (c = input()) != 0; ) {
142                         if (bp-buf >= sz)
143                                 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
144                                         FATAL( "out of space for number %.10s...", buf );
145                         if (isdigit(c) || c == 'e' || c == 'E' 
146                           || c == '.' || c == '+' || c == '-')
147                                 *bp++ = c;
148                         else {
149                                 unput(c);
150                                 break;
151                         }
152                 }
153                 *bp = 0;
154                 strtod(buf, &rem);      /* parse the number */
155                 if (rem == buf) {       /* it wasn't a valid number at all */
156                         buf[1] = 0;     /* return one character as token */
157                         retc = buf[0];  /* character is its own type */
158                         unputstr(rem+1); /* put rest back for later */
159                 } else {        /* some prefix was a number */
160                         unputstr(rem);  /* put rest back for later */
161                         rem[0] = 0;     /* truncate buf after number part */
162                         retc = '0';     /* type is number */
163                 }
164         }
165         *pbuf = buf;
166         *psz = sz;
167         return retc;
168 }
169
170 int     word(char *);
171 int     string(void);
172 int     regexpr(void);
173 int     sc      = 0;    /* 1 => return a } right now */
174 int     reg     = 0;    /* 1 => return a REGEXPR now */
175
176 int yylex(void)
177 {
178         int c;
179         static char *buf = NULL;
180         static int bufsize = 5; /* BUG: setting this small causes core dump! */
181
182         if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
183                 FATAL( "out of space in yylex" );
184         if (sc) {
185                 sc = 0;
186                 RET('}');
187         }
188         if (reg) {
189                 reg = 0;
190                 return regexpr();
191         }
192         for (;;) {
193                 c = gettok(&buf, &bufsize);
194                 if (c == 0)
195                         return 0;
196                 if (isalpha(c) || c == '_')
197                         return word(buf);
198                 if (isdigit(c)) {
199                         yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
200                         /* should this also have STR set? */
201                         RET(NUMBER);
202                 }
203         
204                 yylval.i = c;
205                 switch (c) {
206                 case '\n':      /* {EOL} */
207                         lineno++;
208                         RET(NL);
209                 case '\r':      /* assume \n is coming */
210                 case ' ':       /* {WS}+ */
211                 case '\t':
212                         break;
213                 case '#':       /* #.* strip comments */
214                         while ((c = input()) != '\n' && c != 0)
215                                 ;
216                         unput(c);
217                         break;
218                 case ';':
219                         RET(';');
220                 case '\\':
221                         if (peek() == '\n') {
222                                 input();
223                                 lineno++;
224                         } else if (peek() == '\r') {
225                                 input(); input();       /* \n */
226                                 lineno++;
227                         } else {
228                                 RET(c);
229                         }
230                         break;
231                 case '&':
232                         if (peek() == '&') {
233                                 input(); RET(AND);
234                         } else 
235                                 RET('&');
236                 case '|':
237                         if (peek() == '|') {
238                                 input(); RET(BOR);
239                         } else
240                                 RET('|');
241                 case '!':
242                         if (peek() == '=') {
243                                 input(); yylval.i = NE; RET(NE);
244                         } else if (peek() == '~') {
245                                 input(); yylval.i = NOTMATCH; RET(MATCHOP);
246                         } else
247                                 RET(NOT);
248                 case '~':
249                         yylval.i = MATCH;
250                         RET(MATCHOP);
251                 case '<':
252                         if (peek() == '=') {
253                                 input(); yylval.i = LE; RET(LE);
254                         } else {
255                                 yylval.i = LT; RET(LT);
256                         }
257                 case '=':
258                         if (peek() == '=') {
259                                 input(); yylval.i = EQ; RET(EQ);
260                         } else {
261                                 yylval.i = ASSIGN; RET(ASGNOP);
262                         }
263                 case '>':
264                         if (peek() == '=') {
265                                 input(); yylval.i = GE; RET(GE);
266                         } else if (peek() == '>') {
267                                 input(); yylval.i = APPEND; RET(APPEND);
268                         } else {
269                                 yylval.i = GT; RET(GT);
270                         }
271                 case '+':
272                         if (peek() == '+') {
273                                 input(); yylval.i = INCR; RET(INCR);
274                         } else if (peek() == '=') {
275                                 input(); yylval.i = ADDEQ; RET(ASGNOP);
276                         } else
277                                 RET('+');
278                 case '-':
279                         if (peek() == '-') {
280                                 input(); yylval.i = DECR; RET(DECR);
281                         } else if (peek() == '=') {
282                                 input(); yylval.i = SUBEQ; RET(ASGNOP);
283                         } else
284                                 RET('-');
285                 case '*':
286                         if (peek() == '=') {    /* *= */
287                                 input(); yylval.i = MULTEQ; RET(ASGNOP);
288                         } else if (peek() == '*') {     /* ** or **= */
289                                 input();        /* eat 2nd * */
290                                 if (peek() == '=') {
291                                         input(); yylval.i = POWEQ; RET(ASGNOP);
292                                 } else {
293                                         RET(POWER);
294                                 }
295                         } else
296                                 RET('*');
297                 case '/':
298                         RET('/');
299                 case '%':
300                         if (peek() == '=') {
301                                 input(); yylval.i = MODEQ; RET(ASGNOP);
302                         } else
303                                 RET('%');
304                 case '^':
305                         if (peek() == '=') {
306                                 input(); yylval.i = POWEQ; RET(ASGNOP);
307                         } else
308                                 RET(POWER);
309
310                 case '$':
311                         /* BUG: awkward, if not wrong */
312                         c = gettok(&buf, &bufsize);
313                         if (isalpha(c)) {
314                                 if (strcmp(buf, "NF") == 0) {   /* very special */
315                                         unputstr("(NF)");
316                                         RET(INDIRECT);
317                                 }
318                                 c = peek();
319                                 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
320                                         unputstr(buf);
321                                         RET(INDIRECT);
322                                 }
323                                 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
324                                 RET(IVAR);
325                         } else if (c == 0) {    /*  */
326                                 SYNTAX( "unexpected end of input after $" );
327                                 RET(';');
328                         } else {
329                                 unputstr(buf);
330                                 RET(INDIRECT);
331                         }
332         
333                 case '}':
334                         if (--bracecnt < 0)
335                                 SYNTAX( "extra }" );
336                         sc = 1;
337                         RET(';');
338                 case ']':
339                         if (--brackcnt < 0)
340                                 SYNTAX( "extra ]" );
341                         RET(']');
342                 case ')':
343                         if (--parencnt < 0)
344                                 SYNTAX( "extra )" );
345                         RET(')');
346                 case '{':
347                         bracecnt++;
348                         RET('{');
349                 case '[':
350                         brackcnt++;
351                         RET('[');
352                 case '(':
353                         parencnt++;
354                         RET('(');
355         
356                 case '"':
357                         return string();        /* BUG: should be like tran.c ? */
358         
359                 default:
360                         RET(c);
361                 }
362         }
363 }
364
365 int string(void)
366 {
367         int c, n;
368         char *s, *bp;
369         static char *buf = NULL;
370         static int bufsz = 500;
371
372         if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
373                 FATAL("out of space for strings");
374         for (bp = buf; (c = input()) != '"'; ) {
375                 if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
376                         FATAL("out of space for string %.10s...", buf);
377                 switch (c) {
378                 case '\n':
379                 case '\r':
380                 case 0:
381                         *bp = '\0';
382                         SYNTAX( "non-terminated string %.10s...", buf );
383                         if (c == 0)     /* hopeless */
384                                 FATAL( "giving up" );
385                         lineno++;
386                         break;
387                 case '\\':
388                         c = input();
389                         switch (c) {
390                         case '"': *bp++ = '"'; break;
391                         case 'n': *bp++ = '\n'; break;  
392                         case 't': *bp++ = '\t'; break;
393                         case 'f': *bp++ = '\f'; break;
394                         case 'r': *bp++ = '\r'; break;
395                         case 'b': *bp++ = '\b'; break;
396                         case 'v': *bp++ = '\v'; break;
397                         case 'a': *bp++ = '\007'; break;
398                         case '\\': *bp++ = '\\'; break;
399
400                         case '0': case '1': case '2': /* octal: \d \dd \ddd */
401                         case '3': case '4': case '5': case '6': case '7':
402                                 n = c - '0';
403                                 if ((c = peek()) >= '0' && c < '8') {
404                                         n = 8 * n + input() - '0';
405                                         if ((c = peek()) >= '0' && c < '8')
406                                                 n = 8 * n + input() - '0';
407                                 }
408                                 *bp++ = n;
409                                 break;
410
411                         case 'x':       /* hex  \x0-9a-fA-F + */
412                             {   char xbuf[100], *px;
413                                 for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
414                                         if (isdigit(c)
415                                          || (c >= 'a' && c <= 'f')
416                                          || (c >= 'A' && c <= 'F'))
417                                                 *px++ = c;
418                                         else
419                                                 break;
420                                 }
421                                 *px = 0;
422                                 unput(c);
423                                 sscanf(xbuf, "%x", (unsigned int *) &n);
424                                 *bp++ = n;
425                                 break;
426                             }
427
428                         default: 
429                                 *bp++ = c;
430                                 break;
431                         }
432                         break;
433                 default:
434                         *bp++ = c;
435                         break;
436                 }
437         }
438         *bp = 0; 
439         s = tostring(buf);
440         *bp++ = ' '; *bp++ = 0;
441         yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
442         RET(STRING);
443 }
444
445
446 int binsearch(char *w, Keyword *kp, int n)
447 {
448         int cond, low, mid, high;
449
450         low = 0;
451         high = n - 1;
452         while (low <= high) {
453                 mid = (low + high) / 2;
454                 if ((cond = strcmp(w, kp[mid].word)) < 0)
455                         high = mid - 1;
456                 else if (cond > 0)
457                         low = mid + 1;
458                 else
459                         return mid;
460         }
461         return -1;
462 }
463
464 int word(char *w) 
465 {
466         Keyword *kp;
467         int c, n;
468
469         n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
470         if (n != -1) {  /* found in table */
471                 kp = keywords + n;
472                 yylval.i = kp->sub;
473                 switch (kp->type) {     /* special handling */
474                 case BLTIN:
475                         if (kp->sub == FSYSTEM && safe)
476                                 SYNTAX( "system is unsafe" );
477                         RET(kp->type);
478                 case FUNC:
479                         if (infunc)
480                                 SYNTAX( "illegal nested function" );
481                         RET(kp->type);
482                 case RETURN:
483                         if (!infunc)
484                                 SYNTAX( "return not in function" );
485                         RET(kp->type);
486                 case VARNF:
487                         yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
488                         RET(VARNF);
489                 default:
490                         RET(kp->type);
491                 }
492         }
493         c = peek();     /* look for '(' */
494         if (c != '(' && infunc && (n=isarg(w)) >= 0) {
495                 yylval.i = n;
496                 RET(ARG);
497         } else {
498                 yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
499                 if (c == '(') {
500                         RET(CALL);
501                 } else {
502                         RET(VAR);
503                 }
504         }
505 }
506
507 void startreg(void)     /* next call to yylex will return a regular expression */
508 {
509         reg = 1;
510 }
511
512 int regexpr(void)
513 {
514         int c;
515         static char *buf = NULL;
516         static int bufsz = 500;
517         char *bp;
518
519         if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
520                 FATAL("out of space for rex expr");
521         bp = buf;
522         for ( ; (c = input()) != '/' && c != 0; ) {
523                 if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
524                         FATAL("out of space for reg expr %.10s...", buf);
525                 if (c == '\n') {
526                         *bp = '\0';
527                         SYNTAX( "newline in regular expression %.10s...", buf ); 
528                         unput('\n');
529                         break;
530                 } else if (c == '\\') {
531                         *bp++ = '\\'; 
532                         *bp++ = input();
533                 } else {
534                         *bp++ = c;
535                 }
536         }
537         *bp = 0;
538         if (c == 0)
539                 SYNTAX("non-terminated regular expression %.10s...", buf);
540         yylval.s = tostring(buf);
541         unput('/');
542         RET(REGEXPR);
543 }
544
545 /* low-level lexical stuff, sort of inherited from lex */
546
547 char    ebuf[300];
548 char    *ep = ebuf;
549 char    yysbuf[100];    /* pushback buffer */
550 char    *yysptr = yysbuf;
551 FILE    *yyin = NULL;
552
553 int input(void) /* get next lexical input character */
554 {
555         int c;
556         extern char *lexprog;
557
558         if (yysptr > yysbuf)
559                 c = (uschar)*--yysptr;
560         else if (lexprog != NULL) {     /* awk '...' */
561                 if ((c = (uschar)*lexprog) != 0)
562                         lexprog++;
563         } else                          /* awk -f ... */
564                 c = pgetc();
565         if (c == EOF)
566                 c = 0;
567         if (ep >= ebuf + sizeof ebuf)
568                 ep = ebuf;
569         *ep = c;
570         if (c != 0) {
571                 ep++;
572         }
573         return (c);
574 }
575
576 void unput(int c)       /* put lexical character back on input */
577 {
578         if (yysptr >= yysbuf + sizeof(yysbuf))
579                 FATAL("pushed back too much: %.20s...", yysbuf);
580         *yysptr++ = c;
581         if (--ep < ebuf)
582                 ep = ebuf + sizeof(ebuf) - 1;
583 }
584
585 void unputstr(const char *s)    /* put a string back on input */
586 {
587         int i;
588
589         for (i = strlen(s)-1; i >= 0; i--)
590                 unput(s[i]);
591 }