4 * The source code for a simple lexical analyzer.
6 * Written By: Sachin Kamboj
7 * University of Delaware
23 #include "ntp_config.h"
25 #include "ntp_scanner.h"
26 #include "ntp_parser.h"
28 /* ntp_keyword.h declares finite state machine and token text */
29 #include "ntp_keyword.h"
33 /* SCANNER GLOBAL VARIABLES
34 * ------------------------
37 #define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */
38 char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */
39 u_int32 conf_file_sum; /* Simple sum of characters read */
41 static struct FILE_INFO * lex_stack = NULL;
50 /* SCANNER GLOBAL VARIABLES
51 * ------------------------
53 const char special_chars[] = "{}(),;|=";
60 static int is_keyword(char *lexeme, follby *pfollowedby);
64 * keyword() - Return the keyword associated with token T_ identifier.
65 * See also token_name() for the string-ized T_ identifier.
66 * Example: keyword(T_Server) returns "server"
67 * token_name(T_Server) returns "T_Server"
77 i = token - LOWEST_KEYWORD_ID;
79 if (i < COUNTOF(keyword_text))
80 text = keyword_text[i];
86 : "(keyword not found)";
90 /* FILE & STRING BUFFER INTERFACE
91 * ------------------------------
93 * This set out as a couple of wrapper functions around the standard C
94 * fgetc and ungetc functions in order to include positional
95 * bookkeeping. Alas, this is no longer a good solution with nested
96 * input files and the possibility to send configuration commands via
99 * Now there are a few functions to maintain a stack of nested input
100 * sources (though nesting is only allowd for disk files) and from the
101 * scanner / parser point of view there's no difference between both
104 * The 'fgetc()' / 'ungetc()' replacements now operate on a FILE_INFO
105 * structure. Instead of trying different 'ungetc()' strategies for file
106 * and buffer based parsing, we keep the backup char in our own
107 * FILE_INFO structure. This is sufficient, as the parser does *not*
108 * jump around via 'seek' or the like, and there's no need to
109 * check/clear the backup store in other places than 'lex_getch()'.
113 * Allocate an info structure and attach it to a file.
115 * Note: When 'mode' is NULL, then the INFO block will be set up to
116 * contain a NULL file pointer, as suited for remote config command
117 * parsing. Otherwise having a NULL file pointer is considered an error,
118 * and a NULL info block pointer is returned to indicate failure!
120 * Note: We use a variable-sized structure to hold a copy of the file
121 * name (or, more proper, the input source description). This is more
122 * secure than keeping a reference to some other storage that might go
125 static struct FILE_INFO *
131 struct FILE_INFO *stream;
134 nnambuf = strlen(path);
135 stream = emalloc_zero(sizeof(*stream) + nnambuf);
136 stream->curpos.nline = 1;
137 stream->backch = EOF;
138 /* copy name with memcpy -- trailing NUL already there! */
139 memcpy(stream->fname, path, nnambuf);
142 stream->fpi = fopen(path, mode);
143 if (NULL == stream->fpi) {
151 /* get next character from buffer or file. This will return any putback
152 * character first; it will also make sure the last line is at least
153 * virtually terminated with a '\n'.
157 struct FILE_INFO *stream
162 if (NULL == stream || stream->force_eof)
165 if (EOF != stream->backch) {
167 stream->backch = EOF;
170 stream->curpos.ncol++;
171 } else if (stream->fpi) {
172 /* fetch next 7-bit ASCII char (or EOF) from file */
173 while ((ch = fgetc(stream->fpi)) != EOF && ch > SCHAR_MAX)
174 stream->curpos.ncol++;
177 stream->curpos.ncol++;
180 /* fetch next 7-bit ASCII char from buffer */
182 scan = &remote_config.buffer[remote_config.pos];
183 while ((ch = (u_char)*scan) > SCHAR_MAX) {
185 stream->curpos.ncol++;
189 stream->curpos.ncol++;
193 remote_config.pos = (int)(scan - remote_config.buffer);
196 /* If the last line ends without '\n', generate one. This
197 * happens most likely on Windows, where editors often have a
198 * sloppy concept of a line.
200 if (EOF == ch && stream->curpos.ncol != 0)
203 /* update scan position tallies */
205 stream->bakpos = stream->curpos;
206 stream->curpos.nline++;
207 stream->curpos.ncol = 0;
213 /* Note: lex_ungetch will fail to track more than one line of push
214 * back. But since it guarantees only one char of back storage anyway,
215 * this should not be a problem.
220 struct FILE_INFO *stream
223 /* check preconditions */
224 if (NULL == stream || stream->force_eof)
226 if (EOF != stream->backch || EOF == ch)
229 /* keep for later reference and update checksum */
230 stream->backch = (u_char)ch;
232 conf_file_sum -= stream->backch;
234 /* update position */
235 if (stream->backch == '\n') {
236 stream->curpos = stream->bakpos;
237 stream->bakpos.ncol = -1;
239 stream->curpos.ncol--;
240 return stream->backch;
243 /* dispose of an input structure. If the file pointer is not NULL, close
244 * the file. This function does not check the result of 'fclose()'.
248 struct FILE_INFO *stream
251 if (NULL != stream) {
252 if (NULL != stream->fpi)
261 * Nested input sources are a bit tricky at first glance. We deal with
262 * this problem using a stack of input sources, that is, a forward
263 * linked list of FILE_INFO structs.
265 * This stack is never empty during parsing; while an encounter with EOF
266 * can and will remove nested input sources, removing the last element
267 * in the stack will not work during parsing, and the EOF condition of
268 * the outermost input file remains until the parser folds up.
271 static struct FILE_INFO *
273 struct FILE_INFO * head
276 struct FILE_INFO * tail;
277 while (NULL != head) {
278 tail = head->st_next;
287 /* Create a singleton input source on an empty lexer stack. This will
288 * fail if there is already an input source, or if the underlying disk
289 * file cannot be opened.
291 * Returns TRUE if a new input object was successfully created.
299 if (NULL != lex_stack || NULL == path)
302 lex_stack = lex_open(path, mode);
303 return (NULL != lex_stack);
306 /* This removes *all* input sources from the stack, leaving the head
307 * pointer as NULL. Any attempt to parse in that state is likely to bomb
308 * with segmentation faults or the like.
310 * In other words: Use this to clean up after parsing, and do not parse
311 * anything until the next 'lex_init_stack()' succeeded.
316 lex_stack = _drop_stack_do(lex_stack);
319 /* Flush the lexer input stack: This will nip all input objects on the
320 * stack (but keeps the current top-of-stack) and marks the top-of-stack
321 * as inactive. Any further calls to lex_getch yield only EOF, and it's
322 * no longer possible to push something back.
324 * Returns TRUE if there is a head element (top-of-stack) that was not
325 * in the force-eof mode before this call.
332 if (NULL != lex_stack) {
333 retv = !lex_stack->force_eof;
334 lex_stack->force_eof = TRUE;
335 lex_stack->st_next = _drop_stack_do(
341 /* Push another file on the parsing stack. If the mode is NULL, create a
342 * FILE_INFO suitable for in-memory parsing; otherwise, create a
343 * FILE_INFO that is bound to a local/disc file. Note that 'path' must
344 * not be NULL, or the function will fail.
346 * Returns TRUE if a new info record was pushed onto the stack.
348 int/*BOOL*/ lex_push_file(
353 struct FILE_INFO * next = NULL;
356 next = lex_open(path, mode);
358 next->st_next = lex_stack;
362 return (NULL != next);
365 /* Pop, close & free the top of the include stack, unless the stack
366 * contains only a singleton input object. In that case the function
367 * fails, because the parser does not expect the input stack to be
370 * Returns TRUE if an object was successfuly popped from the stack.
375 struct FILE_INFO * head = lex_stack;
376 struct FILE_INFO * tail = NULL;
379 tail = head->st_next;
385 return (NULL != tail);
388 /* Get include nesting level. This currently loops over the stack and
389 * counts elements; but since this is of concern only with an include
390 * statement and the nesting depth has a small limit, there's no
391 * bottleneck expected here.
393 * Returns the nesting level of includes, that is, the current depth of
394 * the lexer input stack.
402 struct FILE_INFO *ipf = lex_stack;
404 while (NULL != ipf) {
411 /* check if the current input is from a file */
415 return (NULL != lex_stack) && (NULL != lex_stack->fpi);
421 /* this became so simple, it could be a macro. But then,
422 * lex_stack needed to be global...
440 int curr_s; /* current state index */
444 curr_s = SCANNER_INIT_S;
447 for (i = 0; lexeme[i]; i++) {
448 while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
449 curr_s = SS_OTHER_N(sst[curr_s]);
451 if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
452 if ('\0' == lexeme[i + 1]
453 && FOLLBY_NON_ACCEPTING
454 != SS_FB(sst[curr_s])) {
455 fb = SS_FB(sst[curr_s]);
460 curr_s = SS_MATCH_N(sst[curr_s]);
481 /* Allow a leading minus sign */
482 if (lexeme[i] == '-') {
489 /* Check that all the remaining characters are digits */
490 for (; lexeme[i] != '\0'; i++) {
491 if (!isdigit((u_char)lexeme[i]))
498 /* Reject numbers that fit in unsigned but not in signed int */
499 if (1 == sscanf(lexeme, "%u", &u_val))
500 return (u_val <= INT_MAX);
506 /* U_int -- assumes is_integer() has returned FALSE */
516 if ('0' == lexeme[i] && 'x' == tolower((u_char)lexeme[i + 1])) {
523 /* Check that all the remaining characters are digits */
524 for (; lexeme[i] != '\0'; i++) {
525 if (is_hex && !isxdigit((u_char)lexeme[i]))
527 if (!is_hex && !isdigit((u_char)lexeme[i]))
541 u_int num_digits = 0; /* Number of digits read */
546 /* Check for an optional '+' or '-' */
547 if ('+' == lexeme[i] || '-' == lexeme[i])
550 /* Read the integer part */
551 for (; lexeme[i] && isdigit((u_char)lexeme[i]); i++)
554 /* Check for the optional decimal point */
555 if ('.' == lexeme[i]) {
557 /* Check for any digits after the decimal point */
558 for (; lexeme[i] && isdigit((u_char)lexeme[i]); i++)
563 * The number of digits in both the decimal part and the
564 * fraction part must not be zero at this point
569 /* Check if we are done */
573 /* There is still more input, read the exponent */
574 if ('e' == tolower((u_char)lexeme[i]))
579 /* Read an optional Sign */
580 if ('+' == lexeme[i] || '-' == lexeme[i])
583 /* Now read the exponent part */
584 while (lexeme[i] && isdigit((u_char)lexeme[i]))
587 /* Check if we are done */
595 /* is_special() - Test whether a character is a token */
601 return strchr(special_chars, ch) != NULL;
610 if ((old_config_style && (ch == '\n')) ||
611 (!old_config_style && (ch == ';')))
618 quote_if_needed(char *str)
625 octets = len + 2 + 1;
626 ret = emalloc(octets);
628 && (strcspn(str, special_chars) < len
629 || strchr(str, ' ') != NULL)) {
630 snprintf(ret, octets, "\"%s\"", str);
632 strlcpy(ret, str, octets);
646 * ignore end of line whitespace
649 while (*pch && isspace((u_char)*pch))
653 yylval.Integer = T_EOC;
654 return yylval.Integer;
657 yylval.String = estrdup(lexeme);
663 * yylex() - function that does the actual scanning.
664 * Bison expects this function to be called yylex and for it to take no
665 * input and return an int.
666 * Conceptually yylex "returns" yylval as well as the actual return
667 * value representing the token or type.
672 static follby followedby = FOLLBY_TOKEN;
677 int token; /* The return value */
681 yylval_was_set = FALSE;
684 /* Ignore whitespace at the beginning */
685 while (EOF != (ch = lex_getch(lex_stack)) &&
689 ; /* Null Statement */
693 if ( ! lex_pop_file())
698 } else if (is_EOC(ch)) {
700 /* end FOLLBY_STRINGS_TO_EOC effect */
701 followedby = FOLLBY_TOKEN;
705 } else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
706 /* special chars are their own token values */
709 * '=' outside simulator configuration implies
710 * a single string following as in:
711 * setvar Owner = "The Boss" default
713 if ('=' == ch && old_config_style)
714 followedby = FOLLBY_STRING;
715 yytext[0] = (char)ch;
719 lex_ungetch(ch, lex_stack);
721 /* save the position of start of the token */
722 lex_stack->tokpos = lex_stack->curpos;
724 /* Read in the lexeme */
726 while (EOF != (ch = lex_getch(lex_stack))) {
728 yytext[i] = (char)ch;
730 /* Break on whitespace or a special character */
731 if (isspace(ch) || is_EOC(ch)
733 || (FOLLBY_TOKEN == followedby
737 /* Read the rest of the line on reading a start
738 of comment character */
740 while (EOF != (ch = lex_getch(lex_stack))
742 ; /* Null Statement */
747 if (i >= COUNTOF(yytext))
750 /* Pick up all of the string inside between " marks, to
751 * end of line. If we make it to EOL without a
752 * terminating " assume it for them.
754 * XXX - HMS: I'm not sure we want to assume the closing "
758 while (EOF != (ch = lex_getch(lex_stack)) &&
759 ch != '"' && ch != '\n') {
760 yytext[i++] = (char)ch;
761 if (i >= COUNTOF(yytext))
765 * yytext[i] will be pushed back as not part of
766 * this lexeme, but any closing quote should
767 * not be pushed back, so we read another char.
770 ch = lex_getch(lex_stack);
772 /* Pushback the last character read that is not a part
773 * of this lexeme. This fails silently if ch is EOF,
774 * but then the EOF condition persists and is handled on
775 * the next turn by the include stack mechanism.
777 lex_ungetch(ch, lex_stack);
782 /* Now return the desired token */
784 /* First make sure that the parser is *not* expecting a string
785 * as the next token (based on the previous token that was
786 * returned) and that we haven't read a string.
789 if (followedby == FOLLBY_TOKEN && !instring) {
790 token = is_keyword(yytext, &followedby);
793 * T_Server is exceptional as it forces the
794 * following token to be a string in the
795 * non-simulator parts of the configuration,
796 * but in the simulator configuration section,
797 * "server" is followed by "=" which must be
798 * recognized as a token not a string.
800 if (T_Server == token && !old_config_style)
801 followedby = FOLLBY_TOKEN;
803 } else if (is_integer(yytext)) {
804 yylval_was_set = TRUE;
806 if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
807 && ((errno == EINVAL) || (errno == ERANGE))) {
809 "Integer cannot be represented: %s",
811 if (lex_from_file()) {
814 /* force end of parsing */
821 } else if (is_u_int(yytext)) {
822 yylval_was_set = TRUE;
823 if ('0' == yytext[0] &&
824 'x' == tolower((unsigned long)yytext[1]))
825 converted = sscanf(&yytext[2], "%x",
828 converted = sscanf(yytext, "%u",
830 if (1 != converted) {
832 "U_int cannot be represented: %s",
834 if (lex_from_file()) {
837 /* force end of parsing */
844 } else if (is_double(yytext)) {
845 yylval_was_set = TRUE;
847 if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
849 "Double too large to represent: %s",
857 /* Default: Everything is a string */
858 yylval_was_set = TRUE;
859 token = create_string_token(yytext);
865 * Either followedby is not FOLLBY_TOKEN or this lexeme is part
866 * of a string. Hence, we need to return T_String.
868 * _Except_ we might have a -4 or -6 flag on a an association
869 * configuration line (server, peer, pool, etc.).
871 * This is a terrible hack, but the grammar is ambiguous so we
872 * don't have a choice. [SK]
874 * The ambiguity is in the keyword scanner, not ntp_parser.y.
875 * We do not require server addresses be quoted in ntp.conf,
876 * complicating the scanner's job. To avoid trying (and
877 * failing) to match an IP address or DNS name to a keyword,
878 * the association keywords use FOLLBY_STRING in the keyword
879 * table, which tells the scanner to force the next token to be
880 * a T_String, so it does not try to match a keyword but rather
881 * expects a string when -4/-6 modifiers to server, peer, etc.
883 * restrict -4 and restrict -6 parsing works correctly without
884 * this hack, as restrict uses FOLLBY_TOKEN. [DH]
886 if ('-' == yytext[0]) {
887 if ('4' == yytext[1]) {
890 } else if ('6' == yytext[1]) {
896 if (FOLLBY_STRING == followedby)
897 followedby = FOLLBY_TOKEN;
899 yylval_was_set = TRUE;
900 token = create_string_token(yytext);
904 DPRINTF(4,("\t<end of command>\n"));
906 DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
910 yylval.Integer = token;
915 yytext[min(sizeof(yytext) - 1, 50)] = 0;
917 "configuration item on line %d longer than limit of %lu, began with '%s'",
918 lex_stack->curpos.nline, (u_long)min(sizeof(yytext) - 1, 50),
922 * If we hit the length limit reading the startup configuration
926 exit(sizeof(yytext) - 1);
929 * If it's runtime configuration via ntpq :config treat it as
930 * if the configuration text ended before the too-long lexeme,
931 * hostname, or string.