4 * The source code for a simple lexical analyzer.
6 * Written By: Sachin Kamboj
7 * University of Delaware
23 #include "ntp_config.h"
25 #include "ntp_scanner.h"
26 #include "ntp_parser.h"
28 /* ntp_keyword.h declares finite state machine and token text */
29 #include "ntp_keyword.h"
33 /* SCANNER GLOBAL VARIABLES
34 * ------------------------
37 #define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */
38 char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */
39 u_int32 conf_file_sum; /* Simple sum of characters read */
41 static struct FILE_INFO * lex_stack = NULL;
50 /* SCANNER GLOBAL VARIABLES
51 * ------------------------
53 const char special_chars[] = "{}(),;|=";
60 static int is_keyword(char *lexeme, follby *pfollowedby);
64 * keyword() - Return the keyword associated with token T_ identifier.
65 * See also token_name() for the string-ized T_ identifier.
66 * Example: keyword(T_Server) returns "server"
67 * token_name(T_Server) returns "T_Server"
77 i = token - LOWEST_KEYWORD_ID;
79 if (i < COUNTOF(keyword_text))
80 text = keyword_text[i];
86 : "(keyword not found)";
90 /* FILE & STRING BUFFER INTERFACE
91 * ------------------------------
93 * This set out as a couple of wrapper functions around the standard C
94 * fgetc and ungetc functions in order to include positional
95 * bookkeeping. Alas, this is no longer a good solution with nested
96 * input files and the possibility to send configuration commands via
99 * Now there are a few functions to maintain a stack of nested input
100 * sources (though nesting is only allowd for disk files) and from the
101 * scanner / parser point of view there's no difference between both
104 * The 'fgetc()' / 'ungetc()' replacements now operate on a FILE_INFO
105 * structure. Instead of trying different 'ungetc()' strategies for file
106 * and buffer based parsing, we keep the backup char in our own
107 * FILE_INFO structure. This is sufficient, as the parser does *not*
108 * jump around via 'seek' or the like, and there's no need to
109 * check/clear the backup store in other places than 'lex_getch()'.
113 * Allocate an info structure and attach it to a file.
115 * Note: When 'mode' is NULL, then the INFO block will be set up to
116 * contain a NULL file pointer, as suited for remote config command
117 * parsing. Otherwise having a NULL file pointer is considered an error,
118 * and a NULL info block pointer is returned to indicate failure!
120 * Note: We use a variable-sized structure to hold a copy of the file
121 * name (or, more proper, the input source description). This is more
122 * secure than keeping a reference to some other storage that might go
125 static struct FILE_INFO *
131 struct FILE_INFO *stream;
134 nnambuf = strlen(path);
135 stream = emalloc_zero(sizeof(*stream) + nnambuf);
136 stream->curpos.nline = 1;
137 stream->backch = EOF;
138 /* copy name with memcpy -- trailing NUL already there! */
139 memcpy(stream->fname, path, nnambuf);
142 stream->fpi = fopen(path, mode);
143 if (NULL == stream->fpi) {
151 /* get next character from buffer or file. This will return any putback
152 * character first; it will also make sure the last line is at least
153 * virtually terminated with a '\n'.
157 struct FILE_INFO *stream
162 if (NULL == stream || stream->force_eof)
165 if (EOF != stream->backch) {
167 stream->backch = EOF;
170 } else if (stream->fpi) {
171 /* fetch next 7-bit ASCII char (or EOF) from file */
172 while ((ch = fgetc(stream->fpi)) != EOF && ch > SCHAR_MAX)
173 stream->curpos.ncol++;
176 stream->curpos.ncol++;
179 /* fetch next 7-bit ASCII char from buffer */
181 scan = &remote_config.buffer[remote_config.pos];
182 while ((ch = (u_char)*scan) > SCHAR_MAX) {
184 stream->curpos.ncol++;
188 stream->curpos.ncol++;
192 remote_config.pos = (int)(scan - remote_config.buffer);
195 /* If the last line ends without '\n', generate one. This
196 * happens most likely on Windows, where editors often have a
197 * sloppy concept of a line.
199 if (EOF == ch && stream->curpos.ncol != 0)
202 /* update scan position tallies */
204 stream->bakpos = stream->curpos;
205 stream->curpos.nline++;
206 stream->curpos.ncol = 0;
212 /* Note: lex_ungetch will fail to track more than one line of push
213 * back. But since it guarantees only one char of back storage anyway,
214 * this should not be a problem.
219 struct FILE_INFO *stream
222 /* check preconditions */
223 if (NULL == stream || stream->force_eof)
225 if (EOF != stream->backch || EOF == ch)
228 /* keep for later reference and update checksum */
229 stream->backch = (u_char)ch;
231 conf_file_sum -= stream->backch;
233 /* update position */
234 if (stream->backch == '\n') {
235 stream->curpos = stream->bakpos;
236 stream->bakpos.ncol = -1;
238 stream->curpos.ncol--;
239 return stream->backch;
242 /* dispose of an input structure. If the file pointer is not NULL, close
243 * the file. This function does not check the result of 'fclose()'.
247 struct FILE_INFO *stream
250 if (NULL != stream) {
251 if (NULL != stream->fpi)
260 * Nested input sources are a bit tricky at first glance. We deal with
261 * this problem using a stack of input sources, that is, a forward
262 * linked list of FILE_INFO structs.
264 * This stack is never empty during parsing; while an encounter with EOF
265 * can and will remove nested input sources, removing the last element
266 * in the stack will not work during parsing, and the EOF condition of
267 * the outermost input file remains until the parser folds up.
270 static struct FILE_INFO *
272 struct FILE_INFO * head
275 struct FILE_INFO * tail;
276 while (NULL != head) {
277 tail = head->st_next;
286 /* Create a singleton input source on an empty lexer stack. This will
287 * fail if there is already an input source, or if the underlying disk
288 * file cannot be opened.
290 * Returns TRUE if a new input object was successfully created.
298 if (NULL != lex_stack || NULL == path)
301 lex_stack = lex_open(path, mode);
302 return (NULL != lex_stack);
305 /* This removes *all* input sources from the stack, leaving the head
306 * pointer as NULL. Any attempt to parse in that state is likely to bomb
307 * with segmentation faults or the like.
309 * In other words: Use this to clean up after parsing, and do not parse
310 * anything until the next 'lex_init_stack()' succeeded.
315 lex_stack = _drop_stack_do(lex_stack);
318 /* Flush the lexer input stack: This will nip all input objects on the
319 * stack (but keeps the current top-of-stack) and marks the top-of-stack
320 * as inactive. Any further calls to lex_getch yield only EOF, and it's
321 * no longer possible to push something back.
323 * Returns TRUE if there is a head element (top-of-stack) that was not
324 * in the force-eof mode before this call.
331 if (NULL != lex_stack) {
332 retv = !lex_stack->force_eof;
333 lex_stack->force_eof = TRUE;
334 lex_stack->st_next = _drop_stack_do(
340 /* Push another file on the parsing stack. If the mode is NULL, create a
341 * FILE_INFO suitable for in-memory parsing; otherwise, create a
342 * FILE_INFO that is bound to a local/disc file. Note that 'path' must
343 * not be NULL, or the function will fail.
345 * Returns TRUE if a new info record was pushed onto the stack.
347 int/*BOOL*/ lex_push_file(
352 struct FILE_INFO * next = NULL;
355 next = lex_open(path, mode);
357 next->st_next = lex_stack;
361 return (NULL != next);
364 /* Pop, close & free the top of the include stack, unless the stack
365 * contains only a singleton input object. In that case the function
366 * fails, because the parser does not expect the input stack to be
369 * Returns TRUE if an object was successfuly popped from the stack.
374 struct FILE_INFO * head = lex_stack;
375 struct FILE_INFO * tail = NULL;
378 tail = head->st_next;
384 return (NULL != tail);
387 /* Get include nesting level. This currently loops over the stack and
388 * counts elements; but since this is of concern only with an include
389 * statement and the nesting depth has a small limit, there's no
390 * bottleneck expected here.
392 * Returns the nesting level of includes, that is, the current depth of
393 * the lexer input stack.
401 struct FILE_INFO *ipf = lex_stack;
403 while (NULL != ipf) {
410 /* check if the current input is from a file */
414 return (NULL != lex_stack) && (NULL != lex_stack->fpi);
420 /* this became so simple, it could be a macro. But then,
421 * lex_stack needed to be global...
439 int curr_s; /* current state index */
443 curr_s = SCANNER_INIT_S;
446 for (i = 0; lexeme[i]; i++) {
447 while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
448 curr_s = SS_OTHER_N(sst[curr_s]);
450 if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
451 if ('\0' == lexeme[i + 1]
452 && FOLLBY_NON_ACCEPTING
453 != SS_FB(sst[curr_s])) {
454 fb = SS_FB(sst[curr_s]);
459 curr_s = SS_MATCH_N(sst[curr_s]);
480 /* Allow a leading minus sign */
481 if (lexeme[i] == '-') {
488 /* Check that all the remaining characters are digits */
489 for (; lexeme[i] != '\0'; i++) {
490 if (!isdigit((u_char)lexeme[i]))
497 /* Reject numbers that fit in unsigned but not in signed int */
498 if (1 == sscanf(lexeme, "%u", &u_val))
499 return (u_val <= INT_MAX);
505 /* U_int -- assumes is_integer() has returned FALSE */
515 if ('0' == lexeme[i] && 'x' == tolower((u_char)lexeme[i + 1])) {
522 /* Check that all the remaining characters are digits */
523 for (; lexeme[i] != '\0'; i++) {
524 if (is_hex && !isxdigit((u_char)lexeme[i]))
526 if (!is_hex && !isdigit((u_char)lexeme[i]))
540 u_int num_digits = 0; /* Number of digits read */
545 /* Check for an optional '+' or '-' */
546 if ('+' == lexeme[i] || '-' == lexeme[i])
549 /* Read the integer part */
550 for (; lexeme[i] && isdigit((u_char)lexeme[i]); i++)
553 /* Check for the optional decimal point */
554 if ('.' == lexeme[i]) {
556 /* Check for any digits after the decimal point */
557 for (; lexeme[i] && isdigit((u_char)lexeme[i]); i++)
562 * The number of digits in both the decimal part and the
563 * fraction part must not be zero at this point
568 /* Check if we are done */
572 /* There is still more input, read the exponent */
573 if ('e' == tolower((u_char)lexeme[i]))
578 /* Read an optional Sign */
579 if ('+' == lexeme[i] || '-' == lexeme[i])
582 /* Now read the exponent part */
583 while (lexeme[i] && isdigit((u_char)lexeme[i]))
586 /* Check if we are done */
594 /* is_special() - Test whether a character is a token */
600 return strchr(special_chars, ch) != NULL;
609 if ((old_config_style && (ch == '\n')) ||
610 (!old_config_style && (ch == ';')))
617 quote_if_needed(char *str)
624 octets = len + 2 + 1;
625 ret = emalloc(octets);
627 && (strcspn(str, special_chars) < len
628 || strchr(str, ' ') != NULL)) {
629 snprintf(ret, octets, "\"%s\"", str);
631 strlcpy(ret, str, octets);
645 * ignore end of line whitespace
648 while (*pch && isspace((u_char)*pch))
652 yylval.Integer = T_EOC;
653 return yylval.Integer;
656 yylval.String = estrdup(lexeme);
662 * yylex() - function that does the actual scanning.
663 * Bison expects this function to be called yylex and for it to take no
664 * input and return an int.
665 * Conceptually yylex "returns" yylval as well as the actual return
666 * value representing the token or type.
671 static follby followedby = FOLLBY_TOKEN;
676 int token; /* The return value */
680 yylval_was_set = FALSE;
683 /* Ignore whitespace at the beginning */
684 while (EOF != (ch = lex_getch(lex_stack)) &&
688 ; /* Null Statement */
692 if ( ! lex_pop_file())
697 } else if (is_EOC(ch)) {
699 /* end FOLLBY_STRINGS_TO_EOC effect */
700 followedby = FOLLBY_TOKEN;
704 } else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
705 /* special chars are their own token values */
708 * '=' outside simulator configuration implies
709 * a single string following as in:
710 * setvar Owner = "The Boss" default
712 if ('=' == ch && old_config_style)
713 followedby = FOLLBY_STRING;
714 yytext[0] = (char)ch;
718 lex_ungetch(ch, lex_stack);
720 /* save the position of start of the token */
721 lex_stack->tokpos = lex_stack->curpos;
723 /* Read in the lexeme */
725 while (EOF != (ch = lex_getch(lex_stack))) {
727 yytext[i] = (char)ch;
729 /* Break on whitespace or a special character */
730 if (isspace(ch) || is_EOC(ch)
732 || (FOLLBY_TOKEN == followedby
736 /* Read the rest of the line on reading a start
737 of comment character */
739 while (EOF != (ch = lex_getch(lex_stack))
741 ; /* Null Statement */
746 if (i >= COUNTOF(yytext))
749 /* Pick up all of the string inside between " marks, to
750 * end of line. If we make it to EOL without a
751 * terminating " assume it for them.
753 * XXX - HMS: I'm not sure we want to assume the closing "
757 while (EOF != (ch = lex_getch(lex_stack)) &&
758 ch != '"' && ch != '\n') {
759 yytext[i++] = (char)ch;
760 if (i >= COUNTOF(yytext))
764 * yytext[i] will be pushed back as not part of
765 * this lexeme, but any closing quote should
766 * not be pushed back, so we read another char.
769 ch = lex_getch(lex_stack);
771 /* Pushback the last character read that is not a part
772 * of this lexeme. This fails silently if ch is EOF,
773 * but then the EOF condition persists and is handled on
774 * the next turn by the include stack mechanism.
776 lex_ungetch(ch, lex_stack);
781 /* Now return the desired token */
783 /* First make sure that the parser is *not* expecting a string
784 * as the next token (based on the previous token that was
785 * returned) and that we haven't read a string.
788 if (followedby == FOLLBY_TOKEN && !instring) {
789 token = is_keyword(yytext, &followedby);
792 * T_Server is exceptional as it forces the
793 * following token to be a string in the
794 * non-simulator parts of the configuration,
795 * but in the simulator configuration section,
796 * "server" is followed by "=" which must be
797 * recognized as a token not a string.
799 if (T_Server == token && !old_config_style)
800 followedby = FOLLBY_TOKEN;
802 } else if (is_integer(yytext)) {
803 yylval_was_set = TRUE;
805 if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
806 && ((errno == EINVAL) || (errno == ERANGE))) {
808 "Integer cannot be represented: %s",
810 if (lex_from_file()) {
813 /* force end of parsing */
820 } else if (is_u_int(yytext)) {
821 yylval_was_set = TRUE;
822 if ('0' == yytext[0] &&
823 'x' == tolower((unsigned long)yytext[1]))
824 converted = sscanf(&yytext[2], "%x",
827 converted = sscanf(yytext, "%u",
829 if (1 != converted) {
831 "U_int cannot be represented: %s",
833 if (lex_from_file()) {
836 /* force end of parsing */
843 } else if (is_double(yytext)) {
844 yylval_was_set = TRUE;
846 if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
848 "Double too large to represent: %s",
856 /* Default: Everything is a string */
857 yylval_was_set = TRUE;
858 token = create_string_token(yytext);
864 * Either followedby is not FOLLBY_TOKEN or this lexeme is part
865 * of a string. Hence, we need to return T_String.
867 * _Except_ we might have a -4 or -6 flag on a an association
868 * configuration line (server, peer, pool, etc.).
870 * This is a terrible hack, but the grammar is ambiguous so we
871 * don't have a choice. [SK]
873 * The ambiguity is in the keyword scanner, not ntp_parser.y.
874 * We do not require server addresses be quoted in ntp.conf,
875 * complicating the scanner's job. To avoid trying (and
876 * failing) to match an IP address or DNS name to a keyword,
877 * the association keywords use FOLLBY_STRING in the keyword
878 * table, which tells the scanner to force the next token to be
879 * a T_String, so it does not try to match a keyword but rather
880 * expects a string when -4/-6 modifiers to server, peer, etc.
882 * restrict -4 and restrict -6 parsing works correctly without
883 * this hack, as restrict uses FOLLBY_TOKEN. [DH]
885 if ('-' == yytext[0]) {
886 if ('4' == yytext[1]) {
889 } else if ('6' == yytext[1]) {
896 if (FOLLBY_STRING == followedby)
897 followedby = FOLLBY_TOKEN;
899 yylval_was_set = TRUE;
900 token = create_string_token(yytext);
904 DPRINTF(4,("\t<end of command>\n"));
906 DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
910 yylval.Integer = token;
915 yytext[min(sizeof(yytext) - 1, 50)] = 0;
917 "configuration item on line %d longer than limit of %lu, began with '%s'",
918 lex_stack->curpos.nline, (u_long)min(sizeof(yytext) - 1, 50),
922 * If we hit the length limit reading the startup configuration
926 exit(sizeof(yytext) - 1);
929 * If it's runtime configuration via ntpq :config treat it as
930 * if the configuration text ended before the too-long lexeme,
931 * hostname, or string.