4 * The source code for a simple lexical analyzer.
6 * Written By: Sachin Kamboj
7 * University of Delaware
23 #include "ntp_config.h"
25 #include "ntp_scanner.h"
26 #include "ntp_parser.h"
28 /* ntp_keyword.h declares finite state machine and token text */
29 #include "ntp_keyword.h"
33 /* SCANNER GLOBAL VARIABLES
34 * ------------------------
37 #define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */
38 char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */
39 u_int32 conf_file_sum; /* Simple sum of characters read */
41 static struct FILE_INFO * lex_stack = NULL;
50 /* SCANNER GLOBAL VARIABLES
51 * ------------------------
53 const char special_chars[] = "{}(),;|=";
60 static int is_keyword(char *lexeme, follby *pfollowedby);
64 * keyword() - Return the keyword associated with token T_ identifier.
65 * See also token_name() for the string-ized T_ identifier.
66 * Example: keyword(T_Server) returns "server"
67 * token_name(T_Server) returns "T_Server"
78 i = token - LOWEST_KEYWORD_ID;
81 case T_ServerresponseFuzz:
82 text = "serverresponse fuzz";
86 if (i < COUNTOF(keyword_text)) {
87 text = keyword_text[i];
89 snprintf(sbuf, sizeof sbuf,
90 "(keyword #%u not found)", token);
99 /* FILE & STRING BUFFER INTERFACE
100 * ------------------------------
102 * This set out as a couple of wrapper functions around the standard C
103 * fgetc and ungetc functions in order to include positional
104 * bookkeeping. Alas, this is no longer a good solution with nested
105 * input files and the possibility to send configuration commands via
106 * 'ntpdc' and 'ntpq'.
108 * Now there are a few functions to maintain a stack of nested input
109 * sources (though nesting is only allowd for disk files) and from the
110 * scanner / parser point of view there's no difference between both
113 * The 'fgetc()' / 'ungetc()' replacements now operate on a FILE_INFO
114 * structure. Instead of trying different 'ungetc()' strategies for file
115 * and buffer based parsing, we keep the backup char in our own
116 * FILE_INFO structure. This is sufficient, as the parser does *not*
117 * jump around via 'seek' or the like, and there's no need to
118 * check/clear the backup store in other places than 'lex_getch()'.
122 * Allocate an info structure and attach it to a file.
124 * Note: When 'mode' is NULL, then the INFO block will be set up to
125 * contain a NULL file pointer, as suited for remote config command
126 * parsing. Otherwise having a NULL file pointer is considered an error,
127 * and a NULL info block pointer is returned to indicate failure!
129 * Note: We use a variable-sized structure to hold a copy of the file
130 * name (or, more proper, the input source description). This is more
131 * secure than keeping a reference to some other storage that might go
134 static struct FILE_INFO *
140 struct FILE_INFO *stream;
143 nnambuf = strlen(path);
144 stream = emalloc_zero(sizeof(*stream) + nnambuf);
145 stream->curpos.nline = 1;
146 stream->backch = EOF;
147 /* copy name with memcpy -- trailing NUL already there! */
148 memcpy(stream->fname, path, nnambuf);
151 stream->fpi = fopen(path, mode);
152 if (NULL == stream->fpi) {
160 /* get next character from buffer or file. This will return any putback
161 * character first; it will also make sure the last line is at least
162 * virtually terminated with a '\n'.
166 struct FILE_INFO *stream
171 if (NULL == stream || stream->force_eof)
174 if (EOF != stream->backch) {
176 stream->backch = EOF;
179 stream->curpos.ncol++;
180 } else if (stream->fpi) {
181 /* fetch next 7-bit ASCII char (or EOF) from file */
182 while ((ch = fgetc(stream->fpi)) != EOF && ch > SCHAR_MAX)
183 stream->curpos.ncol++;
186 stream->curpos.ncol++;
189 /* fetch next 7-bit ASCII char from buffer */
191 scan = &remote_config.buffer[remote_config.pos];
192 while ((ch = (u_char)*scan) > SCHAR_MAX) {
194 stream->curpos.ncol++;
198 stream->curpos.ncol++;
202 remote_config.pos = (int)(scan - remote_config.buffer);
205 /* If the last line ends without '\n', generate one. This
206 * happens most likely on Windows, where editors often have a
207 * sloppy concept of a line.
209 if (EOF == ch && stream->curpos.ncol != 0)
212 /* update scan position tallies */
214 stream->bakpos = stream->curpos;
215 stream->curpos.nline++;
216 stream->curpos.ncol = 0;
222 /* Note: lex_ungetch will fail to track more than one line of push
223 * back. But since it guarantees only one char of back storage anyway,
224 * this should not be a problem.
229 struct FILE_INFO *stream
232 /* check preconditions */
233 if (NULL == stream || stream->force_eof)
235 if (EOF != stream->backch || EOF == ch)
238 /* keep for later reference and update checksum */
239 stream->backch = (u_char)ch;
241 conf_file_sum -= stream->backch;
243 /* update position */
244 if (stream->backch == '\n') {
245 stream->curpos = stream->bakpos;
246 stream->bakpos.ncol = -1;
248 stream->curpos.ncol--;
249 return stream->backch;
252 /* dispose of an input structure. If the file pointer is not NULL, close
253 * the file. This function does not check the result of 'fclose()'.
257 struct FILE_INFO *stream
260 if (NULL != stream) {
261 if (NULL != stream->fpi)
270 * Nested input sources are a bit tricky at first glance. We deal with
271 * this problem using a stack of input sources, that is, a forward
272 * linked list of FILE_INFO structs.
274 * This stack is never empty during parsing; while an encounter with EOF
275 * can and will remove nested input sources, removing the last element
276 * in the stack will not work during parsing, and the EOF condition of
277 * the outermost input file remains until the parser folds up.
280 static struct FILE_INFO *
282 struct FILE_INFO * head
285 struct FILE_INFO * tail;
286 while (NULL != head) {
287 tail = head->st_next;
296 /* Create a singleton input source on an empty lexer stack. This will
297 * fail if there is already an input source, or if the underlying disk
298 * file cannot be opened.
300 * Returns TRUE if a new input object was successfully created.
308 if (NULL != lex_stack || NULL == path)
311 lex_stack = lex_open(path, mode);
312 return (NULL != lex_stack);
315 /* This removes *all* input sources from the stack, leaving the head
316 * pointer as NULL. Any attempt to parse in that state is likely to bomb
317 * with segmentation faults or the like.
319 * In other words: Use this to clean up after parsing, and do not parse
320 * anything until the next 'lex_init_stack()' succeeded.
325 lex_stack = _drop_stack_do(lex_stack);
328 /* Flush the lexer input stack: This will nip all input objects on the
329 * stack (but keeps the current top-of-stack) and marks the top-of-stack
330 * as inactive. Any further calls to lex_getch yield only EOF, and it's
331 * no longer possible to push something back.
333 * Returns TRUE if there is a head element (top-of-stack) that was not
334 * in the force-eof mode before this call.
341 if (NULL != lex_stack) {
342 retv = !lex_stack->force_eof;
343 lex_stack->force_eof = TRUE;
344 lex_stack->st_next = _drop_stack_do(
350 /* Push another file on the parsing stack. If the mode is NULL, create a
351 * FILE_INFO suitable for in-memory parsing; otherwise, create a
352 * FILE_INFO that is bound to a local/disc file. Note that 'path' must
353 * not be NULL, or the function will fail.
355 * Returns TRUE if a new info record was pushed onto the stack.
357 int/*BOOL*/ lex_push_file(
362 struct FILE_INFO * next = NULL;
365 next = lex_open(path, mode);
367 next->st_next = lex_stack;
371 return (NULL != next);
374 /* Pop, close & free the top of the include stack, unless the stack
375 * contains only a singleton input object. In that case the function
376 * fails, because the parser does not expect the input stack to be
379 * Returns TRUE if an object was successfuly popped from the stack.
384 struct FILE_INFO * head = lex_stack;
385 struct FILE_INFO * tail = NULL;
388 tail = head->st_next;
394 return (NULL != tail);
397 /* Get include nesting level. This currently loops over the stack and
398 * counts elements; but since this is of concern only with an include
399 * statement and the nesting depth has a small limit, there's no
400 * bottleneck expected here.
402 * Returns the nesting level of includes, that is, the current depth of
403 * the lexer input stack.
411 struct FILE_INFO *ipf = lex_stack;
413 while (NULL != ipf) {
420 /* check if the current input is from a file */
424 return (NULL != lex_stack) && (NULL != lex_stack->fpi);
430 /* this became so simple, it could be a macro. But then,
431 * lex_stack needed to be global...
449 int curr_s; /* current state index */
453 curr_s = SCANNER_INIT_S;
456 for (i = 0; lexeme[i]; i++) {
457 while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
458 curr_s = SS_OTHER_N(sst[curr_s]);
460 if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
461 if ('\0' == lexeme[i + 1]
462 && FOLLBY_NON_ACCEPTING
463 != SS_FB(sst[curr_s])) {
464 fb = SS_FB(sst[curr_s]);
469 curr_s = SS_MATCH_N(sst[curr_s]);
490 /* Allow a leading minus sign */
491 if (lexeme[i] == '-') {
498 /* Check that all the remaining characters are digits */
499 for (; lexeme[i] != '\0'; i++) {
500 if (!isdigit((u_char)lexeme[i]))
507 /* Reject numbers that fit in unsigned but not in signed int */
508 if (1 == sscanf(lexeme, "%u", &u_val))
509 return (u_val <= INT_MAX);
515 /* U_int -- assumes is_integer() has returned FALSE */
525 if ('0' == lexeme[i] && 'x' == tolower((u_char)lexeme[i + 1])) {
532 /* Check that all the remaining characters are digits */
533 for (; lexeme[i] != '\0'; i++) {
534 if (is_hex && !isxdigit((u_char)lexeme[i]))
536 if (!is_hex && !isdigit((u_char)lexeme[i]))
550 u_int num_digits = 0; /* Number of digits read */
555 /* Check for an optional '+' or '-' */
556 if ('+' == lexeme[i] || '-' == lexeme[i])
559 /* Read the integer part */
560 for (; lexeme[i] && isdigit((u_char)lexeme[i]); i++)
563 /* Check for the optional decimal point */
564 if ('.' == lexeme[i]) {
566 /* Check for any digits after the decimal point */
567 for (; lexeme[i] && isdigit((u_char)lexeme[i]); i++)
572 * The number of digits in both the decimal part and the
573 * fraction part must not be zero at this point
578 /* Check if we are done */
582 /* There is still more input, read the exponent */
583 if ('e' == tolower((u_char)lexeme[i]))
588 /* Read an optional Sign */
589 if ('+' == lexeme[i] || '-' == lexeme[i])
592 /* Now read the exponent part */
593 while (lexeme[i] && isdigit((u_char)lexeme[i]))
596 /* Check if we are done */
604 /* is_special() - Test whether a character is a token */
610 return strchr(special_chars, ch) != NULL;
619 if ((old_config_style && (ch == '\n')) ||
620 (!old_config_style && (ch == ';')))
627 quote_if_needed(char *str)
634 octets = len + 2 + 1;
635 ret = emalloc(octets);
637 && (strcspn(str, special_chars) < len
638 || strchr(str, ' ') != NULL)) {
639 snprintf(ret, octets, "\"%s\"", str);
641 strlcpy(ret, str, octets);
655 * ignore end of line whitespace
658 while (*pch && isspace((u_char)*pch))
662 yylval.Integer = T_EOC;
663 return yylval.Integer;
666 yylval.String = estrdup(lexeme);
672 * yylex() - function that does the actual scanning.
673 * Bison expects this function to be called yylex and for it to take no
674 * input and return an int.
675 * Conceptually yylex "returns" yylval as well as the actual return
676 * value representing the token or type.
681 static follby followedby = FOLLBY_TOKEN;
686 int token; /* The return value */
690 yylval_was_set = FALSE;
693 /* Ignore whitespace at the beginning */
694 while (EOF != (ch = lex_getch(lex_stack)) &&
698 ; /* Null Statement */
702 if ( ! lex_pop_file())
707 } else if (is_EOC(ch)) {
709 /* end FOLLBY_STRINGS_TO_EOC effect */
710 followedby = FOLLBY_TOKEN;
714 } else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
715 /* special chars are their own token values */
718 * '=' outside simulator configuration implies
719 * a single string following as in:
720 * setvar Owner = "The Boss" default
722 if ('=' == ch && old_config_style)
723 followedby = FOLLBY_STRING;
724 yytext[0] = (char)ch;
728 lex_ungetch(ch, lex_stack);
730 /* save the position of start of the token */
731 lex_stack->tokpos = lex_stack->curpos;
733 /* Read in the lexeme */
735 while (EOF != (ch = lex_getch(lex_stack))) {
737 yytext[i] = (char)ch;
739 /* Break on whitespace or a special character */
740 if (isspace(ch) || is_EOC(ch)
742 || (FOLLBY_TOKEN == followedby
746 /* Read the rest of the line on reading a start
747 of comment character */
749 while (EOF != (ch = lex_getch(lex_stack))
751 ; /* Null Statement */
756 if (i >= COUNTOF(yytext))
759 /* Pick up all of the string inside between " marks, to
760 * end of line. If we make it to EOL without a
761 * terminating " assume it for them.
763 * XXX - HMS: I'm not sure we want to assume the closing "
767 while (EOF != (ch = lex_getch(lex_stack)) &&
768 ch != '"' && ch != '\n') {
769 yytext[i++] = (char)ch;
770 if (i >= COUNTOF(yytext))
774 * yytext[i] will be pushed back as not part of
775 * this lexeme, but any closing quote should
776 * not be pushed back, so we read another char.
779 ch = lex_getch(lex_stack);
781 /* Pushback the last character read that is not a part
782 * of this lexeme. This fails silently if ch is EOF,
783 * but then the EOF condition persists and is handled on
784 * the next turn by the include stack mechanism.
786 lex_ungetch(ch, lex_stack);
791 /* Now return the desired token */
793 /* First make sure that the parser is *not* expecting a string
794 * as the next token (based on the previous token that was
795 * returned) and that we haven't read a string.
798 if (followedby == FOLLBY_TOKEN && !instring) {
799 token = is_keyword(yytext, &followedby);
802 * T_Server is exceptional as it forces the
803 * following token to be a string in the
804 * non-simulator parts of the configuration,
805 * but in the simulator configuration section,
806 * "server" is followed by "=" which must be
807 * recognized as a token not a string.
809 if (T_Server == token && !old_config_style)
810 followedby = FOLLBY_TOKEN;
812 } else if (is_integer(yytext)) {
813 yylval_was_set = TRUE;
815 if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
816 && ((errno == EINVAL) || (errno == ERANGE))) {
818 "Integer cannot be represented: %s",
820 if (lex_from_file()) {
823 /* force end of parsing */
830 } else if (is_u_int(yytext)) {
831 yylval_was_set = TRUE;
832 if ('0' == yytext[0] &&
833 'x' == tolower((unsigned long)yytext[1]))
834 converted = sscanf(&yytext[2], "%x",
837 converted = sscanf(yytext, "%u",
839 if (1 != converted) {
841 "U_int cannot be represented: %s",
843 if (lex_from_file()) {
846 /* force end of parsing */
853 } else if (is_double(yytext)) {
854 yylval_was_set = TRUE;
856 if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
858 "Double too large to represent: %s",
866 /* Default: Everything is a string */
867 yylval_was_set = TRUE;
868 token = create_string_token(yytext);
874 * Either followedby is not FOLLBY_TOKEN or this lexeme is part
875 * of a string. Hence, we need to return T_String.
877 * _Except_ we might have a -4 or -6 flag on a an association
878 * configuration line (server, peer, pool, etc.).
880 * This is a terrible hack, but the grammar is ambiguous so we
881 * don't have a choice. [SK]
883 * The ambiguity is in the keyword scanner, not ntp_parser.y.
884 * We do not require server addresses be quoted in ntp.conf,
885 * complicating the scanner's job. To avoid trying (and
886 * failing) to match an IP address or DNS name to a keyword,
887 * the association keywords use FOLLBY_STRING in the keyword
888 * table, which tells the scanner to force the next token to be
889 * a T_String, so it does not try to match a keyword but rather
890 * expects a string when -4/-6 modifiers to server, peer, etc.
892 * restrict -4 and restrict -6 parsing works correctly without
893 * this hack, as restrict uses FOLLBY_TOKEN. [DH]
895 if ('-' == yytext[0]) {
896 if ('4' == yytext[1]) {
899 } else if ('6' == yytext[1]) {
905 if (FOLLBY_STRING == followedby)
906 followedby = FOLLBY_TOKEN;
908 yylval_was_set = TRUE;
909 token = create_string_token(yytext);
913 DPRINTF(4,("\t<end of command>\n"));
915 DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
919 yylval.Integer = token;
924 yytext[min(sizeof(yytext) - 1, 50)] = 0;
926 "configuration item on line %d longer than limit of %lu, began with '%s'",
927 lex_stack->curpos.nline, (u_long)min(sizeof(yytext) - 1, 50),
931 * If we hit the length limit reading the startup configuration
935 exit(sizeof(yytext) - 1);
938 * If it's runtime configuration via ntpq :config treat it as
939 * if the configuration text ended before the too-long lexeme,
940 * hostname, or string.