4 * The source code for a simple lexical analyzer.
6 * Written By: Sachin Kamboj
7 * University of Delaware
23 #include "ntp_config.h"
25 #include "ntp_scanner.h"
26 #include "ntp_parser.h"
28 /* ntp_keyword.h declares finite state machine and token text */
29 #include "ntp_keyword.h"
33 /* SCANNER GLOBAL VARIABLES
34 * ------------------------
37 #define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */
38 char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */
39 u_int32 conf_file_sum; /* Simple sum of characters read */
49 /* SCANNER GLOBAL VARIABLES
50 * ------------------------
52 const char special_chars[] = "{}(),;|=";
59 static int is_keyword(char *lexeme, follby *pfollowedby);
63 * keyword() - Return the keyword associated with token T_ identifier.
64 * See also token_name() for the string-ized T_ identifier.
65 * Example: keyword(T_Server) returns "server"
66 * token_name(T_Server) returns "T_Server"
76 i = token - LOWEST_KEYWORD_ID;
78 if (i < COUNTOF(keyword_text))
79 text = keyword_text[i];
85 : "(keyword not found)";
91 * We define a couple of wrapper functions around the standard C fgetc
92 * and ungetc functions in order to include positional bookkeeping
101 struct FILE_INFO *my_info;
103 my_info = emalloc(sizeof *my_info);
105 my_info->line_no = 1;
107 my_info->prev_line_col_no = 0;
108 my_info->prev_token_col_no = 0;
109 my_info->fname = path;
111 my_info->fd = fopen(path, mode);
112 if (NULL == my_info->fd) {
121 struct FILE_INFO *stream
127 ch = fgetc(stream->fd);
128 while (EOF != ch && (CHAR_MIN > ch || ch > CHAR_MAX));
132 conf_file_sum += (u_char)ch;
135 stream->prev_line_col_no = stream->col_no;
144 /* BUGS: 1. Function will fail on more than one line of pushback
145 * 2. No error checking is done to see if ungetc fails
146 * SK: I don't think its worth fixing these bugs for our purposes ;-)
151 struct FILE_INFO *stream
155 conf_file_sum -= (u_char)ch;
157 stream->col_no = stream->prev_line_col_no;
158 stream->prev_line_col_no = -1;
162 return ungetc(ch, stream->fd);
167 struct FILE_INFO *stream
170 int ret_val = fclose(stream->fd);
179 * Provide a wrapper for the stream functions so that the
180 * stream can either read from a file or from a character
182 * NOTE: This is not very efficient for reading from character
183 * arrays, but needed to allow remote configuration where the
184 * configuration command is provided through ntpq.
186 * The behavior of there two functions is determined by the
187 * input_from_file flag.
192 struct FILE_INFO *ip_file
198 return FGETC(ip_file);
200 if (remote_config.buffer[remote_config.pos] == '\0')
204 ch = remote_config.buffer[remote_config.pos++];
206 ip_file->prev_line_col_no = ip_file->col_no;
217 struct FILE_INFO *ip_file,
225 ip_file->col_no = ip_file->prev_line_col_no;
226 ip_file->prev_line_col_no = -1;
249 int curr_s; /* current state index */
253 curr_s = SCANNER_INIT_S;
256 for (i = 0; lexeme[i]; i++) {
257 while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
258 curr_s = SS_OTHER_N(sst[curr_s]);
260 if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
261 if ('\0' == lexeme[i + 1]
262 && FOLLBY_NON_ACCEPTING
263 != SS_FB(sst[curr_s])) {
264 fb = SS_FB(sst[curr_s]);
269 curr_s = SS_MATCH_N(sst[curr_s]);
290 /* Allow a leading minus sign */
291 if (lexeme[i] == '-') {
298 /* Check that all the remaining characters are digits */
299 for (; lexeme[i] != '\0'; i++) {
300 if (!isdigit((unsigned char)lexeme[i]))
307 /* Reject numbers that fit in unsigned but not in signed int */
308 if (1 == sscanf(lexeme, "%u", &u_val))
309 return (u_val <= INT_MAX);
315 /* U_int -- assumes is_integer() has returned FALSE */
325 if ('0' == lexeme[i] && 'x' == tolower((unsigned char)lexeme[i + 1])) {
332 /* Check that all the remaining characters are digits */
333 for (; lexeme[i] != '\0'; i++) {
334 if (is_hex && !isxdigit((unsigned char)lexeme[i]))
336 if (!is_hex && !isdigit((unsigned char)lexeme[i]))
350 u_int num_digits = 0; /* Number of digits read */
355 /* Check for an optional '+' or '-' */
356 if ('+' == lexeme[i] || '-' == lexeme[i])
359 /* Read the integer part */
360 for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++)
363 /* Check for the optional decimal point */
364 if ('.' == lexeme[i]) {
366 /* Check for any digits after the decimal point */
367 for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++)
372 * The number of digits in both the decimal part and the
373 * fraction part must not be zero at this point
378 /* Check if we are done */
382 /* There is still more input, read the exponent */
383 if ('e' == tolower((unsigned char)lexeme[i]))
388 /* Read an optional Sign */
389 if ('+' == lexeme[i] || '-' == lexeme[i])
392 /* Now read the exponent part */
393 while (lexeme[i] && isdigit((unsigned char)lexeme[i]))
396 /* Check if we are done */
404 /* is_special() - Test whether a character is a token */
410 return strchr(special_chars, ch) != NULL;
419 if ((old_config_style && (ch == '\n')) ||
420 (!old_config_style && (ch == ';')))
427 quote_if_needed(char *str)
434 octets = len + 2 + 1;
435 ret = emalloc(octets);
437 && (strcspn(str, special_chars) < len
438 || strchr(str, ' ') != NULL)) {
439 snprintf(ret, octets, "\"%s\"", str);
441 strlcpy(ret, str, octets);
455 * ignore end of line whitespace
458 while (*pch && isspace((unsigned char)*pch))
462 yylval.Integer = T_EOC;
463 return yylval.Integer;
466 yylval.String = estrdup(lexeme);
472 * yylex() - function that does the actual scanning.
473 * Bison expects this function to be called yylex and for it to take no
474 * input and return an int.
475 * Conceptually yylex "returns" yylval as well as the actual return
476 * value representing the token or type.
480 struct FILE_INFO *ip_file
483 static follby followedby = FOLLBY_TOKEN;
488 int token; /* The return value */
492 ip_file = fp[curr_include_level];
494 yylval_was_set = FALSE;
497 /* Ignore whitespace at the beginning */
498 while (EOF != (ch = get_next_char(ip_file)) &&
501 ; /* Null Statement */
505 if (!input_from_file || curr_include_level <= 0)
508 FCLOSE(fp[curr_include_level]);
509 ip_file = fp[--curr_include_level];
513 } else if (is_EOC(ch)) {
515 /* end FOLLBY_STRINGS_TO_EOC effect */
516 followedby = FOLLBY_TOKEN;
520 } else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
521 /* special chars are their own token values */
524 * '=' outside simulator configuration implies
525 * a single string following as in:
526 * setvar Owner = "The Boss" default
528 if ('=' == ch && old_config_style)
529 followedby = FOLLBY_STRING;
530 yytext[0] = (char)ch;
534 push_back_char(ip_file, ch);
536 /* save the position of start of the token */
537 ip_file->prev_token_line_no = ip_file->line_no;
538 ip_file->prev_token_col_no = ip_file->col_no;
540 /* Read in the lexeme */
542 while (EOF != (ch = get_next_char(ip_file))) {
544 yytext[i] = (char)ch;
546 /* Break on whitespace or a special character */
547 if (isspace(ch) || is_EOC(ch)
549 || (FOLLBY_TOKEN == followedby
553 /* Read the rest of the line on reading a start
554 of comment character */
556 while (EOF != (ch = get_next_char(ip_file))
558 ; /* Null Statement */
563 if (i >= COUNTOF(yytext))
566 /* Pick up all of the string inside between " marks, to
567 * end of line. If we make it to EOL without a
568 * terminating " assume it for them.
570 * XXX - HMS: I'm not sure we want to assume the closing "
574 while (EOF != (ch = get_next_char(ip_file)) &&
575 ch != '"' && ch != '\n') {
576 yytext[i++] = (char)ch;
577 if (i >= COUNTOF(yytext))
581 * yytext[i] will be pushed back as not part of
582 * this lexeme, but any closing quote should
583 * not be pushed back, so we read another char.
586 ch = get_next_char(ip_file);
588 /* Pushback the last character read that is not a part
590 * If the last character read was an EOF, pushback a
591 * newline character. This is to prevent a parse error
592 * when there is no newline at the end of a file.
595 push_back_char(ip_file, '\n');
597 push_back_char(ip_file, ch);
601 /* Now return the desired token */
603 /* First make sure that the parser is *not* expecting a string
604 * as the next token (based on the previous token that was
605 * returned) and that we haven't read a string.
608 if (followedby == FOLLBY_TOKEN && !instring) {
609 token = is_keyword(yytext, &followedby);
612 * T_Server is exceptional as it forces the
613 * following token to be a string in the
614 * non-simulator parts of the configuration,
615 * but in the simulator configuration section,
616 * "server" is followed by "=" which must be
617 * recognized as a token not a string.
619 if (T_Server == token && !old_config_style)
620 followedby = FOLLBY_TOKEN;
622 } else if (is_integer(yytext)) {
623 yylval_was_set = TRUE;
625 if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
626 && ((errno == EINVAL) || (errno == ERANGE))) {
628 "Integer cannot be represented: %s",
630 if (input_from_file) {
633 /* force end of parsing */
640 } else if (is_u_int(yytext)) {
641 yylval_was_set = TRUE;
642 if ('0' == yytext[0] &&
643 'x' == tolower((unsigned char)yytext[1]))
644 converted = sscanf(&yytext[2], "%x",
647 converted = sscanf(yytext, "%u",
649 if (1 != converted) {
651 "U_int cannot be represented: %s",
653 if (input_from_file) {
656 /* force end of parsing */
663 } else if (is_double(yytext)) {
664 yylval_was_set = TRUE;
666 if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
668 "Double too large to represent: %s",
676 /* Default: Everything is a string */
677 yylval_was_set = TRUE;
678 token = create_string_token(yytext);
684 * Either followedby is not FOLLBY_TOKEN or this lexeme is part
685 * of a string. Hence, we need to return T_String.
687 * _Except_ we might have a -4 or -6 flag on a an association
688 * configuration line (server, peer, pool, etc.).
690 * This is a terrible hack, but the grammar is ambiguous so we
691 * don't have a choice. [SK]
693 * The ambiguity is in the keyword scanner, not ntp_parser.y.
694 * We do not require server addresses be quoted in ntp.conf,
695 * complicating the scanner's job. To avoid trying (and
696 * failing) to match an IP address or DNS name to a keyword,
697 * the association keywords use FOLLBY_STRING in the keyword
698 * table, which tells the scanner to force the next token to be
699 * a T_String, so it does not try to match a keyword but rather
700 * expects a string when -4/-6 modifiers to server, peer, etc.
702 * restrict -4 and restrict -6 parsing works correctly without
703 * this hack, as restrict uses FOLLBY_TOKEN. [DH]
705 if ('-' == yytext[0]) {
706 if ('4' == yytext[1]) {
709 } else if ('6' == yytext[1]) {
716 if (FOLLBY_STRING == followedby)
717 followedby = FOLLBY_TOKEN;
719 yylval_was_set = TRUE;
720 token = create_string_token(yytext);
724 DPRINTF(4,("\t<end of command>\n"));
726 DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
730 yylval.Integer = token;
735 yytext[min(sizeof(yytext) - 1, 50)] = 0;
737 "configuration item on line %d longer than limit of %lu, began with '%s'",
738 ip_file->line_no, (u_long)min(sizeof(yytext) - 1, 50),
742 * If we hit the length limit reading the startup configuration
746 exit(sizeof(yytext) - 1);
749 * If it's runtime configuration via ntpq :config treat it as
750 * if the configuration text ended before the too-long lexeme,
751 * hostname, or string.