1 /* $Id: reader.c,v 1.36 2012/05/26 16:05:41 tom Exp $ */
5 /* The line size must be a positive integer. One hundred was chosen */
6 /* because few lines in Yacc input grammars exceed 100 characters. */
7 /* Note that if a line exceeds LINESIZE characters, the line buffer */
8 /* will be expanded to accomodate it. */
15 static void start_rule(bucket *bp, int s_lineno);
18 static int cinc, cache_size;
22 static char **tag_table;
32 static char last_was_action;
35 static bucket **pitem;
40 static size_t name_pool_size;
41 static char *name_pool;
43 char line_format[] = "#line %d \"%s\"\n";
52 if (cinc >= cache_size)
55 cache = TREALLOC(char, cache, cache_size);
58 cache[cinc] = (char)c;
69 if (saw_eof || (c = getc(f)) == EOF)
81 if (line == 0 || linesize != (LINESIZE + 1))
85 linesize = LINESIZE + 1;
86 line = TMALLOC(char, linesize);
102 linesize += LINESIZE;
103 line = TREALLOC(char, line, linesize);
127 p = TMALLOC(char, s - line + 1);
132 while ((*t++ = *s++) != '\n')
142 int st_lineno = lineno;
143 char *st_line = dup_line();
144 char *st_cptr = st_line + (cptr - line);
149 if (*s == '*' && s[1] == '/')
159 unterminated_comment(st_lineno, st_line, st_cptr);
213 else if (s[1] == '/')
231 * Compare keyword to cached token, treating '_' and '-' the same. Some
232 * grammars rely upon this misfeature.
235 matchec(const char *name)
237 const char *p = cache;
238 const char *q = name;
239 int code = 0; /* assume mismatch */
241 while (*p != '\0' && *q != '\0')
251 if (*p == '\0' && *q == '\0')
294 if (matchec("token") || matchec("term"))
300 if (matchec("right"))
302 if (matchec("nonassoc") || matchec("binary"))
304 if (matchec("start"))
306 if (matchec("union"))
308 if (matchec("ident"))
310 if (matchec("expect"))
312 if (matchec("expect-rr"))
314 if (matchec("pure-parser"))
315 return (PURE_PARSER);
316 if (matchec("parse-param"))
317 return (PARSE_PARAM);
318 if (matchec("lex-param"))
328 if (c == '%' || c == '\\')
339 syntax_error(lineno, line, t_cptr);
347 FILE *f = output_file;
353 syntax_error(lineno, line, cptr);
355 fprintf(f, "#ident \"");
380 int need_newline = 0;
381 int t_lineno = lineno;
382 char *t_line = dup_line();
383 char *t_cptr = t_line + (cptr - line - 2);
389 unterminated_text(t_lineno, t_line, t_cptr);
392 fprintf(f, line_format, lineno, input_file_name);
405 unterminated_text(t_lineno, t_line, t_cptr);
410 int s_lineno = lineno;
411 char *s_line = dup_line();
412 char *s_cptr = s_line + (cptr - line - 1);
427 unterminated_string(s_lineno, s_line, s_cptr);
436 unterminated_string(s_lineno, s_line, s_cptr);
449 while ((c = *++cptr) != '\n')
451 if (c == '*' && cptr[1] == '/')
461 int c_lineno = lineno;
462 char *c_line = dup_line();
463 char *c_cptr = c_line + (cptr - line - 1);
471 if (c == '*' && *cptr == '/')
482 unterminated_comment(c_lineno, c_line, c_cptr);
509 puts_both(const char *s)
513 fputs(s, union_file);
530 int u_lineno = lineno;
531 char *u_line = dup_line();
532 char *u_cptr = u_line + (cptr - line - 6);
535 over_unionized(cptr - 6);
539 fprintf(text_file, line_format, lineno, input_file_name);
541 puts_both("#ifdef YYSTYPE\n");
542 puts_both("#undef YYSTYPE_IS_DECLARED\n");
543 puts_both("#define YYSTYPE_IS_DECLARED 1\n");
544 puts_both("#endif\n");
545 puts_both("#ifndef YYSTYPE_IS_DECLARED\n");
546 puts_both("#define YYSTYPE_IS_DECLARED 1\n");
547 puts_both("typedef union");
559 unterminated_union(u_lineno, u_line, u_cptr);
569 puts_both(" YYSTYPE;\n");
570 puts_both("#endif /* !YYSTYPE_IS_DECLARED */\n");
579 int s_lineno = lineno;
580 char *s_line = dup_line();
581 char *s_cptr = s_line + (cptr - line - 1);
594 unterminated_string(s_lineno, s_line, s_cptr);
603 unterminated_string(s_lineno, s_line, s_cptr);
614 while ((c = *++cptr) != '\n')
616 if (c == '*' && cptr[1] == '/')
630 int c_lineno = lineno;
631 char *c_line = dup_line();
632 char *c_cptr = c_line + (cptr - line - 1);
640 if (c == '*' && *cptr == '/')
651 unterminated_comment(c_lineno, c_line, c_cptr);
663 * Keep a linked list of parameters
687 buf = TMALLOC(char, linesize);
690 for (i = 0; (c = *cptr++) != '}'; i++)
703 while (i >= 0 && isspace(UCH(buf[i])))
709 while (i >= 0 && level > 0 && buf[i] != '[')
713 else if (buf[i] == '[')
726 while (i >= 0 && (isalnum(UCH(buf[i])) ||
730 if (!isspace(UCH(buf[i])) && buf[i] != '*')
735 p = TMALLOC(param, 1);
738 p->type2 = strdup(buf + type2);
743 p->name = strdup(buf + name);
771 syntax_error(lineno, line, cptr);
777 if (c >= '0' && c <= '9')
779 if (c >= 'A' && c <= 'F')
780 return (c - 'A' + 10);
781 if (c >= 'a' && c <= 'f')
782 return (c - 'a' + 10);
794 int s_lineno = lineno;
795 char *s_line = dup_line();
796 char *s_cptr = s_line + (cptr - line);
806 unterminated_string(s_lineno, s_line, s_cptr);
809 char *c_cptr = cptr - 1;
817 unterminated_string(s_lineno, s_line, s_cptr);
832 n = (n << 3) + (c - '0');
836 n = (n << 3) + (c - '0');
841 illegal_character(c_cptr);
848 if (n < 0 || n >= 16)
849 illegal_character(c_cptr);
854 if (i < 0 || i >= 16)
859 illegal_character(c_cptr);
892 s = TMALLOC(char, n);
895 for (i = 0; i < n; ++i)
904 for (i = 0; i < n; ++i)
907 if (c == '\\' || c == cache[0])
941 cachec(((c >> 6) & 7) + '0');
942 cachec(((c >> 3) & 7) + '0');
943 cachec((c & 7) + '0');
957 if (n == 1 && bp->value == UNDEFINED)
965 is_reserved(char *name)
969 if (strcmp(name, ".") == 0 ||
970 strcmp(name, "$accept") == 0 ||
971 strcmp(name, "$end") == 0)
974 if (name[0] == '$' && name[1] == '$' && isdigit(UCH(name[2])))
977 while (isdigit(UCH(*s)))
992 for (c = *cptr; IS_IDENT(c); c = *++cptr)
996 if (is_reserved(cache))
997 used_reserved(cache);
999 return (lookup(cache));
1009 for (c = *cptr; isdigit(c); c = *++cptr)
1010 n = (Value_t) (10 * n + (c - '0'));
1021 int t_lineno = lineno;
1022 char *t_line = dup_line();
1023 char *t_cptr = t_line + (cptr - line);
1029 if (!isalpha(c) && c != '_' && c != '$')
1030 illegal_tag(t_lineno, t_line, t_cptr);
1038 while (IS_IDENT(c));
1045 illegal_tag(t_lineno, t_line, t_cptr);
1048 for (i = 0; i < ntags; ++i)
1050 if (strcmp(cache, tag_table[i]) == 0)
1053 return (tag_table[i]);
1057 if (ntags >= tagmax)
1062 ? TREALLOC(char *, tag_table, tagmax)
1063 : TMALLOC(char *, tagmax));
1064 NO_SPACE(tag_table);
1067 s = TMALLOC(char, cinc);
1071 tag_table[ntags] = s;
1078 declare_tokens(int assoc)
1101 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1103 else if (c == '\'' || c == '"')
1109 tokenized_start(bp->name);
1114 if (bp->tag && tag != bp->tag)
1115 retyped_warning(bp->name);
1121 if (bp->prec && prec != bp->prec)
1122 reprec_warning(bp->name);
1123 bp->assoc = (Assoc_t) assoc;
1133 value = get_number();
1134 if (bp->value != UNDEFINED && value != bp->value)
1135 revalued_warning(bp->name);
1145 * %expect requires special handling
1146 * as it really isn't part of the yacc
1147 * grammar only a flag for yacc proper.
1150 declare_expect(int assoc)
1154 if (assoc != EXPECT && assoc != EXPECT_RR)
1158 * Stay away from nextc - doesn't
1159 * detect EOL and will read to EOF.
1169 if (assoc == EXPECT)
1170 SRexpect = get_number();
1172 RRexpect = get_number();
1176 * Looking for number before EOL.
1177 * Spaces, tabs, and numbers are ok,
1178 * words, punc., etc. are syntax errors.
1180 else if (c == '\n' || isalpha(c) || !isspace(c))
1182 syntax_error(lineno, line, cptr);
1204 syntax_error(lineno, line, cptr);
1210 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1212 else if (c == '\'' || c == '"')
1217 if (bp->tag && tag != bp->tag)
1218 retyped_warning(bp->name);
1232 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
1233 syntax_error(lineno, line, cptr);
1235 if (bp->class == TERM)
1236 terminal_start(bp->name);
1237 if (goal && goal != bp)
1238 restarted_warning();
1243 read_declarations(void)
1248 cache = TMALLOC(char, cache_size);
1257 syntax_error(lineno, line, cptr);
1258 switch (k = keyword())
1305 /* noop for bison compatibility. byacc is already designed to be posix
1306 * yacc compatible. */
1313 initialize_grammar(void)
1318 pitem = TMALLOC(bucket *, maxitems);
1329 plhs = TMALLOC(bucket *, maxrules);
1336 rprec = TMALLOC(Value_t, maxrules);
1343 rassoc = TMALLOC(Assoc_t, maxrules);
1355 pitem = TREALLOC(bucket *, pitem, maxitems);
1364 plhs = TREALLOC(bucket *, plhs, maxrules);
1367 rprec = TREALLOC(Value_t, rprec, maxrules);
1370 rassoc = TREALLOC(Assoc_t, rassoc, maxrules);
1375 advance_to_start(void)
1402 syntax_error(lineno, line, s_cptr);
1407 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1408 syntax_error(lineno, line, cptr);
1412 if (bp->class == TERM)
1413 terminal_start(bp->name);
1422 syntax_error(lineno, line, cptr);
1423 start_rule(bp, s_lineno);
1428 start_rule(bucket *bp, int s_lineno)
1430 if (bp->class == TERM)
1431 terminal_lhs(s_lineno);
1432 bp->class = NONTERM;
1433 if (nrules >= maxrules)
1436 rprec[nrules] = UNDEFINED;
1437 rassoc[nrules] = TOKEN;
1445 if (!last_was_action && plhs[nrules]->tag)
1447 if (pitem[nitems - 1])
1449 for (i = nitems - 1; (i > 0) && pitem[i]; --i)
1451 if (pitem[i + 1] == 0 || pitem[i + 1]->tag != plhs[nrules]->tag)
1452 default_action_warning();
1456 default_action_warning();
1460 last_was_action = 0;
1461 if (nitems >= maxitems)
1469 insert_empty_rule(void)
1474 sprintf(cache, "$$%d", ++gensym);
1475 bp = make_bucket(cache);
1476 last_symbol->next = bp;
1478 bp->tag = plhs[nrules]->tag;
1479 bp->class = NONTERM;
1481 if ((nitems += 2) > maxitems)
1483 bpp = pitem + nitems - 1;
1485 while ((bpp[0] = bpp[-1]) != 0)
1488 if (++nrules >= maxrules)
1490 plhs[nrules] = plhs[nrules - 1];
1491 plhs[nrules - 1] = bp;
1492 rprec[nrules] = rprec[nrules - 1];
1493 rprec[nrules - 1] = 0;
1494 rassoc[nrules] = rassoc[nrules - 1];
1495 rassoc[nrules - 1] = TOKEN;
1503 int s_lineno = lineno;
1506 if (c == '\'' || c == '"')
1515 start_rule(bp, s_lineno);
1520 if (last_was_action)
1521 insert_empty_rule();
1522 last_was_action = 0;
1524 if (++nitems > maxitems)
1526 pitem[nitems - 1] = bp;
1530 after_blanks(char *s)
1532 while (*s != '\0' && isspace(UCH(*s)))
1545 FILE *f = action_file;
1546 int a_lineno = lineno;
1547 char *a_line = dup_line();
1548 char *a_cptr = a_line + (cptr - line);
1550 if (last_was_action)
1551 insert_empty_rule();
1552 last_was_action = 1;
1554 fprintf(f, "case %d:\n", nrules - 2);
1556 fprintf(f, line_format, lineno, input_file_name);
1560 /* avoid putting curly-braces in first column, to ease editing */
1561 if (*after_blanks(cptr) == L_CURL)
1564 cptr = after_blanks(cptr);
1568 for (i = nitems - 1; pitem[i]; --i)
1578 int d_lineno = lineno;
1579 char *d_line = dup_line();
1580 char *d_cptr = d_line + (cptr - line);
1587 fprintf(f, "yyval.%s", tag);
1592 else if (isdigit(c))
1596 dollar_warning(d_lineno, i);
1597 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1601 else if (c == '-' && isdigit(UCH(cptr[1])))
1604 i = -get_number() - n;
1605 fprintf(f, "yystack.l_mark[%d].%s", i, tag);
1610 dollar_error(d_lineno, d_line, d_cptr);
1612 else if (cptr[1] == '$')
1616 tag = plhs[nrules]->tag;
1619 fprintf(f, "yyval.%s", tag);
1622 fprintf(f, "yyval");
1626 else if (isdigit(UCH(cptr[1])))
1632 if (i <= 0 || i > n)
1634 tag = pitem[nitems + i - n - 1]->tag;
1636 untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1637 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1642 dollar_warning(lineno, i);
1643 fprintf(f, "yystack.l_mark[%d]", i - n);
1647 else if (cptr[1] == '-')
1653 fprintf(f, "yystack.l_mark[%d]", -i - n);
1657 if (isalpha(c) || c == '_' || c == '$')
1664 while (isalnum(c) || c == '_' || c == '$');
1676 unterminated_action(a_lineno, a_line, a_cptr);
1681 fprintf(f, "\nbreak;\n");
1692 fprintf(f, "\nbreak;\n");
1699 int s_lineno = lineno;
1700 char *s_line = dup_line();
1701 char *s_cptr = s_line + (cptr - line - 1);
1714 unterminated_string(s_lineno, s_line, s_cptr);
1723 unterminated_string(s_lineno, s_line, s_cptr);
1734 while ((c = *++cptr) != '\n')
1736 if (c == '*' && cptr[1] == '/')
1746 int c_lineno = lineno;
1747 char *c_line = dup_line();
1748 char *c_cptr = c_line + (cptr - line - 1);
1756 if (c == '*' && *cptr == '/')
1767 unterminated_comment(c_lineno, c_line, c_cptr);
1785 if (c == '%' || c == '\\')
1793 else if ((c == 'p' || c == 'P') &&
1794 ((c = cptr[2]) == 'r' || c == 'R') &&
1795 ((c = cptr[3]) == 'e' || c == 'E') &&
1796 ((c = cptr[4]) == 'c' || c == 'C') &&
1797 ((c = cptr[5], !IS_IDENT(c))))
1800 syntax_error(lineno, line, cptr);
1803 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1805 else if (c == '\'' || c == '"')
1809 syntax_error(lineno, line, cptr);
1813 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1816 rprec[nrules] = bp->prec;
1817 rassoc[nrules] = bp->assoc;
1826 initialize_grammar();
1841 else if (c == L_CURL || c == '=')
1846 start_rule(plhs[nrules - 1], 0);
1855 syntax_error(lineno, line, cptr);
1868 for (i = 0; i < ntags; ++i)
1870 assert(tag_table[i]);
1882 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1883 for (bp = first_symbol; bp; bp = bp->next)
1884 name_pool_size += strlen(bp->name) + 1;
1886 name_pool = TMALLOC(char, name_pool_size);
1887 NO_SPACE(name_pool);
1889 strcpy(name_pool, "$accept");
1890 strcpy(name_pool + 8, "$end");
1892 for (bp = first_symbol; bp; bp = bp->next)
1896 while ((*t++ = *s++) != 0)
1908 if (goal->class == UNKNOWN)
1909 undefined_goal(goal->name);
1911 for (bp = first_symbol; bp; bp = bp->next)
1913 if (bp->class == UNKNOWN)
1915 undefined_symbol_warning(bp->name);
1922 protect_string(char *src, char **des)
1935 if ('\\' == *s || '"' == *s)
1941 *des = d = TMALLOC(char, len);
1947 if ('\\' == *s || '"' == *s)
1964 for (bp = first_symbol; bp; bp = bp->next)
1967 if (bp->class == TERM)
1970 start_symbol = (Value_t) ntokens;
1971 nvars = nsyms - ntokens;
1973 symbol_name = TMALLOC(char *, nsyms);
1974 NO_SPACE(symbol_name);
1976 symbol_value = TMALLOC(Value_t, nsyms);
1977 NO_SPACE(symbol_value);
1979 symbol_prec = TMALLOC(short, nsyms);
1980 NO_SPACE(symbol_prec);
1982 symbol_assoc = TMALLOC(char, nsyms);
1983 NO_SPACE(symbol_assoc);
1985 v = TMALLOC(bucket *, nsyms);
1989 v[start_symbol] = 0;
1992 j = (Value_t) (start_symbol + 1);
1993 for (bp = first_symbol; bp; bp = bp->next)
1995 if (bp->class == TERM)
2000 assert(i == ntokens && j == nsyms);
2002 for (i = 1; i < ntokens; ++i)
2005 goal->index = (Index_t) (start_symbol + 1);
2006 k = (Value_t) (start_symbol + 2);
2016 for (i = (Value_t) (start_symbol + 1); i < nsyms; ++i)
2026 for (i = 1; i < ntokens; ++i)
2031 for (j = k++; j > 0 && symbol_value[j - 1] > n; --j)
2032 symbol_value[j] = symbol_value[j - 1];
2033 symbol_value[j] = n;
2039 if (v[1]->value == UNDEFINED)
2044 for (i = 2; i < ntokens; ++i)
2046 if (v[i]->value == UNDEFINED)
2048 while (j < k && n == symbol_value[j])
2050 while (++j < k && n == symbol_value[j])
2059 symbol_name[0] = name_pool + 8;
2060 symbol_value[0] = 0;
2062 symbol_assoc[0] = TOKEN;
2063 for (i = 1; i < ntokens; ++i)
2065 symbol_name[i] = v[i]->name;
2066 symbol_value[i] = v[i]->value;
2067 symbol_prec[i] = v[i]->prec;
2068 symbol_assoc[i] = v[i]->assoc;
2070 symbol_name[start_symbol] = name_pool;
2071 symbol_value[start_symbol] = -1;
2072 symbol_prec[start_symbol] = 0;
2073 symbol_assoc[start_symbol] = TOKEN;
2074 for (++i; i < nsyms; ++i)
2077 symbol_name[k] = v[i]->name;
2078 symbol_value[k] = v[i]->value;
2079 symbol_prec[k] = v[i]->prec;
2080 symbol_assoc[k] = v[i]->assoc;
2085 symbol_pname = TMALLOC(char *, nsyms);
2086 NO_SPACE(symbol_pname);
2088 for (i = 0; i < nsyms; ++i)
2089 protect_string(symbol_name[i], &(symbol_pname[i]));
2103 ritem = TMALLOC(Value_t, nitems);
2106 rlhs = TMALLOC(Value_t, nrules);
2109 rrhs = TMALLOC(Value_t, nrules + 1);
2112 rprec = TREALLOC(Value_t, rprec, nrules);
2115 rassoc = TREALLOC(Assoc_t, rassoc, nrules);
2119 ritem[1] = goal->index;
2124 rlhs[2] = start_symbol;
2130 for (i = 3; i < nrules; ++i)
2132 rlhs[i] = plhs[i]->index;
2138 ritem[j] = pitem[j]->index;
2139 if (pitem[j]->class == TERM)
2141 prec2 = pitem[j]->prec;
2142 assoc = pitem[j]->assoc;
2146 ritem[j] = (Value_t) - i;
2148 if (rprec[i] == UNDEFINED)
2164 size_t j, spacing = 0;
2165 FILE *f = verbose_file;
2171 for (i = 2; i < nrules; ++i)
2173 if (rlhs[i] != rlhs[i - 1])
2177 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
2178 spacing = strlen(symbol_name[rlhs[i]]) + 1;
2182 fprintf(f, "%4d ", i - 2);
2189 while (ritem[k] >= 0)
2191 fprintf(f, " %s", symbol_name[ritem[k]]);
2202 write_section(code_file, banner);
2203 create_symbol_table();
2204 read_declarations();
2206 free_symbol_table();
2218 free_declarations(param * list)
2222 param *next = list->next;
2235 lex_param = free_declarations(lex_param);
2236 parse_param = free_declarations(parse_param);
2246 DO_FREE(symbol_name);
2247 DO_FREE(symbol_prec);
2248 DO_FREE(symbol_assoc);
2249 DO_FREE(symbol_value);