1 /* $Id: reader.c,v 1.37 2013/09/25 23:46:18 tom Exp $ */
5 /* The line size must be a positive integer. One hundred was chosen */
6 /* because few lines in Yacc input grammars exceed 100 characters. */
7 /* Note that if a line exceeds LINESIZE characters, the line buffer */
8 /* will be expanded to accomodate it. */
15 static void start_rule(bucket *bp, int s_lineno);
18 static int cinc, cache_size;
22 static char **tag_table;
32 static char last_was_action;
35 static bucket **pitem;
40 static size_t name_pool_size;
41 static char *name_pool;
43 char line_format[] = "#line %d \"%s\"\n";
52 if (cinc >= cache_size)
55 cache = TREALLOC(char, cache, cache_size);
58 cache[cinc] = (char)c;
69 if (saw_eof || (c = getc(f)) == EOF)
81 if (line == 0 || linesize != (LINESIZE + 1))
85 linesize = LINESIZE + 1;
86 line = TMALLOC(char, linesize);
102 linesize += LINESIZE;
103 line = TREALLOC(char, line, linesize);
127 p = TMALLOC(char, s - line + 1);
132 while ((*t++ = *s++) != '\n')
142 int st_lineno = lineno;
143 char *st_line = dup_line();
144 char *st_cptr = st_line + (cptr - line);
149 if (*s == '*' && s[1] == '/')
159 unterminated_comment(st_lineno, st_line, st_cptr);
213 else if (s[1] == '/')
231 * Compare keyword to cached token, treating '_' and '-' the same. Some
232 * grammars rely upon this misfeature.
235 matchec(const char *name)
237 const char *p = cache;
238 const char *q = name;
239 int code = 0; /* assume mismatch */
241 while (*p != '\0' && *q != '\0')
251 if (*p == '\0' && *q == '\0')
294 if (matchec("token") || matchec("term"))
300 if (matchec("right"))
302 if (matchec("nonassoc") || matchec("binary"))
304 if (matchec("start"))
306 if (matchec("union"))
308 if (matchec("ident"))
310 if (matchec("expect"))
312 if (matchec("expect-rr"))
314 if (matchec("pure-parser"))
315 return (PURE_PARSER);
316 if (matchec("parse-param"))
317 return (PARSE_PARAM);
318 if (matchec("lex-param"))
328 if (c == '%' || c == '\\')
339 syntax_error(lineno, line, t_cptr);
348 FILE *f = output_file;
354 syntax_error(lineno, line, cptr);
356 fprintf(f, "#ident \"");
381 int need_newline = 0;
382 int t_lineno = lineno;
383 char *t_line = dup_line();
384 char *t_cptr = t_line + (cptr - line - 2);
390 unterminated_text(t_lineno, t_line, t_cptr);
393 fprintf(f, line_format, lineno, input_file_name);
406 unterminated_text(t_lineno, t_line, t_cptr);
411 int s_lineno = lineno;
412 char *s_line = dup_line();
413 char *s_cptr = s_line + (cptr - line - 1);
428 unterminated_string(s_lineno, s_line, s_cptr);
437 unterminated_string(s_lineno, s_line, s_cptr);
450 while ((c = *++cptr) != '\n')
452 if (c == '*' && cptr[1] == '/')
462 int c_lineno = lineno;
463 char *c_line = dup_line();
464 char *c_cptr = c_line + (cptr - line - 1);
472 if (c == '*' && *cptr == '/')
483 unterminated_comment(c_lineno, c_line, c_cptr);
510 puts_both(const char *s)
514 fputs(s, union_file);
531 int u_lineno = lineno;
532 char *u_line = dup_line();
533 char *u_cptr = u_line + (cptr - line - 6);
536 over_unionized(cptr - 6);
540 fprintf(text_file, line_format, lineno, input_file_name);
542 puts_both("#ifdef YYSTYPE\n");
543 puts_both("#undef YYSTYPE_IS_DECLARED\n");
544 puts_both("#define YYSTYPE_IS_DECLARED 1\n");
545 puts_both("#endif\n");
546 puts_both("#ifndef YYSTYPE_IS_DECLARED\n");
547 puts_both("#define YYSTYPE_IS_DECLARED 1\n");
548 puts_both("typedef union");
560 unterminated_union(u_lineno, u_line, u_cptr);
570 puts_both(" YYSTYPE;\n");
571 puts_both("#endif /* !YYSTYPE_IS_DECLARED */\n");
580 int s_lineno = lineno;
581 char *s_line = dup_line();
582 char *s_cptr = s_line + (cptr - line - 1);
595 unterminated_string(s_lineno, s_line, s_cptr);
604 unterminated_string(s_lineno, s_line, s_cptr);
615 while ((c = *++cptr) != '\n')
617 if (c == '*' && cptr[1] == '/')
631 int c_lineno = lineno;
632 char *c_line = dup_line();
633 char *c_cptr = c_line + (cptr - line - 1);
641 if (c == '*' && *cptr == '/')
652 unterminated_comment(c_lineno, c_line, c_cptr);
664 * Keep a linked list of parameters
688 buf = TMALLOC(char, linesize);
691 for (i = 0; (c = *cptr++) != '}'; i++)
704 while (i > 0 && isspace(UCH(buf[i])))
710 while (i >= 0 && level > 0 && buf[i] != '[')
714 else if (buf[i] == '[')
727 while (i > 0 && (isalnum(UCH(buf[i])) ||
731 if (!isspace(UCH(buf[i])) && buf[i] != '*')
736 p = TMALLOC(param, 1);
739 p->type2 = strdup(buf + type2);
744 p->name = strdup(buf + name);
772 syntax_error(lineno, line, cptr);
778 if (c >= '0' && c <= '9')
780 if (c >= 'A' && c <= 'F')
781 return (c - 'A' + 10);
782 if (c >= 'a' && c <= 'f')
783 return (c - 'a' + 10);
795 int s_lineno = lineno;
796 char *s_line = dup_line();
797 char *s_cptr = s_line + (cptr - line);
807 unterminated_string(s_lineno, s_line, s_cptr);
810 char *c_cptr = cptr - 1;
818 unterminated_string(s_lineno, s_line, s_cptr);
833 n = (n << 3) + (c - '0');
837 n = (n << 3) + (c - '0');
842 illegal_character(c_cptr);
849 if (n < 0 || n >= 16)
850 illegal_character(c_cptr);
855 if (i < 0 || i >= 16)
860 illegal_character(c_cptr);
893 s = TMALLOC(char, n);
896 for (i = 0; i < n; ++i)
905 for (i = 0; i < n; ++i)
908 if (c == '\\' || c == cache[0])
942 cachec(((c >> 6) & 7) + '0');
943 cachec(((c >> 3) & 7) + '0');
944 cachec((c & 7) + '0');
958 if (n == 1 && bp->value == UNDEFINED)
966 is_reserved(char *name)
970 if (strcmp(name, ".") == 0 ||
971 strcmp(name, "$accept") == 0 ||
972 strcmp(name, "$end") == 0)
975 if (name[0] == '$' && name[1] == '$' && isdigit(UCH(name[2])))
978 while (isdigit(UCH(*s)))
993 for (c = *cptr; IS_IDENT(c); c = *++cptr)
997 if (is_reserved(cache))
998 used_reserved(cache);
1000 return (lookup(cache));
1010 for (c = *cptr; isdigit(c); c = *++cptr)
1011 n = (Value_t) (10 * n + (c - '0'));
1022 int t_lineno = lineno;
1023 char *t_line = dup_line();
1024 char *t_cptr = t_line + (cptr - line);
1030 if (!isalpha(c) && c != '_' && c != '$')
1031 illegal_tag(t_lineno, t_line, t_cptr);
1039 while (IS_IDENT(c));
1046 illegal_tag(t_lineno, t_line, t_cptr);
1049 for (i = 0; i < ntags; ++i)
1051 if (strcmp(cache, tag_table[i]) == 0)
1054 return (tag_table[i]);
1058 if (ntags >= tagmax)
1063 ? TREALLOC(char *, tag_table, tagmax)
1064 : TMALLOC(char *, tagmax));
1065 NO_SPACE(tag_table);
1068 s = TMALLOC(char, cinc);
1072 tag_table[ntags] = s;
1079 declare_tokens(int assoc)
1102 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1104 else if (c == '\'' || c == '"')
1110 tokenized_start(bp->name);
1115 if (bp->tag && tag != bp->tag)
1116 retyped_warning(bp->name);
1122 if (bp->prec && prec != bp->prec)
1123 reprec_warning(bp->name);
1124 bp->assoc = (Assoc_t) assoc;
1134 value = get_number();
1135 if (bp->value != UNDEFINED && value != bp->value)
1136 revalued_warning(bp->name);
1146 * %expect requires special handling
1147 * as it really isn't part of the yacc
1148 * grammar only a flag for yacc proper.
1151 declare_expect(int assoc)
1155 if (assoc != EXPECT && assoc != EXPECT_RR)
1159 * Stay away from nextc - doesn't
1160 * detect EOL and will read to EOF.
1170 if (assoc == EXPECT)
1171 SRexpect = get_number();
1173 RRexpect = get_number();
1177 * Looking for number before EOL.
1178 * Spaces, tabs, and numbers are ok,
1179 * words, punc., etc. are syntax errors.
1181 else if (c == '\n' || isalpha(c) || !isspace(c))
1183 syntax_error(lineno, line, cptr);
1205 syntax_error(lineno, line, cptr);
1211 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1213 else if (c == '\'' || c == '"')
1218 if (bp->tag && tag != bp->tag)
1219 retyped_warning(bp->name);
1233 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
1234 syntax_error(lineno, line, cptr);
1236 if (bp->class == TERM)
1237 terminal_start(bp->name);
1238 if (goal && goal != bp)
1239 restarted_warning();
1244 read_declarations(void)
1249 cache = TMALLOC(char, cache_size);
1258 syntax_error(lineno, line, cptr);
1259 switch (k = keyword())
1306 /* noop for bison compatibility. byacc is already designed to be posix
1307 * yacc compatible. */
1314 initialize_grammar(void)
1319 pitem = TMALLOC(bucket *, maxitems);
1330 plhs = TMALLOC(bucket *, maxrules);
1337 rprec = TMALLOC(Value_t, maxrules);
1344 rassoc = TMALLOC(Assoc_t, maxrules);
1356 pitem = TREALLOC(bucket *, pitem, maxitems);
1365 plhs = TREALLOC(bucket *, plhs, maxrules);
1368 rprec = TREALLOC(Value_t, rprec, maxrules);
1371 rassoc = TREALLOC(Assoc_t, rassoc, maxrules);
1376 advance_to_start(void)
1403 syntax_error(lineno, line, s_cptr);
1408 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1409 syntax_error(lineno, line, cptr);
1413 if (bp->class == TERM)
1414 terminal_start(bp->name);
1423 syntax_error(lineno, line, cptr);
1424 start_rule(bp, s_lineno);
1429 start_rule(bucket *bp, int s_lineno)
1431 if (bp->class == TERM)
1432 terminal_lhs(s_lineno);
1433 bp->class = NONTERM;
1434 if (nrules >= maxrules)
1437 rprec[nrules] = UNDEFINED;
1438 rassoc[nrules] = TOKEN;
1446 if (!last_was_action && plhs[nrules]->tag)
1448 if (pitem[nitems - 1])
1450 for (i = nitems - 1; (i > 0) && pitem[i]; --i)
1452 if (pitem[i + 1] == 0 || pitem[i + 1]->tag != plhs[nrules]->tag)
1453 default_action_warning();
1457 default_action_warning();
1461 last_was_action = 0;
1462 if (nitems >= maxitems)
1470 insert_empty_rule(void)
1475 sprintf(cache, "$$%d", ++gensym);
1476 bp = make_bucket(cache);
1477 last_symbol->next = bp;
1479 bp->tag = plhs[nrules]->tag;
1480 bp->class = NONTERM;
1482 if ((nitems += 2) > maxitems)
1484 bpp = pitem + nitems - 1;
1486 while ((bpp[0] = bpp[-1]) != 0)
1489 if (++nrules >= maxrules)
1491 plhs[nrules] = plhs[nrules - 1];
1492 plhs[nrules - 1] = bp;
1493 rprec[nrules] = rprec[nrules - 1];
1494 rprec[nrules - 1] = 0;
1495 rassoc[nrules] = rassoc[nrules - 1];
1496 rassoc[nrules - 1] = TOKEN;
1504 int s_lineno = lineno;
1507 if (c == '\'' || c == '"')
1516 start_rule(bp, s_lineno);
1521 if (last_was_action)
1522 insert_empty_rule();
1523 last_was_action = 0;
1525 if (++nitems > maxitems)
1527 pitem[nitems - 1] = bp;
1531 after_blanks(char *s)
1533 while (*s != '\0' && isspace(UCH(*s)))
1546 FILE *f = action_file;
1547 int a_lineno = lineno;
1548 char *a_line = dup_line();
1549 char *a_cptr = a_line + (cptr - line);
1551 if (last_was_action)
1552 insert_empty_rule();
1553 last_was_action = 1;
1555 fprintf(f, "case %d:\n", nrules - 2);
1557 fprintf(f, line_format, lineno, input_file_name);
1561 /* avoid putting curly-braces in first column, to ease editing */
1562 if (*after_blanks(cptr) == L_CURL)
1565 cptr = after_blanks(cptr);
1569 for (i = nitems - 1; pitem[i]; --i)
1579 int d_lineno = lineno;
1580 char *d_line = dup_line();
1581 char *d_cptr = d_line + (cptr - line);
1588 fprintf(f, "yyval.%s", tag);
1593 else if (isdigit(c))
1597 dollar_warning(d_lineno, i);
1598 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1602 else if (c == '-' && isdigit(UCH(cptr[1])))
1605 i = -get_number() - n;
1606 fprintf(f, "yystack.l_mark[%d].%s", i, tag);
1611 dollar_error(d_lineno, d_line, d_cptr);
1613 else if (cptr[1] == '$')
1617 tag = plhs[nrules]->tag;
1620 fprintf(f, "yyval.%s", tag);
1623 fprintf(f, "yyval");
1627 else if (isdigit(UCH(cptr[1])))
1633 if (i <= 0 || i > n)
1635 tag = pitem[nitems + i - n - 1]->tag;
1637 untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1638 fprintf(f, "yystack.l_mark[%d].%s", i - n, tag);
1643 dollar_warning(lineno, i);
1644 fprintf(f, "yystack.l_mark[%d]", i - n);
1648 else if (cptr[1] == '-')
1654 fprintf(f, "yystack.l_mark[%d]", -i - n);
1658 if (isalpha(c) || c == '_' || c == '$')
1665 while (isalnum(c) || c == '_' || c == '$');
1677 unterminated_action(a_lineno, a_line, a_cptr);
1682 fprintf(f, "\nbreak;\n");
1693 fprintf(f, "\nbreak;\n");
1700 int s_lineno = lineno;
1701 char *s_line = dup_line();
1702 char *s_cptr = s_line + (cptr - line - 1);
1715 unterminated_string(s_lineno, s_line, s_cptr);
1724 unterminated_string(s_lineno, s_line, s_cptr);
1735 while ((c = *++cptr) != '\n')
1737 if (c == '*' && cptr[1] == '/')
1747 int c_lineno = lineno;
1748 char *c_line = dup_line();
1749 char *c_cptr = c_line + (cptr - line - 1);
1757 if (c == '*' && *cptr == '/')
1768 unterminated_comment(c_lineno, c_line, c_cptr);
1786 if (c == '%' || c == '\\')
1794 else if ((c == 'p' || c == 'P') &&
1795 ((c = cptr[2]) == 'r' || c == 'R') &&
1796 ((c = cptr[3]) == 'e' || c == 'E') &&
1797 ((c = cptr[4]) == 'c' || c == 'C') &&
1798 ((c = cptr[5], !IS_IDENT(c))))
1801 syntax_error(lineno, line, cptr);
1804 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1806 else if (c == '\'' || c == '"')
1810 syntax_error(lineno, line, cptr);
1814 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1817 rprec[nrules] = bp->prec;
1818 rassoc[nrules] = bp->assoc;
1827 initialize_grammar();
1842 else if (c == L_CURL || c == '=')
1847 start_rule(plhs[nrules - 1], 0);
1856 syntax_error(lineno, line, cptr);
1869 for (i = 0; i < ntags; ++i)
1871 assert(tag_table[i]);
1883 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1884 for (bp = first_symbol; bp; bp = bp->next)
1885 name_pool_size += strlen(bp->name) + 1;
1887 name_pool = TMALLOC(char, name_pool_size);
1888 NO_SPACE(name_pool);
1890 strcpy(name_pool, "$accept");
1891 strcpy(name_pool + 8, "$end");
1893 for (bp = first_symbol; bp; bp = bp->next)
1897 while ((*t++ = *s++) != 0)
1909 if (goal->class == UNKNOWN)
1910 undefined_goal(goal->name);
1912 for (bp = first_symbol; bp; bp = bp->next)
1914 if (bp->class == UNKNOWN)
1916 undefined_symbol_warning(bp->name);
1923 protect_string(char *src, char **des)
1936 if ('\\' == *s || '"' == *s)
1942 *des = d = TMALLOC(char, len);
1948 if ('\\' == *s || '"' == *s)
1965 for (bp = first_symbol; bp; bp = bp->next)
1968 if (bp->class == TERM)
1971 start_symbol = (Value_t) ntokens;
1972 nvars = nsyms - ntokens;
1974 symbol_name = TMALLOC(char *, nsyms);
1975 NO_SPACE(symbol_name);
1977 symbol_value = TMALLOC(Value_t, nsyms);
1978 NO_SPACE(symbol_value);
1980 symbol_prec = TMALLOC(short, nsyms);
1981 NO_SPACE(symbol_prec);
1983 symbol_assoc = TMALLOC(char, nsyms);
1984 NO_SPACE(symbol_assoc);
1986 v = TMALLOC(bucket *, nsyms);
1990 v[start_symbol] = 0;
1993 j = (Value_t) (start_symbol + 1);
1994 for (bp = first_symbol; bp; bp = bp->next)
1996 if (bp->class == TERM)
2001 assert(i == ntokens && j == nsyms);
2003 for (i = 1; i < ntokens; ++i)
2006 goal->index = (Index_t) (start_symbol + 1);
2007 k = (Value_t) (start_symbol + 2);
2017 for (i = (Value_t) (start_symbol + 1); i < nsyms; ++i)
2027 for (i = 1; i < ntokens; ++i)
2032 for (j = k++; j > 0 && symbol_value[j - 1] > n; --j)
2033 symbol_value[j] = symbol_value[j - 1];
2034 symbol_value[j] = n;
2040 if (v[1]->value == UNDEFINED)
2045 for (i = 2; i < ntokens; ++i)
2047 if (v[i]->value == UNDEFINED)
2049 while (j < k && n == symbol_value[j])
2051 while (++j < k && n == symbol_value[j])
2060 symbol_name[0] = name_pool + 8;
2061 symbol_value[0] = 0;
2063 symbol_assoc[0] = TOKEN;
2064 for (i = 1; i < ntokens; ++i)
2066 symbol_name[i] = v[i]->name;
2067 symbol_value[i] = v[i]->value;
2068 symbol_prec[i] = v[i]->prec;
2069 symbol_assoc[i] = v[i]->assoc;
2071 symbol_name[start_symbol] = name_pool;
2072 symbol_value[start_symbol] = -1;
2073 symbol_prec[start_symbol] = 0;
2074 symbol_assoc[start_symbol] = TOKEN;
2075 for (++i; i < nsyms; ++i)
2078 symbol_name[k] = v[i]->name;
2079 symbol_value[k] = v[i]->value;
2080 symbol_prec[k] = v[i]->prec;
2081 symbol_assoc[k] = v[i]->assoc;
2086 symbol_pname = TMALLOC(char *, nsyms);
2087 NO_SPACE(symbol_pname);
2089 for (i = 0; i < nsyms; ++i)
2090 protect_string(symbol_name[i], &(symbol_pname[i]));
2104 ritem = TMALLOC(Value_t, nitems);
2107 rlhs = TMALLOC(Value_t, nrules);
2110 rrhs = TMALLOC(Value_t, nrules + 1);
2113 rprec = TREALLOC(Value_t, rprec, nrules);
2116 rassoc = TREALLOC(Assoc_t, rassoc, nrules);
2120 ritem[1] = goal->index;
2125 rlhs[2] = start_symbol;
2131 for (i = 3; i < nrules; ++i)
2133 rlhs[i] = plhs[i]->index;
2139 ritem[j] = pitem[j]->index;
2140 if (pitem[j]->class == TERM)
2142 prec2 = pitem[j]->prec;
2143 assoc = pitem[j]->assoc;
2147 ritem[j] = (Value_t) - i;
2149 if (rprec[i] == UNDEFINED)
2165 size_t j, spacing = 0;
2166 FILE *f = verbose_file;
2172 for (i = 2; i < nrules; ++i)
2174 if (rlhs[i] != rlhs[i - 1])
2178 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
2179 spacing = strlen(symbol_name[rlhs[i]]) + 1;
2183 fprintf(f, "%4d ", i - 2);
2190 while (ritem[k] >= 0)
2192 fprintf(f, " %s", symbol_name[ritem[k]]);
2203 write_section(code_file, banner);
2204 create_symbol_table();
2205 read_declarations();
2207 free_symbol_table();
2219 free_declarations(param * list)
2223 param *next = list->next;
2236 lex_param = free_declarations(lex_param);
2237 parse_param = free_declarations(parse_param);
2247 DO_FREE(symbol_name);
2248 DO_FREE(symbol_prec);
2249 DO_FREE(symbol_assoc);
2250 DO_FREE(symbol_value);