2 Copyright (C) 1989-1998, 2002-2004 Free Software Foundation, Inc.
3 Written by Douglas C. Schmidt <schmidt@ics.uci.edu>
4 and Bruno Haible <bruno@clisp.org>.
6 This file is part of GNU GPERF.
8 GNU GPERF is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU GPERF is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING.
20 If not, write to the Free Software Foundation, Inc.,
21 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
27 #include <stdlib.h> /* declares exit() */
28 #include <string.h> /* declares strncpy(), strchr() */
29 #include <limits.h> /* defines UCHAR_MAX etc. */
33 Input::Input (FILE *stream, Keyword_Factory *keyword_factory)
34 : _stream (stream), _factory (keyword_factory)
38 /* Returns a pretty representation of the input file name, for error and
41 pretty_input_file_name ()
43 if (option.get_input_file_name ())
44 return option.get_input_file_name ();
46 return "(standard input)";
49 /* Returns true if the given line contains a "%DECL" declaration. */
51 is_declaration (const char *line, const char *line_end, unsigned int lineno,
58 for (const char *d = decl; *d; d++)
60 if (!(line < line_end))
62 if (!(*line == *d || (*d == '-' && *line == '_')))
67 && ((*line >= 'A' && *line <= 'Z')
68 || (*line >= 'a' && *line <= 'z')
69 || *line == '-' || *line == '_'))
74 /* Skip whitespace. */
75 while (line < line_end && (*line == ' ' || *line == '\t'))
78 /* Expect end of line. */
79 if (line < line_end && *line != '\n')
81 fprintf (stderr, "%s:%u: junk after declaration\n",
82 pretty_input_file_name (), lineno);
89 /* Tests if the given line contains a "%DECL=ARG" declaration.
90 If yes, it sets *ARGP to the argument, and returns true.
91 Otherwise, it returns false. */
93 is_declaration_with_arg (const char *line, const char *line_end,
95 const char *decl, char **argp)
101 for (const char *d = decl; *d; d++)
103 if (!(line < line_end))
105 if (!(*line == *d || (*d == '-' && *line == '_')))
110 && ((*line >= 'A' && *line <= 'Z')
111 || (*line >= 'a' && *line <= 'z')
112 || *line == '-' || *line == '_'))
115 /* OK, found DECL. */
118 if (!(line < line_end && *line == '='))
120 fprintf (stderr, "%s:%u: missing argument in %%%s=ARG declaration.\n",
121 pretty_input_file_name (), lineno, decl);
126 /* The next word is the argument. */
127 char *arg = new char[line_end - line + 1];
129 while (line < line_end && !(*line == ' ' || *line == '\t' || *line == '\n'))
133 /* Skip whitespace. */
134 while (line < line_end && (*line == ' ' || *line == '\t'))
137 /* Expect end of line. */
138 if (line < line_end && *line != '\n')
140 fprintf (stderr, "%s:%u: junk after declaration\n",
141 pretty_input_file_name (), lineno);
149 /* Tests if the given line contains a "%define DECL ARG" declaration.
150 If yes, it sets *ARGP to the argument, and returns true.
151 Otherwise, it returns false. */
153 is_define_declaration (const char *line, const char *line_end,
155 const char *decl, char **argp)
162 for (const char *d = "define"; *d; d++)
164 if (!(line < line_end))
170 if (!(line < line_end && (*line == ' ' || *line == '\t')))
174 /* Skip whitespace. */
175 while (line < line_end && (*line == ' ' || *line == '\t'))
179 for (const char *d = decl; *d; d++)
181 if (!(line < line_end))
183 if (!(*line == *d || (*d == '-' && *line == '_')))
188 && ((*line >= 'A' && *line <= 'Z')
189 || (*line >= 'a' && *line <= 'z')
190 || *line == '-' || *line == '_'))
193 /* OK, found DECL. */
195 /* Skip whitespace. */
196 if (!(line < line_end && (*line == ' ' || *line == '\t')))
198 fprintf (stderr, "%s:%u:"
199 " missing argument in %%define %s ARG declaration.\n",
200 pretty_input_file_name (), lineno, decl);
205 while (line < line_end && (*line == ' ' || *line == '\t'));
207 /* The next word is the argument. */
208 char *arg = new char[line_end - line + 1];
210 while (line < line_end && !(*line == ' ' || *line == '\t' || *line == '\n'))
214 /* Skip whitespace. */
215 while (line < line_end && (*line == ' ' || *line == '\t'))
218 /* Expect end of line. */
219 if (line < line_end && *line != '\n')
221 fprintf (stderr, "%s:%u: junk after declaration\n",
222 pretty_input_file_name (), lineno);
230 /* Reads the entire input file. */
234 /* The input file has the following structure:
240 Since the DECLARATIONS and the ADDITIONAL_CODE sections are optional,
241 we have to read the entire file in the case there is only one %%
242 separator line, in order to determine whether the structure is
250 When the option -t is given or when the first section contains
251 declaration lines starting with %, we go for the first interpretation,
252 otherwise for the second interpretation. */
255 size_t input_size = 0;
256 int input_length = get_delim (&input, &input_size, EOF, _stream);
257 if (input_length < 0)
259 if (ferror (_stream))
260 fprintf (stderr, "%s: error while reading input file\n",
261 pretty_input_file_name ());
263 fprintf (stderr, "%s: The input file is empty!\n",
264 pretty_input_file_name ());
268 /* We use input_end as a limit, in order to cope with NUL bytes in the
269 input. But note that one trailing NUL byte has been added after
270 input_end, for convenience. */
271 char *input_end = input + input_length;
273 const char *declarations;
274 const char *declarations_end;
275 const char *keywords;
276 const char *keywords_end;
277 unsigned int keywords_lineno;
279 /* Break up the input into the three sections. */
281 const char *separator[2] = { NULL, NULL };
282 unsigned int separator_lineno[2] = { 0, 0 };
285 unsigned int lineno = 1;
286 for (const char *p = input; p < input_end; )
288 if (p[0] == '%' && p[1] == '%')
290 separator[separators] = p;
291 separator_lineno[separators] = lineno;
292 if (++separators == 2)
296 p = (const char *) memchr (p, '\n', input_end - p);
304 bool has_declarations;
308 has_declarations = true;
311 has_declarations = false;
312 for (const char *p = input; p < separator[0]; )
316 has_declarations = true;
319 p = (const char *) memchr (p, '\n', separator[0] - p);
328 has_declarations = (separators > 0);
330 if (has_declarations)
332 declarations = input;
333 declarations_end = separator[0];
334 /* Give a warning if the separator line is nonempty. */
335 bool nonempty_line = false;
337 for (p = declarations_end + 2; p < input_end; )
344 if (!(*p == ' ' || *p == '\t'))
345 nonempty_line = true;
349 fprintf (stderr, "%s:%u: warning: junk after %%%% is ignored\n",
350 pretty_input_file_name (), separator_lineno[0]);
352 keywords_lineno = separator_lineno[0] + 1;
357 declarations_end = NULL;
362 if (separators > (has_declarations ? 1 : 0))
364 keywords_end = separator[separators-1];
365 _verbatim_code = separator[separators-1] + 2;
366 _verbatim_code_end = input_end;
367 _verbatim_code_lineno = separator_lineno[separators-1];
371 keywords_end = input_end;
372 _verbatim_code = NULL;
373 _verbatim_code_end = NULL;
374 _verbatim_code_lineno = 0;
378 /* Parse the declarations section. */
380 _verbatim_declarations = NULL;
381 _verbatim_declarations_end = NULL;
382 _verbatim_declarations_lineno = 0;
384 _struct_decl_lineno = 0;
388 unsigned int lineno = 1;
389 char *struct_decl = NULL;
390 unsigned int *struct_decl_linenos = NULL;
391 unsigned int struct_decl_linecount = 0;
392 for (const char *line = declarations; line < declarations_end; )
394 const char *line_end;
395 line_end = (const char *) memchr (line, '\n', declarations_end - line);
396 if (line_end != NULL)
399 line_end = declarations_end;
406 if (_verbatim_declarations != NULL)
408 fprintf (stderr, "%s:%u:\n%s:%u:"
409 " only one %%{...%%} section is allowed\n",
410 pretty_input_file_name (),
411 _verbatim_declarations_lineno,
412 pretty_input_file_name (), lineno);
415 _verbatim_declarations = line + 2;
416 _verbatim_declarations_lineno = lineno;
418 else if (line[1] == '}')
421 if (_verbatim_declarations == NULL)
423 fprintf (stderr, "%s:%u:"
424 " %%} outside of %%{...%%} section\n",
425 pretty_input_file_name (), lineno);
428 if (_verbatim_declarations_end != NULL)
430 fprintf (stderr, "%s:%u:"
431 " %%{...%%} section already closed\n",
432 pretty_input_file_name (), lineno);
435 _verbatim_declarations_end = line;
436 /* Give a warning if the rest of the line is nonempty. */
437 bool nonempty_line = false;
439 for (q = line + 2; q < line_end; q++)
446 if (!(*q == ' ' || *q == '\t'))
447 nonempty_line = true;
450 fprintf (stderr, "%s:%u:"
451 " warning: junk after %%} is ignored\n",
452 pretty_input_file_name (), lineno);
454 else if (_verbatim_declarations != NULL
455 && _verbatim_declarations_end == NULL)
457 fprintf (stderr, "%s:%u:"
458 " warning: %% directives are ignored"
459 " inside the %%{...%%} section\n",
460 pretty_input_file_name (), lineno);
466 if (is_declaration_with_arg (line, line_end, lineno,
468 option.set_delimiters (arg);
471 if (is_declaration (line, line_end, lineno, "struct-type"))
475 if (is_declaration (line, line_end, lineno, "ignore-case"))
476 option.set (UPPERLOWER);
479 if (is_declaration_with_arg (line, line_end, lineno,
481 option.set_language (arg);
484 if (is_define_declaration (line, line_end, lineno,
486 option.set_slot_name (arg);
489 if (is_define_declaration (line, line_end, lineno,
490 "initializer-suffix", &arg))
491 option.set_initializer_suffix (arg);
494 if (is_define_declaration (line, line_end, lineno,
495 "hash-function-name", &arg))
496 option.set_hash_name (arg);
499 if (is_define_declaration (line, line_end, lineno,
500 "lookup-function-name", &arg))
501 option.set_function_name (arg);
504 if (is_define_declaration (line, line_end, lineno,
506 option.set_class_name (arg);
509 if (is_declaration (line, line_end, lineno, "7bit"))
510 option.set (SEVENBIT);
513 if (is_declaration (line, line_end, lineno, "compare-lengths"))
514 option.set (LENTABLE);
517 if (is_declaration (line, line_end, lineno, "compare-strncmp"))
521 if (is_declaration (line, line_end, lineno, "readonly-tables"))
525 if (is_declaration (line, line_end, lineno, "enum"))
529 if (is_declaration (line, line_end, lineno, "includes"))
530 option.set (INCLUDE);
533 if (is_declaration (line, line_end, lineno, "global-table"))
537 if (is_declaration (line, line_end, lineno, "pic"))
538 option.set (SHAREDLIB);
541 if (is_define_declaration (line, line_end, lineno,
542 "string-pool-name", &arg))
543 option.set_stringpool_name (arg);
546 if (is_declaration (line, line_end, lineno, "null-strings"))
547 option.set (NULLSTRINGS);
550 if (is_define_declaration (line, line_end, lineno,
551 "word-array-name", &arg))
552 option.set_wordlist_name (arg);
555 if (is_define_declaration (line, line_end, lineno,
556 "length-table-name", &arg))
557 option.set_lengthtable_name (arg);
560 if (is_declaration_with_arg (line, line_end, lineno,
563 option.set_total_switches (atoi (arg));
564 if (option.get_total_switches () <= 0)
566 fprintf (stderr, "%s:%u: number of switches %s"
567 " must be a positive number\n",
568 pretty_input_file_name (), lineno, arg);
574 if (is_declaration (line, line_end, lineno, "omit-struct-type"))
579 fprintf (stderr, "%s:%u: unrecognized %% directive\n",
580 pretty_input_file_name (), lineno);
585 else if (!(_verbatim_declarations != NULL
586 && _verbatim_declarations_end == NULL))
588 /* Append the line to struct_decl. */
589 size_t old_len = (struct_decl ? strlen (struct_decl) : 0);
590 size_t line_len = line_end - line;
591 size_t new_len = old_len + line_len + 1;
592 char *new_struct_decl = new char[new_len];
594 memcpy (new_struct_decl, struct_decl, old_len);
595 memcpy (new_struct_decl + old_len, line, line_len);
596 new_struct_decl[old_len + line_len] = '\0';
598 delete[] struct_decl;
599 struct_decl = new_struct_decl;
600 /* Append the lineno to struct_decl_linenos. */
601 unsigned int *new_struct_decl_linenos =
602 new unsigned int[struct_decl_linecount + 1];
603 if (struct_decl_linecount > 0)
604 memcpy (new_struct_decl_linenos, struct_decl_linenos,
605 struct_decl_linecount * sizeof (unsigned int));
606 new_struct_decl_linenos[struct_decl_linecount] = lineno;
607 if (struct_decl_linenos)
608 delete[] struct_decl_linenos;
609 struct_decl_linenos = new_struct_decl_linenos;
610 /* Increment struct_decl_linecount. */
611 struct_decl_linecount++;
616 if (_verbatim_declarations != NULL && _verbatim_declarations_end == NULL)
618 fprintf (stderr, "%s:%u: unterminated %%{ section\n",
619 pretty_input_file_name (), _verbatim_declarations_lineno);
623 /* Determine _struct_decl, _return_type, _struct_tag. */
628 /* Drop leading whitespace and comments. */
630 char *p = struct_decl;
631 unsigned int *l = struct_decl_linenos;
634 if (p[0] == ' ' || p[0] == '\t')
649 /* Skip over ANSI C style comment. */
653 if (p[0] == '*' && p[1] == '/')
666 /* Skip over ISO C99 or C++ style comment. */
668 while (p[0] != '\0' && p[0] != '\n')
680 if (p != struct_decl)
682 size_t len = strlen (p);
683 char *new_struct_decl = new char[len + 1];
684 memcpy (new_struct_decl, p, len + 1);
685 delete[] struct_decl;
686 struct_decl = new_struct_decl;
688 _struct_decl_lineno = *l;
690 /* Drop trailing whitespace. */
691 for (char *p = struct_decl + strlen (struct_decl); p > struct_decl;)
692 if (p[-1] == '\n' || p[-1] == ' ' || p[-1] == '\t')
697 if (struct_decl == NULL || struct_decl[0] == '\0')
699 fprintf (stderr, "%s: missing struct declaration"
700 " for option --struct-type\n",
701 pretty_input_file_name ());
705 /* Ensure trailing semicolon. */
706 size_t old_len = strlen (struct_decl);
707 if (struct_decl[old_len - 1] != ';')
709 char *new_struct_decl = new char[old_len + 2];
710 memcpy (new_struct_decl, struct_decl, old_len);
711 new_struct_decl[old_len] = ';';
712 new_struct_decl[old_len + 1] = '\0';
713 delete[] struct_decl;
714 struct_decl = new_struct_decl;
717 /* Set _struct_decl to the entire declaration. */
718 _struct_decl = struct_decl;
719 /* Set _struct_tag to the naked "struct something". */
721 for (p = struct_decl; *p && *p != '{' && *p != ';' && *p != '\n'; p++)
723 for (; p > struct_decl;)
724 if (p[-1] == '\n' || p[-1] == ' ' || p[-1] == '\t')
728 size_t struct_tag_length = p - struct_decl;
729 char *struct_tag = new char[struct_tag_length + 1];
730 memcpy (struct_tag, struct_decl, struct_tag_length);
731 struct_tag[struct_tag_length] = '\0';
732 _struct_tag = struct_tag;
733 /* The return type of the lookup function is "struct something *".
734 No "const" here, because if !option[CONST], some user code might
735 want to modify the structure. */
736 char *return_type = new char[struct_tag_length + 3];
737 memcpy (return_type, struct_decl, struct_tag_length);
738 return_type[struct_tag_length] = ' ';
739 return_type[struct_tag_length + 1] = '*';
740 return_type[struct_tag_length + 2] = '\0';
741 _return_type = return_type;
744 if (struct_decl_linenos)
745 delete[] struct_decl_linenos;
748 /* Parse the keywords section. */
750 Keyword_List **list_tail = &_head;
751 const char *delimiters = option.get_delimiters ();
752 unsigned int lineno = keywords_lineno;
753 bool charset_dependent = false;
754 for (const char *line = keywords; line < keywords_end; )
756 const char *line_end;
757 line_end = (const char *) memchr (line, '\n', keywords_end - line);
758 if (line_end != NULL)
761 line_end = keywords_end;
764 ; /* Comment line. */
765 else if (line[0] == '%')
767 fprintf (stderr, "%s:%u:"
768 " declarations are not allowed in the keywords section.\n"
769 "To declare a keyword starting with %%, enclose it in"
771 pretty_input_file_name (), lineno);
776 /* An input line carrying a keyword. */
778 size_t keyword_length;
783 /* Parse a string in ANSI C syntax. */
784 char *kp = new char[line_end-line];
786 const char *lp = line + 1;
792 fprintf (stderr, "%s:%u: unterminated string\n",
793 pretty_input_file_name (), lineno);
803 case '0': case '1': case '2': case '3':
804 case '4': case '5': case '6': case '7':
808 while (count < 3 && *lp >= '0' && *lp <= '7')
810 code = (code << 3) + (*lp - '0');
814 if (code > UCHAR_MAX)
816 "%s:%u: octal escape out of range\n",
817 pretty_input_file_name (), lineno);
818 *kp = static_cast<char>(code);
826 while ((*lp >= '0' && *lp <= '9')
827 || (*lp >= 'A' && *lp <= 'F')
828 || (*lp >= 'a' && *lp <= 'f'))
831 + (*lp >= 'A' && *lp <= 'F'
833 *lp >= 'a' && *lp <= 'f'
840 fprintf (stderr, "%s:%u: hexadecimal escape"
841 " without any hex digits\n",
842 pretty_input_file_name (), lineno);
843 if (code > UCHAR_MAX)
844 fprintf (stderr, "%s:%u: hexadecimal escape"
846 pretty_input_file_name (), lineno);
847 *kp = static_cast<char>(code);
850 case '\\': case '\'': case '"':
853 charset_dependent = true;
858 charset_dependent = true;
863 charset_dependent = true;
868 charset_dependent = true;
873 charset_dependent = true;
878 charset_dependent = true;
883 charset_dependent = true;
888 charset_dependent = true;
891 fprintf (stderr, "%s:%u: invalid escape sequence"
893 pretty_input_file_name (), lineno);
903 charset_dependent = true;
908 if (lp < line_end && *lp != '\n')
910 if (strchr (delimiters, *lp) == NULL)
912 fprintf (stderr, "%s:%u: string not followed"
914 pretty_input_file_name (), lineno);
919 keyword_length = kp - keyword;
922 char *line_rest = new char[line_end - lp + 1];
923 memcpy (line_rest, lp, line_end - lp);
924 line_rest[line_end - lp -
925 (line_end > lp && line_end[-1] == '\n' ? 1 : 0)]
934 /* Not a string. Look for the delimiter. */
935 const char *lp = line;
938 if (!(lp < line_end && *lp != '\n'))
941 keyword_length = lp - line;
945 if (strchr (delimiters, *lp) != NULL)
948 keyword_length = lp - line;
952 char *line_rest = new char[line_end - lp + 1];
953 memcpy (line_rest, lp, line_end - lp);
954 line_rest[line_end - lp -
955 (line_end > lp && line_end[-1] == '\n'
966 if (keyword_length > 0)
967 charset_dependent = true;
970 /* Allocate Keyword and add it to the list. */
971 Keyword *new_kw = _factory->create_keyword (keyword, keyword_length,
973 new_kw->_lineno = lineno;
974 *list_tail = new Keyword_List (new_kw);
975 list_tail = &(*list_tail)->rest();
985 fprintf (stderr, "%s: No keywords in input file!\n",
986 pretty_input_file_name ());
990 _charset_dependent = charset_dependent;
993 /* To be freed in the destructor. */
995 _input_end = input_end;
1000 /* Free allocated memory. */
1001 delete[] const_cast<char*>(_return_type);
1002 delete[] const_cast<char*>(_struct_tag);
1003 delete[] const_cast<char*>(_struct_decl);