1 /* parse.y - parser for flex input */
3 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS
6 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
7 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
11 * Copyright (c) 1990 The Regents of the University of California.
12 * All rights reserved.
14 * This code is derived from software contributed to Berkeley by
17 * The United States Government has rights in this work pursuant
18 * to contract no. DE-AC03-76SF00098 between the United States
19 * Department of Energy and the University of California.
21 * Redistribution and use in source and binary forms are permitted provided
22 * that: (1) source distributions retain this entire copyright notice and
23 * comment, and (2) distributions including binaries display the following
24 * acknowledgement: ``This product includes software developed by the
25 * University of California, Berkeley and its contributors'' in the
26 * documentation or other materials provided with the distribution and in
27 * all advertising materials mentioning features or use of this software.
28 * Neither the name of the University nor the names of its contributors may
29 * be used to endorse or promote products derived from this software without
30 * specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
32 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
33 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
36 /* $Header: /home/daffy/u0/vern/flex/RCS/parse.y,v 2.28 95/04/21 11:51:51 vern Exp $ */
39 /* Some versions of bison are broken in that they use alloca() but don't
40 * declare it properly. The following is the patented (just kidding!)
41 * #ifdef chud to fix the problem, courtesy of Francois Pinard.
44 /* AIX requires this to be the first thing in the file. What a piece. */
52 /* The remainder of the alloca() cruft has to come after including flexdef.h,
53 * so HAVE_ALLOCA_H is (possibly) defined.
58 # define alloca __builtin_alloca
77 /* Bletch, ^^^^ that was ugly! */
80 int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, rulelen;
81 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
86 static int madeany = false; /* whether we've made the '.' character class */
87 int previous_continued_action; /* whether the previous rule's action was '|' */
89 /* Expand a POSIX character class expression. */
90 #define CCL_EXPR(func) \
93 for ( c = 0; c < csize; ++c ) \
94 if ( isascii(c) && func(c) ) \
95 ccladd( currccl, c ); \
98 /* While POSIX defines isblank(), it's not ANSI C. */
99 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
101 /* On some over-ambitious machines, such as DEC Alpha's, the default
102 * token type is "long" instead of "int"; this leads to problems with
103 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
104 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
105 * following should ensure that the default token type is "int".
112 goal : initlex sect1 sect1end sect2 initforrule
113 { /* add default rule */
119 def_rule = mkstate( -pat );
121 /* Remember the number of the default rule so we
122 * don't generate "can't match" warnings for it.
124 default_rule = num_rules;
126 finish_rule( def_rule, false, 0, 0 );
128 for ( i = 1; i <= lastsc; ++i )
129 scset[i] = mkbranch( scset[i], def_rule );
133 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
135 add_action( "ECHO" );
137 add_action( ";\n\tYY_BREAK\n" );
142 { /* initialize for processing rules */
144 /* Create default DFA start condition. */
145 scinstal( "INITIAL", false );
149 sect1 : sect1 startconddecl namelist1
153 { synerr( "unknown error processing section 1" ); }
159 scon_stk = allocate_integer_array( lastsc + 1 );
164 startconddecl : SCDECL
171 namelist1 : namelist1 NAME
172 { scinstal( nmstr, xcluflg ); }
175 { scinstal( nmstr, xcluflg ); }
178 { synerr( "bad start condition list" ); }
181 options : OPTION_OP optionlist
184 optionlist : optionlist option
188 option : OPT_OUTFILE '=' NAME
190 outfilename = copy_string( nmstr );
193 | OPT_PREFIX '=' NAME
194 { prefix = copy_string( nmstr ); }
195 | OPT_YYCLASS '=' NAME
196 { yyclass = copy_string( nmstr ); }
199 sect2 : sect2 scon initforrule flexrule '\n'
200 { scon_stk_ptr = $2; }
201 | sect2 scon '{' sect2 '}'
202 { scon_stk_ptr = $2; }
208 /* Initialize for a parse of one rule. */
209 trlcontxt = variable_trail_rule = varlength = false;
210 trailcnt = headcnt = rulelen = 0;
211 current_state_type = STATE_NORMAL;
212 previous_continued_action = continued_action;
222 finish_rule( pat, variable_trail_rule,
225 if ( scon_stk_ptr > 0 )
227 for ( i = 1; i <= scon_stk_ptr; ++i )
229 mkbranch( scbol[scon_stk[i]],
235 /* Add to all non-exclusive start conditions,
236 * including the default (0) start condition.
239 for ( i = 1; i <= lastsc; ++i )
241 scbol[i] = mkbranch( scbol[i],
249 if ( performance_report > 1 )
251 "'^' operator results in sub-optimal performance" );
258 finish_rule( pat, variable_trail_rule,
261 if ( scon_stk_ptr > 0 )
263 for ( i = 1; i <= scon_stk_ptr; ++i )
265 mkbranch( scset[scon_stk[i]],
271 for ( i = 1; i <= lastsc; ++i )
281 if ( scon_stk_ptr > 0 )
286 /* This EOF applies to all start conditions
287 * which don't already have EOF actions.
289 for ( i = 1; i <= lastsc; ++i )
291 scon_stk[++scon_stk_ptr] = i;
293 if ( scon_stk_ptr == 0 )
295 "all start conditions already have <<EOF>> rules" );
303 { synerr( "unrecognized rule" ); }
307 { $$ = scon_stk_ptr; }
310 scon : '<' scon_stk_ptr namelist2 '>'
317 for ( i = 1; i <= lastsc; ++i )
321 for ( j = 1; j <= scon_stk_ptr; ++j )
322 if ( scon_stk[j] == i )
325 if ( j > scon_stk_ptr )
326 scon_stk[++scon_stk_ptr] = i;
331 { $$ = scon_stk_ptr; }
334 namelist2 : namelist2 ',' sconname
339 { synerr( "bad start condition list" ); }
344 if ( (scnum = sclookup( nmstr )) == 0 )
345 format_pinpoint_message(
346 "undeclared start condition %s",
350 for ( i = 1; i <= scon_stk_ptr; ++i )
351 if ( scon_stk[i] == scnum )
354 "<%s> specified twice",
359 if ( i > scon_stk_ptr )
360 scon_stk[++scon_stk_ptr] = scnum;
367 if ( transchar[lastst[$2]] != SYM_EPSILON )
368 /* Provide final transition \now/ so it
369 * will be marked as a trailing context
372 $2 = link_machines( $2,
373 mkstate( SYM_EPSILON ) );
375 mark_beginning_as_normal( $2 );
376 current_state_type = STATE_NORMAL;
378 if ( previous_continued_action )
380 /* We need to treat this as variable trailing
381 * context so that the backup does not happen
382 * in the action but before the action switch
383 * statement. If the backup happens in the
384 * action, then the rules "falling into" this
385 * one's action will *also* do the backup,
388 if ( ! varlength || headcnt != 0 )
390 "trailing context made variable due to preceding '|' action" );
392 /* Mark as variable. */
397 if ( lex_compat || (varlength && headcnt == 0) )
398 { /* variable trailing context rule */
399 /* Mark the first part of the rule as the
400 * accepting "head" part of a trailing
403 * By the way, we didn't do this at the
404 * beginning of this production because back
405 * then current_state_type was set up for a
406 * trail rule, and add_accept() can create
410 num_rules | YY_TRAILING_HEAD_MASK );
411 variable_trail_rule = true;
417 $$ = link_machines( $1, $2 );
421 { synerr( "trailing context used twice" ); }
430 current_state_type = STATE_TRAILING_CONTEXT;
434 synerr( "trailing context used twice" );
435 $$ = mkstate( SYM_EPSILON );
438 else if ( previous_continued_action )
440 /* See the comment in the rule for "re2 re"
444 "trailing context made variable due to preceding '|' action" );
449 if ( lex_compat || varlength )
451 /* Again, see the comment in the rule for
455 num_rules | YY_TRAILING_HEAD_MASK );
456 variable_trail_rule = true;
461 eps = mkstate( SYM_EPSILON );
462 $$ = link_machines( $1,
463 link_machines( eps, mkstate( '\n' ) ) );
472 if ( lex_compat || (varlength && headcnt == 0) )
473 /* Both head and trail are
476 variable_trail_rule = true;
497 /* This rule is written separately so the
498 * reduction will occur before the trailing
503 synerr( "trailing context used twice" );
508 /* We hope the trailing context is
517 current_state_type = STATE_TRAILING_CONTEXT;
522 series : series singleton
524 /* This is where concatenation of adjacent patterns
527 $$ = link_machines( $1, $2 );
534 singleton : singleton '*'
553 | singleton '{' NUMBER ',' NUMBER '}'
557 if ( $3 > $5 || $3 < 0 )
559 synerr( "bad iteration values" );
569 "bad iteration values" );
574 mkrep( $1, 1, $5 ) );
577 $$ = mkrep( $1, $3, $5 );
581 | singleton '{' NUMBER ',' '}'
587 synerr( "iteration value must be positive" );
592 $$ = mkrep( $1, $3, INFINITY );
595 | singleton '{' NUMBER '}'
597 /* The singleton could be something like "(foo)",
598 * in which case we have no idea what its length
599 * is, so we punt here.
605 synerr( "iteration value must be positive" );
610 $$ = link_machines( $1,
611 copysingl( $1, $3 - 1 ) );
618 /* Create the '.' character class. */
620 ccladd( anyccl, '\n' );
624 mkeccl( ccltbl + cclmap[anyccl],
625 ccllen[anyccl], nextecm,
626 ecgroup, csize, csize );
633 $$ = mkstate( -anyccl );
639 /* Sort characters for fast searching. We
640 * use a shell sort since this list could
643 cshell( ccltbl + cclmap[$1], ccllen[$1], true );
646 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
647 nextecm, ecgroup, csize, csize );
671 if ( caseins && $1 >= 'A' && $1 <= 'Z' )
678 fullccl : '[' ccl ']'
688 ccl : ccl CHAR '-' CHAR
692 if ( $2 >= 'A' && $2 <= 'Z' )
694 if ( $4 >= 'A' && $4 <= 'Z' )
699 synerr( "negative range in character class" );
703 for ( i = $2; i <= $4; ++i )
706 /* Keep track if this ccl is staying in
707 * alphabetical order.
709 cclsorted = cclsorted && ($2 > lastchar);
718 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
722 cclsorted = cclsorted && ($2 > lastchar);
729 /* Too hard to properly maintain cclsorted. */
738 currccl = $$ = cclinit();
742 ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum) }
743 | CCE_ALPHA { CCL_EXPR(isalpha) }
744 | CCE_BLANK { CCL_EXPR(IS_BLANK) }
745 | CCE_CNTRL { CCL_EXPR(iscntrl) }
746 | CCE_DIGIT { CCL_EXPR(isdigit) }
747 | CCE_GRAPH { CCL_EXPR(isgraph) }
748 | CCE_LOWER { CCL_EXPR(islower) }
749 | CCE_PRINT { CCL_EXPR(isprint) }
750 | CCE_PUNCT { CCL_EXPR(ispunct) }
751 | CCE_SPACE { CCL_EXPR(isspace) }
758 | CCE_XDIGIT { CCL_EXPR(isxdigit) }
763 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
768 $$ = link_machines( $1, mkstate( $2 ) );
772 { $$ = mkstate( SYM_EPSILON ); }
778 /* build_eof_action - build the "<<EOF>>" action for the active start
782 void build_eof_action()
785 char action_text[MAXLINE];
787 for ( i = 1; i <= scon_stk_ptr; ++i )
789 if ( sceof[scon_stk[i]] )
790 format_pinpoint_message(
791 "multiple <<EOF>> rules for start condition %s",
792 scname[scon_stk[i]] );
796 sceof[scon_stk[i]] = true;
797 sprintf( action_text, "case YY_STATE_EOF(%s):\n",
798 scname[scon_stk[i]] );
799 add_action( action_text );
803 line_directive_out( (FILE *) 0, 1 );
805 /* This isn't a normal rule after all - don't count it as
806 * such, so we don't have any holes in the rule numbering
807 * (which make generating "rule can never match" warnings
815 /* format_synerr - write out formatted syntax error */
817 void format_synerr( msg, arg )
820 char errmsg[MAXLINE];
822 (void) sprintf( errmsg, msg, arg );
827 /* synerr - report a syntax error */
833 pinpoint_message( str );
837 /* format_warn - write out formatted warning */
839 void format_warn( msg, arg )
842 char warn_msg[MAXLINE];
844 (void) sprintf( warn_msg, msg, arg );
849 /* warn - report a warning, unless -w was given */
854 line_warning( str, linenum );
857 /* format_pinpoint_message - write out a message formatted with one string,
858 * pinpointing its location
861 void format_pinpoint_message( msg, arg )
864 char errmsg[MAXLINE];
866 (void) sprintf( errmsg, msg, arg );
867 pinpoint_message( errmsg );
871 /* pinpoint_message - write out a message, pinpointing its location */
873 void pinpoint_message( str )
876 line_pinpoint( str, linenum );
880 /* line_warning - report a warning at a given line, unless -w was given */
882 void line_warning( str, line )
886 char warning[MAXLINE];
890 sprintf( warning, "warning, %s", str );
891 line_pinpoint( warning, line );
896 /* line_pinpoint - write out a message, pinpointing it at the given line */
898 void line_pinpoint( str, line )
902 fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str );
906 /* yyerror - eat up an error message from the parser;
907 * currently, messages are ignore