1 /* parse.y - parser for flex input */
3 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS
6 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
7 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
11 * Copyright (c) 1990 The Regents of the University of California.
12 * All rights reserved.
14 * This code is derived from software contributed to Berkeley by
17 * The United States Government has rights in this work pursuant
18 * to contract no. DE-AC03-76SF00098 between the United States
19 * Department of Energy and the University of California.
21 * Redistribution and use in source and binary forms are permitted provided
22 * that: (1) source distributions retain this entire copyright notice and
23 * comment, and (2) distributions including binaries display the following
24 * acknowledgement: ``This product includes software developed by the
25 * University of California, Berkeley and its contributors'' in the
26 * documentation or other materials provided with the distribution and in
27 * all advertising materials mentioning features or use of this software.
28 * Neither the name of the University nor the names of its contributors may
29 * be used to endorse or promote products derived from this software without
30 * specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
32 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
33 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
36 /* $Header: /home/daffy/u0/vern/flex/RCS/parse.y,v 2.28 95/04/21 11:51:51 vern Exp $ */
40 /* Some versions of bison are broken in that they use alloca() but don't
41 * declare it properly. The following is the patented (just kidding!)
42 * #ifdef chud to fix the problem, courtesy of Francois Pinard.
45 /* AIX requires this to be the first thing in the file. What a piece. */
53 /* The remainder of the alloca() cruft has to come after including flexdef.h,
54 * so HAVE_ALLOCA_H is (possibly) defined.
59 # define alloca __builtin_alloca
78 /* Bletch, ^^^^ that was ugly! */
81 int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, rulelen;
82 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
87 static int madeany = false; /* whether we've made the '.' character class */
88 int previous_continued_action; /* whether the previous rule's action was '|' */
90 /* Expand a POSIX character class expression. */
91 #define CCL_EXPR(func) \
94 for ( c = 0; c < csize; ++c ) \
95 if ( isascii(c) && func(c) ) \
96 ccladd( currccl, c ); \
99 /* While POSIX defines isblank(), it's not ANSI C. */
100 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
102 /* On some over-ambitious machines, such as DEC Alpha's, the default
103 * token type is "long" instead of "int"; this leads to problems with
104 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
105 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
106 * following should ensure that the default token type is "int".
113 goal : initlex sect1 sect1end sect2 initforrule
114 { /* add default rule */
120 def_rule = mkstate( -pat );
122 /* Remember the number of the default rule so we
123 * don't generate "can't match" warnings for it.
125 default_rule = num_rules;
127 finish_rule( def_rule, false, 0, 0 );
129 for ( i = 1; i <= lastsc; ++i )
130 scset[i] = mkbranch( scset[i], def_rule );
134 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
136 add_action( "ECHO" );
138 add_action( ";\n\tYY_BREAK\n" );
143 { /* initialize for processing rules */
145 /* Create default DFA start condition. */
146 scinstal( "INITIAL", false );
150 sect1 : sect1 startconddecl namelist1
154 { synerr( "unknown error processing section 1" ); }
160 scon_stk = allocate_integer_array( lastsc + 1 );
165 startconddecl : SCDECL
172 namelist1 : namelist1 NAME
173 { scinstal( nmstr, xcluflg ); }
176 { scinstal( nmstr, xcluflg ); }
179 { synerr( "bad start condition list" ); }
182 options : OPTION_OP optionlist
185 optionlist : optionlist option
189 option : OPT_OUTFILE '=' NAME
191 outfilename = copy_string( nmstr );
194 | OPT_PREFIX '=' NAME
195 { prefix = copy_string( nmstr ); }
196 | OPT_YYCLASS '=' NAME
197 { yyclass = copy_string( nmstr ); }
200 sect2 : sect2 scon initforrule flexrule '\n'
201 { scon_stk_ptr = $2; }
202 | sect2 scon '{' sect2 '}'
203 { scon_stk_ptr = $2; }
209 /* Initialize for a parse of one rule. */
210 trlcontxt = variable_trail_rule = varlength = false;
211 trailcnt = headcnt = rulelen = 0;
212 current_state_type = STATE_NORMAL;
213 previous_continued_action = continued_action;
223 finish_rule( pat, variable_trail_rule,
226 if ( scon_stk_ptr > 0 )
228 for ( i = 1; i <= scon_stk_ptr; ++i )
230 mkbranch( scbol[scon_stk[i]],
236 /* Add to all non-exclusive start conditions,
237 * including the default (0) start condition.
240 for ( i = 1; i <= lastsc; ++i )
242 scbol[i] = mkbranch( scbol[i],
250 if ( performance_report > 1 )
252 "'^' operator results in sub-optimal performance" );
259 finish_rule( pat, variable_trail_rule,
262 if ( scon_stk_ptr > 0 )
264 for ( i = 1; i <= scon_stk_ptr; ++i )
266 mkbranch( scset[scon_stk[i]],
272 for ( i = 1; i <= lastsc; ++i )
282 if ( scon_stk_ptr > 0 )
287 /* This EOF applies to all start conditions
288 * which don't already have EOF actions.
290 for ( i = 1; i <= lastsc; ++i )
292 scon_stk[++scon_stk_ptr] = i;
294 if ( scon_stk_ptr == 0 )
296 "all start conditions already have <<EOF>> rules" );
304 { synerr( "unrecognized rule" ); }
308 { $$ = scon_stk_ptr; }
311 scon : '<' scon_stk_ptr namelist2 '>'
318 for ( i = 1; i <= lastsc; ++i )
322 for ( j = 1; j <= scon_stk_ptr; ++j )
323 if ( scon_stk[j] == i )
326 if ( j > scon_stk_ptr )
327 scon_stk[++scon_stk_ptr] = i;
332 { $$ = scon_stk_ptr; }
335 namelist2 : namelist2 ',' sconname
340 { synerr( "bad start condition list" ); }
345 if ( (scnum = sclookup( nmstr )) == 0 )
346 format_pinpoint_message(
347 "undeclared start condition %s",
351 for ( i = 1; i <= scon_stk_ptr; ++i )
352 if ( scon_stk[i] == scnum )
355 "<%s> specified twice",
360 if ( i > scon_stk_ptr )
361 scon_stk[++scon_stk_ptr] = scnum;
368 if ( transchar[lastst[$2]] != SYM_EPSILON )
369 /* Provide final transition \now/ so it
370 * will be marked as a trailing context
373 $2 = link_machines( $2,
374 mkstate( SYM_EPSILON ) );
376 mark_beginning_as_normal( $2 );
377 current_state_type = STATE_NORMAL;
379 if ( previous_continued_action )
381 /* We need to treat this as variable trailing
382 * context so that the backup does not happen
383 * in the action but before the action switch
384 * statement. If the backup happens in the
385 * action, then the rules "falling into" this
386 * one's action will *also* do the backup,
389 if ( ! varlength || headcnt != 0 )
391 "trailing context made variable due to preceding '|' action" );
393 /* Mark as variable. */
398 if ( lex_compat || (varlength && headcnt == 0) )
399 { /* variable trailing context rule */
400 /* Mark the first part of the rule as the
401 * accepting "head" part of a trailing
404 * By the way, we didn't do this at the
405 * beginning of this production because back
406 * then current_state_type was set up for a
407 * trail rule, and add_accept() can create
411 num_rules | YY_TRAILING_HEAD_MASK );
412 variable_trail_rule = true;
418 $$ = link_machines( $1, $2 );
422 { synerr( "trailing context used twice" ); }
431 current_state_type = STATE_TRAILING_CONTEXT;
435 synerr( "trailing context used twice" );
436 $$ = mkstate( SYM_EPSILON );
439 else if ( previous_continued_action )
441 /* See the comment in the rule for "re2 re"
445 "trailing context made variable due to preceding '|' action" );
450 if ( lex_compat || varlength )
452 /* Again, see the comment in the rule for
456 num_rules | YY_TRAILING_HEAD_MASK );
457 variable_trail_rule = true;
462 eps = mkstate( SYM_EPSILON );
463 $$ = link_machines( $1,
464 link_machines( eps, mkstate( '\n' ) ) );
473 if ( lex_compat || (varlength && headcnt == 0) )
474 /* Both head and trail are
477 variable_trail_rule = true;
498 /* This rule is written separately so the
499 * reduction will occur before the trailing
504 synerr( "trailing context used twice" );
509 /* We hope the trailing context is
518 current_state_type = STATE_TRAILING_CONTEXT;
523 series : series singleton
525 /* This is where concatenation of adjacent patterns
528 $$ = link_machines( $1, $2 );
535 singleton : singleton '*'
554 | singleton '{' NUMBER ',' NUMBER '}'
558 if ( $3 > $5 || $3 < 0 )
560 synerr( "bad iteration values" );
570 "bad iteration values" );
575 mkrep( $1, 1, $5 ) );
578 $$ = mkrep( $1, $3, $5 );
582 | singleton '{' NUMBER ',' '}'
588 synerr( "iteration value must be positive" );
593 $$ = mkrep( $1, $3, INFINITY );
596 | singleton '{' NUMBER '}'
598 /* The singleton could be something like "(foo)",
599 * in which case we have no idea what its length
600 * is, so we punt here.
606 synerr( "iteration value must be positive" );
611 $$ = link_machines( $1,
612 copysingl( $1, $3 - 1 ) );
619 /* Create the '.' character class. */
621 ccladd( anyccl, '\n' );
625 mkeccl( ccltbl + cclmap[anyccl],
626 ccllen[anyccl], nextecm,
627 ecgroup, csize, csize );
634 $$ = mkstate( -anyccl );
640 /* Sort characters for fast searching. We
641 * use a shell sort since this list could
644 cshell( ccltbl + cclmap[$1], ccllen[$1], true );
647 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
648 nextecm, ecgroup, csize, csize );
672 if ( caseins && $1 >= 'A' && $1 <= 'Z' )
679 fullccl : '[' ccl ']'
689 ccl : ccl CHAR '-' CHAR
693 if ( $2 >= 'A' && $2 <= 'Z' )
695 if ( $4 >= 'A' && $4 <= 'Z' )
700 synerr( "negative range in character class" );
704 for ( i = $2; i <= $4; ++i )
707 /* Keep track if this ccl is staying in
708 * alphabetical order.
710 cclsorted = cclsorted && ($2 > lastchar);
719 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
723 cclsorted = cclsorted && ($2 > lastchar);
730 /* Too hard to properly maintain cclsorted. */
739 currccl = $$ = cclinit();
743 ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum) }
744 | CCE_ALPHA { CCL_EXPR(isalpha) }
745 | CCE_BLANK { CCL_EXPR(IS_BLANK) }
746 | CCE_CNTRL { CCL_EXPR(iscntrl) }
747 | CCE_DIGIT { CCL_EXPR(isdigit) }
748 | CCE_GRAPH { CCL_EXPR(isgraph) }
749 | CCE_LOWER { CCL_EXPR(islower) }
750 | CCE_PRINT { CCL_EXPR(isprint) }
751 | CCE_PUNCT { CCL_EXPR(ispunct) }
752 | CCE_SPACE { CCL_EXPR(isspace) }
759 | CCE_XDIGIT { CCL_EXPR(isxdigit) }
764 if ( caseins && $2 >= 'A' && $2 <= 'Z' )
769 $$ = link_machines( $1, mkstate( $2 ) );
773 { $$ = mkstate( SYM_EPSILON ); }
779 /* build_eof_action - build the "<<EOF>>" action for the active start
783 void build_eof_action()
786 char action_text[MAXLINE];
788 for ( i = 1; i <= scon_stk_ptr; ++i )
790 if ( sceof[scon_stk[i]] )
791 format_pinpoint_message(
792 "multiple <<EOF>> rules for start condition %s",
793 scname[scon_stk[i]] );
797 sceof[scon_stk[i]] = true;
798 sprintf( action_text, "case YY_STATE_EOF(%s):\n",
799 scname[scon_stk[i]] );
800 add_action( action_text );
804 line_directive_out( (FILE *) 0, 1 );
806 /* This isn't a normal rule after all - don't count it as
807 * such, so we don't have any holes in the rule numbering
808 * (which make generating "rule can never match" warnings
816 /* format_synerr - write out formatted syntax error */
818 void format_synerr( msg, arg )
821 char errmsg[MAXLINE];
823 (void) sprintf( errmsg, msg, arg );
828 /* synerr - report a syntax error */
834 pinpoint_message( str );
838 /* format_warn - write out formatted warning */
840 void format_warn( msg, arg )
843 char warn_msg[MAXLINE];
845 (void) sprintf( warn_msg, msg, arg );
850 /* warn - report a warning, unless -w was given */
855 line_warning( str, linenum );
858 /* format_pinpoint_message - write out a message formatted with one string,
859 * pinpointing its location
862 void format_pinpoint_message( msg, arg )
865 char errmsg[MAXLINE];
867 (void) sprintf( errmsg, msg, arg );
868 pinpoint_message( errmsg );
872 /* pinpoint_message - write out a message, pinpointing its location */
874 void pinpoint_message( str )
877 line_pinpoint( str, linenum );
881 /* line_warning - report a warning at a given line, unless -w was given */
883 void line_warning( str, line )
887 char warning[MAXLINE];
891 sprintf( warning, "warning, %s", str );
892 line_pinpoint( warning, line );
897 /* line_pinpoint - write out a message, pinpointing it at the given line */
899 void line_pinpoint( str, line )
903 fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str );
907 /* yyerror - eat up an error message from the parser;
908 * currently, messages are ignore