]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/flex/src/parse.y
MFV: r362286
[FreeBSD/FreeBSD.git] / contrib / flex / src / parse.y
1 /* parse.y - parser for flex input */
2
3 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4 %token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE
5 %token TOK_TABLES_FILE
6
7 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
8 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
9
10 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
11 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
12
13 %left CCL_OP_DIFF CCL_OP_UNION
14
15 /*
16  *POSIX and AT&T lex place the
17  * precedence of the repeat operator, {}, below that of concatenation.
18  * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
19  * Regular Expression (ERE) precedence that has the repeat operator
20  * higher than concatenation.  This causes ab{3} to yield abbb.
21  *
22  * In order to support the POSIX and AT&T precedence and the flex
23  * precedence we define two token sets for the begin and end tokens of
24  * the repeat operator, '{' and '}'.  The lexical scanner chooses
25  * which tokens to return based on whether posix_compat or lex_compat
26  * are specified. Specifying either posix_compat or lex_compat will
27  * cause flex to parse scanner files as per the AT&T and
28  * POSIX-mandated behavior.
29  */
30
31 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
32
33
34 %{
35 /*  Copyright (c) 1990 The Regents of the University of California. */
36 /*  All rights reserved. */
37
38 /*  This code is derived from software contributed to Berkeley by */
39 /*  Vern Paxson. */
40
41 /*  The United States Government has rights in this work pursuant */
42 /*  to contract no. DE-AC03-76SF00098 between the United States */
43 /*  Department of Energy and the University of California. */
44
45 /*  This file is part of flex. */
46
47 /*  Redistribution and use in source and binary forms, with or without */
48 /*  modification, are permitted provided that the following conditions */
49 /*  are met: */
50
51 /*  1. Redistributions of source code must retain the above copyright */
52 /*     notice, this list of conditions and the following disclaimer. */
53 /*  2. Redistributions in binary form must reproduce the above copyright */
54 /*     notice, this list of conditions and the following disclaimer in the */
55 /*     documentation and/or other materials provided with the distribution. */
56
57 /*  Neither the name of the University nor the names of its contributors */
58 /*  may be used to endorse or promote products derived from this software */
59 /*  without specific prior written permission. */
60
61 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
62 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
63 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
64 /*  PURPOSE. */
65
66 #include "flexdef.h"
67 #include "tables.h"
68
69 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
70 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
71
72 int *scon_stk;
73 int scon_stk_ptr;
74
75 static int madeany = false;  /* whether we've made the '.' character class */
76 static int ccldot, cclany;
77 int previous_continued_action;  /* whether the previous rule's action was '|' */
78
79 #define format_warn3(fmt, a1, a2) \
80         do{ \
81         char fw3_msg[MAXLINE];\
82         snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
83         lwarn( fw3_msg );\
84         }while(0)
85
86 /* Expand a POSIX character class expression. */
87 #define CCL_EXPR(func) \
88         do{ \
89         int c; \
90         for ( c = 0; c < csize; ++c ) \
91                 if ( isascii(c) && func(c) ) \
92                         ccladd( currccl, c ); \
93         }while(0)
94
95 /* negated class */
96 #define CCL_NEG_EXPR(func) \
97         do{ \
98         int c; \
99         for ( c = 0; c < csize; ++c ) \
100                 if ( !func(c) ) \
101                         ccladd( currccl, c ); \
102         }while(0)
103
104 /* While POSIX defines isblank(), it's not ANSI C. */
105 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
106
107 /* On some over-ambitious machines, such as DEC Alpha's, the default
108  * token type is "long" instead of "int"; this leads to problems with
109  * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
110  * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
111  * following should ensure that the default token type is "int".
112  */
113 #define YYSTYPE int
114
115 %}
116
117 %%
118 goal            :  initlex sect1 sect1end sect2 initforrule
119                         { /* add default rule */
120                         int def_rule;
121
122                         pat = cclinit();
123                         cclnegate( pat );
124
125                         def_rule = mkstate( -pat );
126
127                         /* Remember the number of the default rule so we
128                          * don't generate "can't match" warnings for it.
129                          */
130                         default_rule = num_rules;
131
132                         finish_rule( def_rule, false, 0, 0, 0);
133
134                         for ( i = 1; i <= lastsc; ++i )
135                                 scset[i] = mkbranch( scset[i], def_rule );
136
137                         if ( spprdflt )
138                                 add_action(
139                                 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
140                         else
141                                 add_action( "ECHO" );
142
143                         add_action( ";\n\tYY_BREAK]]\n" );
144                         }
145                 ;
146
147 initlex         :
148                         { /* initialize for processing rules */
149
150                         /* Create default DFA start condition. */
151                         scinstal( "INITIAL", false );
152                         }
153                 ;
154
155 sect1           :  sect1 startconddecl namelist1
156                 |  sect1 options
157                 |
158                 |  error
159                         { synerr( _("unknown error processing section 1") ); }
160                 ;
161
162 sect1end        :  SECTEND
163                         {
164                         check_options();
165                         scon_stk = allocate_integer_array( lastsc + 1 );
166                         scon_stk_ptr = 0;
167                         }
168                 ;
169
170 startconddecl   :  SCDECL
171                         { xcluflg = false; }
172
173                 |  XSCDECL
174                         { xcluflg = true; }
175                 ;
176
177 namelist1       :  namelist1 NAME
178                         { scinstal( nmstr, xcluflg ); }
179
180                 |  NAME
181                         { scinstal( nmstr, xcluflg ); }
182
183                 |  error
184                         { synerr( _("bad start condition list") ); }
185                 ;
186
187 options         :  TOK_OPTION optionlist
188                 ;
189
190 optionlist      :  optionlist option
191                 |
192                 ;
193
194 option          :  TOK_OUTFILE '=' NAME
195                         {
196                         outfilename = xstrdup(nmstr);
197                         did_outfilename = 1;
198                         }
199                 |  TOK_EXTRA_TYPE '=' NAME
200                         { extra_type = xstrdup(nmstr); }
201                 |  TOK_PREFIX '=' NAME
202                         { prefix = xstrdup(nmstr);
203                           if (strchr(prefix, '[') || strchr(prefix, ']'))
204                               flexerror(_("Prefix must not contain [ or ]")); }
205                 |  TOK_YYCLASS '=' NAME
206                         { yyclass = xstrdup(nmstr); }
207                 |  TOK_HEADER_FILE '=' NAME
208                         { headerfilename = xstrdup(nmstr); }
209             |  TOK_TABLES_FILE '=' NAME
210             { tablesext = true; tablesfilename = xstrdup(nmstr); }
211                 ;
212
213 sect2           :  sect2 scon initforrule flexrule '\n'
214                         { scon_stk_ptr = $2; }
215                 |  sect2 scon '{' sect2 '}'
216                         { scon_stk_ptr = $2; }
217                 |
218                 ;
219
220 initforrule     :
221                         {
222                         /* Initialize for a parse of one rule. */
223                         trlcontxt = variable_trail_rule = varlength = false;
224                         trailcnt = headcnt = rulelen = 0;
225                         current_state_type = STATE_NORMAL;
226                         previous_continued_action = continued_action;
227                         in_rule = true;
228
229                         new_rule();
230                         }
231                 ;
232
233 flexrule        :  '^' rule
234                         {
235                         pat = $2;
236                         finish_rule( pat, variable_trail_rule,
237                                 headcnt, trailcnt , previous_continued_action);
238
239                         if ( scon_stk_ptr > 0 )
240                                 {
241                                 for ( i = 1; i <= scon_stk_ptr; ++i )
242                                         scbol[scon_stk[i]] =
243                                                 mkbranch( scbol[scon_stk[i]],
244                                                                 pat );
245                                 }
246
247                         else
248                                 {
249                                 /* Add to all non-exclusive start conditions,
250                                  * including the default (0) start condition.
251                                  */
252
253                                 for ( i = 1; i <= lastsc; ++i )
254                                         if ( ! scxclu[i] )
255                                                 scbol[i] = mkbranch( scbol[i],
256                                                                         pat );
257                                 }
258
259                         if ( ! bol_needed )
260                                 {
261                                 bol_needed = true;
262
263                                 if ( performance_report > 1 )
264                                         pinpoint_message(
265                         "'^' operator results in sub-optimal performance" );
266                                 }
267                         }
268
269                 |  rule
270                         {
271                         pat = $1;
272                         finish_rule( pat, variable_trail_rule,
273                                 headcnt, trailcnt , previous_continued_action);
274
275                         if ( scon_stk_ptr > 0 )
276                                 {
277                                 for ( i = 1; i <= scon_stk_ptr; ++i )
278                                         scset[scon_stk[i]] =
279                                                 mkbranch( scset[scon_stk[i]],
280                                                                 pat );
281                                 }
282
283                         else
284                                 {
285                                 for ( i = 1; i <= lastsc; ++i )
286                                         if ( ! scxclu[i] )
287                                                 scset[i] =
288                                                         mkbranch( scset[i],
289                                                                 pat );
290                                 }
291                         }
292
293                 |  EOF_OP
294                         {
295                         if ( scon_stk_ptr > 0 )
296                                 build_eof_action();
297         
298                         else
299                                 {
300                                 /* This EOF applies to all start conditions
301                                  * which don't already have EOF actions.
302                                  */
303                                 for ( i = 1; i <= lastsc; ++i )
304                                         if ( ! sceof[i] )
305                                                 scon_stk[++scon_stk_ptr] = i;
306
307                                 if ( scon_stk_ptr == 0 )
308                                         lwarn(
309                         "all start conditions already have <<EOF>> rules" );
310
311                                 else
312                                         build_eof_action();
313                                 }
314                         }
315
316                 |  error
317                         { synerr( _("unrecognized rule") ); }
318                 ;
319
320 scon_stk_ptr    :
321                         { $$ = scon_stk_ptr; }
322                 ;
323
324 scon            :  '<' scon_stk_ptr namelist2 '>'
325                         { $$ = $2; }
326
327                 |  '<' '*' '>'
328                         {
329                         $$ = scon_stk_ptr;
330
331                         for ( i = 1; i <= lastsc; ++i )
332                                 {
333                                 int j;
334
335                                 for ( j = 1; j <= scon_stk_ptr; ++j )
336                                         if ( scon_stk[j] == i )
337                                                 break;
338
339                                 if ( j > scon_stk_ptr )
340                                         scon_stk[++scon_stk_ptr] = i;
341                                 }
342                         }
343
344                 |
345                         { $$ = scon_stk_ptr; }
346                 ;
347
348 namelist2       :  namelist2 ',' sconname
349
350                 |  sconname
351
352                 |  error
353                         { synerr( _("bad start condition list") ); }
354                 ;
355
356 sconname        :  NAME
357                         {
358                         if ( (scnum = sclookup( nmstr )) == 0 )
359                                 format_pinpoint_message(
360                                         "undeclared start condition %s",
361                                         nmstr );
362                         else
363                                 {
364                                 for ( i = 1; i <= scon_stk_ptr; ++i )
365                                         if ( scon_stk[i] == scnum )
366                                                 {
367                                                 format_warn(
368                                                         "<%s> specified twice",
369                                                         scname[scnum] );
370                                                 break;
371                                                 }
372
373                                 if ( i > scon_stk_ptr )
374                                         scon_stk[++scon_stk_ptr] = scnum;
375                                 }
376                         }
377                 ;
378
379 rule            :  re2 re
380                         {
381                         if ( transchar[lastst[$2]] != SYM_EPSILON )
382                                 /* Provide final transition \now/ so it
383                                  * will be marked as a trailing context
384                                  * state.
385                                  */
386                                 $2 = link_machines( $2,
387                                                 mkstate( SYM_EPSILON ) );
388
389                         mark_beginning_as_normal( $2 );
390                         current_state_type = STATE_NORMAL;
391
392                         if ( previous_continued_action )
393                                 {
394                                 /* We need to treat this as variable trailing
395                                  * context so that the backup does not happen
396                                  * in the action but before the action switch
397                                  * statement.  If the backup happens in the
398                                  * action, then the rules "falling into" this
399                                  * one's action will *also* do the backup,
400                                  * erroneously.
401                                  */
402                                 if ( ! varlength || headcnt != 0 )
403                                         lwarn(
404                 "trailing context made variable due to preceding '|' action" );
405
406                                 /* Mark as variable. */
407                                 varlength = true;
408                                 headcnt = 0;
409
410                                 }
411
412                         if ( lex_compat || (varlength && headcnt == 0) )
413                                 { /* variable trailing context rule */
414                                 /* Mark the first part of the rule as the
415                                  * accepting "head" part of a trailing
416                                  * context rule.
417                                  *
418                                  * By the way, we didn't do this at the
419                                  * beginning of this production because back
420                                  * then current_state_type was set up for a
421                                  * trail rule, and add_accept() can create
422                                  * a new state ...
423                                  */
424                                 add_accept( $1,
425                                         num_rules | YY_TRAILING_HEAD_MASK );
426                                 variable_trail_rule = true;
427                                 }
428                         
429                         else
430                                 trailcnt = rulelen;
431
432                         $$ = link_machines( $1, $2 );
433                         }
434
435                 |  re2 re '$'
436                         { synerr( _("trailing context used twice") ); }
437
438                 |  re '$'
439                         {
440                         headcnt = 0;
441                         trailcnt = 1;
442                         rulelen = 1;
443                         varlength = false;
444
445                         current_state_type = STATE_TRAILING_CONTEXT;
446
447                         if ( trlcontxt )
448                                 {
449                                 synerr( _("trailing context used twice") );
450                                 $$ = mkstate( SYM_EPSILON );
451                                 }
452
453                         else if ( previous_continued_action )
454                                 {
455                                 /* See the comment in the rule for "re2 re"
456                                  * above.
457                                  */
458                                 lwarn(
459                 "trailing context made variable due to preceding '|' action" );
460
461                                 varlength = true;
462                                 }
463
464                         if ( lex_compat || varlength )
465                                 {
466                                 /* Again, see the comment in the rule for
467                                  * "re2 re" above.
468                                  */
469                                 add_accept( $1,
470                                         num_rules | YY_TRAILING_HEAD_MASK );
471                                 variable_trail_rule = true;
472                                 }
473
474                         trlcontxt = true;
475
476                         eps = mkstate( SYM_EPSILON );
477                         $$ = link_machines( $1,
478                                 link_machines( eps, mkstate( '\n' ) ) );
479                         }
480
481                 |  re
482                         {
483                         $$ = $1;
484
485                         if ( trlcontxt )
486                                 {
487                                 if ( lex_compat || (varlength && headcnt == 0) )
488                                         /* Both head and trail are
489                                          * variable-length.
490                                          */
491                                         variable_trail_rule = true;
492                                 else
493                                         trailcnt = rulelen;
494                                 }
495                         }
496                 ;
497
498
499 re              :  re '|' series
500                         {
501                         varlength = true;
502                         $$ = mkor( $1, $3 );
503                         }
504
505                 |  series
506                         { $$ = $1; }
507                 ;
508
509
510 re2             :  re '/'
511                         {
512                         /* This rule is written separately so the
513                          * reduction will occur before the trailing
514                          * series is parsed.
515                          */
516
517                         if ( trlcontxt )
518                                 synerr( _("trailing context used twice") );
519                         else
520                                 trlcontxt = true;
521
522                         if ( varlength )
523                                 /* We hope the trailing context is
524                                  * fixed-length.
525                                  */
526                                 varlength = false;
527                         else
528                                 headcnt = rulelen;
529
530                         rulelen = 0;
531
532                         current_state_type = STATE_TRAILING_CONTEXT;
533                         $$ = $1;
534                         }
535                 ;
536
537 series          :  series singleton
538                         {
539                         /* This is where concatenation of adjacent patterns
540                          * gets done.
541                          */
542                         $$ = link_machines( $1, $2 );
543                         }
544
545                 |  singleton
546                         { $$ = $1; }
547
548                 |  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
549                         {
550                         varlength = true;
551
552                         if ( $3 > $5 || $3 < 0 )
553                                 {
554                                 synerr( _("bad iteration values") );
555                                 $$ = $1;
556                                 }
557                         else
558                                 {
559                                 if ( $3 == 0 )
560                                         {
561                                         if ( $5 <= 0 )
562                                                 {
563                                                 synerr(
564                                                 _("bad iteration values") );
565                                                 $$ = $1;
566                                                 }
567                                         else
568                                                 $$ = mkopt(
569                                                         mkrep( $1, 1, $5 ) );
570                                         }
571                                 else
572                                         $$ = mkrep( $1, $3, $5 );
573                                 }
574                         }
575
576                 |  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
577                         {
578                         varlength = true;
579
580                         if ( $3 <= 0 )
581                                 {
582                                 synerr( _("iteration value must be positive") );
583                                 $$ = $1;
584                                 }
585
586                         else
587                                 $$ = mkrep( $1, $3, INFINITE_REPEAT );
588                         }
589
590                 |  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
591                         {
592                         /* The series could be something like "(foo)",
593                          * in which case we have no idea what its length
594                          * is, so we punt here.
595                          */
596                         varlength = true;
597
598                         if ( $3 <= 0 )
599                                 {
600                                   synerr( _("iteration value must be positive")
601                                           );
602                                 $$ = $1;
603                                 }
604
605                         else
606                                 $$ = link_machines( $1,
607                                                 copysingl( $1, $3 - 1 ) );
608                         }
609
610                 ;
611
612 singleton       :  singleton '*'
613                         {
614                         varlength = true;
615
616                         $$ = mkclos( $1 );
617                         }
618
619                 |  singleton '+'
620                         {
621                         varlength = true;
622                         $$ = mkposcl( $1 );
623                         }
624
625                 |  singleton '?'
626                         {
627                         varlength = true;
628                         $$ = mkopt( $1 );
629                         }
630
631                 |  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
632                         {
633                         varlength = true;
634
635                         if ( $3 > $5 || $3 < 0 )
636                                 {
637                                 synerr( _("bad iteration values") );
638                                 $$ = $1;
639                                 }
640                         else
641                                 {
642                                 if ( $3 == 0 )
643                                         {
644                                         if ( $5 <= 0 )
645                                                 {
646                                                 synerr(
647                                                 _("bad iteration values") );
648                                                 $$ = $1;
649                                                 }
650                                         else
651                                                 $$ = mkopt(
652                                                         mkrep( $1, 1, $5 ) );
653                                         }
654                                 else
655                                         $$ = mkrep( $1, $3, $5 );
656                                 }
657                         }
658
659                 |  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
660                         {
661                         varlength = true;
662
663                         if ( $3 <= 0 )
664                                 {
665                                 synerr( _("iteration value must be positive") );
666                                 $$ = $1;
667                                 }
668
669                         else
670                                 $$ = mkrep( $1, $3, INFINITE_REPEAT );
671                         }
672
673                 |  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
674                         {
675                         /* The singleton could be something like "(foo)",
676                          * in which case we have no idea what its length
677                          * is, so we punt here.
678                          */
679                         varlength = true;
680
681                         if ( $3 <= 0 )
682                                 {
683                                 synerr( _("iteration value must be positive") );
684                                 $$ = $1;
685                                 }
686
687                         else
688                                 $$ = link_machines( $1,
689                                                 copysingl( $1, $3 - 1 ) );
690                         }
691
692                 |  '.'
693                         {
694                         if ( ! madeany )
695                                 {
696                                 /* Create the '.' character class. */
697                     ccldot = cclinit();
698                     ccladd( ccldot, '\n' );
699                     cclnegate( ccldot );
700
701                     if ( useecs )
702                         mkeccl( ccltbl + cclmap[ccldot],
703                             ccllen[ccldot], nextecm,
704                             ecgroup, csize, csize );
705
706                                 /* Create the (?s:'.') character class. */
707                     cclany = cclinit();
708                     cclnegate( cclany );
709
710                     if ( useecs )
711                         mkeccl( ccltbl + cclmap[cclany],
712                             ccllen[cclany], nextecm,
713                             ecgroup, csize, csize );
714
715                                 madeany = true;
716                                 }
717
718                         ++rulelen;
719
720             if (sf_dot_all())
721                 $$ = mkstate( -cclany );
722             else
723                 $$ = mkstate( -ccldot );
724                         }
725
726                 |  fullccl
727                         {
728                                 /* Sort characters for fast searching.
729                                  */
730                                 qsort( ccltbl + cclmap[$1], (size_t) ccllen[$1], sizeof (*ccltbl), cclcmp );
731
732                         if ( useecs )
733                                 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
734                                         nextecm, ecgroup, csize, csize );
735
736                         ++rulelen;
737
738                         if (ccl_has_nl[$1])
739                                 rule_has_nl[num_rules] = true;
740
741                         $$ = mkstate( -$1 );
742                         }
743
744                 |  PREVCCL
745                         {
746                         ++rulelen;
747
748                         if (ccl_has_nl[$1])
749                                 rule_has_nl[num_rules] = true;
750
751                         $$ = mkstate( -$1 );
752                         }
753
754                 |  '"' string '"'
755                         { $$ = $2; }
756
757                 |  '(' re ')'
758                         { $$ = $2; }
759
760                 |  CHAR
761                         {
762                         ++rulelen;
763
764                         if ($1 == nlch)
765                                 rule_has_nl[num_rules] = true;
766
767             if (sf_case_ins() && has_case($1))
768                 /* create an alternation, as in (a|A) */
769                 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
770             else
771                 $$ = mkstate( $1 );
772                         }
773                 ;
774 fullccl:
775         fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
776     |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
777     |   braceccl
778     ;
779
780 braceccl: 
781
782             '[' ccl ']' { $$ = $2; }
783
784                 |  '[' '^' ccl ']'
785                         {
786                         cclnegate( $3 );
787                         $$ = $3;
788                         }
789                 ;
790
791 ccl             :  ccl CHAR '-' CHAR
792                         {
793
794                         if (sf_case_ins())
795                           {
796
797                             /* If one end of the range has case and the other
798                              * does not, or the cases are different, then we're not
799                              * sure what range the user is trying to express.
800                              * Examples: [@-z] or [S-t]
801                              */
802                             if (has_case ($2) != has_case ($4)
803                                      || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
804                                      || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
805                               format_warn3 (
806                               _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
807                                             $2, $4);
808
809                             /* If the range spans uppercase characters but not
810                              * lowercase (or vice-versa), then should we automatically
811                              * include lowercase characters in the range?
812                              * Example: [@-_] spans [a-z] but not [A-Z]
813                              */
814                             else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
815                               format_warn3 (
816                               _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
817                                             $2, $4);
818                           }
819
820                         if ( $2 > $4 )
821                                 synerr( _("negative range in character class") );
822
823                         else
824                                 {
825                                 for ( i = $2; i <= $4; ++i )
826                                         ccladd( $1, i );
827
828                                 /* Keep track if this ccl is staying in
829                                  * alphabetical order.
830                                  */
831                                 cclsorted = cclsorted && ($2 > lastchar);
832                                 lastchar = $4;
833
834                 /* Do it again for upper/lowercase */
835                 if (sf_case_ins() && has_case($2) && has_case($4)){
836                     $2 = reverse_case ($2);
837                     $4 = reverse_case ($4);
838                     
839                     for ( i = $2; i <= $4; ++i )
840                         ccladd( $1, i );
841
842                     cclsorted = cclsorted && ($2 > lastchar);
843                     lastchar = $4;
844                 }
845
846                                 }
847
848                         $$ = $1;
849                         }
850
851                 |  ccl CHAR
852                         {
853                         ccladd( $1, $2 );
854                         cclsorted = cclsorted && ($2 > lastchar);
855                         lastchar = $2;
856
857             /* Do it again for upper/lowercase */
858             if (sf_case_ins() && has_case($2)){
859                 $2 = reverse_case ($2);
860                 ccladd ($1, $2);
861
862                 cclsorted = cclsorted && ($2 > lastchar);
863                 lastchar = $2;
864             }
865
866                         $$ = $1;
867                         }
868
869                 |  ccl ccl_expr
870                         {
871                         /* Too hard to properly maintain cclsorted. */
872                         cclsorted = false;
873                         $$ = $1;
874                         }
875
876                 |
877                         {
878                         cclsorted = true;
879                         lastchar = 0;
880                         currccl = $$ = cclinit();
881                         }
882                 ;
883
884 ccl_expr:          
885            CCE_ALNUM    { CCL_EXPR(isalnum); }
886                 |  CCE_ALPHA    { CCL_EXPR(isalpha); }
887                 |  CCE_BLANK    { CCL_EXPR(IS_BLANK); }
888                 |  CCE_CNTRL    { CCL_EXPR(iscntrl); }
889                 |  CCE_DIGIT    { CCL_EXPR(isdigit); }
890                 |  CCE_GRAPH    { CCL_EXPR(isgraph); }
891                 |  CCE_LOWER    { 
892                           CCL_EXPR(islower);
893                           if (sf_case_ins())
894                               CCL_EXPR(isupper);
895                         }
896                 |  CCE_PRINT    { CCL_EXPR(isprint); }
897                 |  CCE_PUNCT    { CCL_EXPR(ispunct); }
898                 |  CCE_SPACE    { CCL_EXPR(isspace); }
899                 |  CCE_XDIGIT   { CCL_EXPR(isxdigit); }
900                 |  CCE_UPPER    {
901                     CCL_EXPR(isupper);
902                     if (sf_case_ins())
903                         CCL_EXPR(islower);
904                                 }
905
906         |  CCE_NEG_ALNUM        { CCL_NEG_EXPR(isalnum); }
907                 |  CCE_NEG_ALPHA        { CCL_NEG_EXPR(isalpha); }
908                 |  CCE_NEG_BLANK        { CCL_NEG_EXPR(IS_BLANK); }
909                 |  CCE_NEG_CNTRL        { CCL_NEG_EXPR(iscntrl); }
910                 |  CCE_NEG_DIGIT        { CCL_NEG_EXPR(isdigit); }
911                 |  CCE_NEG_GRAPH        { CCL_NEG_EXPR(isgraph); }
912                 |  CCE_NEG_PRINT        { CCL_NEG_EXPR(isprint); }
913                 |  CCE_NEG_PUNCT        { CCL_NEG_EXPR(ispunct); }
914                 |  CCE_NEG_SPACE        { CCL_NEG_EXPR(isspace); }
915                 |  CCE_NEG_XDIGIT       { CCL_NEG_EXPR(isxdigit); }
916                 |  CCE_NEG_LOWER        { 
917                                 if ( sf_case_ins() )
918                                         lwarn(_("[:^lower:] is ambiguous in case insensitive scanner"));
919                                 else
920                                         CCL_NEG_EXPR(islower);
921                                 }
922                 |  CCE_NEG_UPPER        {
923                                 if ( sf_case_ins() )
924                                         lwarn(_("[:^upper:] ambiguous in case insensitive scanner"));
925                                 else
926                                         CCL_NEG_EXPR(isupper);
927                                 }
928                 ;
929                 
930 string          :  string CHAR
931                         {
932                         if ( $2 == nlch )
933                                 rule_has_nl[num_rules] = true;
934
935                         ++rulelen;
936
937             if (sf_case_ins() && has_case($2))
938                 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
939             else
940                 $$ = mkstate ($2);
941
942                         $$ = link_machines( $1, $$);
943                         }
944
945                 |
946                         { $$ = mkstate( SYM_EPSILON ); }
947                 ;
948
949 %%
950
951
952 /* build_eof_action - build the "<<EOF>>" action for the active start
953  *                    conditions
954  */
955
956 void build_eof_action(void)
957         {
958         int i;
959         char action_text[MAXLINE];
960
961         for ( i = 1; i <= scon_stk_ptr; ++i )
962                 {
963                 if ( sceof[scon_stk[i]] )
964                         format_pinpoint_message(
965                                 "multiple <<EOF>> rules for start condition %s",
966                                 scname[scon_stk[i]] );
967
968                 else
969                         {
970                         sceof[scon_stk[i]] = true;
971
972                         if (previous_continued_action /* && previous action was regular */)
973                                 add_action("YY_RULE_SETUP\n");
974
975                         snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
976                                 scname[scon_stk[i]] );
977                         add_action( action_text );
978                         }
979                 }
980
981         line_directive_out(NULL, 1);
982         add_action("[[");
983
984         /* This isn't a normal rule after all - don't count it as
985          * such, so we don't have any holes in the rule numbering
986          * (which make generating "rule can never match" warnings
987          * more difficult.
988          */
989         --num_rules;
990         ++num_eof_rules;
991         }
992
993
994 /* format_synerr - write out formatted syntax error */
995
996 void format_synerr( const char *msg, const char arg[] )
997         {
998         char errmsg[MAXLINE];
999
1000         (void) snprintf( errmsg, sizeof(errmsg), msg, arg );
1001         synerr( errmsg );
1002         }
1003
1004
1005 /* synerr - report a syntax error */
1006
1007 void synerr( const char *str )
1008         {
1009         syntaxerror = true;
1010         pinpoint_message( str );
1011         }
1012
1013
1014 /* format_warn - write out formatted warning */
1015
1016 void format_warn( const char *msg, const char arg[] )
1017         {
1018         char warn_msg[MAXLINE];
1019
1020         snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1021         lwarn( warn_msg );
1022         }
1023
1024
1025 /* lwarn - report a warning, unless -w was given */
1026
1027 void lwarn( const char *str )
1028         {
1029         line_warning( str, linenum );
1030         }
1031
1032 /* format_pinpoint_message - write out a message formatted with one string,
1033  *                           pinpointing its location
1034  */
1035
1036 void format_pinpoint_message( const char *msg, const char arg[] )
1037         {
1038         char errmsg[MAXLINE];
1039
1040         snprintf( errmsg, sizeof(errmsg), msg, arg );
1041         pinpoint_message( errmsg );
1042         }
1043
1044
1045 /* pinpoint_message - write out a message, pinpointing its location */
1046
1047 void pinpoint_message( const char *str )
1048         {
1049         line_pinpoint( str, linenum );
1050         }
1051
1052
1053 /* line_warning - report a warning at a given line, unless -w was given */
1054
1055 void line_warning( const char *str, int line )
1056         {
1057         char warning[MAXLINE];
1058
1059         if ( ! nowarn )
1060                 {
1061                 snprintf( warning, sizeof(warning), "warning, %s", str );
1062                 line_pinpoint( warning, line );
1063                 }
1064         }
1065
1066
1067 /* line_pinpoint - write out a message, pinpointing it at the given line */
1068
1069 void line_pinpoint( const char *str, int line )
1070         {
1071         fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1072         }
1073
1074
1075 /* yyerror - eat up an error message from the parser;
1076  *           currently, messages are ignore
1077  */
1078
1079 void yyerror( const char *msg )
1080         {
1081                 (void)msg;
1082         }