contrib/binutils/gas/app.c

   1 /* This is the Assembler Pre-Processor
   2    Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 1997
   3    Free Software Foundation, Inc.
   4
   5    This file is part of GAS, the GNU Assembler.
   6
   7    GAS is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    GAS is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GAS; see the file COPYING.  If not, write to the Free
  19    Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20    02111-1307, USA.  */
  21
  22 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
  23 /* App, the assembler pre-processor.  This pre-processor strips out excess
  24    spaces, turns single-quoted characters into a decimal constant, and turns
  25    # <number> <filename> <garbage> into a .line <number>\n.file <filename>
  26    pair.  This needs better error-handling.  */
  27
  28 #include <stdio.h>
  29 #include "as.h"                 /* For BAD_CASE() only */
  30
  31 #if (__STDC__ != 1)
  32 #ifndef const
  33 #define const  /* empty */
  34 #endif
  35 #endif
  36
  37 /* Whether we are scrubbing in m68k MRI mode.  This is different from
  38    flag_m68k_mri, because the two flags will be affected by the .mri
  39    pseudo-op at different times.  */
  40 static int scrub_m68k_mri;
  41
  42 /* The pseudo-op which switches in and out of MRI mode.  See the
  43    comment in do_scrub_chars.  */
  44 static const char mri_pseudo[] = ".mri 0";
  45
  46 static char lex[256];
  47 static const char symbol_chars[] =
  48 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  49
  50 #define LEX_IS_SYMBOL_COMPONENT         1
  51 #define LEX_IS_WHITESPACE               2
  52 #define LEX_IS_LINE_SEPARATOR           3
  53 #define LEX_IS_COMMENT_START            4
  54 #define LEX_IS_LINE_COMMENT_START       5
  55 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
  56 #define LEX_IS_STRINGQUOTE              8
  57 #define LEX_IS_COLON                    9
  58 #define LEX_IS_NEWLINE                  10
  59 #define LEX_IS_ONECHAR_QUOTE            11
  60 #ifdef TC_V850
  61 #define LEX_IS_DOUBLEDASH_1ST           12
  62 #endif
  63 #ifdef TC_M32R
  64 #define LEX_IS_DOUBLEBAR_1ST            13
  65 #endif
  66 #define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  67 #define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
  68 #define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
  69 #define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
  70 #define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
  71 #define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  72
  73 static int process_escape PARAMS ((int));
  74
  75 /* FIXME-soon: The entire lexer/parser thingy should be
  76    built statically at compile time rather than dynamically
  77    each and every time the assembler is run.  xoxorich. */
  78
  79 void
  80 do_scrub_begin (m68k_mri)
  81      int m68k_mri;
  82 {
  83   const char *p;
  84
  85   scrub_m68k_mri = m68k_mri;
  86
  87   lex[' '] = LEX_IS_WHITESPACE;
  88   lex['\t'] = LEX_IS_WHITESPACE;
  89   lex['\r'] = LEX_IS_WHITESPACE;
  90   lex['\n'] = LEX_IS_NEWLINE;
  91   lex[';'] = LEX_IS_LINE_SEPARATOR;
  92   lex[':'] = LEX_IS_COLON;
  93
  94   if (! m68k_mri)
  95     {
  96       lex['"'] = LEX_IS_STRINGQUOTE;
  97
  98 #ifndef TC_HPPA
  99       lex['\''] = LEX_IS_ONECHAR_QUOTE;
 100 #endif
 101
 102 #ifdef SINGLE_QUOTE_STRINGS
 103       lex['\''] = LEX_IS_STRINGQUOTE;
 104 #endif
 105     }
 106
 107   /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
 108      in state 5 of do_scrub_chars must be changed.  */
 109
 110   /* Note that these override the previous defaults, e.g. if ';' is a
 111      comment char, then it isn't a line separator.  */
 112   for (p = symbol_chars; *p; ++p)
 113     {
 114       lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
 115     }                           /* declare symbol characters */
 116
 117   /* The m68k backend wants to be able to change comment_chars.  */
 118 #ifndef tc_comment_chars
 119 #define tc_comment_chars comment_chars
 120 #endif
 121   for (p = tc_comment_chars; *p; p++)
 122     {
 123       lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
 124     }                           /* declare comment chars */
 125
 126   for (p = line_comment_chars; *p; p++)
 127     {
 128       lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
 129     }                           /* declare line comment chars */
 130
 131   for (p = line_separator_chars; *p; p++)
 132     {
 133       lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
 134     }                           /* declare line separators */
 135
 136   /* Only allow slash-star comments if slash is not in use.
 137      FIXME: This isn't right.  We should always permit them.  */
 138   if (lex['/'] == 0)
 139     {
 140       lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
 141     }
 142
 143   if (m68k_mri)
 144     {
 145       lex['\''] = LEX_IS_STRINGQUOTE;
 146       lex[';'] = LEX_IS_COMMENT_START;
 147       lex['*'] = LEX_IS_LINE_COMMENT_START;
 148       /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
 149          then it can't be used in an expression.  */
 150       lex['!'] = LEX_IS_LINE_COMMENT_START;
 151     }
 152
 153 #ifdef TC_V850
 154   lex['-'] = LEX_IS_DOUBLEDASH_1ST;
 155 #endif
 156 #ifdef TC_M32R
 157   lex['|'] = LEX_IS_DOUBLEBAR_1ST;
 158 #endif
 159 }                               /* do_scrub_begin() */
 160
 161 /* Saved state of the scrubber */
 162 static int state;
 163 static int old_state;
 164 static char *out_string;
 165 static char out_buf[20];
 166 static int add_newlines;
 167 static char *saved_input;
 168 static int saved_input_len;
 169 static const char *mri_state;
 170 static char mri_last_ch;
 171
 172 /* Data structure for saving the state of app across #include's.  Note that
 173    app is called asynchronously to the parsing of the .include's, so our
 174    state at the time .include is interpreted is completely unrelated.
 175    That's why we have to save it all.  */
 176
 177 struct app_save
 178   {
 179     int state;
 180     int old_state;
 181     char *out_string;
 182     char out_buf[sizeof (out_buf)];
 183     int add_newlines;
 184     char *saved_input;
 185     int saved_input_len;
 186     int scrub_m68k_mri;
 187     const char *mri_state;
 188     char mri_last_ch;
 189   };
 190
 191 char *
 192 app_push ()
 193 {
 194   register struct app_save *saved;
 195
 196   saved = (struct app_save *) xmalloc (sizeof (*saved));
 197   saved->state = state;
 198   saved->old_state = old_state;
 199   saved->out_string = out_string;
 200   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
 201   saved->add_newlines = add_newlines;
 202   saved->saved_input = saved_input;
 203   saved->saved_input_len = saved_input_len;
 204   saved->scrub_m68k_mri = scrub_m68k_mri;
 205   saved->mri_state = mri_state;
 206   saved->mri_last_ch = mri_last_ch;
 207
 208   /* do_scrub_begin() is not useful, just wastes time. */
 209
 210   state = 0;
 211   saved_input = NULL;
 212
 213   return (char *) saved;
 214 }
 215
 216 void
 217 app_pop (arg)
 218      char *arg;
 219 {
 220   register struct app_save *saved = (struct app_save *) arg;
 221
 222   /* There is no do_scrub_end (). */
 223   state = saved->state;
 224   old_state = saved->old_state;
 225   out_string = saved->out_string;
 226   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
 227   add_newlines = saved->add_newlines;
 228   saved_input = saved->saved_input;
 229   saved_input_len = saved->saved_input_len;
 230   scrub_m68k_mri = saved->scrub_m68k_mri;
 231   mri_state = saved->mri_state;
 232   mri_last_ch = saved->mri_last_ch;
 233
 234   free (arg);
 235 }                               /* app_pop() */
 236
 237 /* @@ This assumes that \n &c are the same on host and target.  This is not
 238    necessarily true.  */
 239 static int
 240 process_escape (ch)
 241      int ch;
 242 {
 243   switch (ch)
 244     {
 245     case 'b':
 246       return '\b';
 247     case 'f':
 248       return '\f';
 249     case 'n':
 250       return '\n';
 251     case 'r':
 252       return '\r';
 253     case 't':
 254       return '\t';
 255     case '\'':
 256       return '\'';
 257     case '"':
 258       return '\"';
 259     default:
 260       return ch;
 261     }
 262 }
 263
 264 /* This function is called to process input characters.  The GET
 265    parameter is used to retrieve more input characters.  GET should
 266    set its parameter to point to a buffer, and return the length of
 267    the buffer; it should return 0 at end of file.  The scrubbed output
 268    characters are put into the buffer starting at TOSTART; the TOSTART
 269    buffer is TOLEN bytes in length.  The function returns the number
 270    of scrubbed characters put into TOSTART.  This will be TOLEN unless
 271    end of file was seen.  This function is arranged as a state
 272    machine, and saves its state so that it may return at any point.
 273    This is the way the old code used to work.  */
 274
 275 int
 276 do_scrub_chars (get, tostart, tolen)
 277      int (*get) PARAMS ((char **));
 278      char *tostart;
 279      int tolen;
 280 {
 281   char *to = tostart;
 282   char *toend = tostart + tolen;
 283   char *from;
 284   char *fromend;
 285   int fromlen;
 286   register int ch, ch2 = 0;
 287
 288   /*State 0: beginning of normal line
 289           1: After first whitespace on line (flush more white)
 290           2: After first non-white (opcode) on line (keep 1white)
 291           3: after second white on line (into operands) (flush white)
 292           4: after putting out a .line, put out digits
 293           5: parsing a string, then go to old-state
 294           6: putting out \ escape in a "d string.
 295           7: After putting out a .appfile, put out string.
 296           8: After putting out a .appfile string, flush until newline.
 297           9: After seeing symbol char in state 3 (keep 1white after symchar)
 298          10: After seeing whitespace in state 9 (keep white before symchar)
 299          11: After seeing a symbol character in state 0 (eg a label definition)
 300          -1: output string in out_string and go to the state in old_state
 301          -2: flush text until a '*' '/' is seen, then go to state old_state
 302 #ifdef TC_V850
 303          12: After seeing a dash, looking for a second dash as a start of comment.
 304 #endif
 305 #ifdef TC_M32R
 306          13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
 307 #endif
 308           */
 309
 310   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
 311      constructs like ``.loc 1 20''.  This was turning into ``.loc
 312      120''.  States 9 and 10 ensure that a space is never dropped in
 313      between characters which could appear in a identifier.  Ian
 314      Taylor, ian@cygnus.com.
 315
 316      I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
 317      correctly on the PA (and any other target where colons are optional).
 318      Jeff Law, law@cs.utah.edu.
 319
 320      I added state 13 so that something like "cmp r1, r2 || trap #1" does not
 321      get squashed into "cmp r1,r2||trap#1", with the all important space
 322      between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
 323
 324   /* This macro gets the next input character.  */
 325
 326 #define GET()                           \
 327   (from < fromend                       \
 328    ? *from++                            \
 329    : ((saved_input != NULL              \
 330        ? (free (saved_input),           \
 331           saved_input = NULL,           \
 332           0)                            \
 333        : 0),                            \
 334       fromlen = (*get) (&from),         \
 335       fromend = from + fromlen,         \
 336       (fromlen == 0                     \
 337        ? EOF                            \
 338        : *from++)))
 339
 340   /* This macro pushes a character back on the input stream.  */
 341
 342 #define UNGET(uch) (*--from = (uch))
 343
 344   /* This macro puts a character into the output buffer.  If this
 345      character fills the output buffer, this macro jumps to the label
 346      TOFULL.  We use this rather ugly approach because we need to
 347      handle two different termination conditions: EOF on the input
 348      stream, and a full output buffer.  It would be simpler if we
 349      always read in the entire input stream before processing it, but
 350      I don't want to make such a significant change to the assembler's
 351      memory usage.  */
 352
 353 #define PUT(pch)                        \
 354   do                                    \
 355     {                                   \
 356       *to++ = (pch);                    \
 357       if (to >= toend)                  \
 358         goto tofull;                    \
 359     }                                   \
 360   while (0)
 361
 362   if (saved_input != NULL)
 363     {
 364       from = saved_input;
 365       fromend = from + saved_input_len;
 366     }
 367   else
 368     {
 369       fromlen = (*get) (&from);
 370       if (fromlen == 0)
 371         return 0;
 372       fromend = from + fromlen;
 373     }
 374
 375   while (1)
 376     {
 377       /* The cases in this switch end with continue, in order to
 378          branch back to the top of this while loop and generate the
 379          next output character in the appropriate state.  */
 380       switch (state)
 381         {
 382         case -1:
 383           ch = *out_string++;
 384           if (*out_string == '\0')
 385             {
 386               state = old_state;
 387               old_state = 3;
 388             }
 389           PUT (ch);
 390           continue;
 391
 392         case -2:
 393           for (;;)
 394             {
 395               do
 396                 {
 397                   ch = GET ();
 398
 399                   if (ch == EOF)
 400                     {
 401                       as_warn ("end of file in comment");
 402                       goto fromeof;
 403                     }
 404
 405                   if (ch == '\n')
 406                     PUT ('\n');
 407                 }
 408               while (ch != '*');
 409
 410               while ((ch = GET ()) == '*')
 411                 ;
 412
 413               if (ch == EOF)
 414                 {
 415                   as_warn ("end of file in comment");
 416                   goto fromeof;
 417                 }
 418
 419               if (ch == '/')
 420                 break;
 421
 422               UNGET (ch);
 423             }
 424
 425           state = old_state;
 426           UNGET (' ');
 427           continue;
 428
 429         case 4:
 430           ch = GET ();
 431           if (ch == EOF)
 432             goto fromeof;
 433           else if (ch >= '0' && ch <= '9')
 434             PUT (ch);
 435           else
 436             {
 437               while (ch != EOF && IS_WHITESPACE (ch))
 438                 ch = GET ();
 439               if (ch == '"')
 440                 {
 441                   UNGET (ch);
 442                   if (scrub_m68k_mri)
 443                     out_string = "\n\tappfile ";
 444                   else
 445                     out_string = "\n\t.appfile ";
 446                   old_state = 7;
 447                   state = -1;
 448                   PUT (*out_string++);
 449                 }
 450               else
 451                 {
 452                   while (ch != EOF && ch != '\n')
 453                     ch = GET ();
 454                   state = 0;
 455                   PUT (ch);
 456                 }
 457             }
 458           continue;
 459
 460         case 5:
 461           /* We are going to copy everything up to a quote character,
 462              with special handling for a backslash.  We try to
 463              optimize the copying in the simple case without using the
 464              GET and PUT macros.  */
 465           {
 466             char *s;
 467             int len;
 468
 469             for (s = from; s < fromend; s++)
 470               {
 471                 ch = *s;
 472                 /* This condition must be changed if the type of any
 473                    other character can be LEX_IS_STRINGQUOTE.  */
 474                 if (ch == '\\'
 475                     || ch == '"'
 476                     || ch == '\''
 477                     || ch == '\n')
 478                   break;
 479               }
 480             len = s - from;
 481             if (len > toend - to)
 482               len = toend - to;
 483             if (len > 0)
 484               {
 485                 memcpy (to, from, len);
 486                 to += len;
 487                 from += len;
 488               }
 489           }
 490
 491           ch = GET ();
 492           if (ch == EOF)
 493             {
 494               as_warn ("end of file in string: inserted '\"'");
 495               state = old_state;
 496               UNGET ('\n');
 497               PUT ('"');
 498             }
 499           else if (lex[ch] == LEX_IS_STRINGQUOTE)
 500             {
 501               state = old_state;
 502               PUT (ch);
 503             }
 504 #ifndef NO_STRING_ESCAPES
 505           else if (ch == '\\')
 506             {
 507               state = 6;
 508               PUT (ch);
 509             }
 510 #endif
 511           else if (scrub_m68k_mri && ch == '\n')
 512             {
 513               /* Just quietly terminate the string.  This permits lines like
 514                    bne  label   loop if we haven't reach end yet
 515                  */
 516               state = old_state;
 517               UNGET (ch);
 518               PUT ('\'');
 519             }
 520           else
 521             {
 522               PUT (ch);
 523             }
 524           continue;
 525
 526         case 6:
 527           state = 5;
 528           ch = GET ();
 529           switch (ch)
 530             {
 531               /* Handle strings broken across lines, by turning '\n' into
 532                  '\\' and 'n'.  */
 533             case '\n':
 534               UNGET ('n');
 535               add_newlines++;
 536               PUT ('\\');
 537               continue;
 538
 539             case '"':
 540             case '\\':
 541             case 'b':
 542             case 'f':
 543             case 'n':
 544             case 'r':
 545             case 't':
 546             case 'v':
 547             case 'x':
 548             case 'X':
 549             case '0':
 550             case '1':
 551             case '2':
 552             case '3':
 553             case '4':
 554             case '5':
 555             case '6':
 556             case '7':
 557               break;
 558 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
 559             default:
 560               as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
 561               break;
 562 #else  /* ONLY_STANDARD_ESCAPES */
 563             default:
 564               /* Accept \x as x for any x */
 565               break;
 566 #endif /* ONLY_STANDARD_ESCAPES */
 567
 568             case EOF:
 569               as_warn ("End of file in string: '\"' inserted");
 570               PUT ('"');
 571               continue;
 572             }
 573           PUT (ch);
 574           continue;
 575
 576         case 7:
 577           ch = GET ();
 578           state = 5;
 579           old_state = 8;
 580           if (ch == EOF)
 581             goto fromeof;
 582           PUT (ch);
 583           continue;
 584
 585         case 8:
 586           do
 587             ch = GET ();
 588           while (ch != '\n' && ch != EOF);
 589           if (ch == EOF)
 590             goto fromeof;
 591           state = 0;
 592           PUT (ch);
 593           continue;
 594         }
 595
 596       /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
 597
 598       /* flushchar: */
 599       ch = GET ();
 600
 601     recycle:
 602
 603 #ifdef TC_M68K
 604       /* We want to have pseudo-ops which control whether we are in
 605          MRI mode or not.  Unfortunately, since m68k MRI mode affects
 606          the scrubber, that means that we need a special purpose
 607          recognizer here.  */
 608       if (mri_state == NULL)
 609         {
 610           if ((state == 0 || state == 1)
 611               && ch == mri_pseudo[0])
 612             mri_state = mri_pseudo + 1;
 613         }
 614       else
 615         {
 616           /* We advance to the next state if we find the right
 617              character, or if we need a space character and we get any
 618              whitespace character, or if we need a '0' and we get a
 619              '1' (this is so that we only need one state to handle
 620              ``.mri 0'' and ``.mri 1'').  */
 621           if (ch != '\0'
 622               && (*mri_state == ch
 623                   || (*mri_state == ' '
 624                       && lex[ch] == LEX_IS_WHITESPACE)
 625                   || (*mri_state == '0'
 626                       && ch == '1')))
 627             {
 628               mri_last_ch = ch;
 629               ++mri_state;
 630             }
 631           else if (*mri_state != '\0'
 632                    || (lex[ch] != LEX_IS_WHITESPACE
 633                        && lex[ch] != LEX_IS_NEWLINE))
 634             {
 635               /* We did not get the expected character, or we didn't
 636                  get a valid terminating character after seeing the
 637                  entire pseudo-op, so we must go back to the
 638                  beginning.  */
 639               mri_state = NULL;
 640             }
 641           else
 642             {
 643               /* We've read the entire pseudo-op.  mips_last_ch is
 644                  either '0' or '1' indicating whether to enter or
 645                  leave MRI mode.  */
 646               do_scrub_begin (mri_last_ch == '1');
 647               mri_state = NULL;
 648
 649               /* We continue handling the character as usual.  The
 650                  main gas reader must also handle the .mri pseudo-op
 651                  to control expression parsing and the like.  */
 652             }
 653         }
 654 #endif
 655
 656       if (ch == EOF)
 657         {
 658           if (state != 0)
 659             {
 660               as_warn ("end of file not at end of a line; newline inserted");
 661               state = 0;
 662               PUT ('\n');
 663             }
 664           goto fromeof;
 665         }
 666
 667       switch (lex[ch])
 668         {
 669         case LEX_IS_WHITESPACE:
 670           do
 671             {
 672               ch = GET ();
 673             }
 674           while (ch != EOF && IS_WHITESPACE (ch));
 675           if (ch == EOF)
 676             goto fromeof;
 677
 678           if (state == 0)
 679             {
 680               /* Preserve a single whitespace character at the
 681                  beginning of a line.  */
 682               state = 1;
 683               UNGET (ch);
 684               PUT (' ');
 685               break;
 686             }
 687
 688           if (IS_COMMENT (ch)
 689               || ch == '/'
 690               || IS_LINE_SEPARATOR (ch))
 691             {
 692               if (scrub_m68k_mri)
 693                 {
 694                   /* In MRI mode, we keep these spaces.  */
 695                   UNGET (ch);
 696                   PUT (' ');
 697                   break;
 698                 }
 699               goto recycle;
 700             }
 701
 702           /* If we're in state 2 or 11, we've seen a non-white
 703              character followed by whitespace.  If the next character
 704              is ':', this is whitespace after a label name which we
 705              normally must ignore.  In MRI mode, though, spaces are
 706              not permitted between the label and the colon.  */
 707           if ((state == 2 || state == 11)
 708               && lex[ch] == LEX_IS_COLON
 709               && ! scrub_m68k_mri)
 710             {
 711               state = 1;
 712               PUT (ch);
 713               break;
 714             }
 715
 716           switch (state)
 717             {
 718             case 0:
 719               state++;
 720               goto recycle;     /* Punted leading sp */
 721             case 1:
 722               /* We can arrive here if we leave a leading whitespace
 723                  character at the beginning of a line.  */
 724               goto recycle;
 725             case 2:
 726               state = 3;
 727               if (to + 1 < toend)
 728                 {
 729                   /* Optimize common case by skipping UNGET/GET.  */
 730                   PUT (' ');    /* Sp after opco */
 731                   goto recycle;
 732                 }
 733               UNGET (ch);
 734               PUT (' ');
 735               break;
 736             case 3:
 737               if (scrub_m68k_mri)
 738                 {
 739                   /* In MRI mode, we keep these spaces.  */
 740                   UNGET (ch);
 741                   PUT (' ');
 742                   break;
 743                 }
 744               goto recycle;     /* Sp in operands */
 745             case 9:
 746             case 10:
 747               if (scrub_m68k_mri)
 748                 {
 749                   /* In MRI mode, we keep these spaces.  */
 750                   state = 3;
 751                   UNGET (ch);
 752                   PUT (' ');
 753                   break;
 754                 }
 755               state = 10;       /* Sp after symbol char */
 756               goto recycle;
 757             case 11:
 758               if (flag_m68k_mri
 759 #ifdef LABELS_WITHOUT_COLONS
 760                   || 1
 761 #endif
 762                   )
 763                 state = 1;
 764               else
 765                 {
 766                   /* We know that ch is not ':', since we tested that
 767                      case above.  Therefore this is not a label, so it
 768                      must be the opcode, and we've just seen the
 769                      whitespace after it.  */
 770                   state = 3;
 771                 }
 772               UNGET (ch);
 773               PUT (' ');        /* Sp after label definition.  */
 774               break;
 775             default:
 776               BAD_CASE (state);
 777             }
 778           break;
 779
 780         case LEX_IS_TWOCHAR_COMMENT_1ST:
 781           ch2 = GET ();
 782           if (ch2 == '*')
 783             {
 784               for (;;)
 785                 {
 786                   do
 787                     {
 788                       ch2 = GET ();
 789                       if (ch2 != EOF && IS_NEWLINE (ch2))
 790                         add_newlines++;
 791                     }
 792                   while (ch2 != EOF && ch2 != '*');
 793
 794                   while (ch2 == '*')
 795                     ch2 = GET ();
 796
 797                   if (ch2 == EOF || ch2 == '/')
 798                     break;
 799
 800                   /* This UNGET will ensure that we count newlines
 801                      correctly.  */
 802                   UNGET (ch2);
 803                 }
 804
 805               if (ch2 == EOF)
 806                 as_warn ("end of file in multiline comment");
 807
 808               ch = ' ';
 809               goto recycle;
 810             }
 811           else
 812             {
 813               if (ch2 != EOF)
 814                 UNGET (ch2);
 815               if (state == 9 || state == 10)
 816                 state = 3;
 817               PUT (ch);
 818             }
 819           break;
 820
 821         case LEX_IS_STRINGQUOTE:
 822           if (state == 10)
 823             {
 824               /* Preserve the whitespace in foo "bar" */
 825               UNGET (ch);
 826               state = 3;
 827               PUT (' ');
 828
 829               /* PUT didn't jump out.  We could just break, but we
 830                  know what will happen, so optimize a bit.  */
 831               ch = GET ();
 832               old_state = 3;
 833             }
 834           else if (state == 9)
 835             old_state = 3;
 836           else
 837             old_state = state;
 838           state = 5;
 839           PUT (ch);
 840           break;
 841
 842 #ifndef IEEE_STYLE
 843         case LEX_IS_ONECHAR_QUOTE:
 844           if (state == 10)
 845             {
 846               /* Preserve the whitespace in foo 'b' */
 847               UNGET (ch);
 848               state = 3;
 849               PUT (' ');
 850               break;
 851             }
 852           ch = GET ();
 853           if (ch == EOF)
 854             {
 855               as_warn ("end of file after a one-character quote; \\0 inserted");
 856               ch = 0;
 857             }
 858           if (ch == '\\')
 859             {
 860               ch = GET ();
 861               if (ch == EOF)
 862                 {
 863                   as_warn ("end of file in escape character");
 864                   ch = '\\';
 865                 }
 866               else
 867                 ch = process_escape (ch);
 868             }
 869           sprintf (out_buf, "%d", (int) (unsigned char) ch);
 870
 871           /* None of these 'x constants for us.  We want 'x'.  */
 872           if ((ch = GET ()) != '\'')
 873             {
 874 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
 875               as_warn ("Missing close quote: (assumed)");
 876 #else
 877               if (ch != EOF)
 878                 UNGET (ch);
 879 #endif
 880             }
 881           if (strlen (out_buf) == 1)
 882             {
 883               PUT (out_buf[0]);
 884               break;
 885             }
 886           if (state == 9)
 887             old_state = 3;
 888           else
 889             old_state = state;
 890           state = -1;
 891           out_string = out_buf;
 892           PUT (*out_string++);
 893           break;
 894 #endif
 895
 896         case LEX_IS_COLON:
 897           if (state == 9 || state == 10)
 898             state = 3;
 899           else if (state != 3)
 900             state = 1;
 901           PUT (ch);
 902           break;
 903
 904         case LEX_IS_NEWLINE:
 905           /* Roll out a bunch of newlines from inside comments, etc.  */
 906           if (add_newlines)
 907             {
 908               --add_newlines;
 909               UNGET (ch);
 910             }
 911           /* fall thru into... */
 912
 913         case LEX_IS_LINE_SEPARATOR:
 914           state = 0;
 915           PUT (ch);
 916           break;
 917
 918 #ifdef TC_V850
 919         case LEX_IS_DOUBLEDASH_1ST:
 920           ch2 = GET();
 921           if (ch2 != '-')
 922             {
 923               UNGET (ch2);
 924               goto de_fault;
 925             }
 926           /* read and skip to end of line */
 927           do
 928             {
 929               ch = GET ();
 930             }
 931           while (ch != EOF && ch != '\n');
 932           if (ch == EOF)
 933             {
 934               as_warn ("end of file in comment; newline inserted");
 935             }
 936           state = 0;
 937           PUT ('\n');
 938           break;
 939 #endif
 940 #ifdef TC_M32R
 941         case LEX_IS_DOUBLEBAR_1ST:
 942           ch2 = GET();
 943           if (ch2 != '|')
 944             {
 945               UNGET (ch2);
 946               goto de_fault;
 947             }
 948           /* Reset back to state 1 and pretend that we are parsing a line from
 949              just after the first white space.  */
 950           state = 1;
 951           PUT ('|');
 952           PUT ('|');
 953           break;
 954 #endif
 955         case LEX_IS_LINE_COMMENT_START:
 956           /* FIXME-someday: The two character comment stuff was badly
 957              thought out.  On i386, we want '/' as line comment start
 958              AND we want C style comments.  hence this hack.  The
 959              whole lexical process should be reworked.  xoxorich.  */
 960           if (ch == '/')
 961             {
 962               ch2 = GET ();
 963               if (ch2 == '*')
 964                 {
 965                   old_state = 3;
 966                   state = -2;
 967                   break;
 968                 }
 969               else
 970                 {
 971                   UNGET (ch2);
 972                 }
 973             } /* bad hack */
 974
 975           if (state == 0 || state == 1) /* Only comment at start of line.  */
 976             {
 977               int startch;
 978
 979               startch = ch;
 980
 981               do
 982                 {
 983                   ch = GET ();
 984                 }
 985               while (ch != EOF && IS_WHITESPACE (ch));
 986               if (ch == EOF)
 987                 {
 988                   as_warn ("end of file in comment; newline inserted");
 989                   PUT ('\n');
 990                   break;
 991                 }
 992               if (ch < '0' || ch > '9' || state != 0 || startch != '#')
 993                 {
 994                   /* Not a cpp line.  */
 995                   while (ch != EOF && !IS_NEWLINE (ch))
 996                     ch = GET ();
 997                   if (ch == EOF)
 998                     as_warn ("EOF in Comment: Newline inserted");
 999                   state = 0;
1000                   PUT ('\n');
1001                   break;
1002                 }
1003               /* Loks like `# 123 "filename"' from cpp.  */
1004               UNGET (ch);
1005               old_state = 4;
1006               state = -1;
1007               if (scrub_m68k_mri)
1008                 out_string = "\tappline ";
1009               else
1010                 out_string = "\t.appline ";
1011               PUT (*out_string++);
1012               break;
1013             }
1014
1015 #ifdef TC_D10V
1016           /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1017              Trap is the only short insn that has a first operand that is
1018              neither register nor label.
1019              We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1020              We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is already
1021              LEX_IS_LINE_COMMENT_START.  However, it is the only character in
1022              line_comment_chars for d10v, hence we can recognize it as such.  */
1023           /* An alternative approach would be to reset the state to 1 when
1024              we see '||', '<'- or '->', but that seems to be overkill.  */
1025           if (state == 10) PUT (' ');
1026 #endif
1027           /* We have a line comment character which is not at the
1028              start of a line.  If this is also a normal comment
1029              character, fall through.  Otherwise treat it as a default
1030              character.  */
1031           if (strchr (tc_comment_chars, ch) == NULL
1032               && (! scrub_m68k_mri
1033                   || (ch != '!' && ch != '*')))
1034             goto de_fault;
1035           if (scrub_m68k_mri
1036               && (ch == '!' || ch == '*' || ch == '#')
1037               && state != 1
1038               && state != 10)
1039             goto de_fault;
1040           /* Fall through.  */
1041         case LEX_IS_COMMENT_START:
1042           do
1043             {
1044               ch = GET ();
1045             }
1046           while (ch != EOF && !IS_NEWLINE (ch));
1047           if (ch == EOF)
1048             as_warn ("end of file in comment; newline inserted");
1049           state = 0;
1050           PUT ('\n');
1051           break;
1052
1053         case LEX_IS_SYMBOL_COMPONENT:
1054           if (state == 10)
1055             {
1056               /* This is a symbol character following another symbol
1057                  character, with whitespace in between.  We skipped
1058                  the whitespace earlier, so output it now.  */
1059               UNGET (ch);
1060               state = 3;
1061               PUT (' ');
1062               break;
1063             }
1064
1065           if (state == 3)
1066             state = 9;
1067
1068           /* This is a common case.  Quickly copy CH and all the
1069              following symbol component or normal characters.  */
1070           if (to + 1 < toend && mri_state == NULL)
1071             {
1072               char *s;
1073               int len;
1074
1075               for (s = from; s < fromend; s++)
1076                 {
1077                   int type;
1078
1079                   ch2 = *s;
1080                   type = lex[ch2];
1081                   if (type != 0
1082                       && type != LEX_IS_SYMBOL_COMPONENT)
1083                     break;
1084                 }
1085               if (s > from)
1086                 {
1087                   /* Handle the last character normally, for
1088                      simplicity.  */
1089                   --s;
1090                 }
1091               len = s - from;
1092               if (len > (toend - to) - 1)
1093                 len = (toend - to) - 1;
1094               if (len > 0)
1095                 {
1096                   PUT (ch);
1097                   if (len > 8)
1098                     {
1099                       memcpy (to, from, len);
1100                       to += len;
1101                       from += len;
1102                     }
1103                   else
1104                     {
1105                       switch (len)
1106                         {
1107                         case 8: *to++ = *from++;
1108                         case 7: *to++ = *from++;
1109                         case 6: *to++ = *from++;
1110                         case 5: *to++ = *from++;
1111                         case 4: *to++ = *from++;
1112                         case 3: *to++ = *from++;
1113                         case 2: *to++ = *from++;
1114                         case 1: *to++ = *from++;
1115                         }
1116                     }
1117                   ch = GET ();
1118                 }
1119             }
1120
1121           /* Fall through.  */
1122         default:
1123         de_fault:
1124           /* Some relatively `normal' character.  */
1125           if (state == 0)
1126             {
1127               state = 11;       /* Now seeing label definition */
1128             }
1129           else if (state == 1)
1130             {
1131               state = 2;        /* Ditto */
1132             }
1133           else if (state == 9)
1134             {
1135               if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
1136                 state = 3;
1137             }
1138           else if (state == 10)
1139             {
1140               state = 3;
1141             }
1142           PUT (ch);
1143           break;
1144         }
1145     }
1146
1147   /*NOTREACHED*/
1148
1149  fromeof:
1150   /* We have reached the end of the input.  */
1151   return to - tostart;
1152
1153  tofull:
1154   /* The output buffer is full.  Save any input we have not yet
1155      processed.  */
1156   if (fromend > from)
1157     {
1158       char *save;
1159
1160       save = (char *) xmalloc (fromend - from);
1161       memcpy (save, from, fromend - from);
1162       if (saved_input != NULL)
1163         free (saved_input);
1164       saved_input = save;
1165       saved_input_len = fromend - from;
1166     }
1167   else
1168     {
1169       if (saved_input != NULL)
1170         {
1171           free (saved_input);
1172           saved_input = NULL;
1173         }
1174     }
1175   return to - tostart;
1176 }
1177
1178 /* end of app.c */