contrib/ncurses/ncurses/tinfo/comp_scan.c

   1 /****************************************************************************
   2  * Copyright (c) 1998,1999,2000,2001 Free Software Foundation, Inc.         *
   3  *                                                                          *
   4  * Permission is hereby granted, free of charge, to any person obtaining a  *
   5  * copy of this software and associated documentation files (the            *
   6  * "Software"), to deal in the Software without restriction, including      *
   7  * without limitation the rights to use, copy, modify, merge, publish,      *
   8  * distribute, distribute with modifications, sublicense, and/or sell       *
   9  * copies of the Software, and to permit persons to whom the Software is    *
  10  * furnished to do so, subject to the following conditions:                 *
  11  *                                                                          *
  12  * The above copyright notice and this permission notice shall be included  *
  13  * in all copies or substantial portions of the Software.                   *
  14  *                                                                          *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS  *
  16  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF               *
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.   *
  18  * IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,   *
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR    *
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR    *
  21  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.                               *
  22  *                                                                          *
  23  * Except as contained in this notice, the name(s) of the above copyright   *
  24  * holders shall not be used in advertising or otherwise to promote the     *
  25  * sale, use or other dealings in this Software without prior written       *
  26  * authorization.                                                           *
  27  ****************************************************************************/
  28
  29 /****************************************************************************
  30  *  Author: Zeyd M. Ben-Halim <zmbenhal@netcom.com> 1992,1995               *
  31  *     and: Eric S. Raymond <esr@snark.thyrsus.com>                         *
  32  ****************************************************************************/
  33
  34 /* $FreeBSD$ */
  35
  36 /*
  37  *      comp_scan.c --- Lexical scanner for terminfo compiler.
  38  *
  39  *      _nc_reset_input()
  40  *      _nc_get_token()
  41  *      _nc_panic_mode()
  42  *      int _nc_syntax;
  43  *      int _nc_curr_line;
  44  *      long _nc_curr_file_pos;
  45  *      long _nc_comment_start;
  46  *      long _nc_comment_end;
  47  */
  48
  49 #include <curses.priv.h>
  50
  51 #include <ctype.h>
  52 #include <term_entry.h>
  53 #include <tic.h>
  54
  55 MODULE_ID("$Id: comp_scan.c,v 1.59 2001/09/23 00:56:29 tom Exp $")
  56
  57 /*
  58  * Maximum length of string capability we'll accept before raising an error.
  59  * Yes, there is a real capability in /etc/termcap this long, an "is".
  60  */
  61 #define MAXCAPLEN       600
  62
  63 #define iswhite(ch)     (ch == ' '  ||  ch == '\t')
  64
  65 NCURSES_EXPORT_VAR(int)
  66 _nc_syntax = 0;                 /* termcap or terminfo? */
  67 NCURSES_EXPORT_VAR(long)
  68 _nc_curr_file_pos = 0;          /* file offset of current line */
  69 NCURSES_EXPORT_VAR(long)
  70 _nc_comment_start = 0;          /* start of comment range before name */
  71 NCURSES_EXPORT_VAR(long)
  72 _nc_comment_end = 0;            /* end of comment range before name */
  73 NCURSES_EXPORT_VAR(long)
  74 _nc_start_line = 0;             /* start line of current entry */
  75
  76 NCURSES_EXPORT_VAR(struct token)
  77 _nc_curr_token =
  78 {
  79     0, 0, 0
  80 };
  81
  82 /*****************************************************************************
  83  *
  84  * Token-grabbing machinery
  85  *
  86  *****************************************************************************/
  87
  88 static bool first_column;       /* See 'next_char()' below */
  89 static char separator;          /* capability separator */
  90 static int pushtype;            /* type of pushback token */
  91 static char *pushname;
  92
  93 #if NCURSES_EXT_FUNCS
  94 NCURSES_EXPORT_VAR(bool)
  95 _nc_disable_period = FALSE;     /* used by tic -a option */
  96 #endif
  97
  98 static int last_char(void);
  99 static int next_char(void);
 100 static long stream_pos(void);
 101 static bool end_of_stream(void);
 102 static void push_back(char c);
 103
 104 /* Assume we may be looking at a termcap-style continuation */
 105 static inline int
 106 eat_escaped_newline(int ch)
 107 {
 108     if (ch == '\\')
 109         while ((ch = next_char()) == '\n' || iswhite(ch))
 110             continue;
 111     return ch;
 112 }
 113
 114 /*
 115  *      int
 116  *      get_token()
 117  *
 118  *      Scans the input for the next token, storing the specifics in the
 119  *      global structure 'curr_token' and returning one of the following:
 120  *
 121  *              NAMES           A line beginning in column 1.  'name'
 122  *                              will be set to point to everything up to but
 123  *                              not including the first separator on the line.
 124  *              BOOLEAN         An entry consisting of a name followed by
 125  *                              a separator.  'name' will be set to point to
 126  *                              the name of the capability.
 127  *              NUMBER          An entry of the form
 128  *                                      name#digits,
 129  *                              'name' will be set to point to the capability
 130  *                              name and 'valnumber' to the number given.
 131  *              STRING          An entry of the form
 132  *                                      name=characters,
 133  *                              'name' is set to the capability name and
 134  *                              'valstring' to the string of characters, with
 135  *                              input translations done.
 136  *              CANCEL          An entry of the form
 137  *                                      name@,
 138  *                              'name' is set to the capability name and
 139  *                              'valnumber' to -1.
 140  *              EOF             The end of the file has been reached.
 141  *
 142  *      A `separator' is either a comma or a semicolon, depending on whether
 143  *      we are in termcap or terminfo mode.
 144  *
 145  */
 146
 147 NCURSES_EXPORT(int)
 148 _nc_get_token(bool silent)
 149 {
 150     static const char terminfo_punct[] = "@%&*!#";
 151     static char *buffer;
 152
 153     char *numchk;
 154     char *ptr;
 155     char numbuf[80];
 156     int ch;
 157     int dot_flag = FALSE;
 158     int type;
 159     long number;
 160     long token_start;
 161     unsigned found;
 162
 163     if (pushtype != NO_PUSHBACK) {
 164         int retval = pushtype;
 165
 166         _nc_set_type(pushname != 0 ? pushname : "");
 167         DEBUG(3, ("pushed-back token: `%s', class %d",
 168                   _nc_curr_token.tk_name, pushtype));
 169
 170         pushtype = NO_PUSHBACK;
 171         if (pushname != 0)
 172             pushname[0] = '\0';
 173
 174         /* currtok wasn't altered by _nc_push_token() */
 175         return (retval);
 176     }
 177
 178     if (end_of_stream())
 179         return (EOF);
 180
 181   start_token:
 182     token_start = stream_pos();
 183     while ((ch = next_char()) == '\n' || iswhite(ch))
 184         continue;
 185
 186     ch = eat_escaped_newline(ch);
 187
 188     if (ch == EOF)
 189         type = EOF;
 190     else {
 191         /* if this is a termcap entry, skip a leading separator */
 192         if (separator == ':' && ch == ':')
 193             ch = next_char();
 194
 195         if (ch == '.'
 196 #if NCURSES_EXT_FUNCS
 197             && !_nc_disable_period
 198 #endif
 199             ) {
 200             dot_flag = TRUE;
 201             DEBUG(8, ("dot-flag set"));
 202
 203             while ((ch = next_char()) == '.' || iswhite(ch))
 204                 continue;
 205         }
 206
 207         if (ch == EOF) {
 208             type = EOF;
 209             goto end_of_token;
 210         }
 211
 212         /* have to make some punctuation chars legal for terminfo */
 213         if (!isalnum(ch)
 214 #if NCURSES_EXT_FUNCS
 215             && !(ch == '.' && _nc_disable_period)
 216 #endif
 217             && !strchr(terminfo_punct, (char) ch)) {
 218             if (!silent)
 219                 _nc_warning("Illegal character (expected alphanumeric or %s) - %s",
 220                             terminfo_punct, unctrl((chtype) ch));
 221             _nc_panic_mode(separator);
 222             goto start_token;
 223         }
 224
 225         if (buffer == 0)
 226             buffer = _nc_doalloc(buffer, MAX_ENTRY_SIZE);
 227
 228         ptr = buffer;
 229         *(ptr++) = ch;
 230
 231         if (first_column) {
 232             char *desc;
 233
 234             _nc_comment_start = token_start;
 235             _nc_comment_end = _nc_curr_file_pos;
 236             _nc_start_line = _nc_curr_line;
 237
 238             _nc_syntax = ERR;
 239             while ((ch = next_char()) != '\n') {
 240                 if (ch == EOF)
 241                     _nc_err_abort("premature EOF");
 242                 else if (ch == ':' && last_char() != ',') {
 243                     _nc_syntax = SYN_TERMCAP;
 244                     separator = ':';
 245                     break;
 246                 } else if (ch == ',') {
 247                     _nc_syntax = SYN_TERMINFO;
 248                     separator = ',';
 249                     /*
 250                      * Fall-through here is not an accident.  The idea is that
 251                      * if we see a comma, we figure this is terminfo unless we
 252                      * subsequently run into a colon -- but we don't stop
 253                      * looking for that colon until hitting a newline.  This
 254                      * allows commas to be embedded in description fields of
 255                      * either syntax.
 256                      */
 257                     /* FALLTHRU */
 258                 } else
 259                     ch = eat_escaped_newline(ch);
 260
 261                 *ptr++ = ch;
 262             }
 263             ptr[0] = '\0';
 264             if (_nc_syntax == ERR) {
 265                 /*
 266                  * Grrr...what we ought to do here is barf, complaining that
 267                  * the entry is malformed.  But because a couple of name fields
 268                  * in the 8.2 termcap file end with |\, we just have to assume
 269                  * it's termcap syntax.
 270                  */
 271                 _nc_syntax = SYN_TERMCAP;
 272                 separator = ':';
 273             } else if (_nc_syntax == SYN_TERMINFO) {
 274                 /* throw away trailing /, *$/ */
 275                 for (--ptr; iswhite(*ptr) || *ptr == ','; ptr--)
 276                     continue;
 277                 ptr[1] = '\0';
 278             }
 279
 280             /*
 281              * This is the soonest we have the terminal name fetched.  Set up
 282              * for following warning messages.
 283              */
 284             ptr = strchr(buffer, '|');
 285             if (ptr == (char *) NULL)
 286                 ptr = buffer + strlen(buffer);
 287             ch = *ptr;
 288             *ptr = '\0';
 289             _nc_set_type(buffer);
 290             *ptr = ch;
 291
 292             /*
 293              * Compute the boundary between the aliases and the description
 294              * field for syntax-checking purposes.
 295              */
 296             desc = strrchr(buffer, '|');
 297             if (!silent && desc) {
 298                 if (*desc == '\0')
 299                     _nc_warning("empty longname field");
 300 #ifndef FREEBSD_NATIVE
 301                 else if (strchr(desc, ' ') == (char *) NULL)
 302                     _nc_warning("older tic versions may treat the description field as an alias");
 303 #endif
 304             }
 305             if (!desc)
 306                 desc = buffer + strlen(buffer);
 307
 308             /*
 309              * Whitespace in a name field other than the long name can confuse
 310              * rdist and some termcap tools.  Slashes are a no-no.  Other
 311              * special characters can be dangerous due to shell expansion.
 312              */
 313             for (ptr = buffer; ptr < desc; ptr++) {
 314                 if (isspace(UChar(*ptr))) {
 315                     if (!silent)
 316                         _nc_warning("whitespace in name or alias field");
 317                     break;
 318                 } else if (*ptr == '/') {
 319                     if (!silent)
 320                         _nc_warning("slashes aren't allowed in names or aliases");
 321                     break;
 322                 } else if (strchr("$[]!*?", *ptr)) {
 323                     if (!silent)
 324                         _nc_warning("dubious character `%c' in name or alias field", *ptr);
 325                     break;
 326                 }
 327             }
 328
 329             ptr = buffer;
 330
 331             _nc_curr_token.tk_name = buffer;
 332             type = NAMES;
 333         } else {
 334             while ((ch = next_char()) != EOF) {
 335                 if (!isalnum(ch)) {
 336                     if (_nc_syntax == SYN_TERMINFO) {
 337                         if (ch != '_')
 338                             break;
 339                     } else {    /* allow ';' for "k;" */
 340                         if (ch != ';')
 341                             break;
 342                     }
 343                 }
 344                 *(ptr++) = ch;
 345             }
 346
 347             *ptr++ = '\0';
 348             switch (ch) {
 349             case ',':
 350             case ':':
 351                 if (ch != separator)
 352                     _nc_err_abort("Separator inconsistent with syntax");
 353                 _nc_curr_token.tk_name = buffer;
 354                 type = BOOLEAN;
 355                 break;
 356             case '@':
 357                 if ((ch = next_char()) != separator && !silent)
 358                     _nc_warning("Missing separator after `%s', have %s",
 359                                 buffer, unctrl((chtype) ch));
 360                 _nc_curr_token.tk_name = buffer;
 361                 type = CANCEL;
 362                 break;
 363
 364             case '#':
 365                 found = 0;
 366                 while (isalnum(ch = next_char())) {
 367                     numbuf[found++] = ch;
 368                     if (found >= sizeof(numbuf) - 1)
 369                         break;
 370                 }
 371                 numbuf[found] = '\0';
 372                 number = strtol(numbuf, &numchk, 0);
 373                 if (!silent) {
 374                     if (numchk == numbuf)
 375                         _nc_warning("no value given for `%s'", buffer);
 376                     if ((*numchk != '\0') || (ch != separator))
 377                         _nc_warning("Missing separator");
 378                 }
 379                 _nc_curr_token.tk_name = buffer;
 380                 _nc_curr_token.tk_valnumber = number;
 381                 type = NUMBER;
 382                 break;
 383
 384             case '=':
 385                 ch = _nc_trans_string(ptr, buffer + MAX_ENTRY_SIZE);
 386                 if (!silent && ch != separator)
 387                     _nc_warning("Missing separator");
 388                 _nc_curr_token.tk_name = buffer;
 389                 _nc_curr_token.tk_valstring = ptr;
 390                 type = STRING;
 391                 break;
 392
 393             case EOF:
 394                 type = EOF;
 395                 break;
 396             default:
 397                 /* just to get rid of the compiler warning */
 398                 type = UNDEF;
 399                 if (!silent)
 400                     _nc_warning("Illegal character - %s", unctrl((chtype) ch));
 401             }
 402         }                       /* end else (first_column == FALSE) */
 403     }                           /* end else (ch != EOF) */
 404
 405   end_of_token:
 406
 407 #ifdef TRACE
 408     if (dot_flag == TRUE)
 409         DEBUG(8, ("Commented out "));
 410
 411     if (_nc_tracing >= DEBUG_LEVEL(7)) {
 412         switch (type) {
 413         case BOOLEAN:
 414             _tracef("Token: Boolean; name='%s'",
 415                     _nc_curr_token.tk_name);
 416             break;
 417
 418         case NUMBER:
 419             _tracef("Token: Number;  name='%s', value=%d",
 420                     _nc_curr_token.tk_name,
 421                     _nc_curr_token.tk_valnumber);
 422             break;
 423
 424         case STRING:
 425             _tracef("Token: String;  name='%s', value=%s",
 426                     _nc_curr_token.tk_name,
 427                     _nc_visbuf(_nc_curr_token.tk_valstring));
 428             break;
 429
 430         case CANCEL:
 431             _tracef("Token: Cancel; name='%s'",
 432                     _nc_curr_token.tk_name);
 433             break;
 434
 435         case NAMES:
 436
 437             _tracef("Token: Names; value='%s'",
 438                     _nc_curr_token.tk_name);
 439             break;
 440
 441         case EOF:
 442             _tracef("Token: End of file");
 443             break;
 444
 445         default:
 446             _nc_warning("Bad token type");
 447         }
 448     }
 449 #endif
 450
 451     if (dot_flag == TRUE)       /* if commented out, use the next one */
 452         type = _nc_get_token(silent);
 453
 454     DEBUG(3, ("token: `%s', class %d",
 455               _nc_curr_token.tk_name != 0 ? _nc_curr_token.tk_name :
 456               "<null>",
 457               type));
 458
 459     return (type);
 460 }
 461
 462 /*
 463  *      char
 464  *      trans_string(ptr)
 465  *
 466  *      Reads characters using next_char() until encountering a separator, nl,
 467  *      or end-of-file.  The returned value is the character which caused
 468  *      reading to stop.  The following translations are done on the input:
 469  *
 470  *              ^X  goes to  ctrl-X (i.e. X & 037)
 471  *              {\E,\n,\r,\b,\t,\f}  go to
 472  *                      {ESCAPE,newline,carriage-return,backspace,tab,formfeed}
 473  *              {\^,\\}  go to  {carat,backslash}
 474  *              \ddd (for ddd = up to three octal digits)  goes to the character ddd
 475  *
 476  *              \e == \E
 477  *              \0 == \200
 478  *
 479  */
 480
 481 NCURSES_EXPORT(char)
 482 _nc_trans_string(char *ptr, char *last)
 483 {
 484     int count = 0;
 485     int number = 0;
 486     int i, c;
 487     chtype ch, last_ch = '\0';
 488     bool ignored = FALSE;
 489     bool long_warning = FALSE;
 490
 491     while ((ch = c = next_char()) != (chtype) separator && c != EOF) {
 492         if (ptr == (last - 1))
 493             break;
 494         if ((_nc_syntax == SYN_TERMCAP) && c == '\n')
 495             break;
 496         if (ch == '^' && last_ch != '%') {
 497             ch = c = next_char();
 498             if (c == EOF)
 499                 _nc_err_abort("Premature EOF");
 500
 501             if (!(is7bits(ch) && isprint(ch))) {
 502                 _nc_warning("Illegal ^ character - %s", unctrl(ch));
 503             }
 504             if (ch == '?') {
 505                 *(ptr++) = '\177';
 506                 if (_nc_tracing)
 507                     _nc_warning("Allow ^? as synonym for \\177");
 508             } else {
 509                 if ((ch &= 037) == 0)
 510                     ch = 128;
 511                 *(ptr++) = (char) (ch);
 512             }
 513         } else if (ch == '\\') {
 514             ch = c = next_char();
 515             if (c == EOF)
 516                 _nc_err_abort("Premature EOF");
 517
 518             if (ch >= '0' && ch <= '7') {
 519                 number = ch - '0';
 520                 for (i = 0; i < 2; i++) {
 521                     ch = c = next_char();
 522                     if (c == EOF)
 523                         _nc_err_abort("Premature EOF");
 524
 525                     if (c < '0' || c > '7') {
 526                         if (isdigit(c)) {
 527                             _nc_warning("Non-octal digit `%c' in \\ sequence", c);
 528                             /* allow the digit; it'll do less harm */
 529                         } else {
 530                             push_back((char) c);
 531                             break;
 532                         }
 533                     }
 534
 535                     number = number * 8 + c - '0';
 536                 }
 537
 538                 if (number == 0)
 539                     number = 0200;
 540                 *(ptr++) = (char) number;
 541             } else {
 542                 switch (c) {
 543                 case 'E':
 544                 case 'e':
 545                     *(ptr++) = '\033';
 546                     break;
 547
 548                 case 'a':
 549                     *(ptr++) = '\007';
 550                     break;
 551
 552                 case 'l':
 553                 case 'n':
 554                     *(ptr++) = '\n';
 555                     break;
 556
 557                 case 'r':
 558                     *(ptr++) = '\r';
 559                     break;
 560
 561                 case 'b':
 562                     *(ptr++) = '\010';
 563                     break;
 564
 565                 case 's':
 566                     *(ptr++) = ' ';
 567                     break;
 568
 569                 case 'f':
 570                     *(ptr++) = '\014';
 571                     break;
 572
 573                 case 't':
 574                     *(ptr++) = '\t';
 575                     break;
 576
 577                 case '\\':
 578                     *(ptr++) = '\\';
 579                     break;
 580
 581                 case '^':
 582                     *(ptr++) = '^';
 583                     break;
 584
 585                 case ',':
 586                     *(ptr++) = ',';
 587                     break;
 588
 589                 case ':':
 590                     *(ptr++) = ':';
 591                     break;
 592
 593                 case '\n':
 594                     continue;
 595
 596                 default:
 597                     _nc_warning("Illegal character %s in \\ sequence",
 598                                 unctrl(ch));
 599                     *(ptr++) = (char) ch;
 600                 }               /* endswitch (ch) */
 601             }                   /* endelse (ch < '0' ||  ch > '7') */
 602         }
 603         /* end else if (ch == '\\') */
 604         else if (ch == '\n' && (_nc_syntax == SYN_TERMINFO)) {
 605             /* newlines embedded in a terminfo string are ignored */
 606             ignored = TRUE;
 607         } else {
 608             *(ptr++) = (char) ch;
 609         }
 610
 611         if (!ignored) {
 612             last_ch = ch;
 613             count++;
 614         }
 615         ignored = FALSE;
 616
 617         if (count > MAXCAPLEN && !long_warning) {
 618             _nc_warning("Very long string found.  Missing separator?");
 619             long_warning = TRUE;
 620         }
 621     }                           /* end while */
 622
 623     *ptr = '\0';
 624
 625     return (ch);
 626 }
 627
 628 /*
 629  *      _nc_push_token()
 630  *
 631  *      Push a token of given type so that it will be reread by the next
 632  *      get_token() call.
 633  */
 634
 635 NCURSES_EXPORT(void)
 636 _nc_push_token(int tokclass)
 637 {
 638     /*
 639      * This implementation is kind of bogus, it will fail if we ever do more
 640      * than one pushback at a time between get_token() calls.  It relies on the
 641      * fact that _nc_curr_token is static storage that nothing but
 642      * _nc_get_token() touches.
 643      */
 644     pushtype = tokclass;
 645     if (pushname == 0)
 646         pushname = _nc_doalloc(pushname, MAX_NAME_SIZE + 1);
 647     _nc_get_type(pushname);
 648
 649     DEBUG(3, ("pushing token: `%s', class %d",
 650               _nc_curr_token.tk_name, pushtype));
 651 }
 652
 653 /*
 654  * Panic mode error recovery - skip everything until a "ch" is found.
 655  */
 656 NCURSES_EXPORT(void)
 657 _nc_panic_mode(char ch)
 658 {
 659     int c;
 660
 661     for (;;) {
 662         c = next_char();
 663         if (c == ch)
 664             return;
 665         if (c == EOF)
 666             return;
 667     }
 668 }
 669
 670 /*****************************************************************************
 671  *
 672  * Character-stream handling
 673  *
 674  *****************************************************************************/
 675
 676 #define LEXBUFSIZ       1024
 677
 678 static char *bufptr;            /* otherwise, the input buffer pointer */
 679 static char *bufstart;          /* start of buffer so we can compute offsets */
 680 static FILE *yyin;              /* scanner's input file descriptor */
 681
 682 /*
 683  *      _nc_reset_input()
 684  *
 685  *      Resets the input-reading routines.  Used on initialization,
 686  *      or after a seek has been done.  Exactly one argument must be
 687  *      non-null.
 688  */
 689
 690 NCURSES_EXPORT(void)
 691 _nc_reset_input(FILE * fp, char *buf)
 692 {
 693     pushtype = NO_PUSHBACK;
 694     if (pushname != 0)
 695         pushname[0] = '\0';
 696     yyin = fp;
 697     bufstart = bufptr = buf;
 698     _nc_curr_file_pos = 0L;
 699     if (fp != 0)
 700         _nc_curr_line = 0;
 701     _nc_curr_col = 0;
 702 }
 703
 704 /*
 705  *      int last_char()
 706  *
 707  *      Returns the final nonblank character on the current input buffer
 708  */
 709 static int
 710 last_char(void)
 711 {
 712     size_t len = strlen(bufptr);
 713     while (len--) {
 714         if (!isspace(UChar(bufptr[len])))
 715             return bufptr[len];
 716     }
 717     return 0;
 718 }
 719
 720 /*
 721  *      int next_char()
 722  *
 723  *      Returns the next character in the input stream.  Comments and leading
 724  *      white space are stripped.
 725  *
 726  *      The global state variable 'firstcolumn' is set TRUE if the character
 727  *      returned is from the first column of the input line.
 728  *
 729  *      The global variable _nc_curr_line is incremented for each new line.
 730  *      The global variable _nc_curr_file_pos is set to the file offset of the
 731  *      beginning of each line.
 732  */
 733
 734 static int
 735 next_char(void)
 736 {
 737     if (!yyin) {
 738         /*
 739          * An string with an embedded null will truncate the input.  This is
 740          * intentional (we don't read binary files here).
 741          */
 742         if (*bufptr == '\0')
 743             return (EOF);
 744         if (*bufptr == '\n') {
 745             _nc_curr_line++;
 746             _nc_curr_col = 0;
 747         }
 748     } else if (!bufptr || !*bufptr) {
 749         /*
 750          * In theory this could be recoded to do its I/O one character at a
 751          * time, saving the buffer space.  In practice, this turns out to be
 752          * quite hard to get completely right.  Try it and see.  If you
 753          * succeed, don't forget to hack push_back() correspondingly.
 754          */
 755         static char *result;
 756         static size_t allocated;
 757         size_t used;
 758         size_t len;
 759
 760         do {
 761             bufstart = 0;
 762             used = 0;
 763             do {
 764                 if (used + (LEXBUFSIZ / 4) >= allocated) {
 765                     allocated += (allocated + LEXBUFSIZ);
 766                     result = _nc_doalloc(result, allocated);
 767                     if (result == 0)
 768                         return (EOF);
 769                 }
 770                 if (used == 0)
 771                     _nc_curr_file_pos = ftell(yyin);
 772
 773                 if (fgets(result + used, allocated - used, yyin) != NULL) {
 774                     bufstart = result;
 775                     if (used == 0) {
 776                         _nc_curr_line++;
 777                         _nc_curr_col = 0;
 778                     }
 779                 } else {
 780                     if (used != 0)
 781                         strcat(result, "\n");
 782                 }
 783                 if ((bufptr = bufstart) != 0) {
 784                     used = strlen(bufptr);
 785                     while (iswhite(*bufptr))
 786                         bufptr++;
 787
 788                     /*
 789                      * Treat a trailing <cr><lf> the same as a <newline> so we
 790                      * can read files on OS/2, etc.
 791                      */
 792                     if ((len = strlen(bufptr)) > 1) {
 793                         if (bufptr[len - 1] == '\n'
 794                             && bufptr[len - 2] == '\r') {
 795                             len--;
 796                             bufptr[len - 1] = '\n';
 797                             bufptr[len] = '\0';
 798                         }
 799                     }
 800                 } else {
 801                     return (EOF);
 802                 }
 803             } while (bufptr[len - 1] != '\n');  /* complete a line */
 804         } while (result[0] == '#');     /* ignore comments */
 805     }
 806
 807     first_column = (bufptr == bufstart);
 808
 809     _nc_curr_col++;
 810     return (*bufptr++);
 811 }
 812
 813 static void
 814 push_back(char c)
 815 /* push a character back onto the input stream */
 816 {
 817     if (bufptr == bufstart)
 818         _nc_syserr_abort("Can't backspace off beginning of line");
 819     *--bufptr = c;
 820 }
 821
 822 static long
 823 stream_pos(void)
 824 /* return our current character position in the input stream */
 825 {
 826     return (yyin ? ftell(yyin) : (bufptr ? bufptr - bufstart : 0));
 827 }
 828
 829 static bool
 830 end_of_stream(void)
 831 /* are we at end of input? */
 832 {
 833     return ((yyin ? feof(yyin) : (bufptr && *bufptr == '\0'))
 834             ? TRUE : FALSE);
 835 }