contrib/bc/include/lex.h

   1 /*
   2  * *****************************************************************************
   3  *
   4  * SPDX-License-Identifier: BSD-2-Clause
   5  *
   6  * Copyright (c) 2018-2023 Gavin D. Howard and contributors.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions are met:
  10  *
  11  * * Redistributions of source code must retain the above copyright notice, this
  12  *   list of conditions and the following disclaimer.
  13  *
  14  * * Redistributions in binary form must reproduce the above copyright notice,
  15  *   this list of conditions and the following disclaimer in the documentation
  16  *   and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28  * POSSIBILITY OF SUCH DAMAGE.
  29  *
  30  * *****************************************************************************
  31  *
  32  * Definitions for bc's lexer.
  33  *
  34  */
  35
  36 #ifndef BC_LEX_H
  37 #define BC_LEX_H
  38
  39 #include <stdbool.h>
  40 #include <stddef.h>
  41
  42 #include <status.h>
  43 #include <vector.h>
  44 #include <lang.h>
  45
  46 /**
  47  * A convenience macro for throwing errors in lex code. This takes care of
  48  * plumbing like passing in the current line the lexer is on.
  49  * @param l  The lexer.
  50  * @param e  The error.
  51  */
  52 #if BC_DEBUG
  53 #define bc_lex_err(l, e) (bc_vm_handleError((e), __FILE__, __LINE__, (l)->line))
  54 #else // BC_DEBUG
  55 #define bc_lex_err(l, e) (bc_vm_handleError((e), (l)->line))
  56 #endif // BC_DEBUG
  57
  58 /**
  59  * A convenience macro for throwing errors in lex code. This takes care of
  60  * plumbing like passing in the current line the lexer is on.
  61  * @param l  The lexer.
  62  * @param e  The error.
  63  */
  64 #if BC_DEBUG
  65 #define bc_lex_verr(l, e, ...) \
  66         (bc_vm_handleError((e), __FILE__, __LINE__, (l)->line, __VA_ARGS__))
  67 #else // BC_DEBUG
  68 #define bc_lex_verr(l, e, ...) (bc_vm_handleError((e), (l)->line, __VA_ARGS__))
  69 #endif // BC_DEBUG
  70
  71 // BC_LEX_NEG_CHAR returns the char that corresponds to negative for the
  72 // current calculator.
  73 //
  74 // BC_LEX_LAST_NUM_CHAR returns the char that corresponds to the last valid
  75 // char for numbers. In bc and dc, capital letters are part of numbers, to a
  76 // point. (dc only goes up to hex, so its last valid char is 'F'.)
  77 #if BC_ENABLED
  78
  79 #if DC_ENABLED
  80 #define BC_LEX_NEG_CHAR (BC_IS_BC ? '-' : '_')
  81 #define BC_LEX_LAST_NUM_CHAR (BC_IS_BC ? 'Z' : 'F')
  82 #else // DC_ENABLED
  83 #define BC_LEX_NEG_CHAR ('-')
  84 #define BC_LEX_LAST_NUM_CHAR ('Z')
  85 #endif // DC_ENABLED
  86
  87 #else // BC_ENABLED
  88
  89 #define BC_LEX_NEG_CHAR ('_')
  90 #define BC_LEX_LAST_NUM_CHAR ('F')
  91
  92 #endif // BC_ENABLED
  93
  94 /**
  95  * Returns true if c is a valid number character.
  96  * @param c         The char to check.
  97  * @param pt        If a decimal point has already been seen.
  98  * @param int_only  True if the number is expected to be an int only, false if
  99  *                  non-integers are allowed.
 100  * @return          True if @a c is a valid number character.
 101  */
 102 #define BC_LEX_NUM_CHAR(c, pt, int_only)                               \
 103         (isdigit(c) != 0 || ((c) >= 'A' && (c) <= BC_LEX_LAST_NUM_CHAR) || \
 104          ((c) == '.' && !(pt) && !(int_only)))
 105
 106 /// An enum of lex token types.
 107 typedef enum BcLexType
 108 {
 109         /// End of file.
 110         BC_LEX_EOF,
 111
 112         /// Marker for invalid tokens, used by bc and dc for const data.
 113         BC_LEX_INVALID,
 114
 115 #if BC_ENABLED
 116
 117         /// Increment operator.
 118         BC_LEX_OP_INC,
 119
 120         /// Decrement operator.
 121         BC_LEX_OP_DEC,
 122
 123 #endif // BC_ENABLED
 124
 125         /// BC_LEX_NEG is not used in lexing; it is only for parsing. The lexer
 126         /// marks all '-' characters as BC_LEX_OP_MINUS, but the parser needs to be
 127         /// able to distinguish them.
 128         BC_LEX_NEG,
 129
 130         /// Boolean not.
 131         BC_LEX_OP_BOOL_NOT,
 132
 133 #if BC_ENABLE_EXTRA_MATH
 134
 135         /// Truncation operator.
 136         BC_LEX_OP_TRUNC,
 137
 138 #endif // BC_ENABLE_EXTRA_MATH
 139
 140         /// Power operator.
 141         BC_LEX_OP_POWER,
 142
 143         /// Multiplication operator.
 144         BC_LEX_OP_MULTIPLY,
 145
 146         /// Division operator.
 147         BC_LEX_OP_DIVIDE,
 148
 149         /// Modulus operator.
 150         BC_LEX_OP_MODULUS,
 151
 152         /// Addition operator.
 153         BC_LEX_OP_PLUS,
 154
 155         /// Subtraction operator.
 156         BC_LEX_OP_MINUS,
 157
 158 #if BC_ENABLE_EXTRA_MATH
 159
 160         /// Places (truncate or extend) operator.
 161         BC_LEX_OP_PLACES,
 162
 163         /// Left (decimal) shift operator.
 164         BC_LEX_OP_LSHIFT,
 165
 166         /// Right (decimal) shift operator.
 167         BC_LEX_OP_RSHIFT,
 168
 169 #endif // BC_ENABLE_EXTRA_MATH
 170
 171         /// Equal operator.
 172         BC_LEX_OP_REL_EQ,
 173
 174         /// Less than or equal operator.
 175         BC_LEX_OP_REL_LE,
 176
 177         /// Greater than or equal operator.
 178         BC_LEX_OP_REL_GE,
 179
 180         /// Not equal operator.
 181         BC_LEX_OP_REL_NE,
 182
 183         /// Less than operator.
 184         BC_LEX_OP_REL_LT,
 185
 186         /// Greater than operator.
 187         BC_LEX_OP_REL_GT,
 188
 189         /// Boolean or operator.
 190         BC_LEX_OP_BOOL_OR,
 191
 192         /// Boolean and operator.
 193         BC_LEX_OP_BOOL_AND,
 194
 195 #if BC_ENABLED
 196
 197         /// Power assignment operator.
 198         BC_LEX_OP_ASSIGN_POWER,
 199
 200         /// Multiplication assignment operator.
 201         BC_LEX_OP_ASSIGN_MULTIPLY,
 202
 203         /// Division assignment operator.
 204         BC_LEX_OP_ASSIGN_DIVIDE,
 205
 206         /// Modulus assignment operator.
 207         BC_LEX_OP_ASSIGN_MODULUS,
 208
 209         /// Addition assignment operator.
 210         BC_LEX_OP_ASSIGN_PLUS,
 211
 212         /// Subtraction assignment operator.
 213         BC_LEX_OP_ASSIGN_MINUS,
 214
 215 #if BC_ENABLE_EXTRA_MATH
 216
 217         /// Places (truncate or extend) assignment operator.
 218         BC_LEX_OP_ASSIGN_PLACES,
 219
 220         /// Left (decimal) shift assignment operator.
 221         BC_LEX_OP_ASSIGN_LSHIFT,
 222
 223         /// Right (decimal) shift assignment operator.
 224         BC_LEX_OP_ASSIGN_RSHIFT,
 225
 226 #endif // BC_ENABLE_EXTRA_MATH
 227 #endif // BC_ENABLED
 228
 229         /// Assignment operator.
 230         BC_LEX_OP_ASSIGN,
 231
 232         /// Newline.
 233         BC_LEX_NLINE,
 234
 235         /// Whitespace.
 236         BC_LEX_WHITESPACE,
 237
 238         /// Left parenthesis.
 239         BC_LEX_LPAREN,
 240
 241         /// Right parenthesis.
 242         BC_LEX_RPAREN,
 243
 244         /// Left bracket.
 245         BC_LEX_LBRACKET,
 246
 247         /// Comma.
 248         BC_LEX_COMMA,
 249
 250         /// Right bracket.
 251         BC_LEX_RBRACKET,
 252
 253         /// Left brace.
 254         BC_LEX_LBRACE,
 255
 256         /// Semicolon.
 257         BC_LEX_SCOLON,
 258
 259         /// Right brace.
 260         BC_LEX_RBRACE,
 261
 262         /// String.
 263         BC_LEX_STR,
 264
 265         /// Identifier/name.
 266         BC_LEX_NAME,
 267
 268         /// Constant number.
 269         BC_LEX_NUMBER,
 270
 271         // These keywords are in the order they are in for a reason. Don't change
 272         // the order unless you want a bunch of weird failures in the test suite.
 273         // In fact, almost all of these tokens are in a specific order for a reason.
 274
 275 #if BC_ENABLED
 276
 277         /// bc auto keyword.
 278         BC_LEX_KW_AUTO,
 279
 280         /// bc break keyword.
 281         BC_LEX_KW_BREAK,
 282
 283         /// bc continue keyword.
 284         BC_LEX_KW_CONTINUE,
 285
 286         /// bc define keyword.
 287         BC_LEX_KW_DEFINE,
 288
 289         /// bc for keyword.
 290         BC_LEX_KW_FOR,
 291
 292         /// bc if keyword.
 293         BC_LEX_KW_IF,
 294
 295         /// bc limits keyword.
 296         BC_LEX_KW_LIMITS,
 297
 298         /// bc return keyword.
 299         BC_LEX_KW_RETURN,
 300
 301         /// bc while keyword.
 302         BC_LEX_KW_WHILE,
 303
 304         /// bc halt keyword.
 305         BC_LEX_KW_HALT,
 306
 307         /// bc last keyword.
 308         BC_LEX_KW_LAST,
 309
 310 #endif // BC_ENABLED
 311
 312         /// bc ibase keyword.
 313         BC_LEX_KW_IBASE,
 314
 315         /// bc obase keyword.
 316         BC_LEX_KW_OBASE,
 317
 318         /// bc scale keyword.
 319         BC_LEX_KW_SCALE,
 320
 321 #if BC_ENABLE_EXTRA_MATH
 322
 323         /// bc seed keyword.
 324         BC_LEX_KW_SEED,
 325
 326 #endif // BC_ENABLE_EXTRA_MATH
 327
 328         /// bc length keyword.
 329         BC_LEX_KW_LENGTH,
 330
 331         /// bc print keyword.
 332         BC_LEX_KW_PRINT,
 333
 334         /// bc sqrt keyword.
 335         BC_LEX_KW_SQRT,
 336
 337         /// bc abs keyword.
 338         BC_LEX_KW_ABS,
 339
 340         /// bc is_number keyword.
 341         BC_LEX_KW_IS_NUMBER,
 342
 343         /// bc is_string keyword.
 344         BC_LEX_KW_IS_STRING,
 345
 346 #if BC_ENABLE_EXTRA_MATH
 347
 348         /// bc irand keyword.
 349         BC_LEX_KW_IRAND,
 350
 351 #endif // BC_ENABLE_EXTRA_MATH
 352
 353         /// bc asciffy keyword.
 354         BC_LEX_KW_ASCIIFY,
 355
 356         /// bc modexp keyword.
 357         BC_LEX_KW_MODEXP,
 358
 359         /// bc divmod keyword.
 360         BC_LEX_KW_DIVMOD,
 361
 362         /// bc quit keyword.
 363         BC_LEX_KW_QUIT,
 364
 365         /// bc read keyword.
 366         BC_LEX_KW_READ,
 367
 368 #if BC_ENABLE_EXTRA_MATH
 369
 370         /// bc rand keyword.
 371         BC_LEX_KW_RAND,
 372
 373 #endif // BC_ENABLE_EXTRA_MATH
 374
 375         /// bc maxibase keyword.
 376         BC_LEX_KW_MAXIBASE,
 377
 378         /// bc maxobase keyword.
 379         BC_LEX_KW_MAXOBASE,
 380
 381         /// bc maxscale keyword.
 382         BC_LEX_KW_MAXSCALE,
 383
 384 #if BC_ENABLE_EXTRA_MATH
 385
 386         /// bc maxrand keyword.
 387         BC_LEX_KW_MAXRAND,
 388
 389 #endif // BC_ENABLE_EXTRA_MATH
 390
 391         /// bc line_length keyword.
 392         BC_LEX_KW_LINE_LENGTH,
 393
 394 #if BC_ENABLED
 395
 396         /// bc global_stacks keyword.
 397         BC_LEX_KW_GLOBAL_STACKS,
 398
 399 #endif // BC_ENABLED
 400
 401         /// bc leading_zero keyword.
 402         BC_LEX_KW_LEADING_ZERO,
 403
 404         /// bc stream keyword.
 405         BC_LEX_KW_STREAM,
 406
 407         /// bc else keyword.
 408         BC_LEX_KW_ELSE,
 409
 410 #if DC_ENABLED
 411
 412         /// dc extended registers keyword.
 413         BC_LEX_EXTENDED_REGISTERS,
 414
 415         /// A special token for dc to calculate equal without a register.
 416         BC_LEX_EQ_NO_REG,
 417
 418         /// Colon (array) operator.
 419         BC_LEX_COLON,
 420
 421         /// Execute command.
 422         BC_LEX_EXECUTE,
 423
 424         /// Print stack command.
 425         BC_LEX_PRINT_STACK,
 426
 427         /// Clear stack command.
 428         BC_LEX_CLEAR_STACK,
 429
 430         /// Register stack level command.
 431         BC_LEX_REG_STACK_LEVEL,
 432
 433         /// Main stack level command.
 434         BC_LEX_STACK_LEVEL,
 435
 436         /// Duplicate command.
 437         BC_LEX_DUPLICATE,
 438
 439         /// Swap (reverse) command.
 440         BC_LEX_SWAP,
 441
 442         /// Pop (remove) command.
 443         BC_LEX_POP,
 444
 445         /// Store ibase command.
 446         BC_LEX_STORE_IBASE,
 447
 448         /// Store obase command.
 449         BC_LEX_STORE_OBASE,
 450
 451         /// Store scale command.
 452         BC_LEX_STORE_SCALE,
 453
 454 #if BC_ENABLE_EXTRA_MATH
 455
 456         /// Store seed command.
 457         BC_LEX_STORE_SEED,
 458
 459 #endif // BC_ENABLE_EXTRA_MATH
 460
 461         /// Load variable onto stack command.
 462         BC_LEX_LOAD,
 463
 464         /// Pop off of variable stack onto results stack command.
 465         BC_LEX_LOAD_POP,
 466
 467         /// Push onto variable stack command.
 468         BC_LEX_STORE_PUSH,
 469
 470         /// Print with pop command.
 471         BC_LEX_PRINT_POP,
 472
 473         /// Parameterized quit command.
 474         BC_LEX_NQUIT,
 475
 476         /// Execution stack depth command.
 477         BC_LEX_EXEC_STACK_LENGTH,
 478
 479         /// Scale of number command. This is needed specifically for dc because bc
 480         /// parses the scale function in parts.
 481         BC_LEX_SCALE_FACTOR,
 482
 483         /// Array length command. This is needed specifically for dc because bc
 484         /// just reuses its length keyword.
 485         BC_LEX_ARRAY_LENGTH,
 486
 487 #endif // DC_ENABLED
 488
 489 } BcLexType;
 490
 491 struct BcLex;
 492
 493 /**
 494  * A function pointer to call when another token is needed. Mostly called by the
 495  * parser.
 496  * @param l  The lexer.
 497  */
 498 typedef void (*BcLexNext)(struct BcLex* l);
 499
 500 /// The lexer.
 501 typedef struct BcLex
 502 {
 503         /// A pointer to the text to lex.
 504         const char* buf;
 505
 506         /// The current index into buf.
 507         size_t i;
 508
 509         /// The current line.
 510         size_t line;
 511
 512         /// The length of buf.
 513         size_t len;
 514
 515         /// The current token.
 516         BcLexType t;
 517
 518         /// The previous token.
 519         BcLexType last;
 520
 521         /// A string to store extra data for tokens. For example, the @a BC_LEX_STR
 522         /// token really needs to store the actual string, and numbers also need the
 523         /// string.
 524         BcVec str;
 525
 526         /// The mode the lexer is in.
 527         BcMode mode;
 528
 529 } BcLex;
 530
 531 /**
 532  * Initializes a lexer.
 533  * @param l  The lexer to initialize.
 534  */
 535 void
 536 bc_lex_init(BcLex* l);
 537
 538 /**
 539  * Frees a lexer. This is not guarded by #if BC_DEBUG because a separate
 540  * parser is created at runtime to parse read() expressions and dc strings, and
 541  * that parser needs a lexer.
 542  * @param l  The lexer to free.
 543  */
 544 void
 545 bc_lex_free(BcLex* l);
 546
 547 /**
 548  * Sets the filename that the lexer will be lexing.
 549  * @param l     The lexer.
 550  * @param file  The filename that the lexer will lex.
 551  */
 552 void
 553 bc_lex_file(BcLex* l, const char* file);
 554
 555 /**
 556  * Sets the text the lexer will lex.
 557  * @param l     The lexer.
 558  * @param text  The text to lex.
 559  * @param mode  The mode to lex in.
 560  */
 561 void
 562 bc_lex_text(BcLex* l, const char* text, BcMode mode);
 563
 564 /**
 565  * Generic next function for the parser to call. It takes care of calling the
 566  * correct @a BcLexNext function and consuming whitespace.
 567  * @param l  The lexer.
 568  */
 569 void
 570 bc_lex_next(BcLex* l);
 571
 572 /**
 573  * Lexes a line comment (one beginning with '#' and going to a newline).
 574  * @param l  The lexer.
 575  */
 576 void
 577 bc_lex_lineComment(BcLex* l);
 578
 579 /**
 580  * Lexes a general comment (C-style comment).
 581  * @param l  The lexer.
 582  */
 583 void
 584 bc_lex_comment(BcLex* l);
 585
 586 /**
 587  * Lexes whitespace, finding as much as possible.
 588  * @param l  The lexer.
 589  */
 590 void
 591 bc_lex_whitespace(BcLex* l);
 592
 593 /**
 594  * Lexes a number that begins with char @a start. This takes care of parsing
 595  * numbers in scientific and engineering notations.
 596  * @param l      The lexer.
 597  * @param start  The starting char of the number. To detect a number and call
 598  *               this function, the lexer had to eat the first char. It fixes
 599  *               that by passing it in.
 600  */
 601 void
 602 bc_lex_number(BcLex* l, char start);
 603
 604 /**
 605  * Lexes a name/identifier.
 606  * @param l  The lexer.
 607  */
 608 void
 609 bc_lex_name(BcLex* l);
 610
 611 /**
 612  * Lexes common whitespace characters.
 613  * @param l  The lexer.
 614  * @param c  The character to lex.
 615  */
 616 void
 617 bc_lex_commonTokens(BcLex* l, char c);
 618
 619 /**
 620  * Throws a parse error because char @a c was invalid.
 621  * @param l  The lexer.
 622  * @param c  The problem character.
 623  */
 624 void
 625 bc_lex_invalidChar(BcLex* l, char c);
 626
 627 /**
 628  * Reads a line from stdin and puts it into the lexer's buffer.
 629  * @param l  The lexer.
 630  */
 631 bool
 632 bc_lex_readLine(BcLex* l);
 633
 634 #endif // BC_LEX_H