2 * *****************************************************************************
4 * SPDX-License-Identifier: BSD-2-Clause
6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
11 * * Redistributions of source code must retain the above copyright notice, this
12 * list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
30 * *****************************************************************************
32 * Definitions for bc's lexer.
46 // Two convencience macros for throwing errors in lex code. They take care of
47 // plumbing like passing in the current line the lexer is on.
48 #define bc_lex_err(l, e) (bc_vm_handleError((e), (l)->line))
49 #define bc_lex_verr(l, e, ...) (bc_vm_handleError((e), (l)->line, __VA_ARGS__))
51 // BC_LEX_NEG_CHAR returns the char that corresponds to negative for the
52 // current calculator.
54 // BC_LEX_LAST_NUM_CHAR returns the char that corresponds to the last valid
55 // char for numbers. In bc and dc, capital letters are part of numbers, to a
56 // point. (dc only goes up to hex, so its last valid char is 'F'.)
60 #define BC_LEX_NEG_CHAR (BC_IS_BC ? '-' : '_')
61 #define BC_LEX_LAST_NUM_CHAR (BC_IS_BC ? 'Z' : 'F')
63 #define BC_LEX_NEG_CHAR ('-')
64 #define BC_LEX_LAST_NUM_CHAR ('Z')
69 #define BC_LEX_NEG_CHAR ('_')
70 #define BC_LEX_LAST_NUM_CHAR ('F')
75 * Returns true if c is a valid number character.
76 * @param c The char to check.
77 * @param pt If a decimal point has already been seen.
78 * @param int_only True if the number is expected to be an int only, false if
79 * non-integers are allowed.
80 * @return True if @a c is a valid number character.
82 #define BC_LEX_NUM_CHAR(c, pt, int_only) \
83 (isdigit(c) != 0 || ((c) >= 'A' && (c) <= BC_LEX_LAST_NUM_CHAR) || \
84 ((c) == '.' && !(pt) && !(int_only)))
86 /// An enum of lex token types.
87 typedef enum BcLexType {
92 /// Marker for invalid tokens, used by bc and dc for const data.
97 /// Increment operator.
100 /// Decrement operator.
105 /// BC_LEX_NEG is not used in lexing; it is only for parsing. The lexer
106 /// marks all '-' characters as BC_LEX_OP_MINUS, but the parser needs to be
107 /// able to distinguish them.
113 #if BC_ENABLE_EXTRA_MATH
115 /// Truncation operator.
118 #endif // BC_ENABLE_EXTRA_MATH
123 /// Multiplication operator.
126 /// Division operator.
129 /// Modulus operator.
132 /// Addition operator.
135 /// Subtraction operator.
138 #if BC_ENABLE_EXTRA_MATH
139 /// Places (truncate or extend) operator.
142 /// Left (decimal) shift operator.
145 /// Right (decimal) shift operator.
147 #endif // BC_ENABLE_EXTRA_MATH
152 /// Less than or equal operator.
155 /// Greater than or equal operator.
158 /// Not equal operator.
161 /// Less than operator.
164 /// Greater than operator.
167 /// Boolean or operator.
170 /// Boolean and operator.
174 /// Power assignment operator.
175 BC_LEX_OP_ASSIGN_POWER,
177 /// Multiplication assignment operator.
178 BC_LEX_OP_ASSIGN_MULTIPLY,
180 /// Division assignment operator.
181 BC_LEX_OP_ASSIGN_DIVIDE,
183 /// Modulus assignment operator.
184 BC_LEX_OP_ASSIGN_MODULUS,
186 /// Addition assignment operator.
187 BC_LEX_OP_ASSIGN_PLUS,
189 /// Subtraction assignment operator.
190 BC_LEX_OP_ASSIGN_MINUS,
192 #if BC_ENABLE_EXTRA_MATH
194 /// Places (truncate or extend) assignment operator.
195 BC_LEX_OP_ASSIGN_PLACES,
197 /// Left (decimal) shift assignment operator.
198 BC_LEX_OP_ASSIGN_LSHIFT,
200 /// Right (decimal) shift assignment operator.
201 BC_LEX_OP_ASSIGN_RSHIFT,
203 #endif // BC_ENABLE_EXTRA_MATH
206 /// Assignment operator.
215 /// Left parenthesis.
218 /// Right parenthesis.
248 // These keywords are in the order they are in for a reason. Don't change
249 // the order unless you want a bunch of weird failures in the test suite.
250 // In fact, almost all of these tokens are in a specific order for a reason.
257 /// bc break keyword.
260 /// bc continue keyword.
263 /// bc define keyword.
272 /// bc limits keyword.
275 /// bc return keyword.
278 /// bc while keyword.
289 /// bc ibase keyword.
292 /// bc obase keyword.
295 /// bc scale keyword.
298 #if BC_ENABLE_EXTRA_MATH
303 #endif // BC_ENABLE_EXTRA_MATH
305 /// bc length keyword.
308 /// bc print keyword.
317 #if BC_ENABLE_EXTRA_MATH
319 /// bc irand keyword.
322 #endif // BC_ENABLE_EXTRA_MATH
324 /// bc asciffy keyword.
327 /// bc modexp keyword.
330 /// bc divmod keyword.
339 #if BC_ENABLE_EXTRA_MATH
344 #endif // BC_ENABLE_EXTRA_MATH
346 /// bc maxibase keyword.
349 /// bc maxobase keyword.
352 /// bc maxscale keyword.
355 #if BC_ENABLE_EXTRA_MATH
356 /// bc maxrand keyword.
358 #endif // BC_ENABLE_EXTRA_MATH
360 /// bc line_length keyword.
361 BC_LEX_KW_LINE_LENGTH,
365 /// bc global_stacks keyword.
366 BC_LEX_KW_GLOBAL_STACKS,
370 /// bc leading_zero keyword.
371 BC_LEX_KW_LEADING_ZERO,
373 /// bc stream keyword.
381 /// A special token for dc to calculate equal without a register.
384 /// Colon (array) operator.
390 /// Print stack command.
393 /// Clear stack command.
396 /// Register stack level command.
397 BC_LEX_REG_STACK_LEVEL,
399 /// Main stack level command.
402 /// Duplicate command.
405 /// Swap (reverse) command.
408 /// Pop (remove) command.
411 /// Store ibase command.
414 /// Store obase command.
417 /// Store scale command.
420 #if BC_ENABLE_EXTRA_MATH
421 /// Store seed command.
423 #endif // BC_ENABLE_EXTRA_MATH
425 /// Load variable onto stack command.
428 /// Pop off of variable stack onto results stack command.
431 /// Push onto variable stack command.
434 /// Print with pop command.
437 /// Parameterized quit command.
440 /// Execution stack depth command.
441 BC_LEX_EXEC_STACK_LENGTH,
443 /// Scale of number command. This is needed specifically for dc because bc
444 /// parses the scale function in parts.
447 /// Array length command. This is needed specifically for dc because bc
448 /// just reuses its length keyword.
458 * A function pointer to call when another token is needed. Mostly called by the
460 * @param l The lexer.
462 typedef void (*BcLexNext)(struct BcLex* l);
465 typedef struct BcLex {
467 /// A pointer to the text to lex.
470 /// The current index into buf.
473 /// The current line.
476 /// The length of buf.
479 /// The current token.
482 /// The previous token.
485 /// A string to store extra data for tokens. For example, the @a BC_LEX_STR
486 /// token really needs to store the actual string, and numbers also need the
490 /// If this is true, the lexer is processing stdin and can ask for more data
491 /// if a string or comment are not properly terminated.
497 * Initializes a lexer.
498 * @param l The lexer to initialize.
500 void bc_lex_init(BcLex *l);
503 * Frees a lexer. This is not guarded by #ifndef NDEBUG because a separate
504 * parser is created at runtime to parse read() expressions and dc strings, and
505 * that parser needs a lexer.
506 * @param l The lexer to free.
508 void bc_lex_free(BcLex *l);
511 * Sets the filename that the lexer will be lexing.
512 * @param l The lexer.
513 * @param file The filename that the lexer will lex.
515 void bc_lex_file(BcLex *l, const char *file);
518 * Sets the text the lexer will lex.
519 * @param l The lexer.
520 * @param text The text to lex.
521 * @param is_stdin True if the text is from stdin, false otherwise.
523 void bc_lex_text(BcLex *l, const char *text, bool is_stdin);
526 * Generic next function for the parser to call. It takes care of calling the
527 * correct @a BcLexNext function and consuming whitespace.
528 * @param l The lexer.
530 void bc_lex_next(BcLex *l);
533 * Lexes a line comment (one beginning with '#' and going to a newline).
534 * @param l The lexer.
536 void bc_lex_lineComment(BcLex *l);
539 * Lexes a general comment (C-style comment).
540 * @param l The lexer.
542 void bc_lex_comment(BcLex *l);
545 * Lexes whitespace, finding as much as possible.
546 * @param l The lexer.
548 void bc_lex_whitespace(BcLex *l);
551 * Lexes a number that begins with char @a start. This takes care of parsing
552 * numbers in scientific and engineering notations.
553 * @param l The lexer.
554 * @param start The starting char of the number. To detect a number and call
555 * this function, the lexer had to eat the first char. It fixes
556 * that by passing it in.
558 void bc_lex_number(BcLex *l, char start);
561 * Lexes a name/identifier.
562 * @param l The lexer.
564 void bc_lex_name(BcLex *l);
567 * Lexes common whitespace characters.
568 * @param l The lexer.
569 * @param c The character to lex.
571 void bc_lex_commonTokens(BcLex *l, char c);
574 * Throws a parse error because char @a c was invalid.
575 * @param l The lexer.
576 * @param c The problem character.
578 void bc_lex_invalidChar(BcLex *l, char c);
581 * Reads a line from stdin and puts it into the lexer's buffer.
582 * @param l The lexer.
584 bool bc_lex_readLine(BcLex *l);