2 * *****************************************************************************
4 * SPDX-License-Identifier: BSD-2-Clause
6 * Copyright (c) 2018-2023 Gavin D. Howard and contributors.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
11 * * Redistributions of source code must retain the above copyright notice, this
12 * list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
30 * *****************************************************************************
32 * Definitions for bc's lexer.
47 * A convenience macro for throwing errors in lex code. This takes care of
48 * plumbing like passing in the current line the lexer is on.
53 #define bc_lex_err(l, e) (bc_vm_handleError((e), __FILE__, __LINE__, (l)->line))
55 #define bc_lex_err(l, e) (bc_vm_handleError((e), (l)->line))
59 * A convenience macro for throwing errors in lex code. This takes care of
60 * plumbing like passing in the current line the lexer is on.
65 #define bc_lex_verr(l, e, ...) \
66 (bc_vm_handleError((e), __FILE__, __LINE__, (l)->line, __VA_ARGS__))
68 #define bc_lex_verr(l, e, ...) (bc_vm_handleError((e), (l)->line, __VA_ARGS__))
71 // BC_LEX_NEG_CHAR returns the char that corresponds to negative for the
72 // current calculator.
74 // BC_LEX_LAST_NUM_CHAR returns the char that corresponds to the last valid
75 // char for numbers. In bc and dc, capital letters are part of numbers, to a
76 // point. (dc only goes up to hex, so its last valid char is 'F'.)
80 #define BC_LEX_NEG_CHAR (BC_IS_BC ? '-' : '_')
81 #define BC_LEX_LAST_NUM_CHAR (BC_IS_BC ? 'Z' : 'F')
83 #define BC_LEX_NEG_CHAR ('-')
84 #define BC_LEX_LAST_NUM_CHAR ('Z')
89 #define BC_LEX_NEG_CHAR ('_')
90 #define BC_LEX_LAST_NUM_CHAR ('F')
95 * Returns true if c is a valid number character.
96 * @param c The char to check.
97 * @param pt If a decimal point has already been seen.
98 * @param int_only True if the number is expected to be an int only, false if
99 * non-integers are allowed.
100 * @return True if @a c is a valid number character.
102 #define BC_LEX_NUM_CHAR(c, pt, int_only) \
103 (isdigit(c) != 0 || ((c) >= 'A' && (c) <= BC_LEX_LAST_NUM_CHAR) || \
104 ((c) == '.' && !(pt) && !(int_only)))
106 /// An enum of lex token types.
107 typedef enum BcLexType
112 /// Marker for invalid tokens, used by bc and dc for const data.
117 /// Increment operator.
120 /// Decrement operator.
125 /// BC_LEX_NEG is not used in lexing; it is only for parsing. The lexer
126 /// marks all '-' characters as BC_LEX_OP_MINUS, but the parser needs to be
127 /// able to distinguish them.
133 #if BC_ENABLE_EXTRA_MATH
135 /// Truncation operator.
138 #endif // BC_ENABLE_EXTRA_MATH
143 /// Multiplication operator.
146 /// Division operator.
149 /// Modulus operator.
152 /// Addition operator.
155 /// Subtraction operator.
158 #if BC_ENABLE_EXTRA_MATH
160 /// Places (truncate or extend) operator.
163 /// Left (decimal) shift operator.
166 /// Right (decimal) shift operator.
169 #endif // BC_ENABLE_EXTRA_MATH
174 /// Less than or equal operator.
177 /// Greater than or equal operator.
180 /// Not equal operator.
183 /// Less than operator.
186 /// Greater than operator.
189 /// Boolean or operator.
192 /// Boolean and operator.
197 /// Power assignment operator.
198 BC_LEX_OP_ASSIGN_POWER,
200 /// Multiplication assignment operator.
201 BC_LEX_OP_ASSIGN_MULTIPLY,
203 /// Division assignment operator.
204 BC_LEX_OP_ASSIGN_DIVIDE,
206 /// Modulus assignment operator.
207 BC_LEX_OP_ASSIGN_MODULUS,
209 /// Addition assignment operator.
210 BC_LEX_OP_ASSIGN_PLUS,
212 /// Subtraction assignment operator.
213 BC_LEX_OP_ASSIGN_MINUS,
215 #if BC_ENABLE_EXTRA_MATH
217 /// Places (truncate or extend) assignment operator.
218 BC_LEX_OP_ASSIGN_PLACES,
220 /// Left (decimal) shift assignment operator.
221 BC_LEX_OP_ASSIGN_LSHIFT,
223 /// Right (decimal) shift assignment operator.
224 BC_LEX_OP_ASSIGN_RSHIFT,
226 #endif // BC_ENABLE_EXTRA_MATH
229 /// Assignment operator.
238 /// Left parenthesis.
241 /// Right parenthesis.
271 // These keywords are in the order they are in for a reason. Don't change
272 // the order unless you want a bunch of weird failures in the test suite.
273 // In fact, almost all of these tokens are in a specific order for a reason.
280 /// bc break keyword.
283 /// bc continue keyword.
286 /// bc define keyword.
295 /// bc limits keyword.
298 /// bc return keyword.
301 /// bc while keyword.
312 /// bc ibase keyword.
315 /// bc obase keyword.
318 /// bc scale keyword.
321 #if BC_ENABLE_EXTRA_MATH
326 #endif // BC_ENABLE_EXTRA_MATH
328 /// bc length keyword.
331 /// bc print keyword.
340 /// bc is_number keyword.
343 /// bc is_string keyword.
346 #if BC_ENABLE_EXTRA_MATH
348 /// bc irand keyword.
351 #endif // BC_ENABLE_EXTRA_MATH
353 /// bc asciffy keyword.
356 /// bc modexp keyword.
359 /// bc divmod keyword.
368 #if BC_ENABLE_EXTRA_MATH
373 #endif // BC_ENABLE_EXTRA_MATH
375 /// bc maxibase keyword.
378 /// bc maxobase keyword.
381 /// bc maxscale keyword.
384 #if BC_ENABLE_EXTRA_MATH
386 /// bc maxrand keyword.
389 #endif // BC_ENABLE_EXTRA_MATH
391 /// bc line_length keyword.
392 BC_LEX_KW_LINE_LENGTH,
396 /// bc global_stacks keyword.
397 BC_LEX_KW_GLOBAL_STACKS,
401 /// bc leading_zero keyword.
402 BC_LEX_KW_LEADING_ZERO,
404 /// bc stream keyword.
412 /// dc extended registers keyword.
413 BC_LEX_EXTENDED_REGISTERS,
415 /// A special token for dc to calculate equal without a register.
418 /// Colon (array) operator.
424 /// Print stack command.
427 /// Clear stack command.
430 /// Register stack level command.
431 BC_LEX_REG_STACK_LEVEL,
433 /// Main stack level command.
436 /// Duplicate command.
439 /// Swap (reverse) command.
442 /// Pop (remove) command.
445 /// Store ibase command.
448 /// Store obase command.
451 /// Store scale command.
454 #if BC_ENABLE_EXTRA_MATH
456 /// Store seed command.
459 #endif // BC_ENABLE_EXTRA_MATH
461 /// Load variable onto stack command.
464 /// Pop off of variable stack onto results stack command.
467 /// Push onto variable stack command.
470 /// Print with pop command.
473 /// Parameterized quit command.
476 /// Execution stack depth command.
477 BC_LEX_EXEC_STACK_LENGTH,
479 /// Scale of number command. This is needed specifically for dc because bc
480 /// parses the scale function in parts.
483 /// Array length command. This is needed specifically for dc because bc
484 /// just reuses its length keyword.
494 * A function pointer to call when another token is needed. Mostly called by the
496 * @param l The lexer.
498 typedef void (*BcLexNext)(struct BcLex* l);
503 /// A pointer to the text to lex.
506 /// The current index into buf.
509 /// The current line.
512 /// The length of buf.
515 /// The current token.
518 /// The previous token.
521 /// A string to store extra data for tokens. For example, the @a BC_LEX_STR
522 /// token really needs to store the actual string, and numbers also need the
526 /// The mode the lexer is in.
532 * Initializes a lexer.
533 * @param l The lexer to initialize.
536 bc_lex_init(BcLex* l);
539 * Frees a lexer. This is not guarded by #if BC_DEBUG because a separate
540 * parser is created at runtime to parse read() expressions and dc strings, and
541 * that parser needs a lexer.
542 * @param l The lexer to free.
545 bc_lex_free(BcLex* l);
548 * Sets the filename that the lexer will be lexing.
549 * @param l The lexer.
550 * @param file The filename that the lexer will lex.
553 bc_lex_file(BcLex* l, const char* file);
556 * Sets the text the lexer will lex.
557 * @param l The lexer.
558 * @param text The text to lex.
559 * @param mode The mode to lex in.
562 bc_lex_text(BcLex* l, const char* text, BcMode mode);
565 * Generic next function for the parser to call. It takes care of calling the
566 * correct @a BcLexNext function and consuming whitespace.
567 * @param l The lexer.
570 bc_lex_next(BcLex* l);
573 * Lexes a line comment (one beginning with '#' and going to a newline).
574 * @param l The lexer.
577 bc_lex_lineComment(BcLex* l);
580 * Lexes a general comment (C-style comment).
581 * @param l The lexer.
584 bc_lex_comment(BcLex* l);
587 * Lexes whitespace, finding as much as possible.
588 * @param l The lexer.
591 bc_lex_whitespace(BcLex* l);
594 * Lexes a number that begins with char @a start. This takes care of parsing
595 * numbers in scientific and engineering notations.
596 * @param l The lexer.
597 * @param start The starting char of the number. To detect a number and call
598 * this function, the lexer had to eat the first char. It fixes
599 * that by passing it in.
602 bc_lex_number(BcLex* l, char start);
605 * Lexes a name/identifier.
606 * @param l The lexer.
609 bc_lex_name(BcLex* l);
612 * Lexes common whitespace characters.
613 * @param l The lexer.
614 * @param c The character to lex.
617 bc_lex_commonTokens(BcLex* l, char c);
620 * Throws a parse error because char @a c was invalid.
621 * @param l The lexer.
622 * @param c The problem character.
625 bc_lex_invalidChar(BcLex* l, char c);
628 * Reads a line from stdin and puts it into the lexer's buffer.
629 * @param l The lexer.
632 bc_lex_readLine(BcLex* l);