2 * *****************************************************************************
4 * SPDX-License-Identifier: BSD-2-Clause
6 * Copyright (c) 2018-2023 Gavin D. Howard and contributors.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
11 * * Redistributions of source code must retain the above copyright notice, this
12 * list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
30 * *****************************************************************************
46 * Lexes an identifier, which may be a keyword.
50 bc_lex_identifier(BcLex* l)
52 // We already passed the first character, so we need to be sure to include
54 const char* buf = l->buf + l->i - 1;
57 // This loop is simply checking for keywords.
58 for (i = 0; i < bc_lex_kws_len; ++i)
60 const BcLexKeyword* kw = bc_lex_kws + i;
61 size_t n = BC_LEX_KW_LEN(kw);
63 if (!strncmp(buf, kw->name, n) && !isalnum(buf[n]) && buf[n] != '_')
65 // If the keyword has been redefined, and redefinition is allowed
66 // (it is not allowed for builtin libraries), break out of the loop
67 // and use it as a name. This depends on the argument parser to
68 // ensure that only non-POSIX keywords get redefined.
69 if (!vm->no_redefine && vm->redefined_kws[i]) break;
71 l->t = BC_LEX_KW_AUTO + (BcLexType) i;
73 // Warn or error, as appropriate for the mode, if the keyword is not
74 // in the POSIX standard.
75 if (!BC_LEX_KW_POSIX(kw)) bc_lex_verr(l, BC_ERR_POSIX_KW, kw->name);
77 // We minus 1 because the index has already been incremented.
80 // Already have the token; bail.
85 // If not a keyword, parse the name.
88 // POSIX doesn't allow identifiers that are more than one character, so we
89 // might have to warn or error here too.
90 if (BC_ERR(l->str.len - 1 > 1))
92 bc_lex_verr(l, BC_ERR_POSIX_NAME_LEN, l->str.v);
97 * Parses a bc string. This is separate from dc strings because dc strings need
102 bc_lex_string(BcLex* l)
104 // We need to keep track of newlines to increment them properly.
105 size_t len, nlines, i;
118 assert(vm->mode != BC_MODE_STDIN || buf == vm->buffer.v);
120 // Fortunately for us, bc doesn't escape quotes. Instead, the equivalent
121 // is '\q', which makes this loop simpler.
122 for (i = l->i; (c = buf[i]) && c != '"'; ++i)
124 nlines += (c == '\n');
127 if (BC_ERR(c == '\0') && !vm->eof && l->mode != BC_MODE_FILE)
129 got_more = bc_lex_readLine(l);
132 while (got_more && c != '"');
134 // If the string did not end properly, barf.
138 bc_lex_err(l, BC_ERR_PARSE_STRING);
141 // Set the temp string to the parsed string.
143 bc_vec_string(&l->str, len, l->buf + l->i);
150 * This function takes a lexed operator and checks to see if it's the assignment
151 * version, setting the token appropriately.
152 * @param l The lexer.
153 * @param with The token to assign if it is an assignment operator.
154 * @param without The token to assign if it is not an assignment operator.
157 bc_lex_assign(BcLex* l, BcLexType with, BcLexType without)
159 if (l->buf[l->i] == '=')
168 bc_lex_token(BcLex* l)
170 // We increment here. This means that all lexing needs to take that into
171 // account, such as when parsing an identifier. If we don't, the first
172 // character of every identifier would be missing.
173 char c = l->buf[l->i++], c2;
175 BC_SIG_ASSERT_LOCKED;
177 // This is the workhorse of the lexer.
188 bc_lex_commonTokens(l, c);
194 // Even though it's not an assignment, we can use this.
195 bc_lex_assign(l, BC_LEX_OP_REL_NE, BC_LEX_OP_BOOL_NOT);
197 // POSIX doesn't allow boolean not.
198 if (l->t == BC_LEX_OP_BOOL_NOT)
200 bc_lex_verr(l, BC_ERR_POSIX_BOOL, "!");
214 // POSIX does not allow line comments.
215 bc_lex_err(l, BC_ERR_POSIX_COMMENT);
216 bc_lex_lineComment(l);
222 bc_lex_assign(l, BC_LEX_OP_ASSIGN_MODULUS, BC_LEX_OP_MODULUS);
230 // Either we have boolean and or an error. And boolean and is not
232 if (BC_NO_ERR(c2 == '&'))
234 bc_lex_verr(l, BC_ERR_POSIX_BOOL, "&&");
237 l->t = BC_LEX_OP_BOOL_AND;
239 else bc_lex_invalidChar(l, c);
243 #if BC_ENABLE_EXTRA_MATH
246 l->t = BC_LEX_OP_TRUNC;
252 bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLACES, BC_LEX_OP_PLACES);
255 #endif // BC_ENABLE_EXTRA_MATH
259 l->t = (BcLexType) (c - '(' + BC_LEX_LPAREN);
265 bc_lex_assign(l, BC_LEX_OP_ASSIGN_MULTIPLY, BC_LEX_OP_MULTIPLY);
273 // Have to check for increment first.
277 l->t = BC_LEX_OP_INC;
279 else bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLUS, BC_LEX_OP_PLUS);
293 // Have to check for decrement first.
297 l->t = BC_LEX_OP_DEC;
299 else bc_lex_assign(l, BC_LEX_OP_ASSIGN_MINUS, BC_LEX_OP_MINUS);
307 // If it's alone, it's an alias for last.
308 if (BC_LEX_NUM_CHAR(c2, true, false)) bc_lex_number(l, c);
311 l->t = BC_LEX_KW_LAST;
312 bc_lex_err(l, BC_ERR_POSIX_DOT);
321 if (c2 == '*') bc_lex_comment(l);
322 else bc_lex_assign(l, BC_LEX_OP_ASSIGN_DIVIDE, BC_LEX_OP_DIVIDE);
342 // Apparently, GNU bc (and maybe others) allows any uppercase letter as
343 // a number. When single digits, they act like the ones above. When
344 // multi-digit, any letter above the input base is automatically set to
345 // the biggest allowable digit in the input base.
373 l->t = BC_LEX_SCOLON;
379 #if BC_ENABLE_EXTRA_MATH
386 bc_lex_assign(l, BC_LEX_OP_ASSIGN_LSHIFT, BC_LEX_OP_LSHIFT);
389 #endif // BC_ENABLE_EXTRA_MATH
390 bc_lex_assign(l, BC_LEX_OP_REL_LE, BC_LEX_OP_REL_LT);
396 bc_lex_assign(l, BC_LEX_OP_REL_EQ, BC_LEX_OP_ASSIGN);
402 #if BC_ENABLE_EXTRA_MATH
409 bc_lex_assign(l, BC_LEX_OP_ASSIGN_RSHIFT, BC_LEX_OP_RSHIFT);
412 #endif // BC_ENABLE_EXTRA_MATH
413 bc_lex_assign(l, BC_LEX_OP_REL_GE, BC_LEX_OP_REL_GT);
420 l->t = (BcLexType) (c - '[' + BC_LEX_LBRACKET);
426 // In bc, a backslash+newline is whitespace.
427 if (BC_NO_ERR(l->buf[l->i] == '\n'))
430 l->t = BC_LEX_WHITESPACE;
432 else bc_lex_invalidChar(l, c);
438 bc_lex_assign(l, BC_LEX_OP_ASSIGN_POWER, BC_LEX_OP_POWER);
469 bc_lex_identifier(l);
476 l->t = (BcLexType) (c - '{' + BC_LEX_LBRACE);
484 // Once again, boolean or is not allowed by POSIX.
485 if (BC_NO_ERR(c2 == '|'))
487 bc_lex_verr(l, BC_ERR_POSIX_BOOL, "||");
490 l->t = BC_LEX_OP_BOOL_OR;
492 else bc_lex_invalidChar(l, c);
499 bc_lex_invalidChar(l, c);