2 * *****************************************************************************
4 * SPDX-License-Identifier: BSD-2-Clause
6 * Copyright (c) 2018-2023 Gavin D. Howard and contributors.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
11 * * Redistributions of source code must retain the above copyright notice, this
12 * list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
30 * *****************************************************************************
32 * Common code for the lexers.
46 bc_lex_invalidChar(BcLex* l, char c)
48 l->t = BC_LEX_INVALID;
49 bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
53 bc_lex_lineComment(BcLex* l)
55 l->t = BC_LEX_WHITESPACE;
56 while (l->i < l->len && l->buf[l->i] != '\n')
63 bc_lex_comment(BcLex* l)
67 bool end = false, got_more;
71 l->t = BC_LEX_WHITESPACE;
73 // This loop is complex because it might need to request more data from
74 // stdin if the comment is not ended. This loop is taken until the comment
75 // is finished or we have EOF.
81 // If we are in stdin mode, the buffer must be the one used for stdin.
82 assert(vm->mode != BC_MODE_STDIN || buf == vm->buffer.v);
84 // Find the end of the comment.
85 for (i = l->i; !end; i += !end)
87 // While we don't have an asterisk, eat, but increment nlines.
88 for (; (c = buf[i]) && c != '*'; ++i)
90 nlines += (c == '\n');
93 // If this is true, we need to request more data.
94 if (BC_ERR(!c || buf[i + 1] == '\0'))
96 // Read more, if possible.
97 if (!vm->eof && l->mode != BC_MODE_FILE)
99 got_more = bc_lex_readLine(l);
105 // If this turns true, we found the end. Yay!
106 end = (buf[i + 1] == '/');
109 while (got_more && !end);
111 // If we didn't find the end, barf.
115 bc_lex_err(l, BC_ERR_PARSE_COMMENT);
123 bc_lex_whitespace(BcLex* l)
127 l->t = BC_LEX_WHITESPACE;
129 // Eat. We don't eat newlines because they can be special.
130 for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i])
137 bc_lex_commonTokens(BcLex* l, char c)
139 if (!c) l->t = BC_LEX_EOF;
140 else if (c == '\n') l->t = BC_LEX_NLINE;
141 else bc_lex_whitespace(l);
146 * @param l The lexer.
147 * @param start The start character.
148 * @param int_only Whether this function should only look for an integer. This
149 * is used to implement the exponent of scientific notation.
152 bc_lex_num(BcLex* l, char start, bool int_only)
154 const char* buf = l->buf + l->i;
157 bool last_pt, pt = (start == '.');
159 // This loop looks complex. It is not. It is asking if the character is not
160 // a nul byte and it if it a valid num character based on what we have found
161 // thus far, or whether it is a backslash followed by a newline. I can do
162 // i+1 on the buffer because the buffer must have a nul byte.
163 for (i = 0; (c = buf[i]) && (BC_LEX_NUM_CHAR(c, pt, int_only) ||
164 (c == '\\' && buf[i + 1] == '\n'));
167 // I don't need to test that the next character is a newline because
168 // the loop condition above ensures that.
173 // Make sure to eat whitespace at the beginning of the line.
174 while (isspace(buf[i]) && buf[i] != '\n')
181 // If the next character is not a number character, bail.
182 if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break;
185 // Did we find the radix point?
186 last_pt = (c == '.');
188 // If we did, and we already have one, then break because it's not part
190 if (pt && last_pt) break;
192 // Set whether we have found a radix point.
195 bc_vec_push(&l->str, &c);
202 bc_lex_number(BcLex* l, char start)
204 l->t = BC_LEX_NUMBER;
206 // Make sure the string is clear.
207 bc_vec_popAll(&l->str);
208 bc_vec_push(&l->str, &start);
211 l->i += bc_lex_num(l, start, false);
213 #if BC_ENABLE_EXTRA_MATH
215 char c = l->buf[l->i];
217 // Do we have a number in scientific notation?
222 if (BC_IS_POSIX) bc_lex_err(l, BC_ERR_POSIX_EXP_NUM);
226 bc_vec_push(&l->str, &c);
230 // Check for negative specifically because bc_lex_num() does not.
231 if (c == BC_LEX_NEG_CHAR)
233 bc_vec_push(&l->str, &c);
238 // We must have a number character, so barf if not.
239 if (BC_ERR(!BC_LEX_NUM_CHAR(c, false, true)))
241 bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
244 // Parse the exponent.
245 l->i += bc_lex_num(l, 0, true);
248 #endif // BC_ENABLE_EXTRA_MATH
250 bc_vec_pushByte(&l->str, '\0');
254 bc_lex_name(BcLex* l)
257 const char* buf = l->buf + l->i - 1;
262 // Should be obvious. It's looking for valid characters.
263 while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_')
268 // Set the string to the identifier.
269 bc_vec_string(&l->str, i, buf);
271 // Increment the index. We minus 1 because it has already been incremented.
276 bc_lex_init(BcLex* l)
278 BC_SIG_ASSERT_LOCKED;
280 bc_vec_init(&l->str, sizeof(char), BC_DTOR_NONE);
284 bc_lex_free(BcLex* l)
286 BC_SIG_ASSERT_LOCKED;
288 bc_vec_free(&l->str);
292 bc_lex_file(BcLex* l, const char* file)
294 assert(l != NULL && file != NULL);
300 bc_lex_next(BcLex* l)
302 BC_SIG_ASSERT_LOCKED;
308 // If this wasn't here, the line number would be off.
309 l->line += (l->i != 0 && l->buf[l->i - 1] == '\n');
311 // If the last token was EOF, someone called this one too many times.
312 if (BC_ERR(l->last == BC_LEX_EOF)) bc_lex_err(l, BC_ERR_PARSE_EOF);
316 // We are done if this is true.
317 if (l->i == l->len) return;
319 // Loop until failure or we don't have whitespace. This
320 // is so the parser doesn't get inundated with whitespace.
325 while (l->t == BC_LEX_WHITESPACE);
329 * Updates the buffer and len so that they are not invalidated when the stdin
331 * @param l The lexer.
332 * @param text The text.
333 * @param len The length of the text.
336 bc_lex_fixText(BcLex* l, const char* text, size_t len)
343 bc_lex_readLine(BcLex* l)
347 // These are reversed because they should be already locked, but
348 // bc_vm_readLine() needs them to be unlocked.
351 // Make sure we read from the appropriate place.
356 good = bc_vm_readBuf(false);
368 good = bc_vm_readLine(false);
376 // We should never get here.
385 bc_lex_fixText(l, vm->buffer.v, vm->buffer.len - 1);
391 bc_lex_text(BcLex* l, const char* text, BcMode mode)
393 BC_SIG_ASSERT_LOCKED;
395 assert(l != NULL && text != NULL);
397 bc_lex_fixText(l, text, strlen(text));
399 l->t = l->last = BC_LEX_INVALID;