2 * *****************************************************************************
4 * SPDX-License-Identifier: BSD-2-Clause
6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
11 * * Redistributions of source code must retain the above copyright notice, this
12 * list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
30 * *****************************************************************************
49 // Before you embark on trying to understand this code, have you read the
50 // Development manual (manuals/development.md) and the comment in include/bc.h
51 // yet? No? Do that first. I'm serious.
53 // The reason is because this file holds the most sensitive and finicky code in
54 // the entire codebase. Even getting history to work on Windows was nothing
55 // compared to this. This is where dreams go to die, where dragons live, and
56 // from which Ken Thompson himself would flee.
58 static void bc_parse_else(BcParse *p);
59 static void bc_parse_stmt(BcParse *p);
60 static BcParseStatus bc_parse_expr_err(BcParse *p, uint8_t flags,
62 static void bc_parse_expr_status(BcParse *p, uint8_t flags, BcParseNext next);
65 * Returns true if an instruction could only have come from a "leaf" expression.
66 * For more on what leaf expressions are, read the comment for BC_PARSE_LEAF().
67 * @param t The instruction to test.
69 static bool bc_parse_inst_isLeaf(BcInst t) {
70 return (t >= BC_INST_NUM && t <= BC_INST_MAXSCALE) ||
71 #if BC_ENABLE_EXTRA_MATH
73 #endif // BC_ENABLE_EXTRA_MATH
78 * Returns true if the *previous* token was a delimiter. A delimiter is anything
79 * that can legally end a statement. In bc's case, it could be a newline, a
80 * semicolon, and a brace in certain cases.
81 * @param p The parser.
83 static bool bc_parse_isDelimiter(const BcParse *p) {
88 // If it's an obvious delimiter, say so.
89 if (BC_PARSE_DELIMITER(t)) return true;
93 // If the current token is a keyword, then...beware. That means that we need
94 // to check for a "dangling" else, where there was no brace-delimited block
95 // on the previous if.
96 if (t == BC_LEX_KW_ELSE) {
99 uint16_t *fptr = NULL, flags = BC_PARSE_FLAG_ELSE;
101 // As long as going up the stack is valid for a dangling else, keep on.
102 for (i = 0; i < p->flags.len && BC_PARSE_BLOCK_STMT(flags); ++i) {
104 fptr = bc_vec_item_rev(&p->flags, i);
107 // If we need a brace and don't have one, then we don't have a
109 if ((flags & BC_PARSE_FLAG_BRACE) && p->l.last != BC_LEX_RBRACE)
113 // Oh, and we had also better have an if statement somewhere.
114 good = ((flags & BC_PARSE_FLAG_IF) != 0);
116 else if (t == BC_LEX_RBRACE) {
120 // Since we have a brace, we need to just check if a brace was needed.
121 for (i = 0; !good && i < p->flags.len; ++i) {
122 uint16_t *fptr = bc_vec_item_rev(&p->flags, i);
123 good = (((*fptr) & BC_PARSE_FLAG_BRACE) != 0);
131 * Sets a previously defined exit label. What are labels? See the bc Parsing
132 * section of the Development manual (manuals/development.md).
133 * @param p The parser.
135 static void bc_parse_setLabel(BcParse *p) {
137 BcFunc *func = p->func;
138 BcInstPtr *ip = bc_vec_top(&p->exits);
141 assert(func == bc_vec_item(&p->prog->fns, p->fidx));
143 // Set the preallocated label to the correct index.
144 label = bc_vec_item(&func->labels, ip->idx);
145 *label = func->code.len;
147 // Now, we don't need the exit label; it is done.
148 bc_vec_pop(&p->exits);
152 * Creates a label and sets it to idx. If this is an exit label, then idx is
153 * actually invalid, but it doesn't matter because it will be fixed by
154 * bc_parse_setLabel() later.
155 * @param p The parser.
156 * @param idx The index of the label.
158 static void bc_parse_createLabel(BcParse *p, size_t idx) {
159 bc_vec_push(&p->func->labels, &idx);
163 * Creates a conditional label. Unlike an exit label, this label is set at
164 * creation time because it comes *before* the code that will target it.
165 * @param p The parser.
166 * @param idx The index of the label.
168 static void bc_parse_createCondLabel(BcParse *p, size_t idx) {
169 bc_parse_createLabel(p, p->func->code.len);
170 bc_vec_push(&p->conds, &idx);
174 * Creates an exit label to be filled in later by bc_parse_setLabel(). Also, why
175 * create a label to be filled in later? Because exit labels are meant to be
176 * targeted by code that comes *before* the label. Since we have to parse that
177 * code first, and don't know how long it will be, we need to just make sure to
178 * reserve a slot to be filled in later when we know.
180 * By the way, this uses BcInstPtr because it was convenient. The field idx
181 * holds the index, and the field func holds the loop boolean.
183 * @param p The parser.
184 * @param idx The index of the label's position.
185 * @param loop True if the exit label is for a loop or not.
187 static void bc_parse_createExitLabel(BcParse *p, size_t idx, bool loop) {
191 assert(p->func == bc_vec_item(&p->prog->fns, p->fidx));
197 bc_vec_push(&p->exits, &ip);
198 bc_parse_createLabel(p, SIZE_MAX);
202 * Pops the correct operators off of the operator stack based on the current
203 * operator. This is because of the Shunting-Yard algorithm. Lower prec means
205 * @param p The parser.
206 * @param type The operator.
207 * @param start The previous start of the operator stack. For more
208 * information, see the bc Parsing section of the Development
209 * manual (manuals/development.md).
210 * @param nexprs A pointer to the current number of expressions that have not
211 * been consumed yet. This is an IN and OUT parameter.
213 static void bc_parse_operator(BcParse *p, BcLexType type,
214 size_t start, size_t *nexprs)
217 uchar l, r = BC_PARSE_OP_PREC(type);
218 uchar left = BC_PARSE_OP_LEFT(type);
220 // While we haven't hit the stop point yet.
221 while (p->ops.len > start) {
223 // Get the top operator.
224 t = BC_PARSE_TOP_OP(p);
226 // If it's a right paren, we have reached the end of whatever expression
227 // this is no matter what.
228 if (t == BC_LEX_LPAREN) break;
230 // Break for precedence. Precedence operates differently on left and
231 // right associativity, by the way. A left associative operator that
232 // matches the current precedence should take priority, but a right
233 // associative operator should not.
234 l = BC_PARSE_OP_PREC(t);
235 if (l >= r && (l != r || !left)) break;
237 // Do the housekeeping. In particular, make sure to note that one
238 // expression was consumed. (Two were, but another was added.)
239 bc_parse_push(p, BC_PARSE_TOKEN_INST(t));
241 *nexprs -= !BC_PARSE_OP_PREFIX(t);
244 bc_vec_push(&p->ops, &type);
248 * Parses a right paren. In the Shunting-Yard algorithm, it needs to be put on
249 * the operator stack. But before that, it needs to consume whatever operators
250 * there are until it hits a left paren.
251 * @param p The parser.
252 * @param nexprs A pointer to the current number of expressions that have not
253 * been consumed yet. This is an IN and OUT parameter.
255 static void bc_parse_rightParen(BcParse *p, size_t *nexprs) {
259 // Consume operators until a left paren.
260 while ((top = BC_PARSE_TOP_OP(p)) != BC_LEX_LPAREN) {
261 bc_parse_push(p, BC_PARSE_TOKEN_INST(top));
263 *nexprs -= !BC_PARSE_OP_PREFIX(top);
266 // We need to pop the left paren as well.
269 // Oh, and we also want the next token.
274 * Parses function arguments.
275 * @param p The parser.
276 * @param flags Flags restricting what kind of expressions the arguments can
279 static void bc_parse_args(BcParse *p, uint8_t flags) {
286 // Print and comparison operators not allowed. Well, comparison operators
287 // only for POSIX. But we do allow arrays, and we *must* get a value.
288 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL);
289 flags |= (BC_PARSE_ARRAY | BC_PARSE_NEEDVAL);
291 // Count the arguments and parse them.
292 for (nargs = 0; p->l.t != BC_LEX_RPAREN; ++nargs) {
294 bc_parse_expr_status(p, flags, bc_parse_next_arg);
296 comma = (p->l.t == BC_LEX_COMMA);
297 if (comma) bc_lex_next(&p->l);
300 // An ending comma is FAIL.
301 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
303 // Now do the call with the number of arguments.
304 bc_parse_push(p, BC_INST_CALL);
305 bc_parse_pushIndex(p, nargs);
309 * Parses a function call.
310 * @param p The parser.
311 * @param flags Flags restricting what kind of expressions the arguments can
314 static void bc_parse_call(BcParse *p, const char *name, uint8_t flags) {
318 bc_parse_args(p, flags);
320 // We just assert this because bc_parse_args() should
321 // ensure that the next token is what it should be.
322 assert(p->l.t == BC_LEX_RPAREN);
324 // We cannot use bc_program_insertFunc() here
325 // because it will overwrite an existing function.
326 idx = bc_map_index(&p->prog->fn_map, name);
328 // The function does not exist yet. Create a space for it. If the user does
329 // not define it, it's a *runtime* error, not a parse error.
330 if (idx == BC_VEC_INVALID_IDX) {
334 idx = bc_program_insertFunc(p->prog, name);
338 assert(idx != BC_VEC_INVALID_IDX);
340 // Make sure that this pointer was not invalidated.
341 p->func = bc_vec_item(&p->prog->fns, p->fidx);
343 // The function exists, so set the right function index.
344 else idx = ((BcId*) bc_vec_item(&p->prog->fn_map, idx))->idx;
346 bc_parse_pushIndex(p, idx);
348 // Make sure to get the next token.
353 * Parses a name/identifier-based expression. It could be a variable, an array
354 * element, an array itself (for function arguments), a function call, etc.
357 static void bc_parse_name(BcParse *p, BcInst *type,
358 bool *can_assign, uint8_t flags)
364 // We want a copy of the name since the lexer might overwrite its copy.
365 name = bc_vm_strdup(p->l.str.v);
367 BC_SETJMP_LOCKED(err);
371 // We need the next token to see if it's just a variable or something more.
374 // Array element or array.
375 if (p->l.t == BC_LEX_LBRACKET) {
379 // Array only. This has to be a function parameter.
380 if (p->l.t == BC_LEX_RBRACKET) {
382 // Error if arrays are not allowed.
383 if (BC_ERR(!(flags & BC_PARSE_ARRAY)))
384 bc_parse_err(p, BC_ERR_PARSE_EXPR);
386 *type = BC_INST_ARRAY;
391 // If we are here, we have an array element. We need to set the
392 // expression parsing flags.
393 uint8_t flags2 = (flags & ~(BC_PARSE_PRINT | BC_PARSE_REL)) |
396 bc_parse_expr_status(p, flags2, bc_parse_next_elem);
398 // The next token *must* be a right bracket.
399 if (BC_ERR(p->l.t != BC_LEX_RBRACKET))
400 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
402 *type = BC_INST_ARRAY_ELEM;
406 // Make sure to get the next token.
409 // Push the instruction and the name of the identifier.
410 bc_parse_push(p, *type);
411 bc_parse_pushName(p, name, false);
413 else if (p->l.t == BC_LEX_LPAREN) {
415 // We are parsing a function call; error if not allowed.
416 if (BC_ERR(flags & BC_PARSE_NOCALL))
417 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
419 *type = BC_INST_CALL;
422 bc_parse_call(p, name, flags);
428 bc_parse_push(p, BC_INST_VAR);
429 bc_parse_pushName(p, name, true);
433 // Need to make sure to unallocate the name.
440 * Parses a builtin function that takes no arguments. This includes read(),
441 * rand(), maxibase(), maxobase(), maxscale(), and maxrand().
442 * @param p The parser.
443 * @param inst The instruction corresponding to the builtin.
445 static void bc_parse_noArgBuiltin(BcParse *p, BcInst inst) {
447 // Must have a left paren.
449 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
451 // Must have a right paren.
453 if ((p->l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
455 bc_parse_push(p, inst);
461 * Parses a builtin function that takes 1 argument. This includes length(),
462 * sqrt(), abs(), scale(), and irand().
463 * @param p The parser.
464 * @param type The lex token.
465 * @param flags The expression parsing flags for parsing the argument.
466 * @param prev An out parameter; the previous instruction pointer.
468 static void bc_parse_builtin(BcParse *p, BcLexType type,
469 uint8_t flags, BcInst *prev)
471 // Must have a left paren.
473 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
474 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
478 // Change the flags as needed for parsing the argument.
479 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL);
480 flags |= BC_PARSE_NEEDVAL;
482 // Since length can take arrays, we need to specially add that flag.
483 if (type == BC_LEX_KW_LENGTH) flags |= BC_PARSE_ARRAY;
485 bc_parse_expr_status(p, flags, bc_parse_next_rel);
487 // Must have a right paren.
488 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
489 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
491 // Adjust previous based on the token and push it.
492 *prev = type - BC_LEX_KW_LENGTH + BC_INST_LENGTH;
493 bc_parse_push(p, *prev);
499 * Parses a builtin function that takes 3 arguments. This includes modexp() and
502 static void bc_parse_builtin3(BcParse *p, BcLexType type,
503 uint8_t flags, BcInst *prev)
505 assert(type == BC_LEX_KW_MODEXP || type == BC_LEX_KW_DIVMOD);
507 // Must have a left paren.
509 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
510 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
514 // Change the flags as needed for parsing the argument.
515 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL);
516 flags |= BC_PARSE_NEEDVAL;
518 bc_parse_expr_status(p, flags, bc_parse_next_builtin);
520 // Must have a comma.
521 if (BC_ERR(p->l.t != BC_LEX_COMMA))
522 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
526 bc_parse_expr_status(p, flags, bc_parse_next_builtin);
528 // Must have a comma.
529 if (BC_ERR(p->l.t != BC_LEX_COMMA))
530 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
534 // If it is a divmod, parse an array name. Otherwise, just parse another
536 if (type == BC_LEX_KW_DIVMOD) {
539 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
541 // This is safe because the next token should not overwrite the name.
544 // Must have a left bracket.
545 if (BC_ERR(p->l.t != BC_LEX_LBRACKET))
546 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
548 // This is safe because the next token should not overwrite the name.
551 // Must have a right bracket.
552 if (BC_ERR(p->l.t != BC_LEX_RBRACKET))
553 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
555 // This is safe because the next token should not overwrite the name.
558 else bc_parse_expr_status(p, flags, bc_parse_next_rel);
560 // Must have a right paren.
561 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
562 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
564 // Adjust previous based on the token and push it.
565 *prev = type - BC_LEX_KW_MODEXP + BC_INST_MODEXP;
566 bc_parse_push(p, *prev);
568 // If we have divmod, we need to assign the modulus to the array element, so
569 // we need to push the instructions for doing so.
570 if (type == BC_LEX_KW_DIVMOD) {
572 // The zeroth element.
573 bc_parse_push(p, BC_INST_ZERO);
574 bc_parse_push(p, BC_INST_ARRAY_ELEM);
577 bc_parse_pushName(p, p->l.str.v, false);
579 // Swap them and assign. After this, the top item on the stack should
581 bc_parse_push(p, BC_INST_SWAP);
582 bc_parse_push(p, BC_INST_ASSIGN_NO_VAL);
589 * Parses the scale keyword. This is special because scale can be a value or a
591 * @param p The parser.
592 * @param type An out parameter; the instruction for the parse.
593 * @param can_assign An out parameter; whether the expression can be assigned
595 * @param flags The expression parsing flags for parsing a scale() arg.
597 static void bc_parse_scale(BcParse *p, BcInst *type,
598 bool *can_assign, uint8_t flags)
602 // Without the left paren, it's just the keyword.
603 if (p->l.t != BC_LEX_LPAREN) {
605 // Set, push, and return.
606 *type = BC_INST_SCALE;
608 bc_parse_push(p, BC_INST_SCALE);
612 // Handle the scale function.
613 *type = BC_INST_SCALE_FUNC;
616 // Once again, adjust the flags.
617 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL);
618 flags |= BC_PARSE_NEEDVAL;
622 bc_parse_expr_status(p, flags, bc_parse_next_rel);
624 // Must have a right paren.
625 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
626 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
628 bc_parse_push(p, BC_INST_SCALE_FUNC);
634 * Parses and increment or decrement operator. This is a bit complex.
635 * @param p The parser.
636 * @param prev An out parameter; the previous instruction pointer.
637 * @param can_assign An out parameter; whether the expression can be assigned
639 * @param nexs An in/out parameter; the number of expressions in the
640 * parse tree that are not used.
641 * @param flags The expression parsing flags for parsing a scale() arg.
643 static void bc_parse_incdec(BcParse *p, BcInst *prev, bool *can_assign,
644 size_t *nexs, uint8_t flags)
648 BcInst etype = *prev;
649 BcLexType last = p->l.last;
651 assert(prev != NULL && can_assign != NULL);
653 // If we can't assign to the previous token, then we have an error.
654 if (BC_ERR(last == BC_LEX_OP_INC || last == BC_LEX_OP_DEC ||
655 last == BC_LEX_RPAREN))
657 bc_parse_err(p, BC_ERR_PARSE_ASSIGN);
660 // Is the previous instruction for a variable?
661 if (BC_PARSE_INST_VAR(etype)) {
663 // If so, this is a postfix operator.
664 if (!*can_assign) bc_parse_err(p, BC_ERR_PARSE_ASSIGN);
666 // Only postfix uses BC_INST_INC and BC_INST_DEC.
667 *prev = inst = BC_INST_INC + (p->l.t != BC_LEX_OP_INC);
668 bc_parse_push(p, inst);
674 // This is a prefix operator. In that case, we just convert it to
675 // an assignment instruction.
676 *prev = inst = BC_INST_ASSIGN_PLUS + (p->l.t != BC_LEX_OP_INC);
681 // Because we parse the next part of the expression
682 // right here, we need to increment this.
685 // Is the next token a normal identifier?
686 if (type == BC_LEX_NAME) {
689 uint8_t flags2 = flags & ~BC_PARSE_ARRAY;
690 bc_parse_name(p, prev, can_assign, flags2 | BC_PARSE_NOCALL);
692 // Is the next token a global?
693 else if (type >= BC_LEX_KW_LAST && type <= BC_LEX_KW_OBASE) {
694 bc_parse_push(p, type - BC_LEX_KW_LAST + BC_INST_LAST);
697 // Is the next token specifically scale, which needs special treatment?
698 else if (BC_NO_ERR(type == BC_LEX_KW_SCALE)) {
702 // Check that scale() was not used.
703 if (BC_ERR(p->l.t == BC_LEX_LPAREN))
704 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
705 else bc_parse_push(p, BC_INST_SCALE);
707 // Now we know we have an error.
708 else bc_parse_err(p, BC_ERR_PARSE_TOKEN);
712 bc_parse_push(p, BC_INST_ONE);
713 bc_parse_push(p, inst);
718 * Parses the minus operator. This needs special treatment because it is either
719 * subtract or negation.
720 * @param p The parser.
721 * @param prev An in/out parameter; the previous instruction.
722 * @param ops_bgn The size of the operator stack.
723 * @param rparen True if the last token was a right paren.
724 * @param binlast True if the last token was a binary operator.
725 * @param nexprs An in/out parameter; the number of unused expressions.
727 static void bc_parse_minus(BcParse *p, BcInst *prev, size_t ops_bgn,
728 bool rparen, bool binlast, size_t *nexprs)
734 // Figure out if it's a minus or a negation.
735 type = BC_PARSE_LEAF(*prev, binlast, rparen) ? BC_LEX_OP_MINUS : BC_LEX_NEG;
736 *prev = BC_PARSE_TOKEN_INST(type);
738 // We can just push onto the op stack because this is the largest
739 // precedence operator that gets pushed. Inc/dec does not.
740 if (type != BC_LEX_OP_MINUS) bc_vec_push(&p->ops, &type);
741 else bc_parse_operator(p, type, ops_bgn, nexprs);
746 * @param p The parser.
747 * @param inst The instruction corresponding to how the string was found and
748 * how it should be printed.
750 static void bc_parse_str(BcParse *p, BcInst inst) {
751 bc_parse_addString(p);
752 bc_parse_push(p, inst);
757 * Parses a print statement.
758 * @param p The parser.
760 static void bc_parse_print(BcParse *p, BcLexType type) {
764 BcInst inst = type == BC_LEX_KW_STREAM ?
765 BC_INST_PRINT_STREAM : BC_INST_PRINT_POP;
771 // A print or stream statement has to have *something*.
772 if (bc_parse_isDelimiter(p)) bc_parse_err(p, BC_ERR_PARSE_PRINT);
776 // If the token is a string, then print it with escapes.
777 // BC_INST_PRINT_POP plays that role for bc.
778 if (t == BC_LEX_STR) bc_parse_str(p, inst);
780 // We have an actual number; parse and add a print instruction.
781 bc_parse_expr_status(p, BC_PARSE_NEEDVAL, bc_parse_next_print);
782 bc_parse_push(p, inst);
785 // Is the next token a comma?
786 comma = (p->l.t == BC_LEX_COMMA);
788 // Get the next token if we have a comma.
789 if (comma) bc_lex_next(&p->l);
792 // If we don't have a comma, the statement needs to end.
793 if (!bc_parse_isDelimiter(p))
794 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
802 // If we have a comma but no token, that's bad.
803 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
807 * Parses a return statement.
808 * @param p The parser.
810 static void bc_parse_return(BcParse *p) {
814 uchar inst = BC_INST_RET0;
816 // If we are not in a function, that's an error.
817 if (BC_ERR(!BC_PARSE_FUNC(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
819 // If we are in a void function, make sure to return void.
820 if (p->func->voidfn) inst = BC_INST_RET_VOID;
825 paren = (t == BC_LEX_LPAREN);
827 // An empty return statement just needs to push the selected instruction.
828 if (bc_parse_isDelimiter(p)) bc_parse_push(p, inst);
833 // Need to parse the expression whose value will be returned.
834 s = bc_parse_expr_err(p, BC_PARSE_NEEDVAL, bc_parse_next_expr);
836 // If the expression was empty, just push the selected instruction.
837 if (s == BC_PARSE_STATUS_EMPTY_EXPR) {
838 bc_parse_push(p, inst);
842 // POSIX requires parentheses.
843 if (!paren || p->l.last != BC_LEX_RPAREN) {
844 bc_parse_err(p, BC_ERR_POSIX_RET);
847 // Void functions require an empty expression.
848 if (BC_ERR(p->func->voidfn)) {
849 if (s != BC_PARSE_STATUS_EMPTY_EXPR)
850 bc_parse_verr(p, BC_ERR_PARSE_RET_VOID, p->func->name);
852 // If we got here, we want to be sure to end the function with a real
853 // return instruction, just in case.
854 else bc_parse_push(p, BC_INST_RET);
859 * Clears flags that indicate the end of an if statement and its block and sets
861 * @param p The parser.
863 static void bc_parse_noElse(BcParse *p) {
864 uint16_t *flag_ptr = BC_PARSE_TOP_FLAG_PTR(p);
865 *flag_ptr = (*flag_ptr & ~(BC_PARSE_FLAG_IF_END));
866 bc_parse_setLabel(p);
870 * Ends (finishes parsing) the body of a control statement or a function.
871 * @param p The parser.
872 * @param brace True if the body was ended by a brace, false otherwise.
874 static void bc_parse_endBody(BcParse *p, bool brace) {
876 bool has_brace, new_else = false;
878 // We cannot be ending a body if there are no bodies to end.
879 if (BC_ERR(p->flags.len <= 1)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
883 // The brace was already gotten; make sure that the caller did not lie.
884 // We check for the requirement of braces later.
885 assert(p->l.t == BC_LEX_RBRACE);
889 // If the next token is not a delimiter, that is a problem.
890 if (BC_ERR(!bc_parse_isDelimiter(p)))
891 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
894 // Do we have a brace flag?
895 has_brace = (BC_PARSE_BRACE(p) != 0);
898 size_t len = p->flags.len;
901 // If we have a brace flag but not a brace, that's a problem.
902 if (has_brace && !brace) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
904 // Are we inside a loop?
905 loop = (BC_PARSE_LOOP_INNER(p) != 0);
907 // If we are ending a loop or an else...
908 if (loop || BC_PARSE_ELSE(p)) {
910 // Loops have condition labels that we have to take care of as well.
913 size_t *label = bc_vec_top(&p->conds);
915 bc_parse_push(p, BC_INST_JUMP);
916 bc_parse_pushIndex(p, *label);
918 bc_vec_pop(&p->conds);
921 bc_parse_setLabel(p);
922 bc_vec_pop(&p->flags);
924 // If we are ending a function...
925 else if (BC_PARSE_FUNC_INNER(p)) {
926 BcInst inst = (p->func->voidfn ? BC_INST_RET_VOID : BC_INST_RET0);
927 bc_parse_push(p, inst);
928 bc_parse_updateFunc(p, BC_PROG_MAIN);
929 bc_vec_pop(&p->flags);
931 // If we have a brace flag and not an if statement, we can pop the top
932 // of the flags stack because they have been taken care of above.
933 else if (has_brace && !BC_PARSE_IF(p)) bc_vec_pop(&p->flags);
935 // This needs to be last to parse nested if's properly.
936 if (BC_PARSE_IF(p) && (len == p->flags.len || !BC_PARSE_BRACE(p))) {
939 while (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l);
941 // *Now* we can pop the flags.
942 bc_vec_pop(&p->flags);
944 // If we are allowed non-POSIX stuff...
947 // Have we found yet another dangling else?
948 *(BC_PARSE_TOP_FLAG_PTR(p)) |= BC_PARSE_FLAG_IF_END;
949 new_else = (p->l.t == BC_LEX_KW_ELSE);
951 // Parse the else or end the if statement body.
952 if (new_else) bc_parse_else(p);
953 else if (!has_brace && (!BC_PARSE_IF_END(p) || brace))
956 // POSIX requires us to do the bare minimum only.
957 else bc_parse_noElse(p);
960 // If these are both true, we have "used" the braces that we found.
961 if (brace && has_brace) brace = false;
963 // This condition was perhaps the hardest single part of the parser. If the
964 // flags stack does not have enough, we should stop. If we have a new else
965 // statement, we should stop. If we do have the end of an if statement and
966 // we have eaten the brace, we should stop. If we do have a brace flag, we
968 } while (p->flags.len > 1 && !new_else && (!BC_PARSE_IF_END(p) || brace) &&
969 !(has_brace = (BC_PARSE_BRACE(p) != 0)));
971 // If we have a brace, yet no body for it, that's a problem.
972 if (BC_ERR(p->flags.len == 1 && brace))
973 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
974 else if (brace && BC_PARSE_BRACE(p)) {
976 // If we make it here, we have a brace and a flag for it.
977 uint16_t flags = BC_PARSE_TOP_FLAG(p);
979 // This condition ensure that the *last* body is correctly finished by
980 // popping its flags.
981 if (!(flags & (BC_PARSE_FLAG_FUNC_INNER | BC_PARSE_FLAG_LOOP_INNER)) &&
982 !(flags & (BC_PARSE_FLAG_IF | BC_PARSE_FLAG_ELSE)) &&
983 !(flags & (BC_PARSE_FLAG_IF_END)))
985 bc_vec_pop(&p->flags);
991 * Starts the body of a control statement or function.
992 * @param p The parser.
993 * @param flags The current flags (will be edited).
995 static void bc_parse_startBody(BcParse *p, uint16_t flags) {
997 flags |= (BC_PARSE_TOP_FLAG(p) & (BC_PARSE_FLAG_FUNC | BC_PARSE_FLAG_LOOP));
998 flags |= BC_PARSE_FLAG_BODY;
999 bc_vec_push(&p->flags, &flags);
1003 * Parses an if statement.
1004 * @param p The parser.
1006 static void bc_parse_if(BcParse *p) {
1008 // We are allowed relational operators, and we must have a value.
1010 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL);
1012 // Get the left paren and barf if necessary.
1014 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
1015 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1017 // Parse the condition.
1019 bc_parse_expr_status(p, flags, bc_parse_next_rel);
1021 // Must have a right paren.
1022 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
1023 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1027 // Insert the conditional jump instruction.
1028 bc_parse_push(p, BC_INST_JUMP_ZERO);
1030 idx = p->func->labels.len;
1032 // Push the index for the instruction and create an exit label for an else
1034 bc_parse_pushIndex(p, idx);
1035 bc_parse_createExitLabel(p, idx, false);
1037 bc_parse_startBody(p, BC_PARSE_FLAG_IF);
1041 * Parses an else statement.
1042 * @param p The parser.
1044 static void bc_parse_else(BcParse *p) {
1046 size_t idx = p->func->labels.len;
1048 // We must be at the end of an if statement.
1049 if (BC_ERR(!BC_PARSE_IF_END(p)))
1050 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1052 // Push an unconditional jump to make bc jump over the else statement if it
1053 // executed the original if statement.
1054 bc_parse_push(p, BC_INST_JUMP);
1055 bc_parse_pushIndex(p, idx);
1057 // Clear the else stuff. Yes, that function is misnamed for its use here,
1058 // but deal with it.
1061 // Create the exit label and parse the body.
1062 bc_parse_createExitLabel(p, idx, false);
1063 bc_parse_startBody(p, BC_PARSE_FLAG_ELSE);
1069 * Parse a while loop.
1070 * @param p The parser.
1072 static void bc_parse_while(BcParse *p) {
1074 // We are allowed relational operators, and we must have a value.
1076 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL);
1078 // Get the left paren and barf if necessary.
1080 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
1081 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1084 // Create the labels. Loops need both.
1085 bc_parse_createCondLabel(p, p->func->labels.len);
1086 idx = p->func->labels.len;
1087 bc_parse_createExitLabel(p, idx, true);
1089 // Parse the actual condition and barf on non-right paren.
1090 bc_parse_expr_status(p, flags, bc_parse_next_rel);
1091 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
1092 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1095 // Now we can push the conditional jump and start the body.
1096 bc_parse_push(p, BC_INST_JUMP_ZERO);
1097 bc_parse_pushIndex(p, idx);
1098 bc_parse_startBody(p, BC_PARSE_FLAG_LOOP | BC_PARSE_FLAG_LOOP_INNER);
1103 * @param p The parser.
1105 static void bc_parse_for(BcParse *p) {
1107 size_t cond_idx, exit_idx, body_idx, update_idx;
1109 // Barf on the missing left paren.
1111 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
1112 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1115 // The first statement can be empty, but if it is, check for error in POSIX
1116 // mode. Otherwise, parse it.
1117 if (p->l.t != BC_LEX_SCOLON)
1118 bc_parse_expr_status(p, 0, bc_parse_next_for);
1119 else bc_parse_err(p, BC_ERR_POSIX_FOR);
1121 // Must have a semicolon.
1122 if (BC_ERR(p->l.t != BC_LEX_SCOLON)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1125 // These are indices for labels. There are so many of them because the end
1126 // of the loop must unconditionally jump to the update code. Then the update
1127 // code must unconditionally jump to the condition code. Then the condition
1128 // code must *conditionally* jump to the exit.
1129 cond_idx = p->func->labels.len;
1130 update_idx = cond_idx + 1;
1131 body_idx = update_idx + 1;
1132 exit_idx = body_idx + 1;
1134 // This creates the condition label.
1135 bc_parse_createLabel(p, p->func->code.len);
1137 // Parse an expression if it exists.
1138 if (p->l.t != BC_LEX_SCOLON) {
1139 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL);
1140 bc_parse_expr_status(p, flags, bc_parse_next_for);
1144 // Set this for the next call to bc_parse_number because an empty
1145 // condition means that it is an infinite loop, so the condition must be
1146 // non-zero. This is safe to set because the current token is a
1147 // semicolon, which has no string requirement.
1148 bc_vec_string(&p->l.str, sizeof(bc_parse_one) - 1, bc_parse_one);
1151 // An empty condition makes POSIX mad.
1152 bc_parse_err(p, BC_ERR_POSIX_FOR);
1155 // Must have a semicolon.
1156 if (BC_ERR(p->l.t != BC_LEX_SCOLON))
1157 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1160 // Now we can set up the conditional jump to the exit and an unconditional
1161 // jump to the body right after. The unconditional jump to the body is
1162 // because there is update code coming right after the condition, so we need
1163 // to skip it to get to the body.
1164 bc_parse_push(p, BC_INST_JUMP_ZERO);
1165 bc_parse_pushIndex(p, exit_idx);
1166 bc_parse_push(p, BC_INST_JUMP);
1167 bc_parse_pushIndex(p, body_idx);
1169 // Now create the label for the update code.
1170 bc_parse_createCondLabel(p, update_idx);
1172 // Parse if not empty, and if it is, let POSIX yell if necessary.
1173 if (p->l.t != BC_LEX_RPAREN)
1174 bc_parse_expr_status(p, 0, bc_parse_next_rel);
1175 else bc_parse_err(p, BC_ERR_POSIX_FOR);
1177 // Must have a right paren.
1178 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
1179 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1181 // Set up a jump to the condition right after the update code.
1182 bc_parse_push(p, BC_INST_JUMP);
1183 bc_parse_pushIndex(p, cond_idx);
1184 bc_parse_createLabel(p, p->func->code.len);
1186 // Create an exit label for the body and start the body.
1187 bc_parse_createExitLabel(p, exit_idx, true);
1189 bc_parse_startBody(p, BC_PARSE_FLAG_LOOP | BC_PARSE_FLAG_LOOP_INNER);
1193 * Parse a statement or token that indicates a loop exit. This includes an
1194 * actual loop exit, the break keyword, or the continue keyword.
1195 * @param p The parser.
1196 * @param type The type of exit.
1198 static void bc_parse_loopExit(BcParse *p, BcLexType type) {
1203 // Must have a loop. If we don't, that's an error.
1204 if (BC_ERR(!BC_PARSE_LOOP(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1206 // If we have a break statement...
1207 if (type == BC_LEX_KW_BREAK) {
1209 // If there are no exits, something went wrong somewhere.
1210 if (BC_ERR(!p->exits.len)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1213 i = p->exits.len - 1;
1214 ip = bc_vec_item(&p->exits, i);
1216 // The condition !ip->func is true if the exit is not for a loop, so we
1217 // need to find the first actual loop exit.
1218 while (!ip->func && i < p->exits.len) ip = bc_vec_item(&p->exits, i--);
1220 // Make sure everything is hunky dory.
1221 assert(ip != NULL && (i < p->exits.len || ip->func));
1223 // Set the index for the exit.
1226 // If we have a continue statement or just the loop end, jump to the
1227 // condition (or update for a foor loop).
1228 else i = *((size_t*) bc_vec_top(&p->conds));
1230 // Add the unconditional jump.
1231 bc_parse_push(p, BC_INST_JUMP);
1232 bc_parse_pushIndex(p, i);
1238 * Parse a function (header).
1239 * @param p The parser.
1241 static void bc_parse_func(BcParse *p) {
1243 bool comma = false, voidfn;
1249 // Must have a name.
1250 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_FUNC);
1252 // If the name is "void", and POSIX is not on, mark as void.
1253 voidfn = (!BC_IS_POSIX && p->l.t == BC_LEX_NAME &&
1254 !strcmp(p->l.str.v, "void"));
1256 // We can safely do this because the expected token should not overwrite the
1260 // If we *don't* have another name, then void is the name of the function.
1261 voidfn = (voidfn && p->l.t == BC_LEX_NAME);
1263 // With a void function, allow POSIX to complain and get a new token.
1266 bc_parse_err(p, BC_ERR_POSIX_VOID);
1268 // We can safely do this because the expected token should not overwrite
1269 // the function name.
1273 // Must have a left paren.
1274 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
1275 bc_parse_err(p, BC_ERR_PARSE_FUNC);
1277 // Make sure the functions map and vector are synchronized.
1278 assert(p->prog->fns.len == p->prog->fn_map.len);
1280 // Must lock signals because vectors are changed, and the vector functions
1281 // expect signals to be locked.
1284 // Insert the function by name into the map and vector.
1285 idx = bc_program_insertFunc(p->prog, p->l.str.v);
1289 // Make sure the insert worked.
1292 // Update the function pointer and stuff in the parser and set its void.
1293 bc_parse_updateFunc(p, idx);
1294 p->func->voidfn = voidfn;
1298 // While we do not have a right paren, we are still parsing arguments.
1299 while (p->l.t != BC_LEX_RPAREN) {
1301 BcType t = BC_TYPE_VAR;
1303 // If we have an asterisk, we are parsing a reference argument.
1304 if (p->l.t == BC_LEX_OP_MULTIPLY) {
1309 // Let POSIX complain if necessary.
1310 bc_parse_err(p, BC_ERR_POSIX_REF);
1313 // If we don't have a name, the argument will not have a name. Barf.
1314 if (BC_ERR(p->l.t != BC_LEX_NAME))
1315 bc_parse_err(p, BC_ERR_PARSE_FUNC);
1317 // Increment the number of parameters.
1318 p->func->nparams += 1;
1320 // Copy the string in the lexer so that we can use the lexer again.
1321 bc_vec_string(&p->buf, p->l.str.len, p->l.str.v);
1325 // We are parsing an array parameter if this is true.
1326 if (p->l.t == BC_LEX_LBRACKET) {
1328 // Set the array type, unless we are already parsing a reference.
1329 if (t == BC_TYPE_VAR) t = BC_TYPE_ARRAY;
1333 // The brackets *must* be empty.
1334 if (BC_ERR(p->l.t != BC_LEX_RBRACKET))
1335 bc_parse_err(p, BC_ERR_PARSE_FUNC);
1339 // If we did *not* get a bracket, but we are expecting a reference, we
1341 else if (BC_ERR(t == BC_TYPE_REF))
1342 bc_parse_verr(p, BC_ERR_PARSE_REF_VAR, p->buf.v);
1344 // Test for comma and get the next token if it exists.
1345 comma = (p->l.t == BC_LEX_COMMA);
1346 if (comma) bc_lex_next(&p->l);
1348 // Insert the parameter into the function.
1349 bc_func_insert(p->func, p->prog, p->buf.v, t, p->l.line);
1352 // If we have a comma, but no parameter, barf.
1353 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_FUNC);
1356 flags = BC_PARSE_FLAG_FUNC | BC_PARSE_FLAG_FUNC_INNER;
1357 bc_parse_startBody(p, flags);
1361 // POSIX requires that a brace be on the same line as the function header.
1362 // If we don't have a brace, let POSIX throw an error.
1363 if (p->l.t != BC_LEX_LBRACE) bc_parse_err(p, BC_ERR_POSIX_BRACE);
1367 * Parse an auto list.
1368 * @param p The parser.
1370 static void bc_parse_auto(BcParse *p) {
1374 // Error if the auto keyword appeared in the wrong place.
1375 if (BC_ERR(!p->auto_part)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1378 p->auto_part = comma = false;
1380 // We need at least one variable or array.
1381 one = (p->l.t == BC_LEX_NAME);
1383 // While we have a variable or array.
1384 while (p->l.t == BC_LEX_NAME) {
1388 // Copy the name from the lexer, so we can use it again.
1389 bc_vec_string(&p->buf, p->l.str.len - 1, p->l.str.v);
1393 // If we are parsing an array...
1394 if (p->l.t == BC_LEX_LBRACKET) {
1400 // The brackets *must* be empty.
1401 if (BC_ERR(p->l.t != BC_LEX_RBRACKET))
1402 bc_parse_err(p, BC_ERR_PARSE_FUNC);
1406 else t = BC_TYPE_VAR;
1408 // Test for comma and get the next token if it exists.
1409 comma = (p->l.t == BC_LEX_COMMA);
1410 if (comma) bc_lex_next(&p->l);
1412 // Insert the auto into the function.
1413 bc_func_insert(p->func, p->prog, p->buf.v, t, p->l.line);
1416 // If we have a comma, but no auto, barf.
1417 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_FUNC);
1419 // If we don't have any variables or arrays, barf.
1420 if (BC_ERR(!one)) bc_parse_err(p, BC_ERR_PARSE_NO_AUTO);
1422 // The auto statement should be all that's in the statement.
1423 if (BC_ERR(!bc_parse_isDelimiter(p)))
1424 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1429 * @param p The parser.
1430 * @param brace True if a brace was encountered, false otherwise.
1432 static void bc_parse_body(BcParse *p, bool brace) {
1434 uint16_t *flag_ptr = BC_PARSE_TOP_FLAG_PTR(p);
1436 assert(flag_ptr != NULL);
1437 assert(p->flags.len >= 2);
1439 // The body flag is for when we expect a body. We got a body, so clear the
1441 *flag_ptr &= ~(BC_PARSE_FLAG_BODY);
1443 // If we are inside a function, that means we just barely entered it, and
1444 // we can expect an auto list.
1445 if (*flag_ptr & BC_PARSE_FLAG_FUNC_INNER) {
1447 // We *must* have a brace in this case.
1448 if (BC_ERR(!brace)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1450 p->auto_part = (p->l.t != BC_LEX_KW_AUTO);
1452 if (!p->auto_part) {
1454 // Make sure this is true to not get a parse error.
1455 p->auto_part = true;
1457 // Since we already have the auto keyword, parse.
1462 if (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l);
1466 // This is the easy part.
1467 size_t len = p->flags.len;
1471 // Parse a statement.
1474 // This is a very important condition to get right. If there is no
1475 // brace, and no body flag, and the flags len hasn't shrunk, then we
1476 // have a body that was not delimited by braces, so we need to end it
1477 // now, after just one statement.
1478 if (!brace && !BC_PARSE_BODY(p) && len <= p->flags.len)
1479 bc_parse_endBody(p, false);
1484 * Parses a statement. This is the entry point for just about everything, except
1485 * function definitions.
1486 * @param p The parser.
1488 static void bc_parse_stmt(BcParse *p) {
1492 BcLexType type = p->l.t;
1495 if (type == BC_LEX_NLINE) {
1501 if (type == BC_LEX_KW_AUTO) {
1506 // If we reach this point, no auto list is allowed.
1507 p->auto_part = false;
1509 // Everything but an else needs to be taken care of here, but else is
1511 if (type != BC_LEX_KW_ELSE) {
1513 // After an if, no else found.
1514 if (BC_PARSE_IF_END(p)) {
1516 // Clear the expectation for else, end body, and return. Returning
1517 // gives us a clean slate for parsing again.
1519 if (p->flags.len > 1 && !BC_PARSE_BRACE(p))
1520 bc_parse_endBody(p, false);
1523 // With a left brace, we are parsing a body.
1524 else if (type == BC_LEX_LBRACE) {
1526 // We need to start a body if we are not expecting one yet.
1527 if (!BC_PARSE_BODY(p)) {
1528 bc_parse_startBody(p, BC_PARSE_FLAG_BRACE);
1531 // If we *are* expecting a body, that body should get a brace. This
1532 // takes care of braces being on a different line than if and loop
1535 *(BC_PARSE_TOP_FLAG_PTR(p)) |= BC_PARSE_FLAG_BRACE;
1537 bc_parse_body(p, true);
1540 // If we have reached this point, we need to return for a clean
1544 // This happens when we are expecting a body and get a single statement,
1545 // i.e., a body with no braces surrounding it. Returns after for a clean
1547 else if (BC_PARSE_BODY(p) && !BC_PARSE_BRACE(p)) {
1548 bc_parse_body(p, false);
1554 flags = BC_PARSE_TOP_FLAG(p);
1558 // All of these are valid for expressions.
1561 case BC_LEX_OP_MINUS:
1562 case BC_LEX_OP_BOOL_NOT:
1566 case BC_LEX_KW_IBASE:
1567 case BC_LEX_KW_LAST:
1568 case BC_LEX_KW_LENGTH:
1569 case BC_LEX_KW_OBASE:
1570 case BC_LEX_KW_SCALE:
1571 #if BC_ENABLE_EXTRA_MATH
1572 case BC_LEX_KW_SEED:
1573 #endif // BC_ENABLE_EXTRA_MATH
1574 case BC_LEX_KW_SQRT:
1576 #if BC_ENABLE_EXTRA_MATH
1577 case BC_LEX_KW_IRAND:
1578 #endif // BC_ENABLE_EXTRA_MATH
1579 case BC_LEX_KW_ASCIIFY:
1580 case BC_LEX_KW_MODEXP:
1581 case BC_LEX_KW_DIVMOD:
1582 case BC_LEX_KW_READ:
1583 #if BC_ENABLE_EXTRA_MATH
1584 case BC_LEX_KW_RAND:
1585 #endif // BC_ENABLE_EXTRA_MATH
1586 case BC_LEX_KW_MAXIBASE:
1587 case BC_LEX_KW_MAXOBASE:
1588 case BC_LEX_KW_MAXSCALE:
1589 #if BC_ENABLE_EXTRA_MATH
1590 case BC_LEX_KW_MAXRAND:
1591 #endif // BC_ENABLE_EXTRA_MATH
1592 case BC_LEX_KW_LINE_LENGTH:
1593 case BC_LEX_KW_GLOBAL_STACKS:
1594 case BC_LEX_KW_LEADING_ZERO:
1596 bc_parse_expr_status(p, BC_PARSE_PRINT, bc_parse_next_expr);
1600 case BC_LEX_KW_ELSE:
1615 bc_parse_endBody(p, true);
1621 bc_parse_str(p, BC_INST_PRINT_STR);
1625 case BC_LEX_KW_BREAK:
1626 case BC_LEX_KW_CONTINUE:
1628 bc_parse_loopExit(p, p->l.t);
1638 case BC_LEX_KW_HALT:
1640 bc_parse_push(p, BC_INST_HALT);
1651 case BC_LEX_KW_LIMITS:
1653 // `limits` is a compile-time command, so execute it right away.
1654 bc_vm_printf("BC_LONG_BIT = %lu\n", (ulong) BC_LONG_BIT);
1655 bc_vm_printf("BC_BASE_DIGS = %lu\n", (ulong) BC_BASE_DIGS);
1656 bc_vm_printf("BC_BASE_POW = %lu\n", (ulong) BC_BASE_POW);
1657 bc_vm_printf("BC_OVERFLOW_MAX = %lu\n", (ulong) BC_NUM_BIGDIG_MAX);
1659 bc_vm_printf("BC_BASE_MAX = %lu\n", BC_MAX_OBASE);
1660 bc_vm_printf("BC_DIM_MAX = %lu\n", BC_MAX_DIM);
1661 bc_vm_printf("BC_SCALE_MAX = %lu\n", BC_MAX_SCALE);
1662 bc_vm_printf("BC_STRING_MAX = %lu\n", BC_MAX_STRING);
1663 bc_vm_printf("BC_NAME_MAX = %lu\n", BC_MAX_NAME);
1664 bc_vm_printf("BC_NUM_MAX = %lu\n", BC_MAX_NUM);
1665 #if BC_ENABLE_EXTRA_MATH
1666 bc_vm_printf("BC_RAND_MAX = %lu\n", BC_MAX_RAND);
1667 #endif // BC_ENABLE_EXTRA_MATH
1668 bc_vm_printf("MAX Exponent = %lu\n", BC_MAX_EXP);
1669 bc_vm_printf("Number of vars = %lu\n", BC_MAX_VARS);
1676 case BC_LEX_KW_STREAM:
1677 case BC_LEX_KW_PRINT:
1679 bc_parse_print(p, type);
1683 case BC_LEX_KW_QUIT:
1685 // Quit is a compile-time command. We don't exit directly, so the vm
1687 vm.status = BC_STATUS_QUIT;
1692 case BC_LEX_KW_RETURN:
1698 case BC_LEX_KW_WHILE:
1706 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1710 // If the flags did not change, we expect a delimiter.
1711 if (len == p->flags.len && flags == BC_PARSE_TOP_FLAG(p)) {
1712 if (BC_ERR(!bc_parse_isDelimiter(p)))
1713 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1716 // Make sure semicolons are eaten.
1717 while (p->l.t == BC_LEX_SCOLON) bc_lex_next(&p->l);
1720 void bc_parse_parse(BcParse *p) {
1726 // We should not let an EOF get here unless some partial parse was not
1727 // completed, in which case, it's the user's fault.
1728 if (BC_ERR(p->l.t == BC_LEX_EOF)) bc_parse_err(p, BC_ERR_PARSE_EOF);
1730 // Functions need special parsing.
1731 else if (p->l.t == BC_LEX_KW_DEFINE) {
1732 if (BC_ERR(BC_PARSE_NO_EXEC(p))) {
1733 if (p->flags.len == 1 &&
1734 BC_PARSE_TOP_FLAG(p) == BC_PARSE_FLAG_IF_END)
1738 else bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1743 // Otherwise, parse a normal statement.
1744 else bc_parse_stmt(p);
1750 // We need to reset on error.
1751 if (BC_ERR(((vm.status && vm.status != BC_STATUS_QUIT) || vm.sig)))
1758 * Parse an expression. This is the actual implementation of the Shunting-Yard
1760 * @param p The parser.
1761 * @param flags The flags for what is valid in the expression.
1762 * @param next A set of tokens for what is valid *after* the expression.
1763 * @return A parse status. In some places, an empty expression is an
1764 * error, and sometimes, it is required. This allows this function
1765 * to tell the caller if the expression was empty and let the
1768 static BcParseStatus bc_parse_expr_err(BcParse *p, uint8_t flags,
1771 BcInst prev = BC_INST_PRINT;
1772 uchar inst = BC_INST_INVALID;
1774 size_t nexprs, ops_bgn;
1775 uint32_t i, nparens, nrelops;
1776 bool pfirst, rprn, done, get_token, assign, bin_last, incdec, can_assign;
1778 // One of these *must* be true.
1779 assert(!(flags & BC_PARSE_PRINT) || !(flags & BC_PARSE_NEEDVAL));
1781 // These are set very carefully. In fact, controlling the values of these
1782 // locals is the biggest part of making this work. ops_bgn especially is
1783 // important because it marks where the operator stack begins for *this*
1784 // invocation of this function. That's because bc_parse_expr_err() is
1785 // recursive (the Shunting-Yard Algorithm is most easily expressed
1786 // recursively when parsing subexpressions), and each invocation needs to
1787 // know where to stop.
1789 // - nparens is the number of left parens without matches.
1790 // - nrelops is the number of relational operators that appear in the expr.
1791 // - nexprs is the number of unused expressions.
1792 // - rprn is a right paren encountered last.
1793 // - done means the expression has been fully parsed.
1794 // - get_token is true when a token is needed at the end of an iteration.
1795 // - assign is true when an assignment statement was parsed last.
1796 // - incdec is true when the previous operator was an inc or dec operator.
1797 // - can_assign is true when an assignemnt is valid.
1798 // - bin_last is true when the previous instruction was a binary operator.
1800 pfirst = (p->l.t == BC_LEX_LPAREN);
1801 nparens = nrelops = 0;
1803 ops_bgn = p->ops.len;
1804 rprn = done = get_token = assign = incdec = can_assign = false;
1807 // We want to eat newlines if newlines are not a valid ending token.
1808 // This is for spacing in things like for loop headers.
1809 if (!(flags & BC_PARSE_NOREAD)) {
1810 while ((t = p->l.t) == BC_LEX_NLINE) bc_lex_next(&p->l);
1813 // This is the Shunting-Yard algorithm loop.
1814 for (; !done && BC_PARSE_EXPR(t); t = p->l.t)
1821 // These operators can only be used with items that can be
1823 if (BC_ERR(incdec)) bc_parse_err(p, BC_ERR_PARSE_ASSIGN);
1825 bc_parse_incdec(p, &prev, &can_assign, &nexprs, flags);
1827 rprn = get_token = bin_last = false;
1829 flags &= ~(BC_PARSE_ARRAY);
1834 #if BC_ENABLE_EXTRA_MATH
1835 case BC_LEX_OP_TRUNC:
1837 // The previous token must have been a leaf expression, or the
1838 // operator is in the wrong place.
1839 if (BC_ERR(!BC_PARSE_LEAF(prev, bin_last, rprn)))
1840 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1842 // I can just add the instruction because
1843 // negative will already be taken care of.
1844 bc_parse_push(p, BC_INST_TRUNC);
1846 rprn = can_assign = incdec = false;
1848 flags &= ~(BC_PARSE_ARRAY);
1852 #endif // BC_ENABLE_EXTRA_MATH
1854 case BC_LEX_OP_MINUS:
1856 bc_parse_minus(p, &prev, ops_bgn, rprn, bin_last, &nexprs);
1858 rprn = get_token = can_assign = false;
1860 // This is true if it was a binary operator last.
1861 bin_last = (prev == BC_INST_MINUS);
1862 if (bin_last) incdec = false;
1864 flags &= ~(BC_PARSE_ARRAY);
1869 // All of this group, including the fallthrough, is to parse binary
1871 case BC_LEX_OP_ASSIGN_POWER:
1872 case BC_LEX_OP_ASSIGN_MULTIPLY:
1873 case BC_LEX_OP_ASSIGN_DIVIDE:
1874 case BC_LEX_OP_ASSIGN_MODULUS:
1875 case BC_LEX_OP_ASSIGN_PLUS:
1876 case BC_LEX_OP_ASSIGN_MINUS:
1877 #if BC_ENABLE_EXTRA_MATH
1878 case BC_LEX_OP_ASSIGN_PLACES:
1879 case BC_LEX_OP_ASSIGN_LSHIFT:
1880 case BC_LEX_OP_ASSIGN_RSHIFT:
1881 #endif // BC_ENABLE_EXTRA_MATH
1882 case BC_LEX_OP_ASSIGN:
1884 // We need to make sure the assignment is valid.
1885 if (!BC_PARSE_INST_VAR(prev))
1886 bc_parse_err(p, BC_ERR_PARSE_ASSIGN);
1891 case BC_LEX_OP_POWER:
1892 case BC_LEX_OP_MULTIPLY:
1893 case BC_LEX_OP_DIVIDE:
1894 case BC_LEX_OP_MODULUS:
1895 case BC_LEX_OP_PLUS:
1896 #if BC_ENABLE_EXTRA_MATH
1897 case BC_LEX_OP_PLACES:
1898 case BC_LEX_OP_LSHIFT:
1899 case BC_LEX_OP_RSHIFT:
1900 #endif // BC_ENABLE_EXTRA_MATH
1901 case BC_LEX_OP_REL_EQ:
1902 case BC_LEX_OP_REL_LE:
1903 case BC_LEX_OP_REL_GE:
1904 case BC_LEX_OP_REL_NE:
1905 case BC_LEX_OP_REL_LT:
1906 case BC_LEX_OP_REL_GT:
1907 case BC_LEX_OP_BOOL_NOT:
1908 case BC_LEX_OP_BOOL_OR:
1909 case BC_LEX_OP_BOOL_AND:
1911 // This is true if the operator if the token is a prefix
1912 // operator. This is only for boolean not.
1913 if (BC_PARSE_OP_PREFIX(t)) {
1915 // Prefix operators are only allowed after binary operators
1916 // or prefix operators.
1917 if (BC_ERR(!bin_last && !BC_PARSE_OP_PREFIX(p->l.last)))
1918 bc_parse_err(p, BC_ERR_PARSE_EXPR);
1920 // If we execute the else, that means we have a binary operator.
1921 // If the previous operator was a prefix or a binary operator,
1922 // then a binary operator is not allowed.
1923 else if (BC_ERR(BC_PARSE_PREV_PREFIX(prev) || bin_last))
1924 bc_parse_err(p, BC_ERR_PARSE_EXPR);
1926 nrelops += (t >= BC_LEX_OP_REL_EQ && t <= BC_LEX_OP_REL_GT);
1927 prev = BC_PARSE_TOKEN_INST(t);
1929 bc_parse_operator(p, t, ops_bgn, &nexprs);
1931 rprn = incdec = can_assign = false;
1933 bin_last = !BC_PARSE_OP_PREFIX(t);
1934 flags &= ~(BC_PARSE_ARRAY);
1941 // A left paren is *not* allowed right after a leaf expr.
1942 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
1943 bc_parse_err(p, BC_ERR_PARSE_EXPR);
1946 rprn = incdec = can_assign = false;
1949 // Push the paren onto the operator stack.
1950 bc_vec_push(&p->ops, &t);
1957 // This needs to be a status. The error is handled in
1958 // bc_parse_expr_status().
1959 if (BC_ERR(p->l.last == BC_LEX_LPAREN))
1960 return BC_PARSE_STATUS_EMPTY_EXPR;
1962 // The right paren must not come after a prefix or binary
1964 if (BC_ERR(bin_last || BC_PARSE_PREV_PREFIX(prev)))
1965 bc_parse_err(p, BC_ERR_PARSE_EXPR);
1967 // If there are no parens left, we are done, but we need another
1977 get_token = bin_last = incdec = false;
1979 bc_parse_rightParen(p, &nexprs);
1986 // POSIX only allows strings alone.
1987 if (BC_IS_POSIX) bc_parse_err(p, BC_ERR_POSIX_EXPR_STRING);
1989 // A string is a leaf and cannot come right after a leaf.
1990 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
1991 bc_parse_err(p, BC_ERR_PARSE_EXPR);
1993 bc_parse_addString(p);
1996 bin_last = rprn = false;
2004 // A name is a leaf and cannot come right after a leaf.
2005 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2006 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2008 get_token = bin_last = false;
2010 bc_parse_name(p, &prev, &can_assign, flags & ~BC_PARSE_NOCALL);
2012 rprn = (prev == BC_INST_CALL);
2014 flags &= ~(BC_PARSE_ARRAY);
2021 // A number is a leaf and cannot come right after a leaf.
2022 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2023 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2025 // The number instruction is pushed in here.
2031 rprn = bin_last = can_assign = false;
2032 flags &= ~(BC_PARSE_ARRAY);
2037 case BC_LEX_KW_IBASE:
2038 case BC_LEX_KW_LAST:
2039 case BC_LEX_KW_OBASE:
2040 #if BC_ENABLE_EXTRA_MATH
2041 case BC_LEX_KW_SEED:
2042 #endif // BC_ENABLE_EXTRA_MATH
2044 // All of these are leaves and cannot come right after a leaf.
2045 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2046 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2048 prev = t - BC_LEX_KW_LAST + BC_INST_LAST;
2049 bc_parse_push(p, prev);
2051 get_token = can_assign = true;
2052 rprn = bin_last = false;
2054 flags &= ~(BC_PARSE_ARRAY);
2059 case BC_LEX_KW_LENGTH:
2060 case BC_LEX_KW_SQRT:
2062 #if BC_ENABLE_EXTRA_MATH
2063 case BC_LEX_KW_IRAND:
2064 #endif // BC_ENABLE_EXTRA_MATH
2065 case BC_LEX_KW_ASCIIFY:
2067 // All of these are leaves and cannot come right after a leaf.
2068 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2069 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2071 bc_parse_builtin(p, t, flags, &prev);
2073 rprn = get_token = bin_last = incdec = can_assign = false;
2075 flags &= ~(BC_PARSE_ARRAY);
2080 case BC_LEX_KW_READ:
2081 #if BC_ENABLE_EXTRA_MATH
2082 case BC_LEX_KW_RAND:
2083 #endif // BC_ENABLE_EXTRA_MATH
2084 case BC_LEX_KW_MAXIBASE:
2085 case BC_LEX_KW_MAXOBASE:
2086 case BC_LEX_KW_MAXSCALE:
2087 #if BC_ENABLE_EXTRA_MATH
2088 case BC_LEX_KW_MAXRAND:
2089 #endif // BC_ENABLE_EXTRA_MATH
2090 case BC_LEX_KW_LINE_LENGTH:
2091 case BC_LEX_KW_GLOBAL_STACKS:
2092 case BC_LEX_KW_LEADING_ZERO:
2094 // All of these are leaves and cannot come right after a leaf.
2095 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2096 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2098 // Error if we have read and it's not allowed.
2099 else if (t == BC_LEX_KW_READ && BC_ERR(flags & BC_PARSE_NOREAD))
2100 bc_parse_err(p, BC_ERR_EXEC_REC_READ);
2102 prev = t - BC_LEX_KW_READ + BC_INST_READ;
2103 bc_parse_noArgBuiltin(p, prev);
2105 rprn = get_token = bin_last = incdec = can_assign = false;
2107 flags &= ~(BC_PARSE_ARRAY);
2112 case BC_LEX_KW_SCALE:
2114 // This is a leaf and cannot come right after a leaf.
2115 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2116 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2118 // Scale needs special work because it can be a variable *or* a
2120 bc_parse_scale(p, &prev, &can_assign, flags);
2122 rprn = get_token = bin_last = false;
2124 flags &= ~(BC_PARSE_ARRAY);
2129 case BC_LEX_KW_MODEXP:
2130 case BC_LEX_KW_DIVMOD:
2132 // This is a leaf and cannot come right after a leaf.
2133 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2134 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2136 bc_parse_builtin3(p, t, flags, &prev);
2138 rprn = get_token = bin_last = incdec = can_assign = false;
2140 flags &= ~(BC_PARSE_ARRAY);
2148 // We should never get here, even in debug builds.
2149 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
2155 if (get_token) bc_lex_next(&p->l);
2158 // Now that we have parsed the expression, we need to empty the operator
2160 while (p->ops.len > ops_bgn) {
2162 top = BC_PARSE_TOP_OP(p);
2163 assign = top >= BC_LEX_OP_ASSIGN_POWER && top <= BC_LEX_OP_ASSIGN;
2165 // There should not be *any* parens on the stack anymore.
2166 if (BC_ERR(top == BC_LEX_LPAREN || top == BC_LEX_RPAREN))
2167 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2169 bc_parse_push(p, BC_PARSE_TOKEN_INST(top));
2171 // Adjust the number of unused expressions.
2172 nexprs -= !BC_PARSE_OP_PREFIX(top);
2173 bc_vec_pop(&p->ops);
2178 // There must be only one expression at the top.
2179 if (BC_ERR(nexprs != 1)) bc_parse_err(p, BC_ERR_PARSE_EXPR);
2181 // Check that the next token is correct.
2182 for (i = 0; i < next.len && t != next.tokens[i]; ++i);
2183 if (BC_ERR(i == next.len && !bc_parse_isDelimiter(p)))
2184 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2186 // Check that POSIX would be happy with the number of relational operators.
2187 if (!(flags & BC_PARSE_REL) && nrelops)
2188 bc_parse_err(p, BC_ERR_POSIX_REL_POS);
2189 else if ((flags & BC_PARSE_REL) && nrelops > 1)
2190 bc_parse_err(p, BC_ERR_POSIX_MULTIREL);
2192 // If this is true, then we might be in a situation where we don't print.
2193 // We would want to have the increment/decrement operator not make an extra
2194 // copy if it's not necessary.
2195 if (!(flags & BC_PARSE_NEEDVAL) && !pfirst) {
2197 // We have the easy case if the last operator was an assignment
2200 inst = *((uchar*) bc_vec_top(&p->func->code));
2201 inst += (BC_INST_ASSIGN_POWER_NO_VAL - BC_INST_ASSIGN_POWER);
2204 // If we have an inc/dec operator and we are *not* printing, implement
2205 // the optimization to get rid of the extra copy.
2206 else if (incdec && !(flags & BC_PARSE_PRINT)) {
2207 inst = *((uchar*) bc_vec_top(&p->func->code));
2208 incdec = (inst <= BC_INST_DEC);
2209 inst = BC_INST_ASSIGN_PLUS_NO_VAL + (inst != BC_INST_INC &&
2210 inst != BC_INST_ASSIGN_PLUS);
2213 // This condition allows us to change the previous assignment
2214 // instruction (which does a copy) for a NO_VAL version, which does not.
2215 // This condition is set if either of the above if statements ends up
2217 if (inst >= BC_INST_ASSIGN_POWER_NO_VAL &&
2218 inst <= BC_INST_ASSIGN_NO_VAL)
2220 // Pop the previous assignment instruction and push a new one.
2221 // Inc/dec needs the extra instruction because it is now a binary
2222 // operator and needs a second operand.
2223 bc_vec_pop(&p->func->code);
2224 if (incdec) bc_parse_push(p, BC_INST_ONE);
2225 bc_parse_push(p, inst);
2229 // If we might have to print...
2230 if ((flags & BC_PARSE_PRINT)) {
2232 // With a paren first or the last operator not being an assignment, we
2233 // *do* want to print.
2234 if (pfirst || !assign) bc_parse_push(p, BC_INST_PRINT);
2236 // We need to make sure to push a pop instruction for assignment statements
2237 // that will not print. The print will pop, but without it, we need to pop.
2238 else if (!(flags & BC_PARSE_NEEDVAL) &&
2239 (inst < BC_INST_ASSIGN_POWER_NO_VAL ||
2240 inst > BC_INST_ASSIGN_NO_VAL))
2242 bc_parse_push(p, BC_INST_POP);
2245 // We want to eat newlines if newlines are not a valid ending token.
2246 // This is for spacing in things like for loop headers.
2248 // Yes, this is one case where I reuse a variable for a different purpose;
2249 // in this case, incdec being true now means that newlines are not valid.
2250 for (incdec = true, i = 0; i < next.len && incdec; ++i)
2251 incdec = (next.tokens[i] != BC_LEX_NLINE);
2253 while (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l);
2256 return BC_PARSE_STATUS_SUCCESS;
2260 * Parses an expression with bc_parse_expr_err(), but throws an error if it gets
2261 * an empty expression.
2262 * @param p The parser.
2263 * @param flags The flags for what is valid in the expression.
2264 * @param next A set of tokens for what is valid *after* the expression.
2266 static void bc_parse_expr_status(BcParse *p, uint8_t flags, BcParseNext next) {
2268 BcParseStatus s = bc_parse_expr_err(p, flags, next);
2270 if (BC_ERR(s == BC_PARSE_STATUS_EMPTY_EXPR))
2271 bc_parse_err(p, BC_ERR_PARSE_EMPTY_EXPR);
2274 void bc_parse_expr(BcParse *p, uint8_t flags) {
2276 bc_parse_expr_status(p, flags, bc_parse_next_read);
2278 #endif // BC_ENABLED