2 * *****************************************************************************
4 * SPDX-License-Identifier: BSD-2-Clause
6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
11 * * Redistributions of source code must retain the above copyright notice, this
12 * list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
30 * *****************************************************************************
44 dc_lex_negCommand(BcLex* l)
46 char c = l->buf[l->i];
47 return !BC_LEX_NUM_CHAR(c, false, false);
51 * Processes a dc command that needs a register. This is where the
52 * extended-register extension is implemented.
56 dc_lex_register(BcLex* l)
58 // If extended register is enabled and the character is whitespace...
59 if (DC_X && isspace(l->buf[l->i - 1]))
63 // Eat the whitespace.
67 // Check for a letter or underscore.
68 if (BC_ERR(!isalpha(c) && c != '_'))
70 bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
73 // Parse a normal identifier.
79 // I don't allow newlines because newlines are used for controlling when
80 // execution happens, and allowing newlines would just be complex.
81 if (BC_ERR(l->buf[l->i - 1] == '\n'))
83 bc_lex_verr(l, BC_ERR_PARSE_CHAR, l->buf[l->i - 1]);
86 // Set the lexer string and token.
87 bc_vec_popAll(&l->str);
88 bc_vec_pushByte(&l->str, (uchar) l->buf[l->i - 1]);
89 bc_vec_pushByte(&l->str, '\0');
95 * Parses a dc string. Since dc's strings need to check for balanced brackets,
96 * we can't just parse bc and dc strings with different start and end
97 * characters. Oh, and dc strings need to check for escaped brackets.
101 dc_lex_string(BcLex* l)
103 size_t depth, nls, i;
107 // Set the token and clear the string.
109 bc_vec_popAll(&l->str);
117 assert(!l->is_stdin || l->buf == vm.buffer.v);
119 // This is the meat. As long as we don't run into the NUL byte, and we
120 // have "depth", which means we haven't completely balanced brackets
121 // yet, we continue eating the string.
122 for (i = l->i; (c = l->buf[i]) && depth; ++i)
124 // Check for escaped brackets and set the depths as appropriate.
136 // We want to adjust the line in the lexer as necessary.
139 if (depth) bc_vec_push(&l->str, &c);
142 if (BC_ERR(c == '\0' && depth))
144 if (!vm.eof && (l->is_stdin || l->is_exprs))
146 got_more = bc_lex_readLine(l);
148 if (got_more) bc_vec_popAll(&l->str);
151 while (got_more && depth);
153 // Obviously, if we didn't balance, that's an error.
154 if (BC_ERR(c == '\0' && depth))
157 bc_lex_err(l, BC_ERR_PARSE_STRING);
160 bc_vec_pushByte(&l->str, '\0');
167 * Lexes a dc token. This is the dc implementation of BcLexNext.
168 * @param l The lexer.
171 dc_lex_token(BcLex* l)
173 char c = l->buf[l->i++], c2;
176 BC_SIG_ASSERT_LOCKED;
178 // If the last token was a command that needs a register, we need to parse a
179 // register, so do so.
180 for (i = 0; i < dc_lex_regs_len; ++i)
182 // If the token is a register token, take care of it and return.
183 if (l->last == dc_lex_regs[i])
190 // These lines are for tokens that easily correspond to one character. We
191 // just set the token.
192 if (c >= '"' && c <= '~' &&
193 (l->t = dc_lex_tokens[(c - '"')]) != BC_LEX_INVALID)
198 // This is the workhorse of the lexer when more complicated things are
210 bc_lex_commonTokens(l, c);
214 // We don't have the ! command, so we always expect certain things
215 // after the exclamation point.
220 if (c2 == '=') l->t = BC_LEX_OP_REL_NE;
221 else if (c2 == '<') l->t = BC_LEX_OP_REL_LE;
222 else if (c2 == '>') l->t = BC_LEX_OP_REL_GE;
223 else bc_lex_invalidChar(l, c);
232 bc_lex_lineComment(l);
240 // If the character after is a number, this dot is part of a number.
241 // Otherwise, it's the BSD dot (equivalent to last).
242 if (BC_NO_ERR(BC_LEX_NUM_CHAR(c2, true, false)))
246 else bc_lex_invalidChar(l, c);
276 if (c2 == 'l') l->t = BC_LEX_KW_LINE_LENGTH;
277 else if (c2 == 'z') l->t = BC_LEX_KW_LEADING_ZERO;
278 else bc_lex_invalidChar(l, c2);
293 bc_lex_invalidChar(l, c);