contrib/bc/src/dc_lex.c

   1 /*
   2  * *****************************************************************************
   3  *
   4  * SPDX-License-Identifier: BSD-2-Clause
   5  *
   6  * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions are met:
  10  *
  11  * * Redistributions of source code must retain the above copyright notice, this
  12  *   list of conditions and the following disclaimer.
  13  *
  14  * * Redistributions in binary form must reproduce the above copyright notice,
  15  *   this list of conditions and the following disclaimer in the documentation
  16  *   and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28  * POSSIBILITY OF SUCH DAMAGE.
  29  *
  30  * *****************************************************************************
  31  *
  32  * The lexer for dc.
  33  *
  34  */
  35
  36 #if DC_ENABLED
  37
  38 #include <ctype.h>
  39
  40 #include <dc.h>
  41 #include <vm.h>
  42
  43 bool dc_lex_negCommand(BcLex *l) {
  44         char c = l->buf[l->i];
  45         return !BC_LEX_NUM_CHAR(c, false, false);
  46 }
  47
  48 /**
  49  * Processes a dc command that needs a register. This is where the
  50  * extended-register extension is implemented.
  51  * @param l  The lexer.
  52  */
  53 static void dc_lex_register(BcLex *l) {
  54
  55         // If extended register is enabled and the character is whitespace...
  56         if (DC_X && isspace(l->buf[l->i - 1])) {
  57
  58                 char c;
  59
  60                 // Eat the whitespace.
  61                 bc_lex_whitespace(l);
  62                 c = l->buf[l->i];
  63
  64                 // Check for a letter or underscore.
  65                 if (BC_ERR(!isalpha(c) && c != '_'))
  66                         bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
  67
  68                 // Parse a normal identifier.
  69                 l->i += 1;
  70                 bc_lex_name(l);
  71         }
  72         else {
  73
  74                 // I don't allow newlines because newlines are used for controlling when
  75                 // execution happens, and allowing newlines would just be complex.
  76                 if (BC_ERR(l->buf[l->i - 1] == '\n'))
  77                         bc_lex_verr(l, BC_ERR_PARSE_CHAR, l->buf[l->i - 1]);
  78
  79                 // Set the lexer string and token.
  80                 bc_vec_popAll(&l->str);
  81                 bc_vec_pushByte(&l->str, (uchar) l->buf[l->i - 1]);
  82                 bc_vec_pushByte(&l->str, '\0');
  83                 l->t = BC_LEX_NAME;
  84         }
  85 }
  86
  87 /**
  88  * Parses a dc string. Since dc's strings need to check for balanced brackets,
  89  * we can't just parse bc and dc strings with different start and end
  90  * characters. Oh, and dc strings need to check for escaped brackets.
  91  * @param l  The lexer.
  92  */
  93 static void dc_lex_string(BcLex *l) {
  94
  95         size_t depth, nls, i;
  96         char c;
  97         bool got_more;
  98
  99         // Set the token and clear the string.
 100         l->t = BC_LEX_STR;
 101         bc_vec_popAll(&l->str);
 102
 103         do {
 104
 105                 depth = 1;
 106                 nls = 0;
 107                 got_more = false;
 108
 109                 assert(!l->is_stdin || l->buf == vm.buffer.v);
 110
 111                 // This is the meat. As long as we don't run into the NUL byte, and we
 112                 // have "depth", which means we haven't completely balanced brackets
 113                 // yet, we continue eating the string.
 114                 for (i = l->i; (c = l->buf[i]) && depth; ++i) {
 115
 116                         // Check for escaped brackets and set the depths as appropriate.
 117                         if (c == '\\') {
 118                                 c = l->buf[++i];
 119                                 if (!c) break;
 120                         }
 121                         else {
 122                                 depth += (c == '[');
 123                                 depth -= (c == ']');
 124                         }
 125
 126                         // We want to adjust the line in the lexer as necessary.
 127                         nls += (c == '\n');
 128
 129                         if (depth) bc_vec_push(&l->str, &c);
 130                 }
 131
 132                 if (BC_ERR(c == '\0' && depth)) {
 133                         if (!vm.eof && l->is_stdin) got_more = bc_lex_readLine(l);
 134                         if (got_more) bc_vec_popAll(&l->str);
 135                 }
 136
 137         } while (got_more && depth);
 138
 139         // Obviously, if we didn't balance, that's an error.
 140         if (BC_ERR(c == '\0' && depth)) {
 141                 l->i = i;
 142                 bc_lex_err(l, BC_ERR_PARSE_STRING);
 143         }
 144
 145         bc_vec_pushByte(&l->str, '\0');
 146
 147         l->i = i;
 148         l->line += nls;
 149 }
 150
 151 /**
 152  * Lexes a dc token. This is the dc implementation of BcLexNext.
 153  * @param l  The lexer.
 154  */
 155 void dc_lex_token(BcLex *l) {
 156
 157         char c = l->buf[l->i++], c2;
 158         size_t i;
 159
 160         // If the last token was a command that needs a register, we need to parse a
 161         // register, so do so.
 162         for (i = 0; i < dc_lex_regs_len; ++i) {
 163
 164                 // If the token is a register token, take care of it and return.
 165                 if (l->last == dc_lex_regs[i]) {
 166                         dc_lex_register(l);
 167                         return;
 168                 }
 169         }
 170
 171         // These lines are for tokens that easily correspond to one character. We
 172         // just set the token.
 173         if (c >= '"' && c <= '~' &&
 174             (l->t = dc_lex_tokens[(c - '"')]) != BC_LEX_INVALID)
 175         {
 176                 return;
 177         }
 178
 179         // This is the workhorse of the lexer when more complicated things are
 180         // needed.
 181         switch (c) {
 182
 183                 case '\0':
 184                 case '\n':
 185                 case '\t':
 186                 case '\v':
 187                 case '\f':
 188                 case '\r':
 189                 case ' ':
 190                 {
 191                         bc_lex_commonTokens(l, c);
 192                         break;
 193                 }
 194
 195                 // We don't have the ! command, so we always expect certain things
 196                 // after the exclamation point.
 197                 case '!':
 198                 {
 199                         c2 = l->buf[l->i];
 200
 201                         if (c2 == '=') l->t = BC_LEX_OP_REL_NE;
 202                         else if (c2 == '<') l->t = BC_LEX_OP_REL_LE;
 203                         else if (c2 == '>') l->t = BC_LEX_OP_REL_GE;
 204                         else bc_lex_invalidChar(l, c);
 205
 206                         l->i += 1;
 207
 208                         break;
 209                 }
 210
 211                 case '#':
 212                 {
 213                         bc_lex_lineComment(l);
 214                         break;
 215                 }
 216
 217                 case '.':
 218                 {
 219                         c2 = l->buf[l->i];
 220
 221                         // If the character after is a number, this dot is part of a number.
 222                         // Otherwise, it's the BSD dot (equivalent to last).
 223                         if (BC_NO_ERR(BC_LEX_NUM_CHAR(c2, true, false)))
 224                                 bc_lex_number(l, c);
 225                         else bc_lex_invalidChar(l, c);
 226
 227                         break;
 228                 }
 229
 230                 case '0':
 231                 case '1':
 232                 case '2':
 233                 case '3':
 234                 case '4':
 235                 case '5':
 236                 case '6':
 237                 case '7':
 238                 case '8':
 239                 case '9':
 240                 case 'A':
 241                 case 'B':
 242                 case 'C':
 243                 case 'D':
 244                 case 'E':
 245                 case 'F':
 246                 {
 247                         bc_lex_number(l, c);
 248                         break;
 249                 }
 250
 251                 case 'g':
 252                 {
 253                         c2 = l->buf[l->i];
 254
 255                         if (c2 == 'l') l->t = BC_LEX_KW_LINE_LENGTH;
 256                         else if (c2 == 'z') l->t = BC_LEX_KW_LEADING_ZERO;
 257                         else bc_lex_invalidChar(l, c2);
 258
 259                         l->i += 1;
 260
 261                         break;
 262                 }
 263
 264                 case '[':
 265                 {
 266                         dc_lex_string(l);
 267                         break;
 268                 }
 269
 270                 default:
 271                 {
 272                         bc_lex_invalidChar(l, c);
 273                 }
 274         }
 275 }
 276 #endif // DC_ENABLED