contrib/bc/src/dc_lex.c

   1 /*
   2  * *****************************************************************************
   3  *
   4  * SPDX-License-Identifier: BSD-2-Clause
   5  *
   6  * Copyright (c) 2018-2023 Gavin D. Howard and contributors.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions are met:
  10  *
  11  * * Redistributions of source code must retain the above copyright notice, this
  12  *   list of conditions and the following disclaimer.
  13  *
  14  * * Redistributions in binary form must reproduce the above copyright notice,
  15  *   this list of conditions and the following disclaimer in the documentation
  16  *   and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28  * POSSIBILITY OF SUCH DAMAGE.
  29  *
  30  * *****************************************************************************
  31  *
  32  * The lexer for dc.
  33  *
  34  */
  35
  36 #if DC_ENABLED
  37
  38 #include <ctype.h>
  39
  40 #include <dc.h>
  41 #include <vm.h>
  42
  43 bool
  44 dc_lex_negCommand(BcLex* l)
  45 {
  46         char c = l->buf[l->i];
  47         return !BC_LEX_NUM_CHAR(c, false, false);
  48 }
  49
  50 /**
  51  * Processes a dc command that needs a register. This is where the
  52  * extended-register extension is implemented.
  53  * @param l  The lexer.
  54  */
  55 static void
  56 dc_lex_register(BcLex* l)
  57 {
  58         // If extended register is enabled and the character is whitespace...
  59         if (DC_X && isspace(l->buf[l->i - 1]))
  60         {
  61                 char c;
  62
  63                 // Eat the whitespace.
  64                 bc_lex_whitespace(l);
  65                 c = l->buf[l->i];
  66
  67                 // Check for a letter or underscore.
  68                 if (BC_ERR(!isalpha(c) && c != '_'))
  69                 {
  70                         bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
  71                 }
  72
  73                 // Parse a normal identifier.
  74                 l->i += 1;
  75                 bc_lex_name(l);
  76         }
  77         else
  78         {
  79                 // I don't allow newlines because newlines are used for controlling when
  80                 // execution happens, and allowing newlines would just be complex.
  81                 if (BC_ERR(l->buf[l->i - 1] == '\n'))
  82                 {
  83                         bc_lex_verr(l, BC_ERR_PARSE_CHAR, l->buf[l->i - 1]);
  84                 }
  85
  86                 // Set the lexer string and token.
  87                 bc_vec_popAll(&l->str);
  88                 bc_vec_pushByte(&l->str, (uchar) l->buf[l->i - 1]);
  89                 bc_vec_pushByte(&l->str, '\0');
  90                 l->t = BC_LEX_NAME;
  91         }
  92 }
  93
  94 /**
  95  * Parses a dc string. Since dc's strings need to check for balanced brackets,
  96  * we can't just parse bc and dc strings with different start and end
  97  * characters. Oh, and dc strings need to check for escaped brackets.
  98  * @param l  The lexer.
  99  */
 100 static void
 101 dc_lex_string(BcLex* l)
 102 {
 103         size_t depth, nls, i;
 104         char c;
 105         bool got_more;
 106
 107         // Set the token and clear the string.
 108         l->t = BC_LEX_STR;
 109         bc_vec_popAll(&l->str);
 110
 111         do
 112         {
 113                 depth = 1;
 114                 nls = 0;
 115                 got_more = false;
 116
 117                 assert(l->mode != BC_MODE_STDIN || l->buf == vm->buffer.v);
 118
 119                 // This is the meat. As long as we don't run into the NUL byte, and we
 120                 // have "depth", which means we haven't completely balanced brackets
 121                 // yet, we continue eating the string.
 122                 for (i = l->i; (c = l->buf[i]) && depth; ++i)
 123                 {
 124                         // Check for escaped brackets and set the depths as appropriate.
 125                         if (c == '\\')
 126                         {
 127                                 c = l->buf[++i];
 128                                 if (!c) break;
 129                         }
 130                         else
 131                         {
 132                                 depth += (c == '[');
 133                                 depth -= (c == ']');
 134                         }
 135
 136                         // We want to adjust the line in the lexer as necessary.
 137                         nls += (c == '\n');
 138
 139                         if (depth) bc_vec_push(&l->str, &c);
 140                 }
 141
 142                 if (BC_ERR(c == '\0' && depth))
 143                 {
 144                         if (!vm->eof && l->mode != BC_MODE_FILE)
 145                         {
 146                                 got_more = bc_lex_readLine(l);
 147                         }
 148
 149                         if (got_more)
 150                         {
 151                                 bc_vec_popAll(&l->str);
 152                         }
 153                 }
 154         }
 155         while (got_more && depth);
 156
 157         // Obviously, if we didn't balance, that's an error.
 158         if (BC_ERR(c == '\0' && depth))
 159         {
 160                 l->i = i;
 161                 bc_lex_err(l, BC_ERR_PARSE_STRING);
 162         }
 163
 164         bc_vec_pushByte(&l->str, '\0');
 165
 166         l->i = i;
 167         l->line += nls;
 168 }
 169
 170 /**
 171  * Lexes a dc token. This is the dc implementation of BcLexNext.
 172  * @param l  The lexer.
 173  */
 174 void
 175 dc_lex_token(BcLex* l)
 176 {
 177         char c = l->buf[l->i++], c2;
 178         size_t i;
 179
 180         BC_SIG_ASSERT_LOCKED;
 181
 182         // If the last token was a command that needs a register, we need to parse a
 183         // register, so do so.
 184         for (i = 0; i < dc_lex_regs_len; ++i)
 185         {
 186                 // If the token is a register token, take care of it and return.
 187                 if (l->last == dc_lex_regs[i])
 188                 {
 189                         dc_lex_register(l);
 190                         return;
 191                 }
 192         }
 193
 194         // These lines are for tokens that easily correspond to one character. We
 195         // just set the token.
 196         if (c >= '"' && c <= '~' &&
 197             (l->t = dc_lex_tokens[(c - '"')]) != BC_LEX_INVALID)
 198         {
 199                 return;
 200         }
 201
 202         // This is the workhorse of the lexer when more complicated things are
 203         // needed.
 204         switch (c)
 205         {
 206                 case '\0':
 207                 case '\n':
 208                 case '\t':
 209                 case '\v':
 210                 case '\f':
 211                 case '\r':
 212                 case ' ':
 213                 {
 214                         bc_lex_commonTokens(l, c);
 215                         break;
 216                 }
 217
 218                 // We don't have the ! command, so we always expect certain things
 219                 // after the exclamation point.
 220                 case '!':
 221                 {
 222                         c2 = l->buf[l->i];
 223
 224                         if (c2 == '=') l->t = BC_LEX_OP_REL_NE;
 225                         else if (c2 == '<') l->t = BC_LEX_OP_REL_LE;
 226                         else if (c2 == '>') l->t = BC_LEX_OP_REL_GE;
 227                         else bc_lex_invalidChar(l, c);
 228
 229                         l->i += 1;
 230
 231                         break;
 232                 }
 233
 234                 case '#':
 235                 {
 236                         bc_lex_lineComment(l);
 237                         break;
 238                 }
 239
 240                 case '.':
 241                 {
 242                         c2 = l->buf[l->i];
 243
 244                         // If the character after is a number, this dot is part of a number.
 245                         // Otherwise, it's the BSD dot (equivalent to last).
 246                         if (BC_NO_ERR(BC_LEX_NUM_CHAR(c2, true, false)))
 247                         {
 248                                 bc_lex_number(l, c);
 249                         }
 250                         else bc_lex_invalidChar(l, c);
 251
 252                         break;
 253                 }
 254
 255                 case '0':
 256                 case '1':
 257                 case '2':
 258                 case '3':
 259                 case '4':
 260                 case '5':
 261                 case '6':
 262                 case '7':
 263                 case '8':
 264                 case '9':
 265                 case 'A':
 266                 case 'B':
 267                 case 'C':
 268                 case 'D':
 269                 case 'E':
 270                 case 'F':
 271                 {
 272                         bc_lex_number(l, c);
 273                         break;
 274                 }
 275
 276                 case 'g':
 277                 {
 278                         c2 = l->buf[l->i];
 279
 280                         if (c2 == 'l') l->t = BC_LEX_KW_LINE_LENGTH;
 281                         else if (c2 == 'x') l->t = BC_LEX_EXTENDED_REGISTERS;
 282                         else if (c2 == 'z') l->t = BC_LEX_KW_LEADING_ZERO;
 283                         else bc_lex_invalidChar(l, c2);
 284
 285                         l->i += 1;
 286
 287                         break;
 288                 }
 289
 290                 case '[':
 291                 {
 292                         dc_lex_string(l);
 293                         break;
 294                 }
 295
 296                 default:
 297                 {
 298                         bc_lex_invalidChar(l, c);
 299                 }
 300         }
 301 }
 302 #endif // DC_ENABLED