src/bc_lex.c

   1 /*
   2  * *****************************************************************************
   3  *
   4  * SPDX-License-Identifier: BSD-2-Clause
   5  *
   6  * Copyright (c) 2018-2020 Gavin D. Howard and contributors.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions are met:
  10  *
  11  * * Redistributions of source code must retain the above copyright notice, this
  12  *   list of conditions and the following disclaimer.
  13  *
  14  * * Redistributions in binary form must reproduce the above copyright notice,
  15  *   this list of conditions and the following disclaimer in the documentation
  16  *   and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28  * POSSIBILITY OF SUCH DAMAGE.
  29  *
  30  * *****************************************************************************
  31  *
  32  * The lexer for bc.
  33  *
  34  */
  35
  36 #if BC_ENABLED
  37
  38 #include <assert.h>
  39 #include <ctype.h>
  40 #include <string.h>
  41
  42 #include <bc.h>
  43 #include <vm.h>
  44
  45 static void bc_lex_identifier(BcLex *l) {
  46
  47         size_t i;
  48         const char *buf = l->buf + l->i - 1;
  49
  50         for (i = 0; i < bc_lex_kws_len; ++i) {
  51
  52                 const BcLexKeyword *kw = bc_lex_kws + i;
  53                 size_t n = BC_LEX_KW_LEN(kw);
  54
  55                 if (!strncmp(buf, kw->name, n) && !isalnum(buf[n]) && buf[n] != '_') {
  56
  57                         l->t = BC_LEX_KW_AUTO + (BcLexType) i;
  58
  59                         if (!BC_LEX_KW_POSIX(kw))
  60                                 bc_lex_verr(l, BC_ERR_POSIX_KW, kw->name);
  61
  62                         // We minus 1 because the index has already been incremented.
  63                         l->i += n - 1;
  64                         return;
  65                 }
  66         }
  67
  68         bc_lex_name(l);
  69
  70         if (BC_ERR(l->str.len - 1 > 1))
  71                 bc_lex_verr(l, BC_ERR_POSIX_NAME_LEN, l->str.v);
  72 }
  73
  74 static void bc_lex_string(BcLex *l) {
  75
  76         size_t len, nlines = 0, i = l->i;
  77         const char *buf = l->buf;
  78         char c;
  79
  80         l->t = BC_LEX_STR;
  81
  82         for (; (c = buf[i]) && c != '"'; ++i) nlines += c == '\n';
  83
  84         if (BC_ERR(c == '\0')) {
  85                 l->i = i;
  86                 bc_lex_err(l, BC_ERR_PARSE_STRING);
  87         }
  88
  89         len = i - l->i;
  90         bc_vec_string(&l->str, len, l->buf + l->i);
  91
  92         l->i = i + 1;
  93         l->line += nlines;
  94 }
  95
  96 static void bc_lex_assign(BcLex *l, BcLexType with, BcLexType without) {
  97         if (l->buf[l->i] == '=') {
  98                 l->i += 1;
  99                 l->t = with;
 100         }
 101         else l->t = without;
 102 }
 103
 104 void bc_lex_token(BcLex *l) {
 105
 106         char c = l->buf[l->i++], c2;
 107
 108         // This is the workhorse of the lexer.
 109         switch (c) {
 110
 111                 case '\0':
 112                 case '\n':
 113                 case '\t':
 114                 case '\v':
 115                 case '\f':
 116                 case '\r':
 117                 case ' ':
 118                 {
 119                         bc_lex_commonTokens(l, c);
 120                         break;
 121                 }
 122
 123                 case '!':
 124                 {
 125                         bc_lex_assign(l, BC_LEX_OP_REL_NE, BC_LEX_OP_BOOL_NOT);
 126
 127                         if (l->t == BC_LEX_OP_BOOL_NOT)
 128                                 bc_lex_verr(l, BC_ERR_POSIX_BOOL, "!");
 129
 130                         break;
 131                 }
 132
 133                 case '"':
 134                 {
 135                         bc_lex_string(l);
 136                         break;
 137                 }
 138
 139                 case '#':
 140                 {
 141                         bc_lex_err(l, BC_ERR_POSIX_COMMENT);
 142                         bc_lex_lineComment(l);
 143                         break;
 144                 }
 145
 146                 case '%':
 147                 {
 148                         bc_lex_assign(l, BC_LEX_OP_ASSIGN_MODULUS, BC_LEX_OP_MODULUS);
 149                         break;
 150                 }
 151
 152                 case '&':
 153                 {
 154                         c2 = l->buf[l->i];
 155                         if (BC_NO_ERR(c2 == '&')) {
 156
 157                                 bc_lex_verr(l, BC_ERR_POSIX_BOOL, "&&");
 158
 159                                 l->i += 1;
 160                                 l->t = BC_LEX_OP_BOOL_AND;
 161                         }
 162                         else bc_lex_invalidChar(l, c);
 163
 164                         break;
 165                 }
 166 #if BC_ENABLE_EXTRA_MATH
 167                 case '$':
 168                 {
 169                         l->t = BC_LEX_OP_TRUNC;
 170                         break;
 171                 }
 172
 173                 case '@':
 174                 {
 175                         bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLACES, BC_LEX_OP_PLACES);
 176                         break;
 177                 }
 178 #endif // BC_ENABLE_EXTRA_MATH
 179                 case '(':
 180                 case ')':
 181                 {
 182                         l->t = (BcLexType) (c - '(' + BC_LEX_LPAREN);
 183                         break;
 184                 }
 185
 186                 case '*':
 187                 {
 188                         bc_lex_assign(l, BC_LEX_OP_ASSIGN_MULTIPLY, BC_LEX_OP_MULTIPLY);
 189                         break;
 190                 }
 191
 192                 case '+':
 193                 {
 194                         c2 = l->buf[l->i];
 195                         if (c2 == '+') {
 196                                 l->i += 1;
 197                                 l->t = BC_LEX_OP_INC;
 198                         }
 199                         else bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLUS, BC_LEX_OP_PLUS);
 200                         break;
 201                 }
 202
 203                 case ',':
 204                 {
 205                         l->t = BC_LEX_COMMA;
 206                         break;
 207                 }
 208
 209                 case '-':
 210                 {
 211                         c2 = l->buf[l->i];
 212                         if (c2 == '-') {
 213                                 l->i += 1;
 214                                 l->t = BC_LEX_OP_DEC;
 215                         }
 216                         else bc_lex_assign(l, BC_LEX_OP_ASSIGN_MINUS, BC_LEX_OP_MINUS);
 217                         break;
 218                 }
 219
 220                 case '.':
 221                 {
 222                         c2 = l->buf[l->i];
 223                         if (BC_LEX_NUM_CHAR(c2, true, false)) bc_lex_number(l, c);
 224                         else {
 225                                 l->t = BC_LEX_KW_LAST;
 226                                 bc_lex_err(l, BC_ERR_POSIX_DOT);
 227                         }
 228                         break;
 229                 }
 230
 231                 case '/':
 232                 {
 233                         c2 = l->buf[l->i];
 234                         if (c2 =='*') bc_lex_comment(l);
 235                         else bc_lex_assign(l, BC_LEX_OP_ASSIGN_DIVIDE, BC_LEX_OP_DIVIDE);
 236                         break;
 237                 }
 238
 239                 case '0':
 240                 case '1':
 241                 case '2':
 242                 case '3':
 243                 case '4':
 244                 case '5':
 245                 case '6':
 246                 case '7':
 247                 case '8':
 248                 case '9':
 249                 case 'A':
 250                 case 'B':
 251                 case 'C':
 252                 case 'D':
 253                 case 'E':
 254                 case 'F':
 255                 // Apparently, GNU bc (and maybe others) allows any uppercase letter as
 256                 // a number. When single digits, they act like the ones above. When
 257                 // multi-digit, any letter above the input base is automatically set to
 258                 // the biggest allowable digit in the input base.
 259                 case 'G':
 260                 case 'H':
 261                 case 'I':
 262                 case 'J':
 263                 case 'K':
 264                 case 'L':
 265                 case 'M':
 266                 case 'N':
 267                 case 'O':
 268                 case 'P':
 269                 case 'Q':
 270                 case 'R':
 271                 case 'S':
 272                 case 'T':
 273                 case 'U':
 274                 case 'V':
 275                 case 'W':
 276                 case 'X':
 277                 case 'Y':
 278                 case 'Z':
 279                 {
 280                         bc_lex_number(l, c);
 281                         break;
 282                 }
 283
 284                 case ';':
 285                 {
 286                         l->t = BC_LEX_SCOLON;
 287                         break;
 288                 }
 289
 290                 case '<':
 291                 {
 292 #if BC_ENABLE_EXTRA_MATH
 293                         c2 = l->buf[l->i];
 294
 295                         if (c2 == '<') {
 296                                 l->i += 1;
 297                                 bc_lex_assign(l, BC_LEX_OP_ASSIGN_LSHIFT, BC_LEX_OP_LSHIFT);
 298                                 break;
 299                         }
 300 #endif // BC_ENABLE_EXTRA_MATH
 301                         bc_lex_assign(l, BC_LEX_OP_REL_LE, BC_LEX_OP_REL_LT);
 302                         break;
 303                 }
 304
 305                 case '=':
 306                 {
 307                         bc_lex_assign(l, BC_LEX_OP_REL_EQ, BC_LEX_OP_ASSIGN);
 308                         break;
 309                 }
 310
 311                 case '>':
 312                 {
 313 #if BC_ENABLE_EXTRA_MATH
 314                         c2 = l->buf[l->i];
 315
 316                         if (c2 == '>') {
 317                                 l->i += 1;
 318                                 bc_lex_assign(l, BC_LEX_OP_ASSIGN_RSHIFT, BC_LEX_OP_RSHIFT);
 319                                 break;
 320                         }
 321 #endif // BC_ENABLE_EXTRA_MATH
 322                         bc_lex_assign(l, BC_LEX_OP_REL_GE, BC_LEX_OP_REL_GT);
 323                         break;
 324                 }
 325
 326                 case '[':
 327                 case ']':
 328                 {
 329                         l->t = (BcLexType) (c - '[' + BC_LEX_LBRACKET);
 330                         break;
 331                 }
 332
 333                 case '\\':
 334                 {
 335                         if (BC_NO_ERR(l->buf[l->i] == '\n')) {
 336                                 l->i += 1;
 337                                 l->t = BC_LEX_WHITESPACE;
 338                         }
 339                         else bc_lex_invalidChar(l, c);
 340                         break;
 341                 }
 342
 343                 case '^':
 344                 {
 345                         bc_lex_assign(l, BC_LEX_OP_ASSIGN_POWER, BC_LEX_OP_POWER);
 346                         break;
 347                 }
 348
 349                 case 'a':
 350                 case 'b':
 351                 case 'c':
 352                 case 'd':
 353                 case 'e':
 354                 case 'f':
 355                 case 'g':
 356                 case 'h':
 357                 case 'i':
 358                 case 'j':
 359                 case 'k':
 360                 case 'l':
 361                 case 'm':
 362                 case 'n':
 363                 case 'o':
 364                 case 'p':
 365                 case 'q':
 366                 case 'r':
 367                 case 's':
 368                 case 't':
 369                 case 'u':
 370                 case 'v':
 371                 case 'w':
 372                 case 'x':
 373                 case 'y':
 374                 case 'z':
 375                 {
 376                         bc_lex_identifier(l);
 377                         break;
 378                 }
 379
 380                 case '{':
 381                 case '}':
 382                 {
 383                         l->t = (BcLexType) (c - '{' + BC_LEX_LBRACE);
 384                         break;
 385                 }
 386
 387                 case '|':
 388                 {
 389                         c2 = l->buf[l->i];
 390
 391                         if (BC_NO_ERR(c2 == '|')) {
 392
 393                                 bc_lex_verr(l, BC_ERR_POSIX_BOOL, "||");
 394
 395                                 l->i += 1;
 396                                 l->t = BC_LEX_OP_BOOL_OR;
 397                         }
 398                         else bc_lex_invalidChar(l, c);
 399
 400                         break;
 401                 }
 402
 403                 default:
 404                 {
 405                         bc_lex_invalidChar(l, c);
 406                 }
 407         }
 408 }
 409 #endif // BC_ENABLED