contrib/bc/src/bc/lex.c

   1 /*
   2  * *****************************************************************************
   3  *
   4  * SPDX-License-Identifier: BSD-2-Clause
   5  *
   6  * Copyright (c) 2018-2020 Gavin D. Howard and contributors.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions are met:
  10  *
  11  * * Redistributions of source code must retain the above copyright notice, this
  12  *   list of conditions and the following disclaimer.
  13  *
  14  * * Redistributions in binary form must reproduce the above copyright notice,
  15  *   this list of conditions and the following disclaimer in the documentation
  16  *   and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28  * POSSIBILITY OF SUCH DAMAGE.
  29  *
  30  * *****************************************************************************
  31  *
  32  * The lexer for bc.
  33  *
  34  */
  35
  36 #if BC_ENABLED
  37
  38 #include <assert.h>
  39 #include <ctype.h>
  40 #include <string.h>
  41
  42 #include <lex.h>
  43 #include <bc.h>
  44 #include <vm.h>
  45
  46 static void bc_lex_identifier(BcLex *l) {
  47
  48         size_t i;
  49         const char *buf = l->buf + l->i - 1;
  50
  51         for (i = 0; i < bc_lex_kws_len; ++i) {
  52
  53                 const BcLexKeyword *kw = bc_lex_kws + i;
  54                 size_t n = BC_LEX_KW_LEN(kw);
  55
  56                 if (!strncmp(buf, kw->name, n) && !isalnum(buf[n]) && buf[n] != '_') {
  57
  58                         l->t = BC_LEX_KW_AUTO + (BcLexType) i;
  59
  60                         if (!BC_LEX_KW_POSIX(kw))
  61                                 bc_lex_verr(l, BC_ERROR_POSIX_KW, kw->name);
  62
  63                         // We minus 1 because the index has already been incremented.
  64                         l->i += n - 1;
  65                         return;
  66                 }
  67         }
  68
  69         bc_lex_name(l);
  70
  71         if (BC_ERR(l->str.len - 1 > 1))
  72                 bc_lex_verr(l, BC_ERROR_POSIX_NAME_LEN, l->str.v);
  73 }
  74
  75 static void bc_lex_string(BcLex *l) {
  76
  77         size_t len, nlines = 0, i = l->i;
  78         const char *buf = l->buf;
  79         char c;
  80
  81         l->t = BC_LEX_STR;
  82
  83         for (; (c = buf[i]) && c != '"'; ++i) nlines += c == '\n';
  84
  85         if (BC_ERR(c == '\0')) {
  86                 l->i = i;
  87                 bc_lex_err(l, BC_ERROR_PARSE_STRING);
  88         }
  89
  90         len = i - l->i;
  91         bc_vec_string(&l->str, len, l->buf + l->i);
  92
  93         l->i = i + 1;
  94         l->line += nlines;
  95 }
  96
  97 static void bc_lex_assign(BcLex *l, BcLexType with, BcLexType without) {
  98         if (l->buf[l->i] == '=') {
  99                 l->i += 1;
 100                 l->t = with;
 101         }
 102         else l->t = without;
 103 }
 104
 105 void bc_lex_token(BcLex *l) {
 106
 107         char c = l->buf[l->i++], c2;
 108
 109         // This is the workhorse of the lexer.
 110         switch (c) {
 111
 112                 case '\0':
 113                 case '\n':
 114                 case '\t':
 115                 case '\v':
 116                 case '\f':
 117                 case '\r':
 118                 case ' ':
 119                 {
 120                         bc_lex_commonTokens(l, c);
 121                         break;
 122                 }
 123
 124                 case '!':
 125                 {
 126                         bc_lex_assign(l, BC_LEX_OP_REL_NE, BC_LEX_OP_BOOL_NOT);
 127
 128                         if (l->t == BC_LEX_OP_BOOL_NOT)
 129                                 bc_lex_verr(l, BC_ERROR_POSIX_BOOL, "!");
 130
 131                         break;
 132                 }
 133
 134                 case '"':
 135                 {
 136                         bc_lex_string(l);
 137                         break;
 138                 }
 139
 140                 case '#':
 141                 {
 142                         bc_lex_err(l, BC_ERROR_POSIX_COMMENT);
 143                         bc_lex_lineComment(l);
 144                         break;
 145                 }
 146
 147                 case '%':
 148                 {
 149                         bc_lex_assign(l, BC_LEX_OP_ASSIGN_MODULUS, BC_LEX_OP_MODULUS);
 150                         break;
 151                 }
 152
 153                 case '&':
 154                 {
 155                         c2 = l->buf[l->i];
 156                         if (BC_NO_ERR(c2 == '&')) {
 157
 158                                 bc_lex_verr(l, BC_ERROR_POSIX_BOOL, "&&");
 159
 160                                 l->i += 1;
 161                                 l->t = BC_LEX_OP_BOOL_AND;
 162                         }
 163                         else bc_lex_invalidChar(l, c);
 164
 165                         break;
 166                 }
 167 #if BC_ENABLE_EXTRA_MATH
 168                 case '$':
 169                 {
 170                         l->t = BC_LEX_OP_TRUNC;
 171                         break;
 172                 }
 173
 174                 case '@':
 175                 {
 176                         bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLACES, BC_LEX_OP_PLACES);
 177                         break;
 178                 }
 179 #endif // BC_ENABLE_EXTRA_MATH
 180                 case '(':
 181                 case ')':
 182                 {
 183                         l->t = (BcLexType) (c - '(' + BC_LEX_LPAREN);
 184                         break;
 185                 }
 186
 187                 case '*':
 188                 {
 189                         bc_lex_assign(l, BC_LEX_OP_ASSIGN_MULTIPLY, BC_LEX_OP_MULTIPLY);
 190                         break;
 191                 }
 192
 193                 case '+':
 194                 {
 195                         c2 = l->buf[l->i];
 196                         if (c2 == '+') {
 197                                 l->i += 1;
 198                                 l->t = BC_LEX_OP_INC;
 199                         }
 200                         else bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLUS, BC_LEX_OP_PLUS);
 201                         break;
 202                 }
 203
 204                 case ',':
 205                 {
 206                         l->t = BC_LEX_COMMA;
 207                         break;
 208                 }
 209
 210                 case '-':
 211                 {
 212                         c2 = l->buf[l->i];
 213                         if (c2 == '-') {
 214                                 l->i += 1;
 215                                 l->t = BC_LEX_OP_DEC;
 216                         }
 217                         else bc_lex_assign(l, BC_LEX_OP_ASSIGN_MINUS, BC_LEX_OP_MINUS);
 218                         break;
 219                 }
 220
 221                 case '.':
 222                 {
 223                         c2 = l->buf[l->i];
 224                         if (BC_LEX_NUM_CHAR(c2, true, false)) bc_lex_number(l, c);
 225                         else {
 226                                 l->t = BC_LEX_KW_LAST;
 227                                 bc_lex_err(l, BC_ERROR_POSIX_DOT);
 228                         }
 229                         break;
 230                 }
 231
 232                 case '/':
 233                 {
 234                         c2 = l->buf[l->i];
 235                         if (c2 =='*') bc_lex_comment(l);
 236                         else bc_lex_assign(l, BC_LEX_OP_ASSIGN_DIVIDE, BC_LEX_OP_DIVIDE);
 237                         break;
 238                 }
 239
 240                 case '0':
 241                 case '1':
 242                 case '2':
 243                 case '3':
 244                 case '4':
 245                 case '5':
 246                 case '6':
 247                 case '7':
 248                 case '8':
 249                 case '9':
 250                 case 'A':
 251                 case 'B':
 252                 case 'C':
 253                 case 'D':
 254                 case 'E':
 255                 case 'F':
 256                 // Apparently, GNU bc (and maybe others) allows any uppercase letter as
 257                 // a number. When single digits, they act like the ones above. When
 258                 // multi-digit, any letter above the input base is automatically set to
 259                 // the biggest allowable digit in the input base.
 260                 case 'G':
 261                 case 'H':
 262                 case 'I':
 263                 case 'J':
 264                 case 'K':
 265                 case 'L':
 266                 case 'M':
 267                 case 'N':
 268                 case 'O':
 269                 case 'P':
 270                 case 'Q':
 271                 case 'R':
 272                 case 'S':
 273                 case 'T':
 274                 case 'U':
 275                 case 'V':
 276                 case 'W':
 277                 case 'X':
 278                 case 'Y':
 279                 case 'Z':
 280                 {
 281                         bc_lex_number(l, c);
 282                         break;
 283                 }
 284
 285                 case ';':
 286                 {
 287                         l->t = BC_LEX_SCOLON;
 288                         break;
 289                 }
 290
 291                 case '<':
 292                 {
 293 #if BC_ENABLE_EXTRA_MATH
 294                         c2 = l->buf[l->i];
 295
 296                         if (c2 == '<') {
 297                                 l->i += 1;
 298                                 bc_lex_assign(l, BC_LEX_OP_ASSIGN_LSHIFT, BC_LEX_OP_LSHIFT);
 299                                 break;
 300                         }
 301 #endif // BC_ENABLE_EXTRA_MATH
 302                         bc_lex_assign(l, BC_LEX_OP_REL_LE, BC_LEX_OP_REL_LT);
 303                         break;
 304                 }
 305
 306                 case '=':
 307                 {
 308                         bc_lex_assign(l, BC_LEX_OP_REL_EQ, BC_LEX_OP_ASSIGN);
 309                         break;
 310                 }
 311
 312                 case '>':
 313                 {
 314 #if BC_ENABLE_EXTRA_MATH
 315                         c2 = l->buf[l->i];
 316
 317                         if (c2 == '>') {
 318                                 l->i += 1;
 319                                 bc_lex_assign(l, BC_LEX_OP_ASSIGN_RSHIFT, BC_LEX_OP_RSHIFT);
 320                                 break;
 321                         }
 322 #endif // BC_ENABLE_EXTRA_MATH
 323                         bc_lex_assign(l, BC_LEX_OP_REL_GE, BC_LEX_OP_REL_GT);
 324                         break;
 325                 }
 326
 327                 case '[':
 328                 case ']':
 329                 {
 330                         l->t = (BcLexType) (c - '[' + BC_LEX_LBRACKET);
 331                         break;
 332                 }
 333
 334                 case '\\':
 335                 {
 336                         if (BC_NO_ERR(l->buf[l->i] == '\n')) {
 337                                 l->i += 1;
 338                                 l->t = BC_LEX_WHITESPACE;
 339                         }
 340                         else bc_lex_invalidChar(l, c);
 341                         break;
 342                 }
 343
 344                 case '^':
 345                 {
 346                         bc_lex_assign(l, BC_LEX_OP_ASSIGN_POWER, BC_LEX_OP_POWER);
 347                         break;
 348                 }
 349
 350                 case 'a':
 351                 case 'b':
 352                 case 'c':
 353                 case 'd':
 354                 case 'e':
 355                 case 'f':
 356                 case 'g':
 357                 case 'h':
 358                 case 'i':
 359                 case 'j':
 360                 case 'k':
 361                 case 'l':
 362                 case 'm':
 363                 case 'n':
 364                 case 'o':
 365                 case 'p':
 366                 case 'q':
 367                 case 'r':
 368                 case 's':
 369                 case 't':
 370                 case 'u':
 371                 case 'v':
 372                 case 'w':
 373                 case 'x':
 374                 case 'y':
 375                 case 'z':
 376                 {
 377                         bc_lex_identifier(l);
 378                         break;
 379                 }
 380
 381                 case '{':
 382                 case '}':
 383                 {
 384                         l->t = (BcLexType) (c - '{' + BC_LEX_LBRACE);
 385                         break;
 386                 }
 387
 388                 case '|':
 389                 {
 390                         c2 = l->buf[l->i];
 391
 392                         if (BC_NO_ERR(c2 == '|')) {
 393
 394                                 bc_lex_verr(l, BC_ERROR_POSIX_BOOL, "||");
 395
 396                                 l->i += 1;
 397                                 l->t = BC_LEX_OP_BOOL_OR;
 398                         }
 399                         else bc_lex_invalidChar(l, c);
 400
 401                         break;
 402                 }
 403
 404                 default:
 405                 {
 406                         bc_lex_invalidChar(l, c);
 407                 }
 408         }
 409 }
 410 #endif // BC_ENABLED