src/lex.c

   1 /*
   2  * *****************************************************************************
   3  *
   4  * SPDX-License-Identifier: BSD-2-Clause
   5  *
   6  * Copyright (c) 2018-2020 Gavin D. Howard and contributors.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions are met:
  10  *
  11  * * Redistributions of source code must retain the above copyright notice, this
  12  *   list of conditions and the following disclaimer.
  13  *
  14  * * Redistributions in binary form must reproduce the above copyright notice,
  15  *   this list of conditions and the following disclaimer in the documentation
  16  *   and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28  * POSSIBILITY OF SUCH DAMAGE.
  29  *
  30  * *****************************************************************************
  31  *
  32  * Common code for the lexers.
  33  *
  34  */
  35
  36 #include <assert.h>
  37 #include <ctype.h>
  38 #include <stdbool.h>
  39 #include <string.h>
  40
  41 #include <lex.h>
  42 #include <vm.h>
  43 #include <bc.h>
  44
  45 void bc_lex_invalidChar(BcLex *l, char c) {
  46         l->t = BC_LEX_INVALID;
  47         bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
  48 }
  49
  50 void bc_lex_lineComment(BcLex *l) {
  51         l->t = BC_LEX_WHITESPACE;
  52         while (l->i < l->len && l->buf[l->i] != '\n') l->i += 1;
  53 }
  54
  55 void bc_lex_comment(BcLex *l) {
  56
  57         size_t i, nlines = 0;
  58         const char *buf = l->buf;
  59         bool end = false;
  60         char c;
  61
  62         l->i += 1;
  63         l->t = BC_LEX_WHITESPACE;
  64
  65         for (i = l->i; !end; i += !end) {
  66
  67                 for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n');
  68
  69                 if (BC_ERR(!c || buf[i + 1] == '\0')) {
  70                         l->i = i;
  71                         bc_lex_err(l, BC_ERR_PARSE_COMMENT);
  72                 }
  73
  74                 end = buf[i + 1] == '/';
  75         }
  76
  77         l->i = i + 2;
  78         l->line += nlines;
  79 }
  80
  81 void bc_lex_whitespace(BcLex *l) {
  82         char c;
  83         l->t = BC_LEX_WHITESPACE;
  84         for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i]);
  85 }
  86
  87 void bc_lex_commonTokens(BcLex *l, char c) {
  88         if (!c) l->t = BC_LEX_EOF;
  89         else if (c == '\n') l->t = BC_LEX_NLINE;
  90         else bc_lex_whitespace(l);
  91 }
  92
  93 static size_t bc_lex_num(BcLex *l, char start, bool int_only) {
  94
  95         const char *buf = l->buf + l->i;
  96         size_t i;
  97         char c;
  98         bool last_pt, pt = (start == '.');
  99
 100         for (i = 0; (c = buf[i]) && (BC_LEX_NUM_CHAR(c, pt, int_only) ||
 101                                      (c == '\\' && buf[i + 1] == '\n')); ++i)
 102         {
 103                 if (c == '\\') {
 104
 105                         if (buf[i + 1] == '\n') {
 106
 107                                 i += 2;
 108
 109                                 // Make sure to eat whitespace at the beginning of the line.
 110                                 while(isspace(buf[i]) && buf[i] != '\n') i += 1;
 111
 112                                 c = buf[i];
 113
 114                                 if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break;
 115                         }
 116                         else break;
 117                 }
 118
 119                 last_pt = (c == '.');
 120                 if (pt && last_pt) break;
 121                 pt = pt || last_pt;
 122
 123                 bc_vec_push(&l->str, &c);
 124         }
 125
 126         return i;
 127 }
 128
 129 void bc_lex_number(BcLex *l, char start) {
 130
 131         l->t = BC_LEX_NUMBER;
 132
 133         bc_vec_npop(&l->str, l->str.len);
 134         bc_vec_push(&l->str, &start);
 135
 136         l->i += bc_lex_num(l, start, false);
 137
 138 #if BC_ENABLE_EXTRA_MATH
 139         {
 140                 char c = l->buf[l->i];
 141
 142                 if (c == 'e') {
 143
 144 #if BC_ENABLED
 145                         if (BC_IS_POSIX) bc_lex_err(l, BC_ERR_POSIX_EXP_NUM);
 146 #endif // BC_ENABLED
 147
 148                         bc_vec_push(&l->str, &c);
 149                         l->i += 1;
 150                         c = l->buf[l->i];
 151
 152                         if (c == BC_LEX_NEG_CHAR) {
 153                                 bc_vec_push(&l->str, &c);
 154                                 l->i += 1;
 155                                 c = l->buf[l->i];
 156                         }
 157
 158                         if (BC_ERR(!BC_LEX_NUM_CHAR(c, false, true)))
 159                                 bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
 160
 161                         l->i += bc_lex_num(l, 0, true);
 162                 }
 163         }
 164 #endif // BC_ENABLE_EXTRA_MATH
 165
 166         bc_vec_pushByte(&l->str, '\0');
 167 }
 168
 169 void bc_lex_name(BcLex *l) {
 170
 171         size_t i = 0;
 172         const char *buf = l->buf + l->i - 1;
 173         char c = buf[i];
 174
 175         l->t = BC_LEX_NAME;
 176
 177         while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_') c = buf[++i];
 178
 179         bc_vec_string(&l->str, i, buf);
 180
 181         // Increment the index. We minus 1 because it has already been incremented.
 182         l->i += i - 1;
 183 }
 184
 185 void bc_lex_init(BcLex *l) {
 186         BC_SIG_ASSERT_LOCKED;
 187         assert(l != NULL);
 188         bc_vec_init(&l->str, sizeof(char), NULL);
 189 }
 190
 191 void bc_lex_free(BcLex *l) {
 192         BC_SIG_ASSERT_LOCKED;
 193         assert(l != NULL);
 194         bc_vec_free(&l->str);
 195 }
 196
 197 void bc_lex_file(BcLex *l, const char *file) {
 198         assert(l != NULL && file != NULL);
 199         l->line = 1;
 200         vm.file = file;
 201 }
 202
 203 void bc_lex_next(BcLex *l) {
 204
 205         assert(l != NULL);
 206
 207         l->last = l->t;
 208         l->line += (l->i != 0 && l->buf[l->i - 1] == '\n');
 209
 210         if (BC_ERR(l->last == BC_LEX_EOF)) bc_lex_err(l, BC_ERR_PARSE_EOF);
 211
 212         l->t = BC_LEX_EOF;
 213
 214         if (l->i == l->len) return;
 215
 216         // Loop until failure or we don't have whitespace. This
 217         // is so the parser doesn't get inundated with whitespace.
 218         do {
 219                 vm.next(l);
 220         } while (l->t == BC_LEX_WHITESPACE);
 221 }
 222
 223 void bc_lex_text(BcLex *l, const char *text) {
 224         assert(l != NULL && text != NULL);
 225         l->buf = text;
 226         l->i = 0;
 227         l->len = strlen(text);
 228         l->t = l->last = BC_LEX_INVALID;
 229         bc_lex_next(l);
 230 }