include/bc.h

   1 /*
   2  * *****************************************************************************
   3  *
   4  * SPDX-License-Identifier: BSD-2-Clause
   5  *
   6  * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions are met:
  10  *
  11  * * Redistributions of source code must retain the above copyright notice, this
  12  *   list of conditions and the following disclaimer.
  13  *
  14  * * Redistributions in binary form must reproduce the above copyright notice,
  15  *   this list of conditions and the following disclaimer in the documentation
  16  *   and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28  * POSSIBILITY OF SUCH DAMAGE.
  29  *
  30  * *****************************************************************************
  31  *
  32  * Definitions for bc only.
  33  *
  34  */
  35
  36 #ifndef BC_BC_H
  37 #define BC_BC_H
  38
  39 #if BC_ENABLED
  40
  41 #include <limits.h>
  42 #include <stdbool.h>
  43
  44 #include <status.h>
  45 #include <lex.h>
  46 #include <parse.h>
  47
  48 /**
  49  * The main function for bc. It just sets variables and passes its arguments
  50  * through to @a bc_vm_boot().
  51  */
  52 void
  53 bc_main(int argc, char* argv[]);
  54
  55 // These are references to the help text, the library text, and the "filename"
  56 // for the library.
  57 extern const char bc_help[];
  58 extern const char bc_lib[];
  59 extern const char* bc_lib_name;
  60
  61 // These are references to the second math library and its "filename."
  62 #if BC_ENABLE_EXTRA_MATH
  63 extern const char bc_lib2[];
  64 extern const char* bc_lib2_name;
  65 #endif // BC_ENABLE_EXTRA_MATH
  66
  67 /**
  68  * A struct containing information about a bc keyword.
  69  */
  70 typedef struct BcLexKeyword
  71 {
  72         /// Holds the length of the keyword along with a bit that, if set, means the
  73         /// keyword is used in POSIX bc.
  74         uchar data;
  75
  76         /// The keyword text.
  77         const char name[14];
  78 } BcLexKeyword;
  79
  80 /// Sets the most significant bit. Used for setting the POSIX bit in
  81 /// BcLexKeyword's data field.
  82 #define BC_LEX_CHAR_MSB(bit) ((bit) << (CHAR_BIT - 1))
  83
  84 /// Returns non-zero if the keyword is POSIX, zero otherwise.
  85 #define BC_LEX_KW_POSIX(kw) ((kw)->data & (BC_LEX_CHAR_MSB(1)))
  86
  87 /// Returns the length of the keyword.
  88 #define BC_LEX_KW_LEN(kw) ((size_t) ((kw)->data & ~(BC_LEX_CHAR_MSB(1))))
  89
  90 /// A macro to easily build a keyword entry. See bc_lex_kws in src/data.c.
  91 #define BC_LEX_KW_ENTRY(a, b, c)                                              \
  92         {                                                                         \
  93                 .data = ((b) & ~(BC_LEX_CHAR_MSB(1))) | BC_LEX_CHAR_MSB(c), .name = a \
  94         }
  95
  96 #if BC_ENABLE_EXTRA_MATH
  97
  98 /// A macro for the number of keywords bc has. This has to be updated if any are
  99 /// added. This is for the redefined_kws field of the BcVm struct.
 100 #define BC_LEX_NKWS (35)
 101
 102 #else // BC_ENABLE_EXTRA_MATH
 103
 104 /// A macro for the number of keywords bc has. This has to be updated if any are
 105 /// added. This is for the redefined_kws field of the BcVm struct.
 106 #define BC_LEX_NKWS (31)
 107
 108 #endif // BC_ENABLE_EXTRA_MATH
 109
 110 // The array of keywords and its length.
 111 extern const BcLexKeyword bc_lex_kws[];
 112 extern const size_t bc_lex_kws_len;
 113
 114 /**
 115  * The @a BcLexNext function for bc. (See include/lex.h for a definition of
 116  * @a BcLexNext.)
 117  * @param l  The lexer.
 118  */
 119 void
 120 bc_lex_token(BcLex* l);
 121
 122 // The following section is for flags needed when parsing bc code. These flags
 123 // are complicated, but necessary. Why you ask? Because bc's standard is awful.
 124 //
 125 // If you don't believe me, go read the bc Parsing section of the Development
 126 // manual (manuals/development.md). Then come back.
 127 //
 128 // In other words, these flags are the sign declaring, "Here be dragons."
 129
 130 /**
 131  * This returns a pointer to the set of flags at the top of the flag stack.
 132  * @a p is expected to be a BcParse pointer.
 133  * @param p  The parser.
 134  * @return   A pointer to the top flag set.
 135  */
 136 #define BC_PARSE_TOP_FLAG_PTR(p) ((uint16_t*) bc_vec_top(&(p)->flags))
 137
 138 /**
 139  * This returns the flag set at the top of the flag stack. @a p is expected to
 140  * be a BcParse pointer.
 141  * @param p  The parser.
 142  * @return   The top flag set.
 143  */
 144 #define BC_PARSE_TOP_FLAG(p) (*(BC_PARSE_TOP_FLAG_PTR(p)))
 145
 146 // After this point, all flag #defines are in sets of 2: one to define the flag,
 147 // and one to define a way to grab the flag from the flag set at the top of the
 148 // flag stack. All `p` arguments are pointers to a BcParse.
 149
 150 // This flag is set if the parser has seen a left brace.
 151 #define BC_PARSE_FLAG_BRACE (UINTMAX_C(1) << 0)
 152 #define BC_PARSE_BRACE(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_BRACE)
 153
 154 // This flag is set if the parser is parsing inside of the braces of a function
 155 // body.
 156 #define BC_PARSE_FLAG_FUNC_INNER (UINTMAX_C(1) << 1)
 157 #define BC_PARSE_FUNC_INNER(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_FUNC_INNER)
 158
 159 // This flag is set if the parser is parsing a function. It is different from
 160 // the one above because it is set if it is parsing a function body *or* header,
 161 // not just if it's parsing a function body.
 162 #define BC_PARSE_FLAG_FUNC (UINTMAX_C(1) << 2)
 163 #define BC_PARSE_FUNC(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_FUNC)
 164
 165 // This flag is set if the parser is expecting to parse a body, whether of a
 166 // function, an if statement, or a loop.
 167 #define BC_PARSE_FLAG_BODY (UINTMAX_C(1) << 3)
 168 #define BC_PARSE_BODY(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_BODY)
 169
 170 // This flag is set if bc is parsing a loop. This is important because the break
 171 // and continue keywords are only valid inside of a loop.
 172 #define BC_PARSE_FLAG_LOOP (UINTMAX_C(1) << 4)
 173 #define BC_PARSE_LOOP(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_LOOP)
 174
 175 // This flag is set if bc is parsing the body of a loop. It is different from
 176 // the one above the same way @a BC_PARSE_FLAG_FUNC_INNER is different from
 177 // @a BC_PARSE_FLAG_FUNC.
 178 #define BC_PARSE_FLAG_LOOP_INNER (UINTMAX_C(1) << 5)
 179 #define BC_PARSE_LOOP_INNER(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_LOOP_INNER)
 180
 181 // This flag is set if bc is parsing an if statement.
 182 #define BC_PARSE_FLAG_IF (UINTMAX_C(1) << 6)
 183 #define BC_PARSE_IF(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_IF)
 184
 185 // This flag is set if bc is parsing an else statement. This is important
 186 // because of "else if" constructions, among other things.
 187 #define BC_PARSE_FLAG_ELSE (UINTMAX_C(1) << 7)
 188 #define BC_PARSE_ELSE(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_ELSE)
 189
 190 // This flag is set if bc just finished parsing an if statement and its body.
 191 // It tells the parser that it can probably expect an else statement next. This
 192 // flag is, thus, one of the most subtle.
 193 #define BC_PARSE_FLAG_IF_END (UINTMAX_C(1) << 8)
 194 #define BC_PARSE_IF_END(p) (BC_PARSE_TOP_FLAG(p) & BC_PARSE_FLAG_IF_END)
 195
 196 /**
 197  * This returns true if bc is in a state where it should not execute any code
 198  * at all.
 199  * @param p  The parser.
 200  * @return   True if execution cannot proceed, false otherwise.
 201  */
 202 #define BC_PARSE_NO_EXEC(p) ((p)->flags.len != 1 || BC_PARSE_TOP_FLAG(p) != 0)
 203
 204 /**
 205  * This returns true if the token @a t is a statement delimiter, which is
 206  * either a newline or a semicolon.
 207  * @param t  The token to check.
 208  * @return   True if t is a statement delimiter token; false otherwise.
 209  */
 210 #define BC_PARSE_DELIMITER(t) \
 211         ((t) == BC_LEX_SCOLON || (t) == BC_LEX_NLINE || (t) == BC_LEX_EOF)
 212
 213 /**
 214  * This is poorly named, but it basically returns whether or not the current
 215  * state is valid for the end of an else statement.
 216  * @param f  The flag set to be checked.
 217  * @return   True if the state is valid for the end of an else statement.
 218  */
 219 #define BC_PARSE_BLOCK_STMT(f) \
 220         ((f) & (BC_PARSE_FLAG_ELSE | BC_PARSE_FLAG_LOOP_INNER))
 221
 222 /**
 223  * This returns the value of the data for an operator with precedence @a p and
 224  * associativity @a l (true if left associative, false otherwise). This is used
 225  * to construct an array of operators, bc_parse_ops, in src/data.c.
 226  * @param p  The precedence.
 227  * @param l  True if the operator is left associative, false otherwise.
 228  * @return   The data for the operator.
 229  */
 230 #define BC_PARSE_OP(p, l) (((p) & ~(BC_LEX_CHAR_MSB(1))) | (BC_LEX_CHAR_MSB(l)))
 231
 232 /**
 233  * Returns the operator data for the lex token @a t.
 234  * @param t  The token to return operator data for.
 235  * @return   The operator data for @a t.
 236  */
 237 #define BC_PARSE_OP_DATA(t) bc_parse_ops[((t) -BC_LEX_OP_INC)]
 238
 239 /**
 240  * Returns non-zero if operator @a op is left associative, zero otherwise.
 241  * @param op  The operator to test for associativity.
 242  * @return    Non-zero if the operator is left associative, zero otherwise.
 243  */
 244 #define BC_PARSE_OP_LEFT(op) (BC_PARSE_OP_DATA(op) & BC_LEX_CHAR_MSB(1))
 245
 246 /**
 247  * Returns the precedence of operator @a op. Lower number means higher
 248  * precedence.
 249  * @param op  The operator to return the precedence of.
 250  * @return    The precedence of @a op.
 251  */
 252 #define BC_PARSE_OP_PREC(op) (BC_PARSE_OP_DATA(op) & ~(BC_LEX_CHAR_MSB(1)))
 253
 254 /**
 255  * A macro to easily define a series of bits for whether a lex token is an
 256  * expression token or not. It takes 8 expression bits, corresponding to the 8
 257  * bits in a uint8_t. You can see this in use for bc_parse_exprs in src/data.c.
 258  * @param e1  The first bit.
 259  * @param e2  The second bit.
 260  * @param e3  The third bit.
 261  * @param e4  The fourth bit.
 262  * @param e5  The fifth bit.
 263  * @param e6  The sixth bit.
 264  * @param e7  The seventh bit.
 265  * @param e8  The eighth bit.
 266  * @return    An expression entry for bc_parse_exprs[].
 267  */
 268 #define BC_PARSE_EXPR_ENTRY(e1, e2, e3, e4, e5, e6, e7, e8)               \
 269         ((UINTMAX_C(e1) << 7) | (UINTMAX_C(e2) << 6) | (UINTMAX_C(e3) << 5) | \
 270          (UINTMAX_C(e4) << 4) | (UINTMAX_C(e5) << 3) | (UINTMAX_C(e6) << 2) | \
 271          (UINTMAX_C(e7) << 1) | (UINTMAX_C(e8) << 0))
 272
 273 /**
 274  * Returns true if token @a i is a token that belongs in an expression.
 275  * @param i  The token to test.
 276  * @return   True if i is an expression token, false otherwise.
 277  */
 278 #define BC_PARSE_EXPR(i) \
 279         (bc_parse_exprs[(((i) & (uchar) ~(0x07)) >> 3)] & (1 << (7 - ((i) &0x07))))
 280
 281 /**
 282  * Returns the operator (by lex token) that is at the top of the operator
 283  * stack.
 284  * @param p  The parser.
 285  * @return   The operator that is at the top of the operator stack, as a lex
 286  *           token.
 287  */
 288 #define BC_PARSE_TOP_OP(p) (*((BcLexType*) bc_vec_top(&(p)->ops)))
 289
 290 /**
 291  * Returns true if bc has a "leaf" token. A "leaf" token is one that can stand
 292  * alone in an expression. For example, a number by itself can be an expression,
 293  * but a binary operator, while valid for an expression, cannot be alone in the
 294  * expression. It must have an expression to the left and right of itself. See
 295  * the documentation for @a bc_parse_expr_err() in src/bc_parse.c.
 296  * @param prev      The previous token as an instruction.
 297  * @param bin_last  True if that last operator was a binary operator, false
 298  *                  otherwise.
 299  * @param rparen    True if the last operator was a right paren.
 300  * return           True if the last token was a leaf token, false otherwise.
 301  */
 302 #define BC_PARSE_LEAF(prev, bin_last, rparen) \
 303         (!(bin_last) && ((rparen) || bc_parse_inst_isLeaf(prev)))
 304
 305 /**
 306  * This returns true if the token @a t should be treated as though it's a
 307  * variable. This goes for actual variables, array elements, and globals.
 308  * @param t  The token to test.
 309  * @return   True if @a t should be treated as though it's a variable, false
 310  *           otherwise.
 311  */
 312 #if BC_ENABLE_EXTRA_MATH
 313 #define BC_PARSE_INST_VAR(t) \
 314         ((t) >= BC_INST_VAR && (t) <= BC_INST_SEED && (t) != BC_INST_ARRAY)
 315 #else // BC_ENABLE_EXTRA_MATH
 316 #define BC_PARSE_INST_VAR(t) \
 317         ((t) >= BC_INST_VAR && (t) <= BC_INST_SCALE && (t) != BC_INST_ARRAY)
 318 #endif // BC_ENABLE_EXTRA_MATH
 319
 320 /**
 321  * Returns true if the previous token @a p (in the form of a bytecode
 322  * instruction) is a prefix operator. The fact that it is for bytecode
 323  * instructions is what makes it different from @a BC_PARSE_OP_PREFIX below.
 324  * @param p  The previous token.
 325  * @return   True if @a p is a prefix operator.
 326  */
 327 #define BC_PARSE_PREV_PREFIX(p) ((p) >= BC_INST_NEG && (p) <= BC_INST_BOOL_NOT)
 328
 329 /**
 330  * Returns true if token @a t is a prefix operator.
 331  * @param t  The token to test.
 332  * @return   True if @a t is a prefix operator, false otherwise.
 333  */
 334 #define BC_PARSE_OP_PREFIX(t) ((t) == BC_LEX_OP_BOOL_NOT || (t) == BC_LEX_NEG)
 335
 336 /**
 337  * We can calculate the conversion between tokens and bytecode instructions by
 338  * subtracting the position of the first operator in the lex enum and adding the
 339  * position of the first in the instruction enum. Note: This only works for
 340  * binary operators.
 341  * @param t  The token to turn into an instruction.
 342  * @return   The token as an instruction.
 343  */
 344 #define BC_PARSE_TOKEN_INST(t) ((uchar) ((t) -BC_LEX_NEG + BC_INST_NEG))
 345
 346 /**
 347  * Returns true if the token is a bc keyword.
 348  * @param t  The token to check.
 349  * @return   True if @a t is a bc keyword, false otherwise.
 350  */
 351 #define BC_PARSE_IS_KEYWORD(t) ((t) >= BC_LEX_KW_AUTO && (t) <= BC_LEX_KW_ELSE)
 352
 353 /// A struct that holds data about what tokens should be expected next. There
 354 /// are a few instances of these, all named because they are used in specific
 355 /// cases. Basically, in certain situations, it's useful to use the same code,
 356 /// but have a list of valid tokens.
 357 ///
 358 /// Obviously, @a len is the number of tokens in the @a tokens array. If more
 359 /// than 4 is needed in the future, @a tokens will have to be changed.
 360 typedef struct BcParseNext
 361 {
 362         /// The number of tokens in the tokens array.
 363         uchar len;
 364
 365         /// The tokens that can be expected next.
 366         uchar tokens[4];
 367
 368 } BcParseNext;
 369
 370 /// A macro to construct an array literal of tokens from a parameter list.
 371 #define BC_PARSE_NEXT_TOKENS(...) .tokens = { __VA_ARGS__ }
 372
 373 /// A macro to generate a BcParseNext literal from BcParseNext data. See
 374 /// src/data.c for examples.
 375 #define BC_PARSE_NEXT(a, ...)                                 \
 376         {                                                         \
 377                 .len = (uchar) (a), BC_PARSE_NEXT_TOKENS(__VA_ARGS__) \
 378         }
 379
 380 /// A status returned by @a bc_parse_expr_err(). It can either return success or
 381 /// an error indicating an empty expression.
 382 typedef enum BcParseStatus
 383 {
 384         BC_PARSE_STATUS_SUCCESS,
 385         BC_PARSE_STATUS_EMPTY_EXPR,
 386
 387 } BcParseStatus;
 388
 389 /**
 390  * The @a BcParseExpr function for bc. (See include/parse.h for a definition of
 391  * @a BcParseExpr.)
 392  * @param p      The parser.
 393  * @param flags  Flags that define the requirements that the parsed code must
 394  *               meet or an error will result. See @a BcParseExpr for more info.
 395  */
 396 void
 397 bc_parse_expr(BcParse* p, uint8_t flags);
 398
 399 /**
 400  * The @a BcParseParse function for bc. (See include/parse.h for a definition of
 401  * @a BcParseParse.)
 402  * @param p  The parser.
 403  */
 404 void
 405 bc_parse_parse(BcParse* p);
 406
 407 /**
 408  * Ends a series of if statements. This is to ensure that full parses happen
 409  * when a file finishes or before defining a function. Without this, bc thinks
 410  * that it cannot parse any further. But if we reach the end of a file or a
 411  * function definition, we know we can add an empty else clause.
 412  * @param p  The parser.
 413  */
 414 void
 415 bc_parse_endif(BcParse* p);
 416
 417 /// References to the signal message and its length.
 418 extern const char bc_sig_msg[];
 419 extern const uchar bc_sig_msg_len;
 420
 421 /// A reference to an array of bits that are set if the corresponding lex token
 422 /// is valid in an expression.
 423 extern const uint8_t bc_parse_exprs[];
 424
 425 /// A reference to an array of bc operators.
 426 extern const uchar bc_parse_ops[];
 427
 428 // References to the various instances of BcParseNext's.
 429
 430 /// A reference to what tokens are valid as next tokens when parsing normal
 431 /// expressions. More accurately. these are the tokens that are valid for
 432 /// *ending* the expression.
 433 extern const BcParseNext bc_parse_next_expr;
 434
 435 /// A reference to what tokens are valid as next tokens when parsing function
 436 /// parameters (well, actually arguments).
 437 extern const BcParseNext bc_parse_next_arg;
 438
 439 /// A reference to what tokens are valid as next tokens when parsing a print
 440 /// statement.
 441 extern const BcParseNext bc_parse_next_print;
 442
 443 /// A reference to what tokens are valid as next tokens when parsing things like
 444 /// loop headers and builtin functions where the only thing expected is a right
 445 /// paren.
 446 ///
 447 /// The name is an artifact of history, and is related to @a BC_PARSE_REL (see
 448 /// include/parse.h). It refers to how POSIX only allows some operators as part
 449 /// of the conditional of for loops, while loops, and if statements.
 450 extern const BcParseNext bc_parse_next_rel;
 451
 452 // What tokens are valid as next tokens when parsing an array element
 453 // expression.
 454 extern const BcParseNext bc_parse_next_elem;
 455
 456 /// A reference to what tokens are valid as next tokens when parsing the first
 457 /// two parts of a for loop header.
 458 extern const BcParseNext bc_parse_next_for;
 459
 460 /// A reference to what tokens are valid as next tokens when parsing a read
 461 /// expression.
 462 extern const BcParseNext bc_parse_next_read;
 463
 464 /// A reference to what tokens are valid as next tokens when parsing a builtin
 465 /// function with multiple arguments.
 466 extern const BcParseNext bc_parse_next_builtin;
 467
 468 #else // BC_ENABLED
 469
 470 // If bc is not enabled, execution is always possible because dc has strict
 471 // rules that ensure execution can always proceed safely.
 472 #define BC_PARSE_NO_EXEC(p) (0)
 473
 474 #endif // BC_ENABLED
 475
 476 #endif // BC_BC_H