2 * Copyright (c) 2013 David Chisnall
5 * This software was developed by SRI International and the University of
6 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7 * ("CTSRD"), as part of the DARPA CRASH research programme.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include "input_buffer.hh"
50 #ifndef MAP_PREFAULT_READ
51 #define MAP_PREFAULT_READ 0
57 input_buffer::skip_spaces()
59 if (cursor >= size) { return; }
60 if (cursor < 0) { return; }
61 char c = buffer[cursor];
62 while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f')
63 || (c == '\v') || (c == '\r'))
78 input_buffer::buffer_from_offset(int offset, int s)
86 return input_buffer();
88 if (s > (size-offset))
90 return input_buffer();
92 return input_buffer(&buffer[offset], s);
96 input_buffer::consume(const char *str)
98 int len = strlen(str);
99 if (len > size - cursor)
105 for (int i=0 ; i<len ; ++i)
107 if (str[i] != buffer[cursor + i])
119 input_buffer::consume_integer(unsigned long long &outInt)
121 // The first character must be a digit. Hex and octal strings
122 // are prefixed by 0 and 0x, respectively.
123 if (!isdigit((*this)[0]))
128 outInt = strtoull(&buffer[cursor], &end, 0);
129 if (end == &buffer[cursor])
133 cursor = end - buffer;
140 * Convenience typedef for the type that we use for all values.
142 typedef unsigned long long valty;
145 * Expression tree currently being parsed.
150 * Evaluate this node, taking into account operator precedence.
152 virtual valty operator()() = 0;
154 * Returns the precedence of this node. Lower values indicate higher
157 virtual int precedence() = 0;
158 virtual ~expression() {}
161 * Dumps this expression to `std::cerr`, appending a newline if `nl` is
164 void dump(bool nl=false)
168 std::cerr << "{nullptr}\n";
179 * Method that sublcasses override to implement the behaviour of `dump()`.
181 virtual void dump_impl() = 0;
186 * Expression wrapping a single integer. Leaf nodes in the expression tree.
188 class terminal_expr : public expression
191 * The value that this wraps.
195 * Evaluate. Trivially returns the value that this class wraps.
197 valty operator()() override
201 int precedence() override
209 terminal_expr(valty v) : val(v) {}
211 void dump_impl() override { std::cerr << val; }
216 * Parenthetical expression. Exists to make the contents opaque.
218 struct paren_expression : public expression
221 * The expression within the parentheses.
223 expression_ptr subexpr;
225 * Constructor. Takes the child expression as the only argument.
227 paren_expression(expression_ptr p) : subexpr(std::move(p)) {}
228 int precedence() override
233 * Evaluate - just forwards to the underlying expression.
235 valty operator()() override
240 void dump_impl() override
250 * Template class for unary operators. The `OpChar` template parameter is
251 * solely for debugging and makes it easy to print the expression. The `Op`
252 * template parameter is a function object that implements the operator that
253 * this class provides. Most of these are provided by the `<functional>`
256 template<char OpChar, class Op>
257 class unary_operator : public expression
260 * The subexpression for this unary operator.
262 expression_ptr subexpr;
263 valty operator()() override
266 return op((*subexpr)());
269 * All unary operators have the same precedence. They are all evaluated
270 * before binary expressions, but after parentheses.
272 int precedence() override
277 unary_operator(expression_ptr p) : subexpr(std::move(p)) {}
279 void dump_impl() override
288 * Abstract base class for binary operators. Allows the tree to be modified
289 * without knowing what the operations actually are.
291 struct binary_operator_base : public expression
294 * The left side of the expression.
298 * The right side of the expression.
302 * Insert a node somewhere down the path of left children, until it would
303 * be preempting something that should execute first.
305 void insert_left(binary_operator_base *new_left)
307 if (lhs->precedence() < new_left->precedence())
309 new_left->rhs = std::move(lhs);
314 static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left);
320 * Template class for binary operators. The precedence and the operation are
321 * provided as template parameters.
323 template<int Precedence, class Op>
324 struct binary_operator : public binary_operator_base
326 valty operator()() override
329 return op((*lhs)(), (*rhs)());
331 int precedence() override
337 * Constructor. Takes the name of the operator as an argument, for
338 * debugging. Only stores it in debug mode.
340 binary_operator(const char *) {}
343 binary_operator(const char *o) : opName(o) {}
344 void dump_impl() override
354 * Ternary conditional operators (`cond ? true : false`) are a special case -
355 * there are no other ternary operators.
357 class ternary_conditional_operator : public expression
360 * The condition for the clause.
364 * The expression that this evaluates to if the condition is true.
368 * The expression that this evaluates to if the condition is false.
371 valty operator()() override
373 return (*cond)() ? (*lhs)() : (*rhs)();
375 int precedence() override
377 // The actual precedence of a ternary conditional operator is 15, but
378 // its associativity is the opposite way around to the other operators,
379 // so we fudge it slightly.
383 void dump_impl() override
393 ternary_conditional_operator(expression_ptr c,
396 cond(std::move(c)), lhs(std::move(l)), rhs(std::move(r)) {}
402 constexpr T operator()(const T &lhs, const T &rhs) const
410 constexpr T operator()(const T &lhs, const T &rhs) const
418 constexpr T operator()(const T &val) const
423 // TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline.
427 constexpr T operator()(const T &val) const
433 } // anonymous namespace
436 expression_ptr input_buffer::parse_binary_expression(expression_ptr lhs)
439 binary_operator_base *expr = nullptr;
440 char op = ((*this)[0]);
446 expr = new binary_operator<6, std::plus<valty>>("+");
449 expr = new binary_operator<6, std::minus<valty>>("-");
452 expr = new binary_operator<5, std::modulus<valty>>("%");
455 expr = new binary_operator<5, std::multiplies<valty>>("*");
458 expr = new binary_operator<5, std::divides<valty>>("/");
465 parse_error("Invalid operator");
471 expr = new binary_operator<8, std::less<valty>>("<");
474 expr = new binary_operator<8, std::less_equal<valty>>("<=");
477 expr = new binary_operator<7, lshift<valty>>("<<");
486 parse_error("Invalid operator");
492 expr = new binary_operator<8, std::greater<valty>>(">");
495 expr = new binary_operator<8, std::greater_equal<valty>>(">=");
498 expr = new binary_operator<7, rshift<valty>>(">>");
504 if ((*this)[1] != '=')
506 parse_error("Invalid operator");
510 expr = new binary_operator<9, std::equal_to<valty>>("==");
513 if ((*this)[1] != '=')
515 parse_error("Invalid operator");
519 expr = new binary_operator<9, std::not_equal_to<valty>>("!=");
522 if ((*this)[1] == '&')
524 expr = new binary_operator<13, std::logical_and<valty>>("&&");
528 expr = new binary_operator<10, std::bit_and<valty>>("&");
532 if ((*this)[1] == '|')
534 expr = new binary_operator<12, std::logical_or<valty>>("||");
538 expr = new binary_operator<14, std::bit_or<valty>>("|");
544 expression_ptr true_case = parse_expression();
546 if (!true_case || !consume(':'))
548 parse_error("Expected : in ternary conditional operator");
551 expression_ptr false_case = parse_expression();
554 parse_error("Expected false condition for ternary operator");
557 return expression_ptr(new ternary_conditional_operator(std::move(lhs),
558 std::move(true_case), std::move(false_case)));
563 expression_ptr e(expr);
564 expression_ptr rhs(parse_expression());
569 expr->lhs = std::move(lhs);
570 if (rhs->precedence() < expr->precedence())
572 expr->rhs = std::move(rhs);
576 // If we're a normal left-to-right expression, then we need to insert
577 // this as the far-left child node of the rhs expression
578 binary_operator_base *rhs_op =
579 static_cast<binary_operator_base*>(rhs.get());
580 rhs_op->insert_left(expr);
587 expression_ptr input_buffer::parse_expression(bool stopAtParen)
590 unsigned long long leftVal;
595 if (!consume_integer(leftVal))
599 lhs.reset(new terminal_expr(leftVal));
604 expression_ptr &&subexpr = parse_expression();
609 lhs.reset(new paren_expression(std::move(subexpr)));
623 expression_ptr &&subexpr = parse_expression();
628 lhs.reset(new unary_operator<'+', unary_plus<valty>>(std::move(subexpr)));
634 expression_ptr &&subexpr = parse_expression();
639 lhs.reset(new unary_operator<'-', std::negate<valty>>(std::move(subexpr)));
645 expression_ptr &&subexpr = parse_expression();
650 lhs.reset(new unary_operator<'!', std::logical_not<valty>>(std::move(subexpr)));
656 expression_ptr &&subexpr = parse_expression();
661 lhs.reset(new unary_operator<'~', bit_not<valty>>(std::move(subexpr)));
669 return parse_binary_expression(std::move(lhs));
673 input_buffer::consume_integer_expression(unsigned long long &outInt)
679 expression_ptr e(parse_expression(true));
688 return consume_integer(outInt);
695 input_buffer::consume_hex_byte(uint8_t &outByte)
697 if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1]))
701 outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]);
707 input_buffer::next_token()
714 if ((*this)[0] == '/' && (*this)[1] == '*')
716 // eat the start of the comment
720 // Find the ending * of */
721 while ((**this != '\0') && (**this != '*'))
727 } while ((**this != '\0') && (**this != '/'));
732 if (((*this)[0] == '/' && (*this)[1] == '/'))
734 // eat the start of the comment
737 // Find the ending of the line
738 while (**this != '\n')
745 } while (start != cursor);
750 input_buffer::parse_error(const char *msg)
754 int line_end = cursor;
755 for (int i=cursor ; i>0 ; --i)
757 if (buffer[i] == '\n')
766 for (int i=cursor+1 ; i<size ; ++i)
768 if (buffer[i] == '\n')
774 fprintf(stderr, "Error on line %d: %s\n", line_count, msg);
775 fwrite(&buffer[line_start], line_end-line_start, 1, stderr);
777 for (int i=0 ; i<(cursor-line_start) ; ++i)
779 char c = (buffer[i+line_start] == '\t') ? '\t' : ' ';
789 fprintf(stderr, "Current cursor: %d\n", cursor);
790 fwrite(&buffer[cursor], size-cursor, 1, stderr);
794 mmap_input_buffer::mmap_input_buffer(int fd) : input_buffer(0, 0)
799 perror("Failed to stat file");
802 buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE |
803 MAP_PREFAULT_READ, fd, 0);
804 if (buffer == MAP_FAILED)
806 perror("Failed to mmap file");
811 mmap_input_buffer::~mmap_input_buffer()
815 munmap((void*)buffer, size);
819 stream_input_buffer::stream_input_buffer() : input_buffer(0, 0)
822 while ((c = fgetc(stdin)) != EOF)