2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2013 David Chisnall
7 * This software was developed by SRI International and the University of
8 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
9 * ("CTSRD"), as part of the DARPA CRASH research programme.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include "input_buffer.hh"
53 #ifndef MAP_PREFAULT_READ
54 #define MAP_PREFAULT_READ 0
62 * Subclass of input_buffer that mmap()s a file and owns the resulting memory.
63 * When this object is destroyed, the memory is unmapped.
65 struct mmap_input_buffer : public dtc::input_buffer
68 const string &filename() const override
73 * Constructs a new buffer from the file passed in as a file
76 mmap_input_buffer(int fd, string &&filename);
78 * Unmaps the buffer, if one exists.
80 virtual ~mmap_input_buffer();
83 * Input buffer read from standard input. This is used for reading device tree
84 * blobs and source from standard input. It reads the entire input into
85 * malloc'd memory, so will be very slow for large inputs. DTS and DTB files
86 * are very rarely more than 10KB though, so this is probably not a problem.
88 struct stream_input_buffer : public dtc::input_buffer
90 const string &filename() const override
92 static string n = "<standard input>";
96 * The buffer that will store the data read from the standard input.
100 * Constructs a new buffer from the standard input.
102 stream_input_buffer();
105 mmap_input_buffer::mmap_input_buffer(int fd, string &&filename)
106 : input_buffer(0, 0), fn(filename)
111 perror("Failed to stat file");
114 buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE |
115 MAP_PREFAULT_READ, fd, 0);
116 if (buffer == MAP_FAILED)
118 perror("Failed to mmap file");
123 mmap_input_buffer::~mmap_input_buffer()
127 munmap(const_cast<char*>(buffer), size);
131 stream_input_buffer::stream_input_buffer() : input_buffer(0, 0)
134 while ((c = fgetc(stdin)) != EOF)
142 } // Anonymous namespace
149 input_buffer::skip_to(char c)
151 while ((cursor < size) && (buffer[cursor] != c))
158 text_input_buffer::skip_to(char c)
160 while (!finished() && (*(*this) != c))
167 text_input_buffer::skip_spaces()
169 if (finished()) { return; }
171 bool last_nl = false;
172 while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f')
173 || (c == '\v') || (c == '\r'))
175 last_nl = ((c == '\n') || (c == '\r'));
186 // Skip C preprocessor leftovers
187 if ((c == '#') && ((cursor == 0) || last_nl))
192 if (consume("/include/"))
200 text_input_buffer::handle_include()
202 bool reallyInclude = true;
206 string name = parse_property_name();
207 if (defines.count(name) == 0)
209 reallyInclude = false;
216 parse_error("Expected quoted filename");
219 auto loc = location();
220 string file = parse_to('"');
226 string include_file = dir + '/' + file;
227 auto include_buffer = input_buffer::buffer_for_file(include_file, false);
228 if (include_buffer == 0)
230 for (auto i : include_paths)
232 include_file = i + '/' + file;
233 include_buffer = input_buffer::buffer_for_file(include_file, false);
234 if (include_buffer != 0)
243 fputs(include_file.c_str(), depfile);
247 loc.report_error("Unable to locate input file");
250 input_stack.push(std::move(include_buffer));
253 bool text_input_buffer::read_binary_file(const std::string &filename, byte_buffer &b)
255 bool try_include_paths = true;
257 if (filename[0] == '/')
259 include_file = filename;
260 // Don't try include paths if we're given an absolute path.
261 // Failing is better so that we don't accidentally do the wrong thing,
262 // but make it seem like everything is alright.
263 try_include_paths = false;
267 include_file = dir + '/' + filename;
269 auto include_buffer = input_buffer::buffer_for_file(include_file, false);
270 if (include_buffer == 0 && try_include_paths)
272 for (auto i : include_paths)
274 include_file = i + '/' + filename;
275 include_buffer = input_buffer::buffer_for_file(include_file, false);
276 if (include_buffer != 0)
289 fputs(include_file.c_str(), depfile);
291 b.insert(b.begin(), include_buffer->begin(), include_buffer->end());
296 input_buffer::buffer_from_offset(int offset, int s)
300 return input_buffer();
308 return input_buffer();
310 if (s > (size-offset))
312 return input_buffer();
314 return input_buffer(&buffer[offset], s);
318 input_buffer::consume(const char *str)
320 int len = strlen(str);
321 if (len > size - cursor)
327 for (int i=0 ; i<len ; ++i)
329 if (str[i] != (*this)[i])
341 input_buffer::consume_integer(unsigned long long &outInt)
343 // The first character must be a digit. Hex and octal strings
344 // are prefixed by 0 and 0x, respectively.
345 if (!isdigit((*this)[0]))
349 char *end= const_cast<char*>(&buffer[size]);
351 outInt = strtoull(&buffer[cursor], &end, 0);
352 if (end == &buffer[cursor] ||
353 (outInt == std::numeric_limits<unsigned long long>::max() &&
358 cursor = end - buffer;
365 * Convenience typedef for the type that we use for all values.
367 typedef unsigned long long valty;
370 * Expression tree currently being parsed.
374 typedef text_input_buffer::source_location source_location;
376 * The type that is returned when computing the result. The boolean value
377 * indicates whether this is a valid expression.
379 * FIXME: Once we can use C++17, this should be `std::optional`.
381 typedef std::pair<valty, bool> result;
383 * Evaluate this node, taking into account operator precedence.
385 virtual result operator()() = 0;
387 * Returns the precedence of this node. Lower values indicate higher
390 virtual int precedence() = 0;
392 * Constructs an expression, storing the location where it was created.
394 expression(source_location l) : loc(l) {}
395 virtual ~expression() {}
398 * Dumps this expression to `std::cerr`, appending a newline if `nl` is
401 void dump(bool nl=false)
406 std::cerr << "{nullptr}\n";
417 * Method that sublcasses override to implement the behaviour of `dump()`.
419 virtual void dump_impl() = 0;
426 * Expression wrapping a single integer. Leaf nodes in the expression tree.
428 class terminal_expr : public expression
431 * The value that this wraps.
435 * Evaluate. Trivially returns the value that this class wraps.
437 result operator()() override
441 int precedence() override
449 terminal_expr(source_location l, valty v) : expression(l), val(v) {}
451 void dump_impl() override { std::cerr << val; }
456 * Parenthetical expression. Exists to make the contents opaque.
458 struct paren_expression : public expression
461 * The expression within the parentheses.
463 expression_ptr subexpr;
465 * Constructor. Takes the child expression as the only argument.
467 paren_expression(source_location l, expression_ptr p) : expression(l),
468 subexpr(std::move(p)) {}
469 int precedence() override
474 * Evaluate - just forwards to the underlying expression.
476 result operator()() override
481 void dump_impl() override
491 * Template class for unary operators. The `OpChar` template parameter is
492 * solely for debugging and makes it easy to print the expression. The `Op`
493 * template parameter is a function object that implements the operator that
494 * this class provides. Most of these are provided by the `<functional>`
497 template<char OpChar, class Op>
498 class unary_operator : public expression
501 * The subexpression for this unary operator.
503 expression_ptr subexpr;
504 result operator()() override
507 result s = (*subexpr)();
512 return {op(s.first), true};
515 * All unary operators have the same precedence. They are all evaluated
516 * before binary expressions, but after parentheses.
518 int precedence() override
523 unary_operator(source_location l, expression_ptr p) :
524 expression(l), subexpr(std::move(p)) {}
526 void dump_impl() override
535 * Abstract base class for binary operators. Allows the tree to be modified
536 * without knowing what the operations actually are.
538 struct binary_operator_base : public expression
540 using expression::expression;
542 * The left side of the expression.
546 * The right side of the expression.
550 * Insert a node somewhere down the path of left children, until it would
551 * be preempting something that should execute first.
553 void insert_left(binary_operator_base *new_left)
555 if (lhs->precedence() < new_left->precedence())
557 new_left->rhs = std::move(lhs);
562 static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left);
568 * Template class for binary operators. The precedence and the operation are
569 * provided as template parameters.
571 template<int Precedence, class Op>
572 struct binary_operator : public binary_operator_base
574 result operator()() override
579 if (!(l.second && r.second))
583 return {op(l.first, r.first), true};
585 int precedence() override
591 * Constructor. Takes the name of the operator as an argument, for
592 * debugging. Only stores it in debug mode.
594 binary_operator(source_location l, const char *) :
595 binary_operator_base(l) {}
598 binary_operator(source_location l, const char *o) :
599 binary_operator_base(l), opName(o) {}
600 void dump_impl() override
610 * Ternary conditional operators (`cond ? true : false`) are a special case -
611 * there are no other ternary operators.
613 class ternary_conditional_operator : public expression
616 * The condition for the clause.
620 * The expression that this evaluates to if the condition is true.
624 * The expression that this evaluates to if the condition is false.
627 result operator()() override
629 result c = (*cond)();
632 if (!(l.second && r.second && c.second))
636 return c.first ? l : r;
638 int precedence() override
640 // The actual precedence of a ternary conditional operator is 15, but
641 // its associativity is the opposite way around to the other operators,
642 // so we fudge it slightly.
646 void dump_impl() override
656 ternary_conditional_operator(source_location sl,
660 expression(sl), cond(std::move(c)), lhs(std::move(l)),
667 constexpr T operator()(const T &lhs, const T &rhs) const
675 constexpr T operator()(const T &lhs, const T &rhs) const
683 constexpr T operator()(const T &val) const
688 // TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline.
692 constexpr T operator()(const T &val) const
699 struct divmod : public binary_operator<5, T>
701 using binary_operator<5, T>::binary_operator;
702 using typename binary_operator_base::result;
703 result operator()() override
705 result r = (*binary_operator_base::rhs)();
706 if (r.second && (r.first == 0))
708 expression::loc.report_error("Division by zero");
711 return binary_operator<5, T>::operator()();
715 } // anonymous namespace
718 expression_ptr text_input_buffer::parse_binary_expression(expression_ptr lhs)
721 binary_operator_base *expr = nullptr;
723 source_location l = location();
729 expr = new binary_operator<6, std::plus<valty>>(l, "+");
732 expr = new binary_operator<6, std::minus<valty>>(l, "-");
735 expr = new divmod<std::modulus<valty>>(l, "/");
738 expr = new binary_operator<5, std::multiplies<valty>>(l, "*");
741 expr = new divmod<std::divides<valty>>(l, "/");
747 parse_error("Invalid operator");
752 expr = new binary_operator<8, std::less<valty>>(l, "<");
756 expr = new binary_operator<8, std::less_equal<valty>>(l, "<=");
760 expr = new binary_operator<7, lshift<valty>>(l, "<<");
768 parse_error("Invalid operator");
773 expr = new binary_operator<8, std::greater<valty>>(l, ">");
777 expr = new binary_operator<8, std::greater_equal<valty>>(l, ">=");
781 expr = new binary_operator<7, rshift<valty>>(l, ">>");
789 parse_error("Invalid operator");
792 expr = new binary_operator<9, std::equal_to<valty>>(l, "==");
797 parse_error("Invalid operator");
801 expr = new binary_operator<9, std::not_equal_to<valty>>(l, "!=");
806 expr = new binary_operator<13, std::logical_and<valty>>(l, "&&");
810 expr = new binary_operator<10, std::bit_and<valty>>(l, "&");
816 expr = new binary_operator<12, std::logical_or<valty>>(l, "||");
820 expr = new binary_operator<14, std::bit_or<valty>>(l, "|");
826 expression_ptr true_case = parse_expression();
828 if (!true_case || !consume(':'))
830 parse_error("Expected : in ternary conditional operator");
833 expression_ptr false_case = parse_expression();
836 parse_error("Expected false condition for ternary operator");
839 return expression_ptr(new ternary_conditional_operator(l, std::move(lhs),
840 std::move(true_case), std::move(false_case)));
845 expression_ptr e(expr);
846 expression_ptr rhs(parse_expression());
851 expr->lhs = std::move(lhs);
852 if (rhs->precedence() < expr->precedence())
854 expr->rhs = std::move(rhs);
858 // If we're a normal left-to-right expression, then we need to insert
859 // this as the far-left child node of the rhs expression
860 binary_operator_base *rhs_op =
861 static_cast<binary_operator_base*>(rhs.get());
862 rhs_op->insert_left(expr);
869 expression_ptr text_input_buffer::parse_expression(bool stopAtParen)
872 unsigned long long leftVal;
874 source_location l = location();
878 if (!consume_integer(leftVal))
882 lhs.reset(new terminal_expr(l, leftVal));
887 expression_ptr &&subexpr = parse_expression();
892 lhs.reset(new paren_expression(l, std::move(subexpr)));
906 expression_ptr &&subexpr = parse_expression();
911 lhs.reset(new unary_operator<'+', unary_plus<valty>>(l, std::move(subexpr)));
917 expression_ptr &&subexpr = parse_expression();
922 lhs.reset(new unary_operator<'-', std::negate<valty>>(l, std::move(subexpr)));
928 expression_ptr &&subexpr = parse_expression();
933 lhs.reset(new unary_operator<'!', std::logical_not<valty>>(l, std::move(subexpr)));
939 expression_ptr &&subexpr = parse_expression();
944 lhs.reset(new unary_operator<'~', bit_not<valty>>(l, std::move(subexpr)));
952 return parse_binary_expression(std::move(lhs));
956 text_input_buffer::consume_integer_expression(unsigned long long &outInt)
962 expression_ptr e(parse_expression(true));
976 return consume_integer(outInt);
983 input_buffer::consume_hex_byte(uint8_t &outByte)
985 if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1]))
989 outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]);
995 text_input_buffer::next_token()
1006 // Parse /* comments
1007 if (*self == '/' && peek() == '*')
1009 // eat the start of the comment
1013 // Find the ending * of */
1014 while ((*self != '\0') && (*self != '*') && !finished())
1020 } while ((*self != '\0') && (*self != '/') && !finished());
1024 // Parse // comments
1025 if ((*self == '/' && peek() == '/'))
1027 // eat the start of the comment
1030 // Find the ending of the line
1031 while (*self != '\n' && !finished())
1038 } while (start != cursor);
1043 text_input_buffer::parse_error(const char *msg)
1045 if (input_stack.empty())
1047 fprintf(stderr, "Error: %s\n", msg);
1050 input_buffer &b = *input_stack.top();
1051 parse_error(msg, b, b.cursor);
1054 text_input_buffer::parse_error(const char *msg,
1061 if (loc < 0 || loc > b.size)
1065 for (int i=loc ; i>0 ; --i)
1067 if (b.buffer[i] == '\n')
1070 if (line_start == 0)
1076 for (int i=loc+1 ; i<b.size ; ++i)
1078 if (b.buffer[i] == '\n')
1084 fprintf(stderr, "Error at %s:%d:%d: %s\n", b.filename().c_str(), line_count, loc - line_start, msg);
1085 fwrite(&b.buffer[line_start], line_end-line_start, 1, stderr);
1087 for (int i=0 ; i<(loc-line_start) ; ++i)
1089 char c = (b.buffer[i+line_start] == '\t') ? '\t' : ' ';
1097 input_buffer::dump()
1099 fprintf(stderr, "Current cursor: %d\n", cursor);
1100 fwrite(&buffer[cursor], size-cursor, 1, stderr);
1108 * The source files are ASCII, so we provide a non-locale-aware version of
1109 * isalpha. This is a class so that it can be used with a template function
1110 * for parsing strings.
1114 static inline bool check(const char c)
1116 return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') &&
1121 * Check whether a character is in the set allowed for node names. This is a
1122 * class so that it can be used with a template function for parsing strings.
1124 struct is_node_name_character
1126 static inline bool check(const char c)
1132 case 'a'...'z': case 'A'...'Z': case '0'...'9':
1133 case ',': case '.': case '+': case '-':
1140 * Check whether a character is in the set allowed for property names. This is
1141 * a class so that it can be used with a template function for parsing strings.
1143 struct is_property_name_character
1145 static inline bool check(const char c)
1151 case 'a'...'z': case 'A'...'Z': case '0'...'9':
1152 case ',': case '.': case '+': case '-':
1160 string parse(text_input_buffer &s)
1162 std::vector<char> bytes;
1163 for (char c=*s ; T::check(c) ; c=*(++s))
1167 return string(bytes.begin(), bytes.end());
1173 text_input_buffer::parse_node_name()
1175 return parse<is_node_name_character>(*this);
1179 text_input_buffer::parse_property_name()
1181 return parse<is_property_name_character>(*this);
1185 text_input_buffer::parse_node_or_property_name(bool &is_property)
1189 return parse_property_name();
1191 std::vector<char> bytes;
1192 for (char c=*(*this) ; is_node_name_character::check(c) ; c=*(++(*this)))
1196 for (char c=*(*this) ; is_property_name_character::check(c) ; c=*(++(*this)))
1201 return string(bytes.begin(), bytes.end());
1205 input_buffer::parse_to(char stop)
1207 std::vector<char> bytes;
1208 for (char c=*(*this) ; c != stop ; c=*(++(*this)))
1212 return string(bytes.begin(), bytes.end());
1216 text_input_buffer::parse_to(char stop)
1218 std::vector<char> bytes;
1219 for (char c=*(*this) ; c != stop ; c=*(++(*this)))
1227 return string(bytes.begin(), bytes.end());
1231 text_input_buffer::peek()
1233 return (*input_stack.top())[1];
1236 std::unique_ptr<input_buffer>
1237 input_buffer::buffer_for_file(const string &path, bool warn)
1241 std::unique_ptr<input_buffer> b(new stream_input_buffer());
1244 int source = open(path.c_str(), O_RDONLY);
1249 fprintf(stderr, "Unable to open file '%s'. %s\n", path.c_str(), strerror(errno));
1254 if (fstat(source, &st) == 0 && S_ISDIR(st.st_mode))
1258 fprintf(stderr, "File %s is a directory\n", path.c_str());
1263 std::unique_ptr<input_buffer> b(new mmap_input_buffer(source, string(path)));