2 * Copyright (c) 2013 David Chisnall
5 * This software was developed by SRI International and the University of
6 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7 * ("CTSRD"), as part of the DARPA CRASH research programme.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #ifndef _INPUT_BUFFER_HH_
34 #define _INPUT_BUFFER_HH_
39 #include <unordered_set>
46 typedef std::unique_ptr<expression> expression_ptr;
50 * Class encapsulating the input file. Can be used as a const char*, but has
51 * range checking. Attempting to access anything out of range will return a 0
52 * byte. The input buffer can be cheaply copied, without copying the
53 * underlying memory, however it is the user's responsibility to ensure that
54 * such copies do not persist beyond the lifetime of the underlying memory.
56 * This also contains methods for reporting errors and for consuming the token
61 friend class text_input_buffer;
64 * The buffer. This class doesn't own the buffer, but the
65 * mmap_input_buffer subclass does.
69 * The size of the buffer.
74 * The current place in the buffer where we are reading. This class
75 * keeps a separate size, pointer, and cursor so that we can move
76 * forwards and backwards and still have checks that we haven't fallen
81 * Private constructor. This is used to create input buffers that
82 * refer to the same memory, but have different cursors.
84 input_buffer(const char* b, int s, int c) : buffer(b), size(s),
88 * Returns the file name associated with this buffer.
90 virtual const std::string &filename() const
95 static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path,
98 * Skips all characters in the input until the specified character is
103 * Parses up to a specified character and returns the intervening
104 * characters as a string.
106 std::string parse_to(char);
108 * Return whether all input has been consumed.
110 bool finished() { return cursor >= size; }
112 * Virtual destructor. Does nothing, but exists so that subclasses
113 * that own the memory can run cleanup code for deallocating it.
115 virtual ~input_buffer() {};
117 * Constructs an empty buffer.
119 input_buffer() : buffer(0), size(0), cursor(0) {}
121 * Constructs a new buffer with a specified memory region and size.
123 input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){}
125 * Returns a new input buffer referring into this input, clamped to the
126 * specified size. If the requested buffer would fall outside the
127 * range of this one, then it returns an empty buffer.
129 * The returned buffer shares the same underlying storage as the
130 * original. This is intended to be used for splitting up the various
131 * sections of a device tree blob. Requesting a size of 0 will give a
132 * buffer that extends to the end of the available memory.
134 input_buffer buffer_from_offset(int offset, int s=0);
136 * Dereferencing operator, allows the buffer to be treated as a char*
137 * and dereferenced to give a character. This returns a null byte if
138 * the cursor is out of range.
140 inline char operator*()
142 if (cursor >= size) { return '\0'; }
143 if (cursor < 0) { return '\0'; }
144 return buffer[cursor];
147 * Array subscripting operator, returns a character at the specified
148 * index offset from the current cursor. The offset may be negative,
149 * to reread characters that have already been read. If the current
150 * cursor plus offset is outside of the range, this returns a nul
153 inline char operator[](int offset)
155 if (cursor + offset >= size) { return '\0'; }
156 if (cursor + offset < 0) { return '\0'; }
157 return buffer[cursor + offset];
160 * Increments the cursor, iterating forward in the buffer.
162 inline input_buffer &operator++()
168 * Consumes a character. Moves the cursor one character forward if the
169 * next character matches the argument, returning true. If the current
170 * character does not match the argument, returns false.
172 inline bool consume(char c)
182 * Consumes a string. If the (null-terminated) string passed as the
183 * argument appears in the input, advances the cursor to the end and
184 * returns true. Returns false if the string does not appear at the
185 * current point in the input.
187 bool consume(const char *str);
189 * Reads an integer in base 8, 10, or 16. Returns true and advances
190 * the cursor to the end of the integer if the cursor points to an
191 * integer, returns false and does not move the cursor otherwise.
193 * The parsed value is returned via the argument.
195 bool consume_integer(unsigned long long &outInt);
197 * Reads an arithmetic expression (containing any of the normal C
198 * operators), evaluates it, and returns the result.
200 bool consume_integer_expression(unsigned long long &outInt);
202 * Consumes two hex digits and return the resulting byte via the first
203 * argument. If the next two characters are hex digits, returns true
204 * and advances the cursor. If not, then returns false and leaves the
207 bool consume_hex_byte(uint8_t &outByte);
209 * Template function that consumes a binary value in big-endian format
210 * from the input stream. Returns true and advances the cursor if
211 * there is a value of the correct size. This function assumes that
212 * all values must be natively aligned, and so advances the cursor to
213 * the correct alignment before reading.
216 bool consume_binary(T &out)
219 int type_size = sizeof(T);
220 if (cursor % type_size != 0)
222 align = type_size - (cursor % type_size);
224 if (size < cursor + align + type_size)
229 assert(cursor % type_size == 0);
231 for (int i=0 ; i<type_size ; ++i)
238 out |= (((T)buffer[cursor++]) & 0xff);
244 * Dumps the current cursor value and the unconsumed values in the
245 * input buffer to the standard error. This method is intended solely
252 * Explicit specialisation for reading a single byte.
255 inline bool input_buffer::consume_binary(uint8_t &out)
257 if (size < cursor + 1)
261 out = buffer[cursor++];
266 * An input buffer subclass used for parsing DTS files. This manages a stack
267 * of input buffers to handle /input/ operations.
269 class text_input_buffer
271 std::unordered_set<std::string> defines;
273 * The cursor is the input into the input stream where we are currently reading.
277 * The current stack of includes. The current input is always from the top
280 std::stack<std::shared_ptr<input_buffer>> input_stack;
284 const std::vector<std::string> include_paths;
286 * Reads forward past any spaces. The DTS format is not whitespace
287 * sensitive and so we want to scan past whitespace when reading it.
291 * Returns the character immediately after the current one.
293 * This method does not look between files.
297 * If a /include/ token is encountered, then look up the corresponding
298 * input file, push it onto the input stack, and continue.
300 void handle_include();
302 * The base directory for this file.
304 const std::string dir;
306 * The file where dependencies should be output.
311 * Construct a new text input buffer with the specified buffer as the start
312 * of parsing and the specified set of input paths for handling new
315 text_input_buffer(std::unique_ptr<input_buffer> &&b,
316 std::unordered_set<std::string> &&d,
317 std::vector<std::string> &&i,
318 const std::string directory,
320 : defines(d), include_paths(i), dir(directory), depfile(deps)
322 input_stack.push(std::move(b));
325 * Skips all characters in the input until the specified character is
330 * Parse an expression. If `stopAtParen` is set, then only parse a number
331 * or a parenthetical expression, otherwise assume that either is the
332 * left-hand side of a binary expression and try to parse the right-hand
335 expression_ptr parse_expression(bool stopAtParen=false);
337 * Parse a binary expression, having already parsed the right-hand side.
339 expression_ptr parse_binary_expression(expression_ptr lhs);
341 * Return whether all input has been consumed.
345 return input_stack.empty() ||
346 ((input_stack.size() == 1) && input_stack.top()->finished());
349 * Dereferencing operator. Returns the current character in the top input buffer.
351 inline char operator*()
353 if (input_stack.empty())
357 return *(*input_stack.top());
360 * Increments the cursor, iterating forward in the buffer.
362 inline text_input_buffer &operator++()
364 if (input_stack.empty())
369 auto &top = *input_stack.top();
378 * Consumes a character. Moves the cursor one character forward if the
379 * next character matches the argument, returning true. If the current
380 * character does not match the argument, returns false.
382 inline bool consume(char c)
392 * Consumes a string. If the (null-terminated) string passed as the
393 * argument appears in the input, advances the cursor to the end and
394 * returns true. Returns false if the string does not appear at the
395 * current point in the input.
397 * This method does not scan between files.
399 bool consume(const char *str)
401 if (input_stack.empty())
405 return input_stack.top()->consume(str);
408 * Reads an integer in base 8, 10, or 16. Returns true and advances
409 * the cursor to the end of the integer if the cursor points to an
410 * integer, returns false and does not move the cursor otherwise.
412 * The parsed value is returned via the argument.
414 * This method does not scan between files.
416 bool consume_integer(unsigned long long &outInt)
418 if (input_stack.empty())
422 return input_stack.top()->consume_integer(outInt);
425 * Reads an arithmetic expression (containing any of the normal C
426 * operators), evaluates it, and returns the result.
428 bool consume_integer_expression(unsigned long long &outInt);
430 * Consumes two hex digits and return the resulting byte via the first
431 * argument. If the next two characters are hex digits, returns true
432 * and advances the cursor. If not, then returns false and leaves the
435 * This method does not scan between files.
437 bool consume_hex_byte(uint8_t &outByte)
439 if (input_stack.empty())
443 return input_stack.top()->consume_hex_byte(outByte);
446 * Returns the longest string in the input buffer starting at the
447 * current cursor and composed entirely of characters that are valid in
450 std::string parse_node_name();
452 * Returns the longest string in the input buffer starting at the
453 * current cursor and composed entirely of characters that are valid in
456 std::string parse_property_name();
458 * Parses either a node or a property name. If is_property is true on
459 * entry, then only property names are parsed. If it is false, then it
460 * will be set, on return, to indicate whether the parsed name is only
461 * valid as a property.
463 std::string parse_node_or_property_name(bool &is_property);
465 * Parses up to a specified character and returns the intervening
466 * characters as a string.
468 std::string parse_to(char);
470 * Advances the cursor to the start of the next token, skipping
471 * comments and whitespace. If the cursor already points to the start
472 * of a token, then this function does nothing.
474 text_input_buffer &next_token();
476 * Location in the source file. This should never be interpreted by
477 * anything other than error reporting functions of this class. It will
478 * eventually become something more complex than an `int`.
480 class source_location
482 friend class text_input_buffer;
484 * The text buffer object that included `b`.
486 text_input_buffer &buffer;
488 * The underlying buffer that contains this location.
490 std::shared_ptr<input_buffer> b;
492 * The offset within the current buffer of the source location.
495 source_location(text_input_buffer &buf)
497 b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()),
498 cursor(b ? b->cursor : 0) {}
501 * Report an error at this location.
503 void report_error(const char *msg)
507 buffer.parse_error(msg, *b, cursor);
511 buffer.parse_error(msg);
516 * Returns the current source location.
518 source_location location()
523 * Prints a message indicating the location of a parse error.
525 void parse_error(const char *msg);
528 * Prints a message indicating the location of a parse error, given a
529 * specified location. This is used when input has already moved beyond
530 * the location that caused the failure.
532 void parse_error(const char *msg, input_buffer &b, int loc);
537 #endif // !_INPUT_BUFFER_HH_