1 //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This class represents the Lexer for tablegen files.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
15 #define LLVM_LIB_TABLEGEN_TGLEXER_H
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSet.h"
20 #include "llvm/Support/DataTypes.h"
21 #include "llvm/Support/SMLoc.h"
37 // Tokens with no info.
39 l_square, r_square, // [ ]
40 l_brace, r_brace, // { }
41 l_paren, r_paren, // ( )
45 equal, question, // = ?
49 Bit, Bits, Class, Code, Dag, Def, Foreach, Defm, Field, In, Int, Let, List,
50 MultiClass, String, Defset,
53 XConcat, XADD, XAND, XOR, XSRA, XSRL, XSHL, XListConcat, XStrConcat, XCast,
54 XSubst, XForEach, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XEq, XIsA, XDag,
55 XNe, XLe, XLt, XGe, XGt,
60 // Binary constant. Note that these are sized according to the number of
64 // String valued tokens.
65 Id, StrVal, VarName, CodeFragment,
67 // Preprocessing tokens for internal usage by the lexer.
68 // They are never returned as a result of Lex().
69 Ifdef, Else, Endif, Define
73 /// TGLexer - TableGen Lexer class.
80 // Information about the current token.
82 tgtok::TokKind CurCode;
83 std::string CurStrVal; // This is valid for ID, STRVAL, VARNAME, CODEFRAGMENT
84 int64_t CurIntVal; // This is valid for INTVAL.
86 /// CurBuffer - This is the current buffer index we're lexing from as managed
87 /// by the SourceMgr object.
91 typedef std::map<std::string, SMLoc> DependenciesMapTy;
93 /// Dependencies - This is the list of all included files.
94 DependenciesMapTy Dependencies;
97 TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
99 tgtok::TokKind Lex() {
100 return CurCode = LexToken(CurPtr == CurBuf.begin());
103 const DependenciesMapTy &getDependencies() const {
107 tgtok::TokKind getCode() const { return CurCode; }
109 const std::string &getCurStrVal() const {
110 assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
111 CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
112 "This token doesn't have a string value");
115 int64_t getCurIntVal() const {
116 assert(CurCode == tgtok::IntVal && "This token isn't an integer");
119 std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
120 assert(CurCode == tgtok::BinaryIntVal &&
121 "This token isn't a binary integer");
122 return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
125 SMLoc getLoc() const;
128 /// LexToken - Read the next token and return its code.
129 tgtok::TokKind LexToken(bool FileOrLineStart = false);
131 tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
132 tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
135 int peekNextChar(int Index) const;
136 void SkipBCPLComment();
138 tgtok::TokKind LexIdentifier();
140 tgtok::TokKind LexString();
141 tgtok::TokKind LexVarName();
142 tgtok::TokKind LexNumber();
143 tgtok::TokKind LexBracket();
144 tgtok::TokKind LexExclaim();
146 // Process EOF encountered in LexToken().
147 // If EOF is met in an include file, then the method will update
148 // CurPtr, CurBuf and preprocessing include stack, and return true.
149 // If EOF is met in the top-level file, then the method will
150 // update and check the preprocessing include stack, and return false.
153 // *** Structures and methods for preprocessing support ***
155 // A set of macro names that are defined either via command line or
158 StringSet<> DefinedMacros;
160 // Each of #ifdef and #else directives has a descriptor associated
163 // An ordered list of preprocessing controls defined by #ifdef/#else
164 // directives that are in effect currently is called preprocessing
165 // control stack. It is represented as a vector of PreprocessorControlDesc's.
167 // The control stack is updated according to the following rules:
169 // For each #ifdef we add an element to the control stack.
170 // For each #else we replace the top element with a descriptor
171 // with an inverted IsDefined value.
172 // For each #endif we pop the top element from the control stack.
174 // When CurPtr reaches the current buffer's end, the control stack
175 // must be empty, i.e. #ifdef and the corresponding #endif
176 // must be located in the same file.
177 struct PreprocessorControlDesc {
178 // Either tgtok::Ifdef or tgtok::Else.
181 // True, if the condition for this directive is true, false - otherwise.
183 // #ifdef NAME : true, if NAME is defined, false - otherwise.
185 // #else : false, if NAME is defined, true - otherwise.
188 // Pointer into CurBuf to the beginning of the preprocessing directive
195 // We want to disallow code like this:
199 // include "file2.td"
205 // To do this, we clear the preprocessing control stack on entry
206 // to each of the included file. PrepIncludeStack is used to store
207 // preprocessing control stacks for the current file and all its
208 // parent files. The back() element is the preprocessing control
209 // stack for the current file.
210 std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
213 // Validate that the current preprocessing control stack is empty,
214 // since we are about to exit a file, and pop the include stack.
216 // If IncludeStackMustBeEmpty is true, the include stack must be empty
217 // after the popping, otherwise, the include stack must not be empty
218 // after the popping. Basically, the include stack must be empty
219 // only if we exit the "top-level" file (i.e. finish lexing).
221 // The method returns false, if the current preprocessing control stack
222 // is not empty (e.g. there is an unterminated #ifdef/#else),
224 bool prepExitInclude(bool IncludeStackMustBeEmpty);
226 // Look ahead for a preprocessing directive starting from CurPtr. The caller
227 // must only call this method, if *(CurPtr - 1) is '#'. If the method matches
228 // a preprocessing directive word followed by a whitespace, then it returns
229 // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
231 // CurPtr is not adjusted by this method.
232 tgtok::TokKind prepIsDirective() const;
234 // Given a preprocessing token kind, adjusts CurPtr to the end
235 // of the preprocessing directive word. Returns true, unless
236 // an unsupported token kind is passed in.
238 // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
239 // to avoid adjusting CurPtr before we are sure that '#' is followed
240 // by a preprocessing directive. If it is not, then we fall back to
241 // tgtok::paste interpretation of '#'.
242 bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
244 // The main "exit" point from the token parsing to preprocessor.
246 // The method is called for CurPtr, when prepIsDirective() returns
247 // true. The first parameter matches the result of prepIsDirective(),
248 // denoting the actual preprocessor directive to be processed.
250 // If the preprocessing directive disables the tokens processing, e.g.:
251 // #ifdef NAME // NAME is undefined
252 // then lexPreprocessor() enters the lines-skipping mode.
253 // In this mode, it does not parse any tokens, because the code under
254 // the #ifdef may not even be a correct tablegen code. The preprocessor
255 // looks for lines containing other preprocessing directives, which
256 // may be prepended with whitespaces and C-style comments. If the line
257 // does not contain a preprocessing directive, it is skipped completely.
258 // Otherwise, the preprocessing directive is processed by recursively
259 // calling lexPreprocessor(). The processing of the encountered
260 // preprocessing directives includes updating preprocessing control stack
261 // and adding new macros into DefinedMacros set.
263 // The second parameter controls whether lexPreprocessor() is called from
264 // LexToken() (true) or recursively from lexPreprocessor() (false).
266 // If ReturnNextLiveToken is true, the method returns the next
267 // LEX token following the current directive or following the end
268 // of the disabled preprocessing region corresponding to this directive.
269 // If ReturnNextLiveToken is false, the method returns the first parameter,
270 // unless there were errors encountered in the disabled preprocessing
271 // region - in this case, it returns tgtok::Error.
272 tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
273 bool ReturnNextLiveToken = true);
275 // Worker method for lexPreprocessor() to skip lines after some
276 // preprocessing directive up to the buffer end or to the directive
277 // that re-enables token processing. The method returns true
278 // upon processing the next directive that re-enables tokens
279 // processing. False is returned if an error was encountered.
281 // Note that prepSkipRegion() calls lexPreprocessor() to process
282 // encountered preprocessing directives. In this case, the second
283 // parameter to lexPreprocessor() is set to false. Being passed
284 // false ReturnNextLiveToken, lexPreprocessor() must never call
285 // prepSkipRegion(). We assert this by passing ReturnNextLiveToken
286 // to prepSkipRegion() and checking that it is never set to false.
287 bool prepSkipRegion(bool MustNeverBeFalse);
289 // Lex name of the macro after either #ifdef or #define. We could have used
290 // LexIdentifier(), but it has special handling of "include" word, which
291 // could result in awkward diagnostic errors. Consider:
296 // LexIdentifier() will engage LexInclude(), which will complain about
297 // missing file with name "class". Instead, prepLexMacroName() will treat
298 // "include" as a normal macro name.
300 // On entry, CurPtr points to the end of a preprocessing directive word.
301 // The method allows for whitespaces between the preprocessing directive
302 // and the macro name. The allowed whitespaces are ' ' and '\t'.
304 // If the first non-whitespace symbol after the preprocessing directive
305 // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
306 // the method updates TokStart to the position of the first non-whitespace
307 // symbol, sets CurPtr to the position of the macro name's last symbol,
308 // and returns a string reference to the macro name. Otherwise,
309 // TokStart is set to the first non-whitespace symbol after the preprocessing
310 // directive, and the method returns an empty string reference.
312 // In all cases, TokStart may be used to point to the word following
313 // the preprocessing directive.
314 StringRef prepLexMacroName();
316 // Skip any whitespaces starting from CurPtr. The method is used
317 // only in the lines-skipping mode to find the first non-whitespace
318 // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
319 // and '\r'. The method skips C-style comments as well, because
320 // it is used to find the beginning of the preprocessing directive.
321 // If we do not handle C-style comments the following code would
322 // result in incorrect detection of a preprocessing directive:
326 // As long as we skip C-style comments, the following code is correctly
327 // recognized as a preprocessing directive:
328 // /* first line comment
329 // second line comment */ #ifdef NAME
331 // The method returns true upon reaching the first non-whitespace symbol
332 // or EOF, CurPtr is set to point to this symbol. The method returns false,
333 // if an error occured during skipping of a C-style comment.
334 bool prepSkipLineBegin();
336 // Skip any whitespaces or comments after a preprocessing directive.
337 // The method returns true upon reaching either end of the line
338 // or end of the file. If there is a multiline C-style comment
339 // after the preprocessing directive, the method skips
340 // the comment, so the final CurPtr may point to one of the next lines.
341 // The method returns false, if an error occured during skipping
342 // C- or C++-style comment, or a non-whitespace symbol appears
343 // after the preprocessing directive.
345 // The method maybe called both during lines-skipping and tokens
346 // processing. It actually verifies that only whitespaces or/and
347 // comments follow a preprocessing directive.
349 // After the execution of this mehod, CurPtr points either to new line
350 // symbol, buffer end or non-whitespace symbol following the preprocesing
352 bool prepSkipDirectiveEnd();
354 // Skip all symbols to the end of the line/file.
355 // The method adjusts CurPtr, so that it points to either new line
356 // symbol in the current line or the buffer end.
357 void prepSkipToLineEnd();
359 // Return true, if the current preprocessor control stack is such that
360 // we should allow lexer to process the next token, false - otherwise.
362 // In particular, the method returns true, if all the #ifdef/#else
363 // controls on the stack have their IsDefined member set to true.
364 bool prepIsProcessingEnabled();
366 // Report an error, if we reach EOF with non-empty preprocessing control
367 // stack. This means there is no matching #endif for the previous
369 void prepReportPreprocessorStackError();
372 } // end namespace llvm