contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.h

   1 //===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// \brief This file contains FormatTokenLexer, which tokenizes a source file
  12 /// into a token stream suitable for ClangFormat.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
  17 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
  18
  19 #include "Encoding.h"
  20 #include "FormatToken.h"
  21 #include "clang/Basic/SourceLocation.h"
  22 #include "clang/Basic/SourceManager.h"
  23 #include "clang/Format/Format.h"
  24 #include "llvm/Support/Regex.h"
  25
  26 #include <stack>
  27
  28 namespace clang {
  29 namespace format {
  30
  31 enum LexerState {
  32   NORMAL,
  33   TEMPLATE_STRING,
  34   TOKEN_STASHED,
  35 };
  36
  37 class FormatTokenLexer {
  38 public:
  39   FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
  40                    const FormatStyle &Style, encoding::Encoding Encoding);
  41
  42   ArrayRef<FormatToken *> lex();
  43
  44   const AdditionalKeywords &getKeywords() { return Keywords; }
  45
  46 private:
  47   void tryMergePreviousTokens();
  48
  49   bool tryMergeLessLess();
  50
  51   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
  52
  53   // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
  54   bool precedesOperand(FormatToken *Tok);
  55
  56   bool canPrecedeRegexLiteral(FormatToken *Prev);
  57
  58   // Tries to parse a JavaScript Regex literal starting at the current token,
  59   // if that begins with a slash and is in a location where JavaScript allows
  60   // regex literals. Changes the current token to a regex literal and updates
  61   // its text if successful.
  62   void tryParseJSRegexLiteral();
  63
  64   // Handles JavaScript template strings.
  65   //
  66   // JavaScript template strings use backticks ('`') as delimiters, and allow
  67   // embedding expressions nested in ${expr-here}. Template strings can be
  68   // nested recursively, i.e. expressions can contain template strings in turn.
  69   //
  70   // The code below parses starting from a backtick, up to a closing backtick or
  71   // an opening ${. It also maintains a stack of lexing contexts to handle
  72   // nested template parts by balancing curly braces.
  73   void handleTemplateStrings();
  74
  75   bool tryMerge_TMacro();
  76
  77   bool tryMergeConflictMarkers();
  78
  79   FormatToken *getStashedToken();
  80
  81   FormatToken *getNextToken();
  82
  83   FormatToken *FormatTok;
  84   bool IsFirstToken;
  85   std::stack<LexerState> StateStack;
  86   unsigned Column;
  87   unsigned TrailingWhitespace;
  88   std::unique_ptr<Lexer> Lex;
  89   const SourceManager &SourceMgr;
  90   FileID ID;
  91   const FormatStyle &Style;
  92   IdentifierTable IdentTable;
  93   AdditionalKeywords Keywords;
  94   encoding::Encoding Encoding;
  95   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
  96   // Index (in 'Tokens') of the last token that starts a new line.
  97   unsigned FirstInLineIndex;
  98   SmallVector<FormatToken *, 16> Tokens;
  99   SmallVector<IdentifierInfo *, 8> ForEachMacros;
 100
 101   bool FormattingDisabled;
 102
 103   llvm::Regex MacroBlockBeginRegex;
 104   llvm::Regex MacroBlockEndRegex;
 105
 106   void readRawToken(FormatToken &Tok);
 107
 108   void resetLexer(unsigned Offset);
 109 };
 110
 111 } // namespace format
 112 } // namespace clang
 113
 114 #endif