contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.h

   1 //===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// \brief This file contains FormatTokenLexer, which tokenizes a source file
  12 /// into a token stream suitable for ClangFormat.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
  17 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
  18
  19 #include "Encoding.h"
  20 #include "FormatToken.h"
  21 #include "clang/Basic/SourceLocation.h"
  22 #include "clang/Basic/SourceManager.h"
  23 #include "clang/Format/Format.h"
  24 #include "llvm/Support/Regex.h"
  25
  26 #include <stack>
  27
  28 namespace clang {
  29 namespace format {
  30
  31 enum LexerState {
  32   NORMAL,
  33   TEMPLATE_STRING,
  34   TOKEN_STASHED,
  35 };
  36
  37 class FormatTokenLexer {
  38 public:
  39   FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
  40                    const FormatStyle &Style, encoding::Encoding Encoding);
  41
  42   ArrayRef<FormatToken *> lex();
  43
  44   const AdditionalKeywords &getKeywords() { return Keywords; }
  45
  46 private:
  47   void tryMergePreviousTokens();
  48
  49   bool tryMergeLessLess();
  50   bool tryMergeNSStringLiteral();
  51
  52   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
  53
  54   // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
  55   bool precedesOperand(FormatToken *Tok);
  56
  57   bool canPrecedeRegexLiteral(FormatToken *Prev);
  58
  59   // Tries to parse a JavaScript Regex literal starting at the current token,
  60   // if that begins with a slash and is in a location where JavaScript allows
  61   // regex literals. Changes the current token to a regex literal and updates
  62   // its text if successful.
  63   void tryParseJSRegexLiteral();
  64
  65   // Handles JavaScript template strings.
  66   //
  67   // JavaScript template strings use backticks ('`') as delimiters, and allow
  68   // embedding expressions nested in ${expr-here}. Template strings can be
  69   // nested recursively, i.e. expressions can contain template strings in turn.
  70   //
  71   // The code below parses starting from a backtick, up to a closing backtick or
  72   // an opening ${. It also maintains a stack of lexing contexts to handle
  73   // nested template parts by balancing curly braces.
  74   void handleTemplateStrings();
  75
  76   void tryParsePythonComment();
  77
  78   bool tryMerge_TMacro();
  79
  80   bool tryMergeConflictMarkers();
  81
  82   FormatToken *getStashedToken();
  83
  84   FormatToken *getNextToken();
  85
  86   FormatToken *FormatTok;
  87   bool IsFirstToken;
  88   std::stack<LexerState> StateStack;
  89   unsigned Column;
  90   unsigned TrailingWhitespace;
  91   std::unique_ptr<Lexer> Lex;
  92   const SourceManager &SourceMgr;
  93   FileID ID;
  94   const FormatStyle &Style;
  95   IdentifierTable IdentTable;
  96   AdditionalKeywords Keywords;
  97   encoding::Encoding Encoding;
  98   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
  99   // Index (in 'Tokens') of the last token that starts a new line.
 100   unsigned FirstInLineIndex;
 101   SmallVector<FormatToken *, 16> Tokens;
 102   SmallVector<IdentifierInfo *, 8> ForEachMacros;
 103
 104   bool FormattingDisabled;
 105
 106   llvm::Regex MacroBlockBeginRegex;
 107   llvm::Regex MacroBlockEndRegex;
 108
 109   void readRawToken(FormatToken &Tok);
 110
 111   void resetLexer(unsigned Offset);
 112 };
 113
 114 } // namespace format
 115 } // namespace clang
 116
 117 #endif