contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.h

   1 //===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// This file contains FormatTokenLexer, which tokenizes a source file
  12 /// into a token stream suitable for ClangFormat.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
  17 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
  18
  19 #include "Encoding.h"
  20 #include "FormatToken.h"
  21 #include "clang/Basic/SourceLocation.h"
  22 #include "clang/Basic/SourceManager.h"
  23 #include "clang/Format/Format.h"
  24 #include "llvm/Support/Regex.h"
  25 #include "llvm/ADT/MapVector.h"
  26
  27 #include <stack>
  28
  29 namespace clang {
  30 namespace format {
  31
  32 enum LexerState {
  33   NORMAL,
  34   TEMPLATE_STRING,
  35   TOKEN_STASHED,
  36 };
  37
  38 class FormatTokenLexer {
  39 public:
  40   FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
  41                    const FormatStyle &Style, encoding::Encoding Encoding);
  42
  43   ArrayRef<FormatToken *> lex();
  44
  45   const AdditionalKeywords &getKeywords() { return Keywords; }
  46
  47 private:
  48   void tryMergePreviousTokens();
  49
  50   bool tryMergeLessLess();
  51   bool tryMergeNSStringLiteral();
  52
  53   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
  54
  55   // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
  56   bool precedesOperand(FormatToken *Tok);
  57
  58   bool canPrecedeRegexLiteral(FormatToken *Prev);
  59
  60   // Tries to parse a JavaScript Regex literal starting at the current token,
  61   // if that begins with a slash and is in a location where JavaScript allows
  62   // regex literals. Changes the current token to a regex literal and updates
  63   // its text if successful.
  64   void tryParseJSRegexLiteral();
  65
  66   // Handles JavaScript template strings.
  67   //
  68   // JavaScript template strings use backticks ('`') as delimiters, and allow
  69   // embedding expressions nested in ${expr-here}. Template strings can be
  70   // nested recursively, i.e. expressions can contain template strings in turn.
  71   //
  72   // The code below parses starting from a backtick, up to a closing backtick or
  73   // an opening ${. It also maintains a stack of lexing contexts to handle
  74   // nested template parts by balancing curly braces.
  75   void handleTemplateStrings();
  76
  77   void tryParsePythonComment();
  78
  79   bool tryMerge_TMacro();
  80
  81   bool tryMergeConflictMarkers();
  82
  83   FormatToken *getStashedToken();
  84
  85   FormatToken *getNextToken();
  86
  87   FormatToken *FormatTok;
  88   bool IsFirstToken;
  89   std::stack<LexerState> StateStack;
  90   unsigned Column;
  91   unsigned TrailingWhitespace;
  92   std::unique_ptr<Lexer> Lex;
  93   const SourceManager &SourceMgr;
  94   FileID ID;
  95   const FormatStyle &Style;
  96   IdentifierTable IdentTable;
  97   AdditionalKeywords Keywords;
  98   encoding::Encoding Encoding;
  99   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
 100   // Index (in 'Tokens') of the last token that starts a new line.
 101   unsigned FirstInLineIndex;
 102   SmallVector<FormatToken *, 16> Tokens;
 103
 104   llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros;
 105
 106   bool FormattingDisabled;
 107
 108   llvm::Regex MacroBlockBeginRegex;
 109   llvm::Regex MacroBlockEndRegex;
 110
 111   void readRawToken(FormatToken &Tok);
 112
 113   void resetLexer(unsigned Offset);
 114 };
 115
 116 } // namespace format
 117 } // namespace clang
 118
 119 #endif