//===--- CommentLexer.h - Lexer for structured comments ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines lexer for structured comments and supporting token class. // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_AST_COMMENT_LEXER_H #define LLVM_CLANG_AST_COMMENT_LEXER_H #include "clang/Basic/SourceManager.h" #include "clang/Basic/Diagnostic.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/raw_ostream.h" namespace clang { namespace comments { class Lexer; class TextTokenRetokenizer; struct CommandInfo; class CommandTraits; namespace tok { enum TokenKind { eof, newline, text, unknown_command, // Command that does not have an ID. backslash_command, // Command with an ID, that used backslash marker. at_command, // Command with an ID, that used 'at' marker. verbatim_block_begin, verbatim_block_line, verbatim_block_end, verbatim_line_name, verbatim_line_text, html_start_tag, // html_slash_greater, // /> html_end_tag // '. LS_HTMLEndTag }; /// Current lexing mode. LexerState State; /// If State is LS_VerbatimBlock, contains the name of verbatim end /// command, including command marker. SmallString<16> VerbatimBlockEndCommandName; /// Given a character reference name (e.g., "lt"), return the character that /// it stands for (e.g., "<"). StringRef resolveHTMLNamedCharacterReference(StringRef Name) const; /// Given a Unicode codepoint as base-10 integer, return the character. StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const; /// Given a Unicode codepoint as base-16 integer, return the character. StringRef resolveHTMLHexCharacterReference(StringRef Name) const; void formTokenWithChars(Token &Result, const char *TokEnd, tok::TokenKind Kind) { const unsigned TokLen = TokEnd - BufferPtr; Result.setLocation(getSourceLocation(BufferPtr)); Result.setKind(Kind); Result.setLength(TokLen); #ifndef NDEBUG Result.TextPtr = ""; Result.IntVal = 7; #endif BufferPtr = TokEnd; } void formTextToken(Token &Result, const char *TokEnd) { StringRef Text(BufferPtr, TokEnd - BufferPtr); formTokenWithChars(Result, TokEnd, tok::text); Result.setText(Text); } SourceLocation getSourceLocation(const char *Loc) const { assert(Loc >= BufferStart && Loc <= BufferEnd && "Location out of range for this buffer!"); const unsigned CharNo = Loc - BufferStart; return FileLoc.getLocWithOffset(CharNo); } DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) { return Diags.Report(Loc, DiagID); } /// Eat string matching regexp \code \s*\* \endcode. void skipLineStartingDecorations(); /// Lex stuff inside comments. CommentEnd should be set correctly. void lexCommentText(Token &T); void setupAndLexVerbatimBlock(Token &T, const char *TextBegin, char Marker, const CommandInfo *Info); void lexVerbatimBlockFirstLine(Token &T); void lexVerbatimBlockBody(Token &T); void setupAndLexVerbatimLine(Token &T, const char *TextBegin, const CommandInfo *Info); void lexVerbatimLineText(Token &T); void lexHTMLCharacterReference(Token &T); void setupAndLexHTMLStartTag(Token &T); void lexHTMLStartTag(Token &T); void setupAndLexHTMLEndTag(Token &T); void lexHTMLEndTag(Token &T); public: Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags, const CommandTraits &Traits, SourceLocation FileLoc, const char *BufferStart, const char *BufferEnd); void lex(Token &T); StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr, bool *Invalid = NULL) const; }; } // end namespace comments } // end namespace clang #endif