1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "clang/AST/CommentParser.h"
11 #include "clang/AST/CommentCommandTraits.h"
12 #include "clang/AST/CommentDiagnostic.h"
13 #include "clang/AST/CommentSema.h"
14 #include "clang/Basic/CharInfo.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "llvm/Support/ErrorHandling.h"
21 /// Re-lexes a sequence of tok::text tokens.
22 class TextTokenRetokenizer {
23 llvm::BumpPtrAllocator &Allocator;
26 /// This flag is set when there are no more tokens we can fetch from lexer.
27 bool NoMoreInterestingTokens;
29 /// Token buffer: tokens we have processed and lookahead.
30 SmallVector<Token, 16> Toks;
32 /// A position in \c Toks.
35 const char *BufferStart;
36 const char *BufferEnd;
37 const char *BufferPtr;
38 SourceLocation BufferStartLoc;
41 /// Current position in Toks.
45 return Pos.CurToken >= Toks.size();
48 /// Sets up the buffer pointers to point to current token.
51 const Token &Tok = Toks[Pos.CurToken];
53 Pos.BufferStart = Tok.getText().begin();
54 Pos.BufferEnd = Tok.getText().end();
55 Pos.BufferPtr = Pos.BufferStart;
56 Pos.BufferStartLoc = Tok.getLocation();
59 SourceLocation getSourceLocation() const {
60 const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
61 return Pos.BufferStartLoc.getLocWithOffset(CharNo);
66 assert(Pos.BufferPtr != Pos.BufferEnd);
67 return *Pos.BufferPtr;
72 assert(Pos.BufferPtr != Pos.BufferEnd);
74 if (Pos.BufferPtr == Pos.BufferEnd) {
76 if (isEnd() && !addToken())
85 /// Returns true on success, false if there are no interesting tokens to
88 if (NoMoreInterestingTokens)
91 if (P.Tok.is(tok::newline)) {
92 // If we see a single newline token between text tokens, skip it.
93 Token Newline = P.Tok;
95 if (P.Tok.isNot(tok::text)) {
97 NoMoreInterestingTokens = true;
101 if (P.Tok.isNot(tok::text)) {
102 NoMoreInterestingTokens = true;
106 Toks.push_back(P.Tok);
108 if (Toks.size() == 1)
113 void consumeWhitespace() {
115 if (isWhitespace(peek()))
122 void formTokenWithChars(Token &Result,
124 const char *TokBegin,
127 Result.setLocation(Loc);
128 Result.setKind(tok::text);
129 Result.setLength(TokLength);
131 Result.TextPtr = "<UNSET>";
134 Result.setText(Text);
138 TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
139 Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
144 /// Extract a word -- sequence of non-whitespace characters.
145 bool lexWord(Token &Tok) {
149 Position SavedPos = Pos;
152 SmallString<32> WordText;
153 const char *WordBegin = Pos.BufferPtr;
154 SourceLocation Loc = getSourceLocation();
156 const char C = peek();
157 if (!isWhitespace(C)) {
158 WordText.push_back(C);
163 const unsigned Length = WordText.size();
169 char *TextPtr = Allocator.Allocate<char>(Length + 1);
171 memcpy(TextPtr, WordText.c_str(), Length + 1);
172 StringRef Text = StringRef(TextPtr, Length);
174 formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
178 bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
182 Position SavedPos = Pos;
185 SmallString<32> WordText;
186 const char *WordBegin = Pos.BufferPtr;
187 SourceLocation Loc = getSourceLocation();
190 const char C = peek();
191 if (C == OpenDelim) {
192 WordText.push_back(C);
198 while (!Error && !isEnd()) {
200 WordText.push_back(C);
205 if (!Error && C != CloseDelim)
213 const unsigned Length = WordText.size();
214 char *TextPtr = Allocator.Allocate<char>(Length + 1);
216 memcpy(TextPtr, WordText.c_str(), Length + 1);
217 StringRef Text = StringRef(TextPtr, Length);
219 formTokenWithChars(Tok, Loc, WordBegin,
220 Pos.BufferPtr - WordBegin, Text);
224 /// Put back tokens that we didn't consume.
225 void putBackLeftoverTokens() {
229 bool HavePartialTok = false;
231 if (Pos.BufferPtr != Pos.BufferStart) {
232 formTokenWithChars(PartialTok, getSourceLocation(),
233 Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
234 StringRef(Pos.BufferPtr,
235 Pos.BufferEnd - Pos.BufferPtr));
236 HavePartialTok = true;
240 P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
241 Pos.CurToken = Toks.size();
244 P.putBack(PartialTok);
248 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
249 const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
250 const CommandTraits &Traits):
251 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
256 void Parser::parseParamCommandArgs(ParamCommandComment *PC,
257 TextTokenRetokenizer &Retokenizer) {
259 // Check if argument looks like direction specification: [dir]
260 // e.g., [in], [out], [in,out]
261 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
262 S.actOnParamCommandDirectionArg(PC,
264 Arg.getEndLocation(),
267 if (Retokenizer.lexWord(Arg))
268 S.actOnParamCommandParamNameArg(PC,
270 Arg.getEndLocation(),
274 void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
275 TextTokenRetokenizer &Retokenizer) {
277 if (Retokenizer.lexWord(Arg))
278 S.actOnTParamCommandParamNameArg(TPC,
280 Arg.getEndLocation(),
284 void Parser::parseBlockCommandArgs(BlockCommandComment *BC,
285 TextTokenRetokenizer &Retokenizer,
287 typedef BlockCommandComment::Argument Argument;
289 new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
290 unsigned ParsedArgs = 0;
292 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
293 Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
294 Arg.getEndLocation()),
299 S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
302 BlockCommandComment *Parser::parseBlockCommand() {
303 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
305 ParamCommandComment *PC = 0;
306 TParamCommandComment *TPC = 0;
307 BlockCommandComment *BC = 0;
308 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
309 CommandMarkerKind CommandMarker =
310 Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
311 if (Info->IsParamCommand) {
312 PC = S.actOnParamCommandStart(Tok.getLocation(),
313 Tok.getEndLocation(),
316 } else if (Info->IsTParamCommand) {
317 TPC = S.actOnTParamCommandStart(Tok.getLocation(),
318 Tok.getEndLocation(),
322 BC = S.actOnBlockCommandStart(Tok.getLocation(),
323 Tok.getEndLocation(),
329 if (isTokBlockCommand()) {
330 // Block command ahead. We can't nest block commands, so pretend that this
331 // command has an empty argument.
332 ParagraphComment *Paragraph = S.actOnParagraphComment(None);
334 S.actOnParamCommandFinish(PC, Paragraph);
337 S.actOnTParamCommandFinish(TPC, Paragraph);
340 S.actOnBlockCommandFinish(BC, Paragraph);
345 if (PC || TPC || Info->NumArgs > 0) {
346 // In order to parse command arguments we need to retokenize a few
347 // following text tokens.
348 TextTokenRetokenizer Retokenizer(Allocator, *this);
351 parseParamCommandArgs(PC, Retokenizer);
353 parseTParamCommandArgs(TPC, Retokenizer);
355 parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs);
357 Retokenizer.putBackLeftoverTokens();
360 // If there's a block command ahead, we will attach an empty paragraph to
362 bool EmptyParagraph = false;
363 if (isTokBlockCommand())
364 EmptyParagraph = true;
365 else if (Tok.is(tok::newline)) {
368 EmptyParagraph = isTokBlockCommand();
372 ParagraphComment *Paragraph;
374 Paragraph = S.actOnParagraphComment(None);
376 BlockContentComment *Block = parseParagraphOrBlockCommand();
377 // Since we have checked for a block command, we should have parsed a
379 Paragraph = cast<ParagraphComment>(Block);
383 S.actOnParamCommandFinish(PC, Paragraph);
386 S.actOnTParamCommandFinish(TPC, Paragraph);
389 S.actOnBlockCommandFinish(BC, Paragraph);
394 InlineCommandComment *Parser::parseInlineCommand() {
395 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
397 const Token CommandTok = Tok;
400 TextTokenRetokenizer Retokenizer(Allocator, *this);
403 bool ArgTokValid = Retokenizer.lexWord(ArgTok);
405 InlineCommandComment *IC;
407 IC = S.actOnInlineCommand(CommandTok.getLocation(),
408 CommandTok.getEndLocation(),
409 CommandTok.getCommandID(),
410 ArgTok.getLocation(),
411 ArgTok.getEndLocation(),
414 IC = S.actOnInlineCommand(CommandTok.getLocation(),
415 CommandTok.getEndLocation(),
416 CommandTok.getCommandID());
419 Retokenizer.putBackLeftoverTokens();
424 HTMLStartTagComment *Parser::parseHTMLStartTag() {
425 assert(Tok.is(tok::html_start_tag));
426 HTMLStartTagComment *HST =
427 S.actOnHTMLStartTagStart(Tok.getLocation(),
428 Tok.getHTMLTagStartName());
431 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
433 switch (Tok.getKind()) {
434 case tok::html_ident: {
437 if (Tok.isNot(tok::html_equals)) {
438 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
439 Ident.getHTMLIdent()));
444 if (Tok.isNot(tok::html_quoted_string)) {
445 Diag(Tok.getLocation(),
446 diag::warn_doc_html_start_tag_expected_quoted_string)
447 << SourceRange(Equals.getLocation());
448 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
449 Ident.getHTMLIdent()));
450 while (Tok.is(tok::html_equals) ||
451 Tok.is(tok::html_quoted_string))
455 Attrs.push_back(HTMLStartTagComment::Attribute(
457 Ident.getHTMLIdent(),
458 Equals.getLocation(),
459 SourceRange(Tok.getLocation(),
460 Tok.getEndLocation()),
461 Tok.getHTMLQuotedString()));
466 case tok::html_greater:
467 S.actOnHTMLStartTagFinish(HST,
468 S.copyArray(llvm::makeArrayRef(Attrs)),
470 /* IsSelfClosing = */ false);
474 case tok::html_slash_greater:
475 S.actOnHTMLStartTagFinish(HST,
476 S.copyArray(llvm::makeArrayRef(Attrs)),
478 /* IsSelfClosing = */ true);
482 case tok::html_equals:
483 case tok::html_quoted_string:
484 Diag(Tok.getLocation(),
485 diag::warn_doc_html_start_tag_expected_ident_or_greater);
486 while (Tok.is(tok::html_equals) ||
487 Tok.is(tok::html_quoted_string))
489 if (Tok.is(tok::html_ident) ||
490 Tok.is(tok::html_greater) ||
491 Tok.is(tok::html_slash_greater))
494 S.actOnHTMLStartTagFinish(HST,
495 S.copyArray(llvm::makeArrayRef(Attrs)),
497 /* IsSelfClosing = */ false);
501 // Not a token from an HTML start tag. Thus HTML tag prematurely ended.
502 S.actOnHTMLStartTagFinish(HST,
503 S.copyArray(llvm::makeArrayRef(Attrs)),
505 /* IsSelfClosing = */ false);
506 bool StartLineInvalid;
507 const unsigned StartLine = SourceMgr.getPresumedLineNumber(
511 const unsigned EndLine = SourceMgr.getPresumedLineNumber(
514 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
515 Diag(Tok.getLocation(),
516 diag::warn_doc_html_start_tag_expected_ident_or_greater)
517 << HST->getSourceRange();
519 Diag(Tok.getLocation(),
520 diag::warn_doc_html_start_tag_expected_ident_or_greater);
521 Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
522 << HST->getSourceRange();
529 HTMLEndTagComment *Parser::parseHTMLEndTag() {
530 assert(Tok.is(tok::html_end_tag));
531 Token TokEndTag = Tok;
534 if (Tok.is(tok::html_greater)) {
535 Loc = Tok.getLocation();
539 return S.actOnHTMLEndTag(TokEndTag.getLocation(),
541 TokEndTag.getHTMLTagEndName());
544 BlockContentComment *Parser::parseParagraphOrBlockCommand() {
545 SmallVector<InlineContentComment *, 8> Content;
548 switch (Tok.getKind()) {
549 case tok::verbatim_block_begin:
550 case tok::verbatim_line_name:
552 assert(Content.size() != 0);
553 break; // Block content or EOF ahead, finish this parapgaph.
555 case tok::unknown_command:
556 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
557 Tok.getEndLocation(),
558 Tok.getUnknownCommandName()));
562 case tok::backslash_command:
563 case tok::at_command: {
564 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
565 if (Info->IsBlockCommand) {
566 if (Content.size() == 0)
567 return parseBlockCommand();
568 break; // Block command ahead, finish this parapgaph.
570 if (Info->IsVerbatimBlockEndCommand) {
571 Diag(Tok.getLocation(),
572 diag::warn_verbatim_block_end_without_start)
573 << Tok.is(tok::at_command)
575 << SourceRange(Tok.getLocation(), Tok.getEndLocation());
579 if (Info->IsUnknownCommand) {
580 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
581 Tok.getEndLocation(),
586 assert(Info->IsInlineCommand);
587 Content.push_back(parseInlineCommand());
593 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
595 break; // Two newlines -- end of paragraph.
597 if (Content.size() > 0)
598 Content.back()->addTrailingNewline();
602 // Don't deal with HTML tag soup now.
603 case tok::html_start_tag:
604 Content.push_back(parseHTMLStartTag());
607 case tok::html_end_tag:
608 Content.push_back(parseHTMLEndTag());
612 Content.push_back(S.actOnText(Tok.getLocation(),
613 Tok.getEndLocation(),
618 case tok::verbatim_block_line:
619 case tok::verbatim_block_end:
620 case tok::verbatim_line_text:
621 case tok::html_ident:
622 case tok::html_equals:
623 case tok::html_quoted_string:
624 case tok::html_greater:
625 case tok::html_slash_greater:
626 llvm_unreachable("should not see this token");
631 return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content)));
634 VerbatimBlockComment *Parser::parseVerbatimBlock() {
635 assert(Tok.is(tok::verbatim_block_begin));
637 VerbatimBlockComment *VB =
638 S.actOnVerbatimBlockStart(Tok.getLocation(),
639 Tok.getVerbatimBlockID());
642 // Don't create an empty line if verbatim opening command is followed
644 if (Tok.is(tok::newline))
647 SmallVector<VerbatimBlockLineComment *, 8> Lines;
648 while (Tok.is(tok::verbatim_block_line) ||
649 Tok.is(tok::newline)) {
650 VerbatimBlockLineComment *Line;
651 if (Tok.is(tok::verbatim_block_line)) {
652 Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
653 Tok.getVerbatimBlockText());
655 if (Tok.is(tok::newline)) {
659 // Empty line, just a tok::newline.
660 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
663 Lines.push_back(Line);
666 if (Tok.is(tok::verbatim_block_end)) {
667 const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
668 S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
670 S.copyArray(llvm::makeArrayRef(Lines)));
673 // Unterminated \\verbatim block
674 S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
675 S.copyArray(llvm::makeArrayRef(Lines)));
681 VerbatimLineComment *Parser::parseVerbatimLine() {
682 assert(Tok.is(tok::verbatim_line_name));
687 SourceLocation TextBegin;
689 // Next token might not be a tok::verbatim_line_text if verbatim line
690 // starting command comes just before a newline or comment end.
691 if (Tok.is(tok::verbatim_line_text)) {
692 TextBegin = Tok.getLocation();
693 Text = Tok.getVerbatimLineText();
695 TextBegin = NameTok.getEndLocation();
699 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
700 NameTok.getVerbatimLineID(),
707 BlockContentComment *Parser::parseBlockContent() {
708 switch (Tok.getKind()) {
710 case tok::unknown_command:
711 case tok::backslash_command:
712 case tok::at_command:
713 case tok::html_start_tag:
714 case tok::html_end_tag:
715 return parseParagraphOrBlockCommand();
717 case tok::verbatim_block_begin:
718 return parseVerbatimBlock();
720 case tok::verbatim_line_name:
721 return parseVerbatimLine();
725 case tok::verbatim_block_line:
726 case tok::verbatim_block_end:
727 case tok::verbatim_line_text:
728 case tok::html_ident:
729 case tok::html_equals:
730 case tok::html_quoted_string:
731 case tok::html_greater:
732 case tok::html_slash_greater:
733 llvm_unreachable("should not see this token");
735 llvm_unreachable("bogus token kind");
738 FullComment *Parser::parseFullComment() {
739 // Skip newlines at the beginning of the comment.
740 while (Tok.is(tok::newline))
743 SmallVector<BlockContentComment *, 8> Blocks;
744 while (Tok.isNot(tok::eof)) {
745 Blocks.push_back(parseBlockContent());
747 // Skip extra newlines after paragraph end.
748 while (Tok.is(tok::newline))
751 return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks)));
754 } // end namespace comments
755 } // end namespace clang