1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "clang/AST/CommentParser.h"
11 #include "clang/AST/CommentCommandTraits.h"
12 #include "clang/AST/CommentDiagnostic.h"
13 #include "clang/AST/CommentSema.h"
14 #include "clang/Basic/CharInfo.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "llvm/Support/ErrorHandling.h"
20 static inline bool isWhitespace(llvm::StringRef S) {
21 for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
22 if (!isWhitespace(*I))
30 /// Re-lexes a sequence of tok::text tokens.
31 class TextTokenRetokenizer {
32 llvm::BumpPtrAllocator &Allocator;
35 /// This flag is set when there are no more tokens we can fetch from lexer.
36 bool NoMoreInterestingTokens;
38 /// Token buffer: tokens we have processed and lookahead.
39 SmallVector<Token, 16> Toks;
41 /// A position in \c Toks.
43 const char *BufferStart;
44 const char *BufferEnd;
45 const char *BufferPtr;
46 SourceLocation BufferStartLoc;
50 /// Current position in Toks.
54 return Pos.CurToken >= Toks.size();
57 /// Sets up the buffer pointers to point to current token.
60 const Token &Tok = Toks[Pos.CurToken];
62 Pos.BufferStart = Tok.getText().begin();
63 Pos.BufferEnd = Tok.getText().end();
64 Pos.BufferPtr = Pos.BufferStart;
65 Pos.BufferStartLoc = Tok.getLocation();
68 SourceLocation getSourceLocation() const {
69 const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
70 return Pos.BufferStartLoc.getLocWithOffset(CharNo);
75 assert(Pos.BufferPtr != Pos.BufferEnd);
76 return *Pos.BufferPtr;
81 assert(Pos.BufferPtr != Pos.BufferEnd);
83 if (Pos.BufferPtr == Pos.BufferEnd) {
85 if (isEnd() && !addToken())
94 /// Returns true on success, false if there are no interesting tokens to
97 if (NoMoreInterestingTokens)
100 if (P.Tok.is(tok::newline)) {
101 // If we see a single newline token between text tokens, skip it.
102 Token Newline = P.Tok;
104 if (P.Tok.isNot(tok::text)) {
106 NoMoreInterestingTokens = true;
110 if (P.Tok.isNot(tok::text)) {
111 NoMoreInterestingTokens = true;
115 Toks.push_back(P.Tok);
117 if (Toks.size() == 1)
122 void consumeWhitespace() {
124 if (isWhitespace(peek()))
131 void formTokenWithChars(Token &Result,
133 const char *TokBegin,
136 Result.setLocation(Loc);
137 Result.setKind(tok::text);
138 Result.setLength(TokLength);
140 Result.TextPtr = "<UNSET>";
143 Result.setText(Text);
147 TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
148 Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
153 /// Extract a word -- sequence of non-whitespace characters.
154 bool lexWord(Token &Tok) {
158 Position SavedPos = Pos;
161 SmallString<32> WordText;
162 const char *WordBegin = Pos.BufferPtr;
163 SourceLocation Loc = getSourceLocation();
165 const char C = peek();
166 if (!isWhitespace(C)) {
167 WordText.push_back(C);
172 const unsigned Length = WordText.size();
178 char *TextPtr = Allocator.Allocate<char>(Length + 1);
180 memcpy(TextPtr, WordText.c_str(), Length + 1);
181 StringRef Text = StringRef(TextPtr, Length);
183 formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
187 bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
191 Position SavedPos = Pos;
194 SmallString<32> WordText;
195 const char *WordBegin = Pos.BufferPtr;
196 SourceLocation Loc = getSourceLocation();
199 const char C = peek();
200 if (C == OpenDelim) {
201 WordText.push_back(C);
207 while (!Error && !isEnd()) {
209 WordText.push_back(C);
214 if (!Error && C != CloseDelim)
222 const unsigned Length = WordText.size();
223 char *TextPtr = Allocator.Allocate<char>(Length + 1);
225 memcpy(TextPtr, WordText.c_str(), Length + 1);
226 StringRef Text = StringRef(TextPtr, Length);
228 formTokenWithChars(Tok, Loc, WordBegin,
229 Pos.BufferPtr - WordBegin, Text);
233 /// Put back tokens that we didn't consume.
234 void putBackLeftoverTokens() {
238 bool HavePartialTok = false;
240 if (Pos.BufferPtr != Pos.BufferStart) {
241 formTokenWithChars(PartialTok, getSourceLocation(),
242 Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
243 StringRef(Pos.BufferPtr,
244 Pos.BufferEnd - Pos.BufferPtr));
245 HavePartialTok = true;
249 P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
250 Pos.CurToken = Toks.size();
253 P.putBack(PartialTok);
257 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
258 const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
259 const CommandTraits &Traits):
260 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
265 void Parser::parseParamCommandArgs(ParamCommandComment *PC,
266 TextTokenRetokenizer &Retokenizer) {
268 // Check if argument looks like direction specification: [dir]
269 // e.g., [in], [out], [in,out]
270 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
271 S.actOnParamCommandDirectionArg(PC,
273 Arg.getEndLocation(),
276 if (Retokenizer.lexWord(Arg))
277 S.actOnParamCommandParamNameArg(PC,
279 Arg.getEndLocation(),
283 void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
284 TextTokenRetokenizer &Retokenizer) {
286 if (Retokenizer.lexWord(Arg))
287 S.actOnTParamCommandParamNameArg(TPC,
289 Arg.getEndLocation(),
293 void Parser::parseBlockCommandArgs(BlockCommandComment *BC,
294 TextTokenRetokenizer &Retokenizer,
296 typedef BlockCommandComment::Argument Argument;
298 new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
299 unsigned ParsedArgs = 0;
301 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
302 Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
303 Arg.getEndLocation()),
308 S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
311 BlockCommandComment *Parser::parseBlockCommand() {
312 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
314 ParamCommandComment *PC = nullptr;
315 TParamCommandComment *TPC = nullptr;
316 BlockCommandComment *BC = nullptr;
317 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
318 CommandMarkerKind CommandMarker =
319 Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
320 if (Info->IsParamCommand) {
321 PC = S.actOnParamCommandStart(Tok.getLocation(),
322 Tok.getEndLocation(),
325 } else if (Info->IsTParamCommand) {
326 TPC = S.actOnTParamCommandStart(Tok.getLocation(),
327 Tok.getEndLocation(),
331 BC = S.actOnBlockCommandStart(Tok.getLocation(),
332 Tok.getEndLocation(),
338 if (isTokBlockCommand()) {
339 // Block command ahead. We can't nest block commands, so pretend that this
340 // command has an empty argument.
341 ParagraphComment *Paragraph = S.actOnParagraphComment(None);
343 S.actOnParamCommandFinish(PC, Paragraph);
346 S.actOnTParamCommandFinish(TPC, Paragraph);
349 S.actOnBlockCommandFinish(BC, Paragraph);
354 if (PC || TPC || Info->NumArgs > 0) {
355 // In order to parse command arguments we need to retokenize a few
356 // following text tokens.
357 TextTokenRetokenizer Retokenizer(Allocator, *this);
360 parseParamCommandArgs(PC, Retokenizer);
362 parseTParamCommandArgs(TPC, Retokenizer);
364 parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs);
366 Retokenizer.putBackLeftoverTokens();
369 // If there's a block command ahead, we will attach an empty paragraph to
371 bool EmptyParagraph = false;
372 if (isTokBlockCommand())
373 EmptyParagraph = true;
374 else if (Tok.is(tok::newline)) {
377 EmptyParagraph = isTokBlockCommand();
381 ParagraphComment *Paragraph;
383 Paragraph = S.actOnParagraphComment(None);
385 BlockContentComment *Block = parseParagraphOrBlockCommand();
386 // Since we have checked for a block command, we should have parsed a
388 Paragraph = cast<ParagraphComment>(Block);
392 S.actOnParamCommandFinish(PC, Paragraph);
395 S.actOnTParamCommandFinish(TPC, Paragraph);
398 S.actOnBlockCommandFinish(BC, Paragraph);
403 InlineCommandComment *Parser::parseInlineCommand() {
404 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
406 const Token CommandTok = Tok;
409 TextTokenRetokenizer Retokenizer(Allocator, *this);
412 bool ArgTokValid = Retokenizer.lexWord(ArgTok);
414 InlineCommandComment *IC;
416 IC = S.actOnInlineCommand(CommandTok.getLocation(),
417 CommandTok.getEndLocation(),
418 CommandTok.getCommandID(),
419 ArgTok.getLocation(),
420 ArgTok.getEndLocation(),
423 IC = S.actOnInlineCommand(CommandTok.getLocation(),
424 CommandTok.getEndLocation(),
425 CommandTok.getCommandID());
428 Retokenizer.putBackLeftoverTokens();
433 HTMLStartTagComment *Parser::parseHTMLStartTag() {
434 assert(Tok.is(tok::html_start_tag));
435 HTMLStartTagComment *HST =
436 S.actOnHTMLStartTagStart(Tok.getLocation(),
437 Tok.getHTMLTagStartName());
440 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
442 switch (Tok.getKind()) {
443 case tok::html_ident: {
446 if (Tok.isNot(tok::html_equals)) {
447 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
448 Ident.getHTMLIdent()));
453 if (Tok.isNot(tok::html_quoted_string)) {
454 Diag(Tok.getLocation(),
455 diag::warn_doc_html_start_tag_expected_quoted_string)
456 << SourceRange(Equals.getLocation());
457 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
458 Ident.getHTMLIdent()));
459 while (Tok.is(tok::html_equals) ||
460 Tok.is(tok::html_quoted_string))
464 Attrs.push_back(HTMLStartTagComment::Attribute(
466 Ident.getHTMLIdent(),
467 Equals.getLocation(),
468 SourceRange(Tok.getLocation(),
469 Tok.getEndLocation()),
470 Tok.getHTMLQuotedString()));
475 case tok::html_greater:
476 S.actOnHTMLStartTagFinish(HST,
477 S.copyArray(llvm::makeArrayRef(Attrs)),
479 /* IsSelfClosing = */ false);
483 case tok::html_slash_greater:
484 S.actOnHTMLStartTagFinish(HST,
485 S.copyArray(llvm::makeArrayRef(Attrs)),
487 /* IsSelfClosing = */ true);
491 case tok::html_equals:
492 case tok::html_quoted_string:
493 Diag(Tok.getLocation(),
494 diag::warn_doc_html_start_tag_expected_ident_or_greater);
495 while (Tok.is(tok::html_equals) ||
496 Tok.is(tok::html_quoted_string))
498 if (Tok.is(tok::html_ident) ||
499 Tok.is(tok::html_greater) ||
500 Tok.is(tok::html_slash_greater))
503 S.actOnHTMLStartTagFinish(HST,
504 S.copyArray(llvm::makeArrayRef(Attrs)),
506 /* IsSelfClosing = */ false);
510 // Not a token from an HTML start tag. Thus HTML tag prematurely ended.
511 S.actOnHTMLStartTagFinish(HST,
512 S.copyArray(llvm::makeArrayRef(Attrs)),
514 /* IsSelfClosing = */ false);
515 bool StartLineInvalid;
516 const unsigned StartLine = SourceMgr.getPresumedLineNumber(
520 const unsigned EndLine = SourceMgr.getPresumedLineNumber(
523 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
524 Diag(Tok.getLocation(),
525 diag::warn_doc_html_start_tag_expected_ident_or_greater)
526 << HST->getSourceRange();
528 Diag(Tok.getLocation(),
529 diag::warn_doc_html_start_tag_expected_ident_or_greater);
530 Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
531 << HST->getSourceRange();
538 HTMLEndTagComment *Parser::parseHTMLEndTag() {
539 assert(Tok.is(tok::html_end_tag));
540 Token TokEndTag = Tok;
543 if (Tok.is(tok::html_greater)) {
544 Loc = Tok.getLocation();
548 return S.actOnHTMLEndTag(TokEndTag.getLocation(),
550 TokEndTag.getHTMLTagEndName());
553 BlockContentComment *Parser::parseParagraphOrBlockCommand() {
554 SmallVector<InlineContentComment *, 8> Content;
557 switch (Tok.getKind()) {
558 case tok::verbatim_block_begin:
559 case tok::verbatim_line_name:
561 assert(Content.size() != 0);
562 break; // Block content or EOF ahead, finish this parapgaph.
564 case tok::unknown_command:
565 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
566 Tok.getEndLocation(),
567 Tok.getUnknownCommandName()));
571 case tok::backslash_command:
572 case tok::at_command: {
573 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
574 if (Info->IsBlockCommand) {
575 if (Content.size() == 0)
576 return parseBlockCommand();
577 break; // Block command ahead, finish this parapgaph.
579 if (Info->IsVerbatimBlockEndCommand) {
580 Diag(Tok.getLocation(),
581 diag::warn_verbatim_block_end_without_start)
582 << Tok.is(tok::at_command)
584 << SourceRange(Tok.getLocation(), Tok.getEndLocation());
588 if (Info->IsUnknownCommand) {
589 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
590 Tok.getEndLocation(),
595 assert(Info->IsInlineCommand);
596 Content.push_back(parseInlineCommand());
602 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
604 break; // Two newlines -- end of paragraph.
606 // Also allow [tok::newline, tok::text, tok::newline] if the middle
607 // tok::text is just whitespace.
608 if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
609 Token WhitespaceTok = Tok;
611 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
615 // We have [tok::newline, tok::text, non-newline]. Put back tok::text.
616 putBack(WhitespaceTok);
618 if (Content.size() > 0)
619 Content.back()->addTrailingNewline();
623 // Don't deal with HTML tag soup now.
624 case tok::html_start_tag:
625 Content.push_back(parseHTMLStartTag());
628 case tok::html_end_tag:
629 Content.push_back(parseHTMLEndTag());
633 Content.push_back(S.actOnText(Tok.getLocation(),
634 Tok.getEndLocation(),
639 case tok::verbatim_block_line:
640 case tok::verbatim_block_end:
641 case tok::verbatim_line_text:
642 case tok::html_ident:
643 case tok::html_equals:
644 case tok::html_quoted_string:
645 case tok::html_greater:
646 case tok::html_slash_greater:
647 llvm_unreachable("should not see this token");
652 return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content)));
655 VerbatimBlockComment *Parser::parseVerbatimBlock() {
656 assert(Tok.is(tok::verbatim_block_begin));
658 VerbatimBlockComment *VB =
659 S.actOnVerbatimBlockStart(Tok.getLocation(),
660 Tok.getVerbatimBlockID());
663 // Don't create an empty line if verbatim opening command is followed
665 if (Tok.is(tok::newline))
668 SmallVector<VerbatimBlockLineComment *, 8> Lines;
669 while (Tok.is(tok::verbatim_block_line) ||
670 Tok.is(tok::newline)) {
671 VerbatimBlockLineComment *Line;
672 if (Tok.is(tok::verbatim_block_line)) {
673 Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
674 Tok.getVerbatimBlockText());
676 if (Tok.is(tok::newline)) {
680 // Empty line, just a tok::newline.
681 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
684 Lines.push_back(Line);
687 if (Tok.is(tok::verbatim_block_end)) {
688 const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
689 S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
691 S.copyArray(llvm::makeArrayRef(Lines)));
694 // Unterminated \\verbatim block
695 S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
696 S.copyArray(llvm::makeArrayRef(Lines)));
702 VerbatimLineComment *Parser::parseVerbatimLine() {
703 assert(Tok.is(tok::verbatim_line_name));
708 SourceLocation TextBegin;
710 // Next token might not be a tok::verbatim_line_text if verbatim line
711 // starting command comes just before a newline or comment end.
712 if (Tok.is(tok::verbatim_line_text)) {
713 TextBegin = Tok.getLocation();
714 Text = Tok.getVerbatimLineText();
716 TextBegin = NameTok.getEndLocation();
720 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
721 NameTok.getVerbatimLineID(),
728 BlockContentComment *Parser::parseBlockContent() {
729 switch (Tok.getKind()) {
731 case tok::unknown_command:
732 case tok::backslash_command:
733 case tok::at_command:
734 case tok::html_start_tag:
735 case tok::html_end_tag:
736 return parseParagraphOrBlockCommand();
738 case tok::verbatim_block_begin:
739 return parseVerbatimBlock();
741 case tok::verbatim_line_name:
742 return parseVerbatimLine();
746 case tok::verbatim_block_line:
747 case tok::verbatim_block_end:
748 case tok::verbatim_line_text:
749 case tok::html_ident:
750 case tok::html_equals:
751 case tok::html_quoted_string:
752 case tok::html_greater:
753 case tok::html_slash_greater:
754 llvm_unreachable("should not see this token");
756 llvm_unreachable("bogus token kind");
759 FullComment *Parser::parseFullComment() {
760 // Skip newlines at the beginning of the comment.
761 while (Tok.is(tok::newline))
764 SmallVector<BlockContentComment *, 8> Blocks;
765 while (Tok.isNot(tok::eof)) {
766 Blocks.push_back(parseBlockContent());
768 // Skip extra newlines after paragraph end.
769 while (Tok.is(tok::newline))
772 return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks)));
775 } // end namespace comments
776 } // end namespace clang