1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "clang/AST/CommentParser.h"
10 #include "clang/AST/CommentCommandTraits.h"
11 #include "clang/AST/CommentDiagnostic.h"
12 #include "clang/AST/CommentSema.h"
13 #include "clang/Basic/CharInfo.h"
14 #include "clang/Basic/SourceManager.h"
15 #include "llvm/Support/ErrorHandling.h"
19 static inline bool isWhitespace(llvm::StringRef S) {
20 for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
21 if (!isWhitespace(*I))
29 /// Re-lexes a sequence of tok::text tokens.
30 class TextTokenRetokenizer {
31 llvm::BumpPtrAllocator &Allocator;
34 /// This flag is set when there are no more tokens we can fetch from lexer.
35 bool NoMoreInterestingTokens;
37 /// Token buffer: tokens we have processed and lookahead.
38 SmallVector<Token, 16> Toks;
40 /// A position in \c Toks.
42 const char *BufferStart;
43 const char *BufferEnd;
44 const char *BufferPtr;
45 SourceLocation BufferStartLoc;
49 /// Current position in Toks.
53 return Pos.CurToken >= Toks.size();
56 /// Sets up the buffer pointers to point to current token.
59 const Token &Tok = Toks[Pos.CurToken];
61 Pos.BufferStart = Tok.getText().begin();
62 Pos.BufferEnd = Tok.getText().end();
63 Pos.BufferPtr = Pos.BufferStart;
64 Pos.BufferStartLoc = Tok.getLocation();
67 SourceLocation getSourceLocation() const {
68 const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
69 return Pos.BufferStartLoc.getLocWithOffset(CharNo);
74 assert(Pos.BufferPtr != Pos.BufferEnd);
75 return *Pos.BufferPtr;
80 assert(Pos.BufferPtr != Pos.BufferEnd);
82 if (Pos.BufferPtr == Pos.BufferEnd) {
84 if (isEnd() && !addToken())
93 /// Returns true on success, false if there are no interesting tokens to
96 if (NoMoreInterestingTokens)
99 if (P.Tok.is(tok::newline)) {
100 // If we see a single newline token between text tokens, skip it.
101 Token Newline = P.Tok;
103 if (P.Tok.isNot(tok::text)) {
105 NoMoreInterestingTokens = true;
109 if (P.Tok.isNot(tok::text)) {
110 NoMoreInterestingTokens = true;
114 Toks.push_back(P.Tok);
116 if (Toks.size() == 1)
121 void consumeWhitespace() {
123 if (isWhitespace(peek()))
130 void formTokenWithChars(Token &Result,
132 const char *TokBegin,
135 Result.setLocation(Loc);
136 Result.setKind(tok::text);
137 Result.setLength(TokLength);
139 Result.TextPtr = "<UNSET>";
142 Result.setText(Text);
146 TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
147 Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
152 /// Extract a word -- sequence of non-whitespace characters.
153 bool lexWord(Token &Tok) {
157 Position SavedPos = Pos;
160 SmallString<32> WordText;
161 const char *WordBegin = Pos.BufferPtr;
162 SourceLocation Loc = getSourceLocation();
164 const char C = peek();
165 if (!isWhitespace(C)) {
166 WordText.push_back(C);
171 const unsigned Length = WordText.size();
177 char *TextPtr = Allocator.Allocate<char>(Length + 1);
179 memcpy(TextPtr, WordText.c_str(), Length + 1);
180 StringRef Text = StringRef(TextPtr, Length);
182 formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
186 bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
190 Position SavedPos = Pos;
193 SmallString<32> WordText;
194 const char *WordBegin = Pos.BufferPtr;
195 SourceLocation Loc = getSourceLocation();
198 const char C = peek();
199 if (C == OpenDelim) {
200 WordText.push_back(C);
206 while (!Error && !isEnd()) {
208 WordText.push_back(C);
213 if (!Error && C != CloseDelim)
221 const unsigned Length = WordText.size();
222 char *TextPtr = Allocator.Allocate<char>(Length + 1);
224 memcpy(TextPtr, WordText.c_str(), Length + 1);
225 StringRef Text = StringRef(TextPtr, Length);
227 formTokenWithChars(Tok, Loc, WordBegin,
228 Pos.BufferPtr - WordBegin, Text);
232 /// Put back tokens that we didn't consume.
233 void putBackLeftoverTokens() {
237 bool HavePartialTok = false;
239 if (Pos.BufferPtr != Pos.BufferStart) {
240 formTokenWithChars(PartialTok, getSourceLocation(),
241 Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
242 StringRef(Pos.BufferPtr,
243 Pos.BufferEnd - Pos.BufferPtr));
244 HavePartialTok = true;
248 P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
249 Pos.CurToken = Toks.size();
252 P.putBack(PartialTok);
256 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
257 const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
258 const CommandTraits &Traits):
259 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
264 void Parser::parseParamCommandArgs(ParamCommandComment *PC,
265 TextTokenRetokenizer &Retokenizer) {
267 // Check if argument looks like direction specification: [dir]
268 // e.g., [in], [out], [in,out]
269 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
270 S.actOnParamCommandDirectionArg(PC,
272 Arg.getEndLocation(),
275 if (Retokenizer.lexWord(Arg))
276 S.actOnParamCommandParamNameArg(PC,
278 Arg.getEndLocation(),
282 void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
283 TextTokenRetokenizer &Retokenizer) {
285 if (Retokenizer.lexWord(Arg))
286 S.actOnTParamCommandParamNameArg(TPC,
288 Arg.getEndLocation(),
292 void Parser::parseBlockCommandArgs(BlockCommandComment *BC,
293 TextTokenRetokenizer &Retokenizer,
295 typedef BlockCommandComment::Argument Argument;
297 new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
298 unsigned ParsedArgs = 0;
300 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
301 Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
302 Arg.getEndLocation()),
307 S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
310 BlockCommandComment *Parser::parseBlockCommand() {
311 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
313 ParamCommandComment *PC = nullptr;
314 TParamCommandComment *TPC = nullptr;
315 BlockCommandComment *BC = nullptr;
316 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
317 CommandMarkerKind CommandMarker =
318 Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
319 if (Info->IsParamCommand) {
320 PC = S.actOnParamCommandStart(Tok.getLocation(),
321 Tok.getEndLocation(),
324 } else if (Info->IsTParamCommand) {
325 TPC = S.actOnTParamCommandStart(Tok.getLocation(),
326 Tok.getEndLocation(),
330 BC = S.actOnBlockCommandStart(Tok.getLocation(),
331 Tok.getEndLocation(),
337 if (isTokBlockCommand()) {
338 // Block command ahead. We can't nest block commands, so pretend that this
339 // command has an empty argument.
340 ParagraphComment *Paragraph = S.actOnParagraphComment(None);
342 S.actOnParamCommandFinish(PC, Paragraph);
345 S.actOnTParamCommandFinish(TPC, Paragraph);
348 S.actOnBlockCommandFinish(BC, Paragraph);
353 if (PC || TPC || Info->NumArgs > 0) {
354 // In order to parse command arguments we need to retokenize a few
355 // following text tokens.
356 TextTokenRetokenizer Retokenizer(Allocator, *this);
359 parseParamCommandArgs(PC, Retokenizer);
361 parseTParamCommandArgs(TPC, Retokenizer);
363 parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs);
365 Retokenizer.putBackLeftoverTokens();
368 // If there's a block command ahead, we will attach an empty paragraph to
370 bool EmptyParagraph = false;
371 if (isTokBlockCommand())
372 EmptyParagraph = true;
373 else if (Tok.is(tok::newline)) {
376 EmptyParagraph = isTokBlockCommand();
380 ParagraphComment *Paragraph;
382 Paragraph = S.actOnParagraphComment(None);
384 BlockContentComment *Block = parseParagraphOrBlockCommand();
385 // Since we have checked for a block command, we should have parsed a
387 Paragraph = cast<ParagraphComment>(Block);
391 S.actOnParamCommandFinish(PC, Paragraph);
394 S.actOnTParamCommandFinish(TPC, Paragraph);
397 S.actOnBlockCommandFinish(BC, Paragraph);
402 InlineCommandComment *Parser::parseInlineCommand() {
403 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
405 const Token CommandTok = Tok;
408 TextTokenRetokenizer Retokenizer(Allocator, *this);
411 bool ArgTokValid = Retokenizer.lexWord(ArgTok);
413 InlineCommandComment *IC;
415 IC = S.actOnInlineCommand(CommandTok.getLocation(),
416 CommandTok.getEndLocation(),
417 CommandTok.getCommandID(),
418 ArgTok.getLocation(),
419 ArgTok.getEndLocation(),
422 IC = S.actOnInlineCommand(CommandTok.getLocation(),
423 CommandTok.getEndLocation(),
424 CommandTok.getCommandID());
426 Diag(CommandTok.getEndLocation().getLocWithOffset(1),
427 diag::warn_doc_inline_contents_no_argument)
428 << CommandTok.is(tok::at_command)
429 << Traits.getCommandInfo(CommandTok.getCommandID())->Name
430 << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation());
433 Retokenizer.putBackLeftoverTokens();
438 HTMLStartTagComment *Parser::parseHTMLStartTag() {
439 assert(Tok.is(tok::html_start_tag));
440 HTMLStartTagComment *HST =
441 S.actOnHTMLStartTagStart(Tok.getLocation(),
442 Tok.getHTMLTagStartName());
445 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
447 switch (Tok.getKind()) {
448 case tok::html_ident: {
451 if (Tok.isNot(tok::html_equals)) {
452 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
453 Ident.getHTMLIdent()));
458 if (Tok.isNot(tok::html_quoted_string)) {
459 Diag(Tok.getLocation(),
460 diag::warn_doc_html_start_tag_expected_quoted_string)
461 << SourceRange(Equals.getLocation());
462 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
463 Ident.getHTMLIdent()));
464 while (Tok.is(tok::html_equals) ||
465 Tok.is(tok::html_quoted_string))
469 Attrs.push_back(HTMLStartTagComment::Attribute(
471 Ident.getHTMLIdent(),
472 Equals.getLocation(),
473 SourceRange(Tok.getLocation(),
474 Tok.getEndLocation()),
475 Tok.getHTMLQuotedString()));
480 case tok::html_greater:
481 S.actOnHTMLStartTagFinish(HST,
482 S.copyArray(llvm::makeArrayRef(Attrs)),
484 /* IsSelfClosing = */ false);
488 case tok::html_slash_greater:
489 S.actOnHTMLStartTagFinish(HST,
490 S.copyArray(llvm::makeArrayRef(Attrs)),
492 /* IsSelfClosing = */ true);
496 case tok::html_equals:
497 case tok::html_quoted_string:
498 Diag(Tok.getLocation(),
499 diag::warn_doc_html_start_tag_expected_ident_or_greater);
500 while (Tok.is(tok::html_equals) ||
501 Tok.is(tok::html_quoted_string))
503 if (Tok.is(tok::html_ident) ||
504 Tok.is(tok::html_greater) ||
505 Tok.is(tok::html_slash_greater))
508 S.actOnHTMLStartTagFinish(HST,
509 S.copyArray(llvm::makeArrayRef(Attrs)),
511 /* IsSelfClosing = */ false);
515 // Not a token from an HTML start tag. Thus HTML tag prematurely ended.
516 S.actOnHTMLStartTagFinish(HST,
517 S.copyArray(llvm::makeArrayRef(Attrs)),
519 /* IsSelfClosing = */ false);
520 bool StartLineInvalid;
521 const unsigned StartLine = SourceMgr.getPresumedLineNumber(
525 const unsigned EndLine = SourceMgr.getPresumedLineNumber(
528 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
529 Diag(Tok.getLocation(),
530 diag::warn_doc_html_start_tag_expected_ident_or_greater)
531 << HST->getSourceRange();
533 Diag(Tok.getLocation(),
534 diag::warn_doc_html_start_tag_expected_ident_or_greater);
535 Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
536 << HST->getSourceRange();
543 HTMLEndTagComment *Parser::parseHTMLEndTag() {
544 assert(Tok.is(tok::html_end_tag));
545 Token TokEndTag = Tok;
548 if (Tok.is(tok::html_greater)) {
549 Loc = Tok.getLocation();
553 return S.actOnHTMLEndTag(TokEndTag.getLocation(),
555 TokEndTag.getHTMLTagEndName());
558 BlockContentComment *Parser::parseParagraphOrBlockCommand() {
559 SmallVector<InlineContentComment *, 8> Content;
562 switch (Tok.getKind()) {
563 case tok::verbatim_block_begin:
564 case tok::verbatim_line_name:
566 break; // Block content or EOF ahead, finish this parapgaph.
568 case tok::unknown_command:
569 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
570 Tok.getEndLocation(),
571 Tok.getUnknownCommandName()));
575 case tok::backslash_command:
576 case tok::at_command: {
577 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
578 if (Info->IsBlockCommand) {
579 if (Content.size() == 0)
580 return parseBlockCommand();
581 break; // Block command ahead, finish this parapgaph.
583 if (Info->IsVerbatimBlockEndCommand) {
584 Diag(Tok.getLocation(),
585 diag::warn_verbatim_block_end_without_start)
586 << Tok.is(tok::at_command)
588 << SourceRange(Tok.getLocation(), Tok.getEndLocation());
592 if (Info->IsUnknownCommand) {
593 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
594 Tok.getEndLocation(),
599 assert(Info->IsInlineCommand);
600 Content.push_back(parseInlineCommand());
606 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
608 break; // Two newlines -- end of paragraph.
610 // Also allow [tok::newline, tok::text, tok::newline] if the middle
611 // tok::text is just whitespace.
612 if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
613 Token WhitespaceTok = Tok;
615 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
619 // We have [tok::newline, tok::text, non-newline]. Put back tok::text.
620 putBack(WhitespaceTok);
622 if (Content.size() > 0)
623 Content.back()->addTrailingNewline();
627 // Don't deal with HTML tag soup now.
628 case tok::html_start_tag:
629 Content.push_back(parseHTMLStartTag());
632 case tok::html_end_tag:
633 Content.push_back(parseHTMLEndTag());
637 Content.push_back(S.actOnText(Tok.getLocation(),
638 Tok.getEndLocation(),
643 case tok::verbatim_block_line:
644 case tok::verbatim_block_end:
645 case tok::verbatim_line_text:
646 case tok::html_ident:
647 case tok::html_equals:
648 case tok::html_quoted_string:
649 case tok::html_greater:
650 case tok::html_slash_greater:
651 llvm_unreachable("should not see this token");
656 return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content)));
659 VerbatimBlockComment *Parser::parseVerbatimBlock() {
660 assert(Tok.is(tok::verbatim_block_begin));
662 VerbatimBlockComment *VB =
663 S.actOnVerbatimBlockStart(Tok.getLocation(),
664 Tok.getVerbatimBlockID());
667 // Don't create an empty line if verbatim opening command is followed
669 if (Tok.is(tok::newline))
672 SmallVector<VerbatimBlockLineComment *, 8> Lines;
673 while (Tok.is(tok::verbatim_block_line) ||
674 Tok.is(tok::newline)) {
675 VerbatimBlockLineComment *Line;
676 if (Tok.is(tok::verbatim_block_line)) {
677 Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
678 Tok.getVerbatimBlockText());
680 if (Tok.is(tok::newline)) {
684 // Empty line, just a tok::newline.
685 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
688 Lines.push_back(Line);
691 if (Tok.is(tok::verbatim_block_end)) {
692 const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
693 S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
695 S.copyArray(llvm::makeArrayRef(Lines)));
698 // Unterminated \\verbatim block
699 S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
700 S.copyArray(llvm::makeArrayRef(Lines)));
706 VerbatimLineComment *Parser::parseVerbatimLine() {
707 assert(Tok.is(tok::verbatim_line_name));
712 SourceLocation TextBegin;
714 // Next token might not be a tok::verbatim_line_text if verbatim line
715 // starting command comes just before a newline or comment end.
716 if (Tok.is(tok::verbatim_line_text)) {
717 TextBegin = Tok.getLocation();
718 Text = Tok.getVerbatimLineText();
720 TextBegin = NameTok.getEndLocation();
724 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
725 NameTok.getVerbatimLineID(),
732 BlockContentComment *Parser::parseBlockContent() {
733 switch (Tok.getKind()) {
735 case tok::unknown_command:
736 case tok::backslash_command:
737 case tok::at_command:
738 case tok::html_start_tag:
739 case tok::html_end_tag:
740 return parseParagraphOrBlockCommand();
742 case tok::verbatim_block_begin:
743 return parseVerbatimBlock();
745 case tok::verbatim_line_name:
746 return parseVerbatimLine();
750 case tok::verbatim_block_line:
751 case tok::verbatim_block_end:
752 case tok::verbatim_line_text:
753 case tok::html_ident:
754 case tok::html_equals:
755 case tok::html_quoted_string:
756 case tok::html_greater:
757 case tok::html_slash_greater:
758 llvm_unreachable("should not see this token");
760 llvm_unreachable("bogus token kind");
763 FullComment *Parser::parseFullComment() {
764 // Skip newlines at the beginning of the comment.
765 while (Tok.is(tok::newline))
768 SmallVector<BlockContentComment *, 8> Blocks;
769 while (Tok.isNot(tok::eof)) {
770 Blocks.push_back(parseBlockContent());
772 // Skip extra newlines after paragraph end.
773 while (Tok.is(tok::newline))
776 return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks)));
779 } // end namespace comments
780 } // end namespace clang