1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "clang/AST/CommentParser.h"
11 #include "clang/AST/CommentSema.h"
12 #include "clang/AST/CommentDiagnostic.h"
13 #include "clang/AST/CommentCommandTraits.h"
14 #include "clang/Basic/SourceManager.h"
15 #include "llvm/Support/ErrorHandling.h"
20 /// Re-lexes a sequence of tok::text tokens.
21 class TextTokenRetokenizer {
22 llvm::BumpPtrAllocator &Allocator;
25 /// This flag is set when there are no more tokens we can fetch from lexer.
26 bool NoMoreInterestingTokens;
28 /// Token buffer: tokens we have processed and lookahead.
29 SmallVector<Token, 16> Toks;
31 /// A position in \c Toks.
34 const char *BufferStart;
35 const char *BufferEnd;
36 const char *BufferPtr;
37 SourceLocation BufferStartLoc;
40 /// Current position in Toks.
44 return Pos.CurToken >= Toks.size();
47 /// Sets up the buffer pointers to point to current token.
50 const Token &Tok = Toks[Pos.CurToken];
52 Pos.BufferStart = Tok.getText().begin();
53 Pos.BufferEnd = Tok.getText().end();
54 Pos.BufferPtr = Pos.BufferStart;
55 Pos.BufferStartLoc = Tok.getLocation();
58 SourceLocation getSourceLocation() const {
59 const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
60 return Pos.BufferStartLoc.getLocWithOffset(CharNo);
65 assert(Pos.BufferPtr != Pos.BufferEnd);
66 return *Pos.BufferPtr;
71 assert(Pos.BufferPtr != Pos.BufferEnd);
73 if (Pos.BufferPtr == Pos.BufferEnd) {
75 if (isEnd() && !addToken())
84 /// Returns true on success, false if there are no interesting tokens to
87 if (NoMoreInterestingTokens)
90 if (P.Tok.is(tok::newline)) {
91 // If we see a single newline token between text tokens, skip it.
92 Token Newline = P.Tok;
94 if (P.Tok.isNot(tok::text)) {
96 NoMoreInterestingTokens = true;
100 if (P.Tok.isNot(tok::text)) {
101 NoMoreInterestingTokens = true;
105 Toks.push_back(P.Tok);
107 if (Toks.size() == 1)
112 static bool isWhitespace(char C) {
113 return C == ' ' || C == '\n' || C == '\r' ||
114 C == '\t' || C == '\f' || C == '\v';
117 void consumeWhitespace() {
119 if (isWhitespace(peek()))
126 void formTokenWithChars(Token &Result,
128 const char *TokBegin,
131 Result.setLocation(Loc);
132 Result.setKind(tok::text);
133 Result.setLength(TokLength);
135 Result.TextPtr = "<UNSET>";
138 Result.setText(Text);
142 TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
143 Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
148 /// Extract a word -- sequence of non-whitespace characters.
149 bool lexWord(Token &Tok) {
153 Position SavedPos = Pos;
156 SmallString<32> WordText;
157 const char *WordBegin = Pos.BufferPtr;
158 SourceLocation Loc = getSourceLocation();
160 const char C = peek();
161 if (!isWhitespace(C)) {
162 WordText.push_back(C);
167 const unsigned Length = WordText.size();
173 char *TextPtr = Allocator.Allocate<char>(Length + 1);
175 memcpy(TextPtr, WordText.c_str(), Length + 1);
176 StringRef Text = StringRef(TextPtr, Length);
178 formTokenWithChars(Tok, Loc, WordBegin,
179 Pos.BufferPtr - WordBegin, Text);
183 bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
187 Position SavedPos = Pos;
190 SmallString<32> WordText;
191 const char *WordBegin = Pos.BufferPtr;
192 SourceLocation Loc = getSourceLocation();
195 const char C = peek();
196 if (C == OpenDelim) {
197 WordText.push_back(C);
203 while (!Error && !isEnd()) {
205 WordText.push_back(C);
210 if (!Error && C != CloseDelim)
218 const unsigned Length = WordText.size();
219 char *TextPtr = Allocator.Allocate<char>(Length + 1);
221 memcpy(TextPtr, WordText.c_str(), Length + 1);
222 StringRef Text = StringRef(TextPtr, Length);
224 formTokenWithChars(Tok, Loc, WordBegin,
225 Pos.BufferPtr - WordBegin, Text);
229 /// Put back tokens that we didn't consume.
230 void putBackLeftoverTokens() {
234 bool HavePartialTok = false;
236 if (Pos.BufferPtr != Pos.BufferStart) {
237 formTokenWithChars(PartialTok, getSourceLocation(),
238 Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
239 StringRef(Pos.BufferPtr,
240 Pos.BufferEnd - Pos.BufferPtr));
241 HavePartialTok = true;
245 P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
246 Pos.CurToken = Toks.size();
249 P.putBack(PartialTok);
253 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
254 const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
255 const CommandTraits &Traits):
256 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
261 void Parser::parseParamCommandArgs(ParamCommandComment *PC,
262 TextTokenRetokenizer &Retokenizer) {
264 // Check if argument looks like direction specification: [dir]
265 // e.g., [in], [out], [in,out]
266 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
267 S.actOnParamCommandDirectionArg(PC,
269 Arg.getEndLocation(),
272 if (Retokenizer.lexWord(Arg))
273 S.actOnParamCommandParamNameArg(PC,
275 Arg.getEndLocation(),
279 void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
280 TextTokenRetokenizer &Retokenizer) {
282 if (Retokenizer.lexWord(Arg))
283 S.actOnTParamCommandParamNameArg(TPC,
285 Arg.getEndLocation(),
289 void Parser::parseBlockCommandArgs(BlockCommandComment *BC,
290 TextTokenRetokenizer &Retokenizer,
292 typedef BlockCommandComment::Argument Argument;
294 new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
295 unsigned ParsedArgs = 0;
297 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
298 Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
299 Arg.getEndLocation()),
304 S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
307 BlockCommandComment *Parser::parseBlockCommand() {
308 assert(Tok.is(tok::command));
310 ParamCommandComment *PC;
311 TParamCommandComment *TPC;
312 BlockCommandComment *BC;
313 bool IsParam = false;
314 bool IsTParam = false;
315 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
316 if (Info->IsParamCommand) {
318 PC = S.actOnParamCommandStart(Tok.getLocation(),
319 Tok.getEndLocation(),
321 } if (Info->IsTParamCommand) {
323 TPC = S.actOnTParamCommandStart(Tok.getLocation(),
324 Tok.getEndLocation(),
327 BC = S.actOnBlockCommandStart(Tok.getLocation(),
328 Tok.getEndLocation(),
333 if (Tok.is(tok::command) &&
334 Traits.getCommandInfo(Tok.getCommandID())->IsBlockCommand) {
335 // Block command ahead. We can't nest block commands, so pretend that this
336 // command has an empty argument.
337 ParagraphComment *Paragraph = S.actOnParagraphComment(
338 ArrayRef<InlineContentComment *>());
340 S.actOnParamCommandFinish(PC, Paragraph);
342 } else if (IsTParam) {
343 S.actOnTParamCommandFinish(TPC, Paragraph);
346 S.actOnBlockCommandFinish(BC, Paragraph);
351 if (IsParam || IsTParam || Info->NumArgs > 0) {
352 // In order to parse command arguments we need to retokenize a few
353 // following text tokens.
354 TextTokenRetokenizer Retokenizer(Allocator, *this);
357 parseParamCommandArgs(PC, Retokenizer);
359 parseTParamCommandArgs(TPC, Retokenizer);
361 parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs);
363 Retokenizer.putBackLeftoverTokens();
366 BlockContentComment *Block = parseParagraphOrBlockCommand();
367 // Since we have checked for a block command, we should have parsed a
369 ParagraphComment *Paragraph = cast<ParagraphComment>(Block);
371 S.actOnParamCommandFinish(PC, Paragraph);
373 } else if (IsTParam) {
374 S.actOnTParamCommandFinish(TPC, Paragraph);
377 S.actOnBlockCommandFinish(BC, Paragraph);
382 InlineCommandComment *Parser::parseInlineCommand() {
383 assert(Tok.is(tok::command));
385 const Token CommandTok = Tok;
388 TextTokenRetokenizer Retokenizer(Allocator, *this);
391 bool ArgTokValid = Retokenizer.lexWord(ArgTok);
393 InlineCommandComment *IC;
395 IC = S.actOnInlineCommand(CommandTok.getLocation(),
396 CommandTok.getEndLocation(),
397 CommandTok.getCommandID(),
398 ArgTok.getLocation(),
399 ArgTok.getEndLocation(),
402 IC = S.actOnInlineCommand(CommandTok.getLocation(),
403 CommandTok.getEndLocation(),
404 CommandTok.getCommandID());
407 Retokenizer.putBackLeftoverTokens();
412 HTMLStartTagComment *Parser::parseHTMLStartTag() {
413 assert(Tok.is(tok::html_start_tag));
414 HTMLStartTagComment *HST =
415 S.actOnHTMLStartTagStart(Tok.getLocation(),
416 Tok.getHTMLTagStartName());
419 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
421 switch (Tok.getKind()) {
422 case tok::html_ident: {
425 if (Tok.isNot(tok::html_equals)) {
426 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
427 Ident.getHTMLIdent()));
432 if (Tok.isNot(tok::html_quoted_string)) {
433 Diag(Tok.getLocation(),
434 diag::warn_doc_html_start_tag_expected_quoted_string)
435 << SourceRange(Equals.getLocation());
436 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
437 Ident.getHTMLIdent()));
438 while (Tok.is(tok::html_equals) ||
439 Tok.is(tok::html_quoted_string))
443 Attrs.push_back(HTMLStartTagComment::Attribute(
445 Ident.getHTMLIdent(),
446 Equals.getLocation(),
447 SourceRange(Tok.getLocation(),
448 Tok.getEndLocation()),
449 Tok.getHTMLQuotedString()));
454 case tok::html_greater:
455 S.actOnHTMLStartTagFinish(HST,
456 S.copyArray(llvm::makeArrayRef(Attrs)),
458 /* IsSelfClosing = */ false);
462 case tok::html_slash_greater:
463 S.actOnHTMLStartTagFinish(HST,
464 S.copyArray(llvm::makeArrayRef(Attrs)),
466 /* IsSelfClosing = */ true);
470 case tok::html_equals:
471 case tok::html_quoted_string:
472 Diag(Tok.getLocation(),
473 diag::warn_doc_html_start_tag_expected_ident_or_greater);
474 while (Tok.is(tok::html_equals) ||
475 Tok.is(tok::html_quoted_string))
477 if (Tok.is(tok::html_ident) ||
478 Tok.is(tok::html_greater) ||
479 Tok.is(tok::html_slash_greater))
482 S.actOnHTMLStartTagFinish(HST,
483 S.copyArray(llvm::makeArrayRef(Attrs)),
485 /* IsSelfClosing = */ false);
489 // Not a token from an HTML start tag. Thus HTML tag prematurely ended.
490 S.actOnHTMLStartTagFinish(HST,
491 S.copyArray(llvm::makeArrayRef(Attrs)),
493 /* IsSelfClosing = */ false);
494 bool StartLineInvalid;
495 const unsigned StartLine = SourceMgr.getPresumedLineNumber(
499 const unsigned EndLine = SourceMgr.getPresumedLineNumber(
502 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
503 Diag(Tok.getLocation(),
504 diag::warn_doc_html_start_tag_expected_ident_or_greater)
505 << HST->getSourceRange();
507 Diag(Tok.getLocation(),
508 diag::warn_doc_html_start_tag_expected_ident_or_greater);
509 Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
510 << HST->getSourceRange();
517 HTMLEndTagComment *Parser::parseHTMLEndTag() {
518 assert(Tok.is(tok::html_end_tag));
519 Token TokEndTag = Tok;
522 if (Tok.is(tok::html_greater)) {
523 Loc = Tok.getLocation();
527 return S.actOnHTMLEndTag(TokEndTag.getLocation(),
529 TokEndTag.getHTMLTagEndName());
532 BlockContentComment *Parser::parseParagraphOrBlockCommand() {
533 SmallVector<InlineContentComment *, 8> Content;
536 switch (Tok.getKind()) {
537 case tok::verbatim_block_begin:
538 case tok::verbatim_line_name:
540 assert(Content.size() != 0);
541 break; // Block content or EOF ahead, finish this parapgaph.
543 case tok::unknown_command:
544 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
545 Tok.getEndLocation(),
546 Tok.getUnknownCommandName()));
551 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
552 if (Info->IsBlockCommand) {
553 if (Content.size() == 0)
554 return parseBlockCommand();
555 break; // Block command ahead, finish this parapgaph.
557 if (Info->IsVerbatimBlockEndCommand) {
558 Diag(Tok.getLocation(),
559 diag::warn_verbatim_block_end_without_start)
561 << SourceRange(Tok.getLocation(), Tok.getEndLocation());
565 if (Info->IsUnknownCommand) {
566 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
567 Tok.getEndLocation(),
572 assert(Info->IsInlineCommand);
573 Content.push_back(parseInlineCommand());
579 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
581 break; // Two newlines -- end of paragraph.
583 if (Content.size() > 0)
584 Content.back()->addTrailingNewline();
588 // Don't deal with HTML tag soup now.
589 case tok::html_start_tag:
590 Content.push_back(parseHTMLStartTag());
593 case tok::html_end_tag:
594 Content.push_back(parseHTMLEndTag());
598 Content.push_back(S.actOnText(Tok.getLocation(),
599 Tok.getEndLocation(),
604 case tok::verbatim_block_line:
605 case tok::verbatim_block_end:
606 case tok::verbatim_line_text:
607 case tok::html_ident:
608 case tok::html_equals:
609 case tok::html_quoted_string:
610 case tok::html_greater:
611 case tok::html_slash_greater:
612 llvm_unreachable("should not see this token");
617 return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content)));
620 VerbatimBlockComment *Parser::parseVerbatimBlock() {
621 assert(Tok.is(tok::verbatim_block_begin));
623 VerbatimBlockComment *VB =
624 S.actOnVerbatimBlockStart(Tok.getLocation(),
625 Tok.getVerbatimBlockID());
628 // Don't create an empty line if verbatim opening command is followed
630 if (Tok.is(tok::newline))
633 SmallVector<VerbatimBlockLineComment *, 8> Lines;
634 while (Tok.is(tok::verbatim_block_line) ||
635 Tok.is(tok::newline)) {
636 VerbatimBlockLineComment *Line;
637 if (Tok.is(tok::verbatim_block_line)) {
638 Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
639 Tok.getVerbatimBlockText());
641 if (Tok.is(tok::newline)) {
645 // Empty line, just a tok::newline.
646 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
649 Lines.push_back(Line);
652 if (Tok.is(tok::verbatim_block_end)) {
653 const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
654 S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
656 S.copyArray(llvm::makeArrayRef(Lines)));
659 // Unterminated \\verbatim block
660 S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
661 S.copyArray(llvm::makeArrayRef(Lines)));
667 VerbatimLineComment *Parser::parseVerbatimLine() {
668 assert(Tok.is(tok::verbatim_line_name));
673 SourceLocation TextBegin;
675 // Next token might not be a tok::verbatim_line_text if verbatim line
676 // starting command comes just before a newline or comment end.
677 if (Tok.is(tok::verbatim_line_text)) {
678 TextBegin = Tok.getLocation();
679 Text = Tok.getVerbatimLineText();
681 TextBegin = NameTok.getEndLocation();
685 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
686 NameTok.getVerbatimLineID(),
693 BlockContentComment *Parser::parseBlockContent() {
694 switch (Tok.getKind()) {
696 case tok::unknown_command:
698 case tok::html_start_tag:
699 case tok::html_end_tag:
700 return parseParagraphOrBlockCommand();
702 case tok::verbatim_block_begin:
703 return parseVerbatimBlock();
705 case tok::verbatim_line_name:
706 return parseVerbatimLine();
710 case tok::verbatim_block_line:
711 case tok::verbatim_block_end:
712 case tok::verbatim_line_text:
713 case tok::html_ident:
714 case tok::html_equals:
715 case tok::html_quoted_string:
716 case tok::html_greater:
717 case tok::html_slash_greater:
718 llvm_unreachable("should not see this token");
720 llvm_unreachable("bogus token kind");
723 FullComment *Parser::parseFullComment() {
724 // Skip newlines at the beginning of the comment.
725 while (Tok.is(tok::newline))
728 SmallVector<BlockContentComment *, 8> Blocks;
729 while (Tok.isNot(tok::eof)) {
730 Blocks.push_back(parseBlockContent());
732 // Skip extra newlines after paragraph end.
733 while (Tok.is(tok::newline))
736 return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks)));
739 } // end namespace comments
740 } // end namespace clang