1 //== HTMLRewrite.cpp - Translate source code into prettified HTML --*- C++ -*-//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the HTMLRewriter class, which is used to translate the
10 // text of a source file into prettified HTML.
12 //===----------------------------------------------------------------------===//
14 #include "clang/Rewrite/Core/HTMLRewrite.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Lex/Preprocessor.h"
17 #include "clang/Lex/TokenConcatenation.h"
18 #include "clang/Rewrite/Core/Rewriter.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 #include "llvm/Support/raw_ostream.h"
24 using namespace clang;
27 /// HighlightRange - Highlight a range in the source code with the specified
28 /// start/end tags. B/E must be in the same file. This ensures that
29 /// start/end tags are placed at the start/end of each line if the range is
31 void html::HighlightRange(Rewriter &R, SourceLocation B, SourceLocation E,
32 const char *StartTag, const char *EndTag,
34 SourceManager &SM = R.getSourceMgr();
35 B = SM.getExpansionLoc(B);
36 E = SM.getExpansionLoc(E);
37 FileID FID = SM.getFileID(B);
38 assert(SM.getFileID(E) == FID && "B/E not in the same file!");
40 unsigned BOffset = SM.getFileOffset(B);
41 unsigned EOffset = SM.getFileOffset(E);
43 // Include the whole end token in the range.
45 EOffset += Lexer::MeasureTokenLength(E, R.getSourceMgr(), R.getLangOpts());
48 const char *BufferStart = SM.getBufferData(FID, &Invalid).data();
52 HighlightRange(R.getEditBuffer(FID), BOffset, EOffset,
53 BufferStart, StartTag, EndTag);
56 /// HighlightRange - This is the same as the above method, but takes
57 /// decomposed file locations.
58 void html::HighlightRange(RewriteBuffer &RB, unsigned B, unsigned E,
59 const char *BufferStart,
60 const char *StartTag, const char *EndTag) {
61 // Insert the tag at the absolute start/end of the range.
62 RB.InsertTextAfter(B, StartTag);
63 RB.InsertTextBefore(E, EndTag);
65 // Scan the range to see if there is a \r or \n. If so, and if the line is
66 // not blank, insert tags on that line as well.
67 bool HadOpenTag = true;
69 unsigned LastNonWhiteSpace = B;
70 for (unsigned i = B; i != E; ++i) {
71 switch (BufferStart[i]) {
74 // Okay, we found a newline in the range. If we have an open tag, we need
75 // to insert a close tag at the first non-whitespace before the newline.
77 RB.InsertTextBefore(LastNonWhiteSpace+1, EndTag);
79 // Instead of inserting an open tag immediately after the newline, we
80 // wait until we see a non-whitespace character. This prevents us from
81 // inserting tags around blank lines, and also allows the open tag to
82 // be put *after* whitespace on a non-blank line.
94 // If there is no tag open, do it now.
96 RB.InsertTextAfter(i, StartTag);
100 // Remember this character.
101 LastNonWhiteSpace = i;
107 void html::EscapeText(Rewriter &R, FileID FID,
108 bool EscapeSpaces, bool ReplaceTabs) {
110 const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
111 const char* C = Buf->getBufferStart();
112 const char* FileEnd = Buf->getBufferEnd();
114 assert (C <= FileEnd);
116 RewriteBuffer &RB = R.getEditBuffer(FID);
119 for (unsigned FilePos = 0; C != FileEnd ; ++C, ++FilePos) {
121 default: ++ColNo; break;
129 RB.ReplaceText(FilePos, 1, " ");
133 RB.ReplaceText(FilePos, 1, "<hr>");
140 unsigned NumSpaces = 8-(ColNo&7);
142 RB.ReplaceText(FilePos, 1,
143 StringRef(" "
144 " ", 6*NumSpaces));
146 RB.ReplaceText(FilePos, 1, StringRef(" ", NumSpaces));
151 RB.ReplaceText(FilePos, 1, "<");
156 RB.ReplaceText(FilePos, 1, ">");
161 RB.ReplaceText(FilePos, 1, "&");
168 std::string html::EscapeText(StringRef s, bool EscapeSpaces, bool ReplaceTabs) {
170 unsigned len = s.size();
172 llvm::raw_string_ostream os(Str);
174 for (unsigned i = 0 ; i < len; ++i) {
182 if (EscapeSpaces) os << " ";
189 for (unsigned i = 0; i < 4; ++i)
192 for (unsigned i = 0; i < 4; ++i)
200 case '<': os << "<"; break;
201 case '>': os << ">"; break;
202 case '&': os << "&"; break;
209 static void AddLineNumber(RewriteBuffer &RB, unsigned LineNo,
210 unsigned B, unsigned E) {
211 SmallString<256> Str;
212 llvm::raw_svector_ostream OS(Str);
214 OS << "<tr class=\"codeline\" data-linenumber=\"" << LineNo << "\">"
215 << "<td class=\"num\" id=\"LN" << LineNo << "\">" << LineNo
216 << "</td><td class=\"line\">";
218 if (B == E) { // Handle empty lines.
220 RB.InsertTextBefore(B, OS.str());
222 RB.InsertTextBefore(B, OS.str());
223 RB.InsertTextBefore(E, "</td></tr>");
227 void html::AddLineNumbers(Rewriter& R, FileID FID) {
229 const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
230 const char* FileBeg = Buf->getBufferStart();
231 const char* FileEnd = Buf->getBufferEnd();
232 const char* C = FileBeg;
233 RewriteBuffer &RB = R.getEditBuffer(FID);
235 assert (C <= FileEnd);
238 unsigned FilePos = 0;
240 while (C != FileEnd) {
243 unsigned LineStartPos = FilePos;
244 unsigned LineEndPos = FileEnd - FileBeg;
246 assert (FilePos <= LineEndPos);
247 assert (C < FileEnd);
249 // Scan until the newline (or end-of-file).
251 while (C != FileEnd) {
256 LineEndPos = FilePos++;
263 AddLineNumber(RB, LineNo, LineStartPos, LineEndPos);
266 // Add one big table tag that surrounds all of the code.
268 llvm::raw_string_ostream os(s);
269 os << "<table class=\"code\" data-fileid=\"" << FID.getHashValue() << "\">\n";
270 RB.InsertTextBefore(0, os.str());
271 RB.InsertTextAfter(FileEnd - FileBeg, "</table>");
274 void html::AddHeaderFooterInternalBuiltinCSS(Rewriter &R, FileID FID,
277 const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
278 const char* FileStart = Buf->getBufferStart();
279 const char* FileEnd = Buf->getBufferEnd();
281 SourceLocation StartLoc = R.getSourceMgr().getLocForStartOfFile(FID);
282 SourceLocation EndLoc = StartLoc.getLocWithOffset(FileEnd-FileStart);
285 llvm::raw_string_ostream os(s);
286 os << "<!doctype html>\n" // Use HTML 5 doctype
290 os << "<title>" << html::EscapeText(title) << "</title>\n";
293 <style type="text/css">
294 body { color:#000000; background-color:#ffffff }
295 body { font-family:Helvetica, sans-serif; font-size:10pt }
296 h1 { font-size:14pt }
297 .FileName { margin-top: 5px; margin-bottom: 5px; display: inline; }
298 .FileNav { margin-left: 5px; margin-right: 5px; display: inline; }
299 .FileNav a { text-decoration:none; font-size: larger; }
300 .divider { margin-top: 30px; margin-bottom: 30px; height: 15px; }
301 .divider { background-color: gray; }
302 .code { border-collapse:collapse; width:100%; }
303 .code { font-family: "Monospace", monospace; font-size:10pt }
304 .code { line-height: 1.2em }
305 .comment { color: green; font-style: oblique }
306 .keyword { color: blue }
307 .string_literal { color: red }
308 .directive { color: darkmagenta }
310 /* Macros and variables could have pop-up notes hidden by default.
311 - Macro pop-up: expansion of the macro
312 - Variable pop-up: value (table) of the variable */
313 .macro_popup, .variable_popup { display: none; }
315 /* Pop-up appears on mouse-hover event. */
316 .macro:hover .macro_popup, .variable:hover .variable_popup {
319 -webkit-border-radius:5px;
320 -webkit-box-shadow:1px 1px 7px #000;
322 box-shadow:1px 1px 7px #000;
330 border: 2px solid red;
331 background-color:#FFF0F0;
336 border: 2px solid blue;
337 background-color:#F0F0FF;
339 font-family: Helvetica, sans-serif;
343 /* Pop-up notes needs a relative position as a base where they pops up. */
345 background-color: PaleGoldenRod;
348 .macro { color: DarkMagenta; }
356 border: 1px solid #b0b0b0;
358 box-shadow: 1px 1px 7px black;
359 background-color: #c0c0c0;
363 .num { width:2.5em; padding-right:2ex; background-color:#eeeeee }
364 .num { text-align:right; font-size:8pt }
365 .num { color:#444444 }
366 .line { padding-left: 1ex; border-left: 3px solid #ccc }
367 .line { white-space: pre }
368 .msg { -webkit-box-shadow:1px 1px 7px #000 }
369 .msg { box-shadow:1px 1px 7px #000 }
370 .msg { -webkit-border-radius:5px }
371 .msg { border-radius:5px }
372 .msg { font-family:Helvetica, sans-serif; font-size:8pt }
374 .msg { padding:0.25em 1ex 0.25em 1ex }
375 .msg { margin-top:10px; margin-bottom:10px }
376 .msg { font-weight:bold }
377 .msg { max-width:60em; word-wrap: break-word; white-space: pre-wrap }
378 .msgT { padding:0x; spacing:0x }
379 .msgEvent { background-color:#fff8b4; color:#000000 }
380 .msgControl { background-color:#bbbbbb; color:#000000 }
381 .msgNote { background-color:#ddeeff; color:#000000 }
382 .mrange { background-color:#dfddf3 }
383 .mrange { border-bottom:1px solid #6F9DBE }
384 .PathIndex { font-weight: bold; padding:0px 5px; margin-right:5px; }
385 .PathIndex { -webkit-border-radius:8px }
386 .PathIndex { border-radius:8px }
387 .PathIndexEvent { background-color:#bfba87 }
388 .PathIndexControl { background-color:#8c8c8c }
389 .PathIndexPopUp { background-color: #879abc; }
390 .PathNav a { text-decoration:none; font-size: larger }
391 .CodeInsertionHint { font-weight: bold; background-color: #10dd10 }
392 .CodeRemovalHint { background-color:#de1010 }
393 .CodeRemovalHint { border-bottom:1px solid #6F9DBE }
394 .selected{ background-color:orange !important; }
400 border-collapse: collapse; border-spacing: 0px;
411 input.spoilerhider + label {
413 text-decoration: underline;
419 input.spoilerhider ~ .spoiler {
425 input.spoilerhider:checked + label + .spoiler{
434 R.InsertTextBefore(StartLoc, os.str());
437 R.InsertTextAfter(EndLoc, "</body></html>\n");
440 /// SyntaxHighlight - Relex the specified FileID and annotate the HTML with
441 /// information about keywords, macro expansions etc. This uses the macro
442 /// table state from the end of the file, so it won't be perfectly perfect,
443 /// but it will be reasonably close.
444 void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
445 RewriteBuffer &RB = R.getEditBuffer(FID);
447 const SourceManager &SM = PP.getSourceManager();
448 const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID);
449 Lexer L(FID, FromFile, SM, PP.getLangOpts());
450 const char *BufferStart = L.getBuffer().data();
452 // Inform the preprocessor that we want to retain comments as tokens, so we
453 // can highlight them.
454 L.SetCommentRetentionState(true);
456 // Lex all the tokens in raw mode, to avoid entering #includes or expanding
459 L.LexFromRawLexer(Tok);
461 while (Tok.isNot(tok::eof)) {
462 // Since we are lexing unexpanded tokens, all tokens are from the main
464 unsigned TokOffs = SM.getFileOffset(Tok.getLocation());
465 unsigned TokLen = Tok.getLength();
466 switch (Tok.getKind()) {
468 case tok::identifier:
469 llvm_unreachable("tok::identifier in raw lexing mode!");
470 case tok::raw_identifier: {
471 // Fill in Result.IdentifierInfo and update the token kind,
472 // looking up the identifier in the identifier table.
473 PP.LookUpIdentifierInfo(Tok);
475 // If this is a pp-identifier, for a keyword, highlight it as such.
476 if (Tok.isNot(tok::identifier))
477 HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
478 "<span class='keyword'>", "</span>");
482 HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
483 "<span class='comment'>", "</span>");
485 case tok::utf8_string_literal:
486 // Chop off the u part of u8 prefix
489 // FALL THROUGH to chop the 8
491 case tok::wide_string_literal:
492 case tok::utf16_string_literal:
493 case tok::utf32_string_literal:
494 // Chop off the L, u, U or 8 prefix
498 case tok::string_literal:
499 // FIXME: Exclude the optional ud-suffix from the highlighted range.
500 HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
501 "<span class='string_literal'>", "</span>");
504 // If this is a preprocessor directive, all tokens to end of line are too.
505 if (!Tok.isAtStartOfLine())
508 // Eat all of the tokens until we get to the next one at the start of
510 unsigned TokEnd = TokOffs+TokLen;
511 L.LexFromRawLexer(Tok);
512 while (!Tok.isAtStartOfLine() && Tok.isNot(tok::eof)) {
513 TokEnd = SM.getFileOffset(Tok.getLocation())+Tok.getLength();
514 L.LexFromRawLexer(Tok);
517 // Find end of line. This is a hack.
518 HighlightRange(RB, TokOffs, TokEnd, BufferStart,
519 "<span class='directive'>", "</span>");
521 // Don't skip the next token.
526 L.LexFromRawLexer(Tok);
530 /// HighlightMacros - This uses the macro table state from the end of the
531 /// file, to re-expand macros and insert (into the HTML) information about the
532 /// macro expansions. This won't be perfectly perfect, but it will be
533 /// reasonably close.
534 void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) {
535 // Re-lex the raw token stream into a token buffer.
536 const SourceManager &SM = PP.getSourceManager();
537 std::vector<Token> TokenStream;
539 const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID);
540 Lexer L(FID, FromFile, SM, PP.getLangOpts());
542 // Lex all the tokens in raw mode, to avoid entering #includes or expanding
546 L.LexFromRawLexer(Tok);
548 // If this is a # at the start of a line, discard it from the token stream.
549 // We don't want the re-preprocess step to see #defines, #includes or other
550 // preprocessor directives.
551 if (Tok.is(tok::hash) && Tok.isAtStartOfLine())
554 // If this is a ## token, change its kind to unknown so that repreprocessing
555 // it will not produce an error.
556 if (Tok.is(tok::hashhash))
557 Tok.setKind(tok::unknown);
559 // If this raw token is an identifier, the raw lexer won't have looked up
560 // the corresponding identifier info for it. Do this now so that it will be
561 // macro expanded when we re-preprocess it.
562 if (Tok.is(tok::raw_identifier))
563 PP.LookUpIdentifierInfo(Tok);
565 TokenStream.push_back(Tok);
567 if (Tok.is(tok::eof)) break;
570 // Temporarily change the diagnostics object so that we ignore any generated
571 // diagnostics from this pass.
572 DiagnosticsEngine TmpDiags(PP.getDiagnostics().getDiagnosticIDs(),
573 &PP.getDiagnostics().getDiagnosticOptions(),
574 new IgnoringDiagConsumer);
576 // FIXME: This is a huge hack; we reuse the input preprocessor because we want
577 // its state, but we aren't actually changing it (we hope). This should really
578 // construct a copy of the preprocessor.
579 Preprocessor &TmpPP = const_cast<Preprocessor&>(PP);
580 DiagnosticsEngine *OldDiags = &TmpPP.getDiagnostics();
581 TmpPP.setDiagnostics(TmpDiags);
583 // Inform the preprocessor that we don't want comments.
584 TmpPP.SetCommentRetentionState(false, false);
586 // We don't want pragmas either. Although we filtered out #pragma, removing
587 // _Pragma and __pragma is much harder.
588 bool PragmasPreviouslyEnabled = TmpPP.getPragmasEnabled();
589 TmpPP.setPragmasEnabled(false);
591 // Enter the tokens we just lexed. This will cause them to be macro expanded
592 // but won't enter sub-files (because we removed #'s).
593 TmpPP.EnterTokenStream(TokenStream, false, /*IsReinject=*/false);
595 TokenConcatenation ConcatInfo(TmpPP);
597 // Lex all the tokens.
600 while (Tok.isNot(tok::eof)) {
601 // Ignore non-macro tokens.
602 if (!Tok.getLocation().isMacroID()) {
607 // Okay, we have the first token of a macro expansion: highlight the
608 // expansion by inserting a start tag before the macro expansion and
610 CharSourceRange LLoc = SM.getExpansionRange(Tok.getLocation());
612 // Ignore tokens whose instantiation location was not the main file.
613 if (SM.getFileID(LLoc.getBegin()) != FID) {
618 assert(SM.getFileID(LLoc.getEnd()) == FID &&
619 "Start and end of expansion must be in the same ultimate file!");
621 std::string Expansion = EscapeText(TmpPP.getSpelling(Tok));
622 unsigned LineLen = Expansion.size();
626 // Okay, eat this token, getting the next one.
629 // Skip all the rest of the tokens that are part of this macro
630 // instantiation. It would be really nice to pop up a window with all the
631 // spelling of the tokens or something.
632 while (!Tok.is(tok::eof) &&
633 SM.getExpansionLoc(Tok.getLocation()) == LLoc.getBegin()) {
634 // Insert a newline if the macro expansion is getting large.
640 LineLen -= Expansion.size();
642 // If the tokens were already space separated, or if they must be to avoid
643 // them being implicitly pasted, add a space between them.
644 if (Tok.hasLeadingSpace() ||
645 ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok))
648 // Escape any special characters in the token text.
649 Expansion += EscapeText(TmpPP.getSpelling(Tok));
650 LineLen += Expansion.size();
652 PrevPrevTok = PrevTok;
657 // Insert the 'macro_popup' as the end tag, so that multi-line macros all
659 Expansion = "<span class='macro_popup'>" + Expansion + "</span></span>";
661 HighlightRange(R, LLoc.getBegin(), LLoc.getEnd(), "<span class='macro'>",
662 Expansion.c_str(), LLoc.isTokenRange());
665 // Restore the preprocessor's old state.
666 TmpPP.setDiagnostics(*OldDiags);
667 TmpPP.setPragmasEnabled(PragmasPreviouslyEnabled);