1 //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This tablegen backend emits an fficient function to translate HTML named
11 // character references to UTF-8 sequences.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/Support/ConvertUTF.h"
17 #include "llvm/TableGen/Error.h"
18 #include "llvm/TableGen/Record.h"
19 #include "llvm/TableGen/StringMatcher.h"
20 #include "llvm/TableGen/TableGenBackend.h"
25 /// \brief Convert a code point to the corresponding UTF-8 sequence represented
26 /// as a C string literal.
28 /// \returns true on success.
29 static bool translateCodePointToUTF8(unsigned CodePoint,
30 SmallVectorImpl<char> &CLiteral) {
31 char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
32 char *TranslatedPtr = Translated;
33 if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
36 StringRef UTF8(Translated, TranslatedPtr - Translated);
38 raw_svector_ostream OS(CLiteral);
40 for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
42 OS.write_hex(static_cast<unsigned char>(UTF8[i]));
50 void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
52 std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
53 std::vector<StringMatcher::StringPair> NameToUTF8;
54 SmallString<32> CLiteral;
55 for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
58 std::string Spelling = Tag.getValueAsString("Spelling");
59 uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
61 CLiteral.append("return ");
62 if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
63 SrcMgr.PrintMessage(Tag.getLoc().front(),
65 Twine("invalid code point"));
70 StringMatcher::StringPair Match(Spelling, CLiteral.str());
71 NameToUTF8.push_back(Match);
74 emitSourceFileHeader("HTML named character reference to UTF-8 "
77 OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
78 " StringRef Name) {\n";
79 StringMatcher("Name", NameToUTF8, OS).Emit();
80 OS << " return StringRef();\n"
84 } // end namespace clang