summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/tools/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/tools/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp')
-rw-r--r--contrib/llvm/tools/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp85
1 files changed, 85 insertions, 0 deletions
diff --git a/contrib/llvm/tools/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp b/contrib/llvm/tools/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
new file mode 100644
index 0000000..bfdb268
--- /dev/null
+++ b/contrib/llvm/tools/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
@@ -0,0 +1,85 @@
+//===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits an fficient function to translate HTML named
+// character references to UTF-8 sequences.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/StringMatcher.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <vector>
+
+using namespace llvm;
+
+/// \brief Convert a code point to the corresponding UTF-8 sequence represented
+/// as a C string literal.
+///
+/// \returns true on success.
+static bool translateCodePointToUTF8(unsigned CodePoint,
+ SmallVectorImpl<char> &CLiteral) {
+ char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
+ char *TranslatedPtr = Translated;
+ if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
+ return false;
+
+ StringRef UTF8(Translated, TranslatedPtr - Translated);
+
+ raw_svector_ostream OS(CLiteral);
+ OS << "\"";
+ for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
+ OS << "\\x";
+ OS.write_hex(static_cast<unsigned char>(UTF8[i]));
+ }
+ OS << "\"";
+
+ return true;
+}
+
+namespace clang {
+void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
+ raw_ostream &OS) {
+ std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
+ std::vector<StringMatcher::StringPair> NameToUTF8;
+ SmallString<32> CLiteral;
+ for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
+ I != E; ++I) {
+ Record &Tag = **I;
+ std::string Spelling = Tag.getValueAsString("Spelling");
+ uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
+ CLiteral.clear();
+ CLiteral.append("return ");
+ if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
+ SrcMgr.PrintMessage(Tag.getLoc().front(),
+ SourceMgr::DK_Error,
+ Twine("invalid code point"));
+ continue;
+ }
+ CLiteral.append(";");
+
+ StringMatcher::StringPair Match(Spelling, CLiteral.str());
+ NameToUTF8.push_back(Match);
+ }
+
+ emitSourceFileHeader("HTML named character reference to UTF-8 "
+ "translation", OS);
+
+ OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
+ " StringRef Name) {\n";
+ StringMatcher("Name", NameToUTF8, OS).Emit();
+ OS << " return StringRef();\n"
+ << "}\n\n";
+}
+
+} // end namespace clang
+
OpenPOWER on IntegriCloud