Home | History | Annotate | Download | only in TableGen
      1 //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This tablegen backend emits an fficient function to translate HTML named
     11 // character references to UTF-8 sequences.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "llvm/ADT/SmallString.h"
     16 #include "llvm/Support/ConvertUTF.h"
     17 #include "llvm/TableGen/Error.h"
     18 #include "llvm/TableGen/Record.h"
     19 #include "llvm/TableGen/StringMatcher.h"
     20 #include "llvm/TableGen/TableGenBackend.h"
     21 #include <vector>
     22 
     23 using namespace llvm;
     24 
     25 /// \brief Convert a code point to the corresponding UTF-8 sequence represented
     26 /// as a C string literal.
     27 ///
     28 /// \returns true on success.
     29 static bool translateCodePointToUTF8(unsigned CodePoint,
     30                                      SmallVectorImpl<char> &CLiteral) {
     31   char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
     32   char *TranslatedPtr = Translated;
     33   if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
     34     return false;
     35 
     36   StringRef UTF8(Translated, TranslatedPtr - Translated);
     37 
     38   raw_svector_ostream OS(CLiteral);
     39   OS << "\"";
     40   for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
     41     OS << "\\x";
     42     OS.write_hex(static_cast<unsigned char>(UTF8[i]));
     43   }
     44   OS << "\"";
     45 
     46   return true;
     47 }
     48 
     49 namespace clang {
     50 void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
     51                                                   raw_ostream &OS) {
     52   std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
     53   std::vector<StringMatcher::StringPair> NameToUTF8;
     54   SmallString<32> CLiteral;
     55   for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
     56        I != E; ++I) {
     57     Record &Tag = **I;
     58     std::string Spelling = Tag.getValueAsString("Spelling");
     59     uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
     60     CLiteral.clear();
     61     CLiteral.append("return ");
     62     if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
     63       SrcMgr.PrintMessage(Tag.getLoc().front(),
     64                           SourceMgr::DK_Error,
     65                           Twine("invalid code point"));
     66       continue;
     67     }
     68     CLiteral.append(";");
     69 
     70     StringMatcher::StringPair Match(Spelling, CLiteral.str());
     71     NameToUTF8.push_back(Match);
     72   }
     73 
     74   emitSourceFileHeader("HTML named character reference to UTF-8 "
     75                        "translation", OS);
     76 
     77   OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
     78         "                                             StringRef Name) {\n";
     79   StringMatcher("Name", NameToUTF8, OS).Emit();
     80   OS << "  return StringRef();\n"
     81      << "}\n\n";
     82 }
     83 
     84 } // end namespace clang
     85 
     86