Home | History | Annotate | Download | only in yaml-bench
      1 //===- YAMLBench - Benchmark the YAMLParser implementation ----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This program executes the YAMLParser on differently sized YAML texts and
     11 // outputs the run time.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 
     16 #include "llvm/ADT/SmallString.h"
     17 #include "llvm/Support/Casting.h"
     18 #include "llvm/Support/CommandLine.h"
     19 #include "llvm/Support/MemoryBuffer.h"
     20 #include "llvm/Support/SourceMgr.h"
     21 #include "llvm/Support/Timer.h"
     22 #include "llvm/Support/YAMLParser.h"
     23 #include "llvm/Support/raw_ostream.h"
     24 #include <system_error>
     25 
     26 using namespace llvm;
     27 
     28 static cl::opt<bool>
     29   DumpTokens( "tokens"
     30             , cl::desc("Print the tokenization of the file.")
     31             , cl::init(false)
     32             );
     33 
     34 static cl::opt<bool>
     35   DumpCanonical( "canonical"
     36                , cl::desc("Print the canonical YAML for this file.")
     37                , cl::init(false)
     38                );
     39 
     40 static cl::opt<std::string>
     41  Input(cl::Positional, cl::desc("<input>"));
     42 
     43 static cl::opt<bool>
     44   Verify( "verify"
     45         , cl::desc(
     46             "Run a quick verification useful for regression testing")
     47         , cl::init(false)
     48         );
     49 
     50 static cl::opt<unsigned>
     51   MemoryLimitMB("memory-limit", cl::desc(
     52                   "Do not use more megabytes of memory"),
     53                 cl::init(1000));
     54 
     55 struct indent {
     56   unsigned distance;
     57   indent(unsigned d) : distance(d) {}
     58 };
     59 
     60 static raw_ostream &operator <<(raw_ostream &os, const indent &in) {
     61   for (unsigned i = 0; i < in.distance; ++i)
     62     os << "  ";
     63   return os;
     64 }
     65 
     66 /// \brief Pretty print a tag by replacing tag:yaml.org,2002: with !!.
     67 static std::string prettyTag(yaml::Node *N) {
     68   std::string Tag = N->getVerbatimTag();
     69   if (StringRef(Tag).startswith("tag:yaml.org,2002:")) {
     70     std::string Ret = "!!";
     71     Ret += StringRef(Tag).substr(18);
     72     return std::move(Ret);
     73   }
     74   std::string Ret = "!<";
     75   Ret += Tag;
     76   Ret += ">";
     77   return Ret;
     78 }
     79 
     80 static void dumpNode( yaml::Node *n
     81                     , unsigned Indent = 0
     82                     , bool SuppressFirstIndent = false) {
     83   if (!n)
     84     return;
     85   if (!SuppressFirstIndent)
     86     outs() << indent(Indent);
     87   StringRef Anchor = n->getAnchor();
     88   if (!Anchor.empty())
     89     outs() << "&" << Anchor << " ";
     90   if (yaml::ScalarNode *sn = dyn_cast<yaml::ScalarNode>(n)) {
     91     SmallString<32> Storage;
     92     StringRef Val = sn->getValue(Storage);
     93     outs() << prettyTag(n) << " \"" << yaml::escape(Val) << "\"";
     94   } else if (yaml::SequenceNode *sn = dyn_cast<yaml::SequenceNode>(n)) {
     95     outs() << prettyTag(n) << " [\n";
     96     ++Indent;
     97     for (yaml::SequenceNode::iterator i = sn->begin(), e = sn->end();
     98                                       i != e; ++i) {
     99       dumpNode(i, Indent);
    100       outs() << ",\n";
    101     }
    102     --Indent;
    103     outs() << indent(Indent) << "]";
    104   } else if (yaml::MappingNode *mn = dyn_cast<yaml::MappingNode>(n)) {
    105     outs() << prettyTag(n) << " {\n";
    106     ++Indent;
    107     for (yaml::MappingNode::iterator i = mn->begin(), e = mn->end();
    108                                      i != e; ++i) {
    109       outs() << indent(Indent) << "? ";
    110       dumpNode(i->getKey(), Indent, true);
    111       outs() << "\n";
    112       outs() << indent(Indent) << ": ";
    113       dumpNode(i->getValue(), Indent, true);
    114       outs() << ",\n";
    115     }
    116     --Indent;
    117     outs() << indent(Indent) << "}";
    118   } else if (yaml::AliasNode *an = dyn_cast<yaml::AliasNode>(n)){
    119     outs() << "*" << an->getName();
    120   } else if (dyn_cast<yaml::NullNode>(n)) {
    121     outs() << prettyTag(n) << " null";
    122   }
    123 }
    124 
    125 static void dumpStream(yaml::Stream &stream) {
    126   for (yaml::document_iterator di = stream.begin(), de = stream.end(); di != de;
    127        ++di) {
    128     outs() << "%YAML 1.2\n"
    129            << "---\n";
    130     yaml::Node *n = di->getRoot();
    131     if (n)
    132       dumpNode(n);
    133     else
    134       break;
    135     outs() << "\n...\n";
    136   }
    137 }
    138 
    139 static void benchmark( llvm::TimerGroup &Group
    140                      , llvm::StringRef Name
    141                      , llvm::StringRef JSONText) {
    142   llvm::Timer BaseLine((Name + ": Loop").str(), Group);
    143   BaseLine.startTimer();
    144   char C = 0;
    145   for (llvm::StringRef::iterator I = JSONText.begin(),
    146                                  E = JSONText.end();
    147        I != E; ++I) { C += *I; }
    148   BaseLine.stopTimer();
    149   volatile char DontOptimizeOut = C; (void)DontOptimizeOut;
    150 
    151   llvm::Timer Tokenizing((Name + ": Tokenizing").str(), Group);
    152   Tokenizing.startTimer();
    153   {
    154     yaml::scanTokens(JSONText);
    155   }
    156   Tokenizing.stopTimer();
    157 
    158   llvm::Timer Parsing((Name + ": Parsing").str(), Group);
    159   Parsing.startTimer();
    160   {
    161     llvm::SourceMgr SM;
    162     llvm::yaml::Stream stream(JSONText, SM);
    163     stream.skip();
    164   }
    165   Parsing.stopTimer();
    166 }
    167 
    168 static std::string createJSONText(size_t MemoryMB, unsigned ValueSize) {
    169   std::string JSONText;
    170   llvm::raw_string_ostream Stream(JSONText);
    171   Stream << "[\n";
    172   size_t MemoryBytes = MemoryMB * 1024 * 1024;
    173   while (JSONText.size() < MemoryBytes) {
    174     Stream << " {\n"
    175            << "  \"key1\": \"" << std::string(ValueSize, '*') << "\",\n"
    176            << "  \"key2\": \"" << std::string(ValueSize, '*') << "\",\n"
    177            << "  \"key3\": \"" << std::string(ValueSize, '*') << "\"\n"
    178            << " }";
    179     Stream.flush();
    180     if (JSONText.size() < MemoryBytes) Stream << ",";
    181     Stream << "\n";
    182   }
    183   Stream << "]\n";
    184   Stream.flush();
    185   return JSONText;
    186 }
    187 
    188 int main(int argc, char **argv) {
    189   llvm::cl::ParseCommandLineOptions(argc, argv);
    190   if (Input.getNumOccurrences()) {
    191     ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
    192         MemoryBuffer::getFileOrSTDIN(Input);
    193     if (!BufOrErr)
    194       return 1;
    195     std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
    196 
    197     llvm::SourceMgr sm;
    198     if (DumpTokens) {
    199       yaml::dumpTokens(Buf->getBuffer(), outs());
    200     }
    201 
    202     if (DumpCanonical) {
    203       yaml::Stream stream(Buf->getBuffer(), sm);
    204       dumpStream(stream);
    205     }
    206   }
    207 
    208   if (Verify) {
    209     llvm::TimerGroup Group("YAML parser benchmark");
    210     benchmark(Group, "Fast", createJSONText(10, 500));
    211   } else if (!DumpCanonical && !DumpTokens) {
    212     llvm::TimerGroup Group("YAML parser benchmark");
    213     benchmark(Group, "Small Values", createJSONText(MemoryLimitMB, 5));
    214     benchmark(Group, "Medium Values", createJSONText(MemoryLimitMB, 500));
    215     benchmark(Group, "Large Values", createJSONText(MemoryLimitMB, 50000));
    216   }
    217 
    218   return 0;
    219 }
    220