Home | History | Annotate | Download | only in Support
      1 //===--- YAMLParser.h - Simple YAML parser --------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 //  This is a YAML 1.2 parser.
     11 //
     12 //  See http://www.yaml.org/spec/1.2/spec.html for the full standard.
     13 //
     14 //  This currently does not implement the following:
     15 //    * Multi-line literal folding.
     16 //    * Tag resolution.
     17 //    * UTF-16.
     18 //    * BOMs anywhere other than the first Unicode scalar value in the file.
     19 //
     20 //  The most important class here is Stream. This represents a YAML stream with
     21 //  0, 1, or many documents.
     22 //
     23 //  SourceMgr sm;
     24 //  StringRef input = getInput();
     25 //  yaml::Stream stream(input, sm);
     26 //
     27 //  for (yaml::document_iterator di = stream.begin(), de = stream.end();
     28 //       di != de; ++di) {
     29 //    yaml::Node *n = di->getRoot();
     30 //    if (n) {
     31 //      // Do something with n...
     32 //    } else
     33 //      break;
     34 //  }
     35 //
     36 //===----------------------------------------------------------------------===//
     37 
     38 #ifndef LLVM_SUPPORT_YAMLPARSER_H
     39 #define LLVM_SUPPORT_YAMLPARSER_H
     40 
     41 #include "llvm/ADT/StringRef.h"
     42 #include "llvm/Support/Allocator.h"
     43 #include "llvm/Support/SMLoc.h"
     44 #include <map>
     45 #include <utility>
     46 
     47 namespace llvm {
     48 class MemoryBufferRef;
     49 class SourceMgr;
     50 class Twine;
     51 class raw_ostream;
     52 
     53 namespace yaml {
     54 
     55 class document_iterator;
     56 class Document;
     57 class Node;
     58 class Scanner;
     59 struct Token;
     60 
     61 /// \brief Dump all the tokens in this stream to OS.
     62 /// \returns true if there was an error, false otherwise.
     63 bool dumpTokens(StringRef Input, raw_ostream &);
     64 
     65 /// \brief Scans all tokens in input without outputting anything. This is used
     66 ///        for benchmarking the tokenizer.
     67 /// \returns true if there was an error, false otherwise.
     68 bool scanTokens(StringRef Input);
     69 
     70 /// \brief Escape \a Input for a double quoted scalar.
     71 std::string escape(StringRef Input);
     72 
     73 /// \brief This class represents a YAML stream potentially containing multiple
     74 ///        documents.
     75 class Stream {
     76 public:
     77   /// \brief This keeps a reference to the string referenced by \p Input.
     78   Stream(StringRef Input, SourceMgr &, bool ShowColors = true);
     79 
     80   Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true);
     81   ~Stream();
     82 
     83   document_iterator begin();
     84   document_iterator end();
     85   void skip();
     86   bool failed();
     87   bool validate() {
     88     skip();
     89     return !failed();
     90   }
     91 
     92   void printError(Node *N, const Twine &Msg);
     93 
     94 private:
     95   std::unique_ptr<Scanner> scanner;
     96   std::unique_ptr<Document> CurrentDoc;
     97 
     98   friend class Document;
     99 };
    100 
    101 /// \brief Abstract base class for all Nodes.
    102 class Node {
    103   virtual void anchor();
    104 
    105 public:
    106   enum NodeKind {
    107     NK_Null,
    108     NK_Scalar,
    109     NK_BlockScalar,
    110     NK_KeyValue,
    111     NK_Mapping,
    112     NK_Sequence,
    113     NK_Alias
    114   };
    115 
    116   Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor,
    117        StringRef Tag);
    118 
    119   /// \brief Get the value of the anchor attached to this node. If it does not
    120   ///        have one, getAnchor().size() will be 0.
    121   StringRef getAnchor() const { return Anchor; }
    122 
    123   /// \brief Get the tag as it was written in the document. This does not
    124   ///   perform tag resolution.
    125   StringRef getRawTag() const { return Tag; }
    126 
    127   /// \brief Get the verbatium tag for a given Node. This performs tag resoluton
    128   ///   and substitution.
    129   std::string getVerbatimTag() const;
    130 
    131   SMRange getSourceRange() const { return SourceRange; }
    132   void setSourceRange(SMRange SR) { SourceRange = SR; }
    133 
    134   // These functions forward to Document and Scanner.
    135   Token &peekNext();
    136   Token getNext();
    137   Node *parseBlockNode();
    138   BumpPtrAllocator &getAllocator();
    139   void setError(const Twine &Message, Token &Location) const;
    140   bool failed() const;
    141 
    142   virtual void skip() {}
    143 
    144   unsigned int getType() const { return TypeID; }
    145 
    146   void *operator new(size_t Size, BumpPtrAllocator &Alloc,
    147                      size_t Alignment = 16) LLVM_NOEXCEPT {
    148     return Alloc.Allocate(Size, Alignment);
    149   }
    150 
    151   void operator delete(void *Ptr, BumpPtrAllocator &Alloc,
    152                        size_t Size) LLVM_NOEXCEPT {
    153     Alloc.Deallocate(Ptr, Size);
    154   }
    155 
    156 protected:
    157   std::unique_ptr<Document> &Doc;
    158   SMRange SourceRange;
    159 
    160   void operator delete(void *) LLVM_NOEXCEPT = delete;
    161 
    162   ~Node() = default;
    163 
    164 private:
    165   unsigned int TypeID;
    166   StringRef Anchor;
    167   /// \brief The tag as typed in the document.
    168   StringRef Tag;
    169 };
    170 
    171 /// \brief A null value.
    172 ///
    173 /// Example:
    174 ///   !!null null
    175 class NullNode final : public Node {
    176   void anchor() override;
    177 
    178 public:
    179   NullNode(std::unique_ptr<Document> &D)
    180       : Node(NK_Null, D, StringRef(), StringRef()) {}
    181 
    182   static inline bool classof(const Node *N) { return N->getType() == NK_Null; }
    183 };
    184 
    185 /// \brief A scalar node is an opaque datum that can be presented as a
    186 ///        series of zero or more Unicode scalar values.
    187 ///
    188 /// Example:
    189 ///   Adena
    190 class ScalarNode final : public Node {
    191   void anchor() override;
    192 
    193 public:
    194   ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
    195              StringRef Val)
    196       : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
    197     SMLoc Start = SMLoc::getFromPointer(Val.begin());
    198     SMLoc End = SMLoc::getFromPointer(Val.end());
    199     SourceRange = SMRange(Start, End);
    200   }
    201 
    202   // Return Value without any escaping or folding or other fun YAML stuff. This
    203   // is the exact bytes that are contained in the file (after conversion to
    204   // utf8).
    205   StringRef getRawValue() const { return Value; }
    206 
    207   /// \brief Gets the value of this node as a StringRef.
    208   ///
    209   /// \param Storage is used to store the content of the returned StringRef iff
    210   ///        it requires any modification from how it appeared in the source.
    211   ///        This happens with escaped characters and multi-line literals.
    212   StringRef getValue(SmallVectorImpl<char> &Storage) const;
    213 
    214   static inline bool classof(const Node *N) {
    215     return N->getType() == NK_Scalar;
    216   }
    217 
    218 private:
    219   StringRef Value;
    220 
    221   StringRef unescapeDoubleQuoted(StringRef UnquotedValue,
    222                                  StringRef::size_type Start,
    223                                  SmallVectorImpl<char> &Storage) const;
    224 };
    225 
    226 /// \brief A block scalar node is an opaque datum that can be presented as a
    227 ///        series of zero or more Unicode scalar values.
    228 ///
    229 /// Example:
    230 ///   |
    231 ///     Hello
    232 ///     World
    233 class BlockScalarNode final : public Node {
    234   void anchor() override;
    235 
    236 public:
    237   BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
    238                   StringRef Value, StringRef RawVal)
    239       : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) {
    240     SMLoc Start = SMLoc::getFromPointer(RawVal.begin());
    241     SMLoc End = SMLoc::getFromPointer(RawVal.end());
    242     SourceRange = SMRange(Start, End);
    243   }
    244 
    245   /// \brief Gets the value of this node as a StringRef.
    246   StringRef getValue() const { return Value; }
    247 
    248   static inline bool classof(const Node *N) {
    249     return N->getType() == NK_BlockScalar;
    250   }
    251 
    252 private:
    253   StringRef Value;
    254 };
    255 
    256 /// \brief A key and value pair. While not technically a Node under the YAML
    257 ///        representation graph, it is easier to treat them this way.
    258 ///
    259 /// TODO: Consider making this not a child of Node.
    260 ///
    261 /// Example:
    262 ///   Section: .text
    263 class KeyValueNode final : public Node {
    264   void anchor() override;
    265 
    266 public:
    267   KeyValueNode(std::unique_ptr<Document> &D)
    268       : Node(NK_KeyValue, D, StringRef(), StringRef()), Key(nullptr),
    269         Value(nullptr) {}
    270 
    271   /// \brief Parse and return the key.
    272   ///
    273   /// This may be called multiple times.
    274   ///
    275   /// \returns The key, or nullptr if failed() == true.
    276   Node *getKey();
    277 
    278   /// \brief Parse and return the value.
    279   ///
    280   /// This may be called multiple times.
    281   ///
    282   /// \returns The value, or nullptr if failed() == true.
    283   Node *getValue();
    284 
    285   void skip() override {
    286     getKey()->skip();
    287     if (Node *Val = getValue())
    288       Val->skip();
    289   }
    290 
    291   static inline bool classof(const Node *N) {
    292     return N->getType() == NK_KeyValue;
    293   }
    294 
    295 private:
    296   Node *Key;
    297   Node *Value;
    298 };
    299 
    300 /// \brief This is an iterator abstraction over YAML collections shared by both
    301 ///        sequences and maps.
    302 ///
    303 /// BaseT must have a ValueT* member named CurrentEntry and a member function
    304 /// increment() which must set CurrentEntry to 0 to create an end iterator.
    305 template <class BaseT, class ValueT>
    306 class basic_collection_iterator
    307     : public std::iterator<std::input_iterator_tag, ValueT> {
    308 public:
    309   basic_collection_iterator() : Base(nullptr) {}
    310   basic_collection_iterator(BaseT *B) : Base(B) {}
    311 
    312   ValueT *operator->() const {
    313     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
    314     return Base->CurrentEntry;
    315   }
    316 
    317   ValueT &operator*() const {
    318     assert(Base && Base->CurrentEntry &&
    319            "Attempted to dereference end iterator!");
    320     return *Base->CurrentEntry;
    321   }
    322 
    323   operator ValueT *() const {
    324     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
    325     return Base->CurrentEntry;
    326   }
    327 
    328   /// Note on EqualityComparable:
    329   ///
    330   /// The iterator is not re-entrant,
    331   /// it is meant to be used for parsing YAML on-demand
    332   /// Once iteration started - it can point only to one entry at a time
    333   /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
    334   /// iff Base and Other.Base are equal.
    335   bool operator==(const basic_collection_iterator &Other) const {
    336     if (Base && (Base == Other.Base)) {
    337       assert((Base->CurrentEntry == Other.Base->CurrentEntry)
    338              && "Equal Bases expected to point to equal Entries");
    339     }
    340 
    341     return Base == Other.Base;
    342   }
    343 
    344   bool operator!=(const basic_collection_iterator &Other) const {
    345     return !(Base == Other.Base);
    346   }
    347 
    348   basic_collection_iterator &operator++() {
    349     assert(Base && "Attempted to advance iterator past end!");
    350     Base->increment();
    351     // Create an end iterator.
    352     if (!Base->CurrentEntry)
    353       Base = nullptr;
    354     return *this;
    355   }
    356 
    357 private:
    358   BaseT *Base;
    359 };
    360 
    361 // The following two templates are used for both MappingNode and Sequence Node.
    362 template <class CollectionType>
    363 typename CollectionType::iterator begin(CollectionType &C) {
    364   assert(C.IsAtBeginning && "You may only iterate over a collection once!");
    365   C.IsAtBeginning = false;
    366   typename CollectionType::iterator ret(&C);
    367   ++ret;
    368   return ret;
    369 }
    370 
    371 template <class CollectionType> void skip(CollectionType &C) {
    372   // TODO: support skipping from the middle of a parsed collection ;/
    373   assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
    374   if (C.IsAtBeginning)
    375     for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e;
    376          ++i)
    377       i->skip();
    378 }
    379 
    380 /// \brief Represents a YAML map created from either a block map for a flow map.
    381 ///
    382 /// This parses the YAML stream as increment() is called.
    383 ///
    384 /// Example:
    385 ///   Name: _main
    386 ///   Scope: Global
    387 class MappingNode final : public Node {
    388   void anchor() override;
    389 
    390 public:
    391   enum MappingType {
    392     MT_Block,
    393     MT_Flow,
    394     MT_Inline ///< An inline mapping node is used for "[key: value]".
    395   };
    396 
    397   MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
    398               MappingType MT)
    399       : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true),
    400         IsAtEnd(false), CurrentEntry(nullptr) {}
    401 
    402   friend class basic_collection_iterator<MappingNode, KeyValueNode>;
    403   typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator;
    404   template <class T> friend typename T::iterator yaml::begin(T &);
    405   template <class T> friend void yaml::skip(T &);
    406 
    407   iterator begin() { return yaml::begin(*this); }
    408 
    409   iterator end() { return iterator(); }
    410 
    411   void skip() override { yaml::skip(*this); }
    412 
    413   static inline bool classof(const Node *N) {
    414     return N->getType() == NK_Mapping;
    415   }
    416 
    417 private:
    418   MappingType Type;
    419   bool IsAtBeginning;
    420   bool IsAtEnd;
    421   KeyValueNode *CurrentEntry;
    422 
    423   void increment();
    424 };
    425 
    426 /// \brief Represents a YAML sequence created from either a block sequence for a
    427 ///        flow sequence.
    428 ///
    429 /// This parses the YAML stream as increment() is called.
    430 ///
    431 /// Example:
    432 ///   - Hello
    433 ///   - World
    434 class SequenceNode final : public Node {
    435   void anchor() override;
    436 
    437 public:
    438   enum SequenceType {
    439     ST_Block,
    440     ST_Flow,
    441     // Use for:
    442     //
    443     // key:
    444     // - val1
    445     // - val2
    446     //
    447     // As a BlockMappingEntry and BlockEnd are not created in this case.
    448     ST_Indentless
    449   };
    450 
    451   SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
    452                SequenceType ST)
    453       : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true),
    454         IsAtEnd(false),
    455         WasPreviousTokenFlowEntry(true), // Start with an imaginary ','.
    456         CurrentEntry(nullptr) {}
    457 
    458   friend class basic_collection_iterator<SequenceNode, Node>;
    459   typedef basic_collection_iterator<SequenceNode, Node> iterator;
    460   template <class T> friend typename T::iterator yaml::begin(T &);
    461   template <class T> friend void yaml::skip(T &);
    462 
    463   void increment();
    464 
    465   iterator begin() { return yaml::begin(*this); }
    466 
    467   iterator end() { return iterator(); }
    468 
    469   void skip() override { yaml::skip(*this); }
    470 
    471   static inline bool classof(const Node *N) {
    472     return N->getType() == NK_Sequence;
    473   }
    474 
    475 private:
    476   SequenceType SeqType;
    477   bool IsAtBeginning;
    478   bool IsAtEnd;
    479   bool WasPreviousTokenFlowEntry;
    480   Node *CurrentEntry;
    481 };
    482 
    483 /// \brief Represents an alias to a Node with an anchor.
    484 ///
    485 /// Example:
    486 ///   *AnchorName
    487 class AliasNode final : public Node {
    488   void anchor() override;
    489 
    490 public:
    491   AliasNode(std::unique_ptr<Document> &D, StringRef Val)
    492       : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
    493 
    494   StringRef getName() const { return Name; }
    495   Node *getTarget();
    496 
    497   static inline bool classof(const Node *N) { return N->getType() == NK_Alias; }
    498 
    499 private:
    500   StringRef Name;
    501 };
    502 
    503 /// \brief A YAML Stream is a sequence of Documents. A document contains a root
    504 ///        node.
    505 class Document {
    506 public:
    507   /// \brief Root for parsing a node. Returns a single node.
    508   Node *parseBlockNode();
    509 
    510   Document(Stream &ParentStream);
    511 
    512   /// \brief Finish parsing the current document and return true if there are
    513   ///        more. Return false otherwise.
    514   bool skip();
    515 
    516   /// \brief Parse and return the root level node.
    517   Node *getRoot() {
    518     if (Root)
    519       return Root;
    520     return Root = parseBlockNode();
    521   }
    522 
    523   const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; }
    524 
    525 private:
    526   friend class Node;
    527   friend class document_iterator;
    528 
    529   /// \brief Stream to read tokens from.
    530   Stream &stream;
    531 
    532   /// \brief Used to allocate nodes to. All are destroyed without calling their
    533   ///        destructor when the document is destroyed.
    534   BumpPtrAllocator NodeAllocator;
    535 
    536   /// \brief The root node. Used to support skipping a partially parsed
    537   ///        document.
    538   Node *Root;
    539 
    540   /// \brief Maps tag prefixes to their expansion.
    541   std::map<StringRef, StringRef> TagMap;
    542 
    543   Token &peekNext();
    544   Token getNext();
    545   void setError(const Twine &Message, Token &Location) const;
    546   bool failed() const;
    547 
    548   /// \brief Parse %BLAH directives and return true if any were encountered.
    549   bool parseDirectives();
    550 
    551   /// \brief Parse %YAML
    552   void parseYAMLDirective();
    553 
    554   /// \brief Parse %TAG
    555   void parseTAGDirective();
    556 
    557   /// \brief Consume the next token and error if it is not \a TK.
    558   bool expectToken(int TK);
    559 };
    560 
    561 /// \brief Iterator abstraction for Documents over a Stream.
    562 class document_iterator {
    563 public:
    564   document_iterator() : Doc(nullptr) {}
    565   document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {}
    566 
    567   bool operator==(const document_iterator &Other) {
    568     if (isAtEnd() || Other.isAtEnd())
    569       return isAtEnd() && Other.isAtEnd();
    570 
    571     return Doc == Other.Doc;
    572   }
    573   bool operator!=(const document_iterator &Other) { return !(*this == Other); }
    574 
    575   document_iterator operator++() {
    576     assert(Doc && "incrementing iterator past the end.");
    577     if (!(*Doc)->skip()) {
    578       Doc->reset(nullptr);
    579     } else {
    580       Stream &S = (*Doc)->stream;
    581       Doc->reset(new Document(S));
    582     }
    583     return *this;
    584   }
    585 
    586   Document &operator*() { return *Doc->get(); }
    587 
    588   std::unique_ptr<Document> &operator->() { return *Doc; }
    589 
    590 private:
    591   bool isAtEnd() const { return !Doc || !*Doc; }
    592 
    593   std::unique_ptr<Document> *Doc;
    594 };
    595 
    596 } // End namespace yaml.
    597 
    598 } // End namespace llvm.
    599 
    600 #endif
    601