Home | History | Annotate | Download | only in Support
      1 //===--- YAMLParser.h - Simple YAML parser --------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 //  This is a YAML 1.2 parser.
     11 //
     12 //  See http://www.yaml.org/spec/1.2/spec.html for the full standard.
     13 //
     14 //  This currently does not implement the following:
     15 //    * Multi-line literal folding.
     16 //    * Tag resolution.
     17 //    * UTF-16.
     18 //    * BOMs anywhere other than the first Unicode scalar value in the file.
     19 //
     20 //  The most important class here is Stream. This represents a YAML stream with
     21 //  0, 1, or many documents.
     22 //
     23 //  SourceMgr sm;
     24 //  StringRef input = getInput();
     25 //  yaml::Stream stream(input, sm);
     26 //
     27 //  for (yaml::document_iterator di = stream.begin(), de = stream.end();
     28 //       di != de; ++di) {
     29 //    yaml::Node *n = di->getRoot();
     30 //    if (n) {
     31 //      // Do something with n...
     32 //    } else
     33 //      break;
     34 //  }
     35 //
     36 //===----------------------------------------------------------------------===//
     37 
     38 #ifndef LLVM_SUPPORT_YAMLPARSER_H
     39 #define LLVM_SUPPORT_YAMLPARSER_H
     40 
     41 #include "llvm/ADT/StringRef.h"
     42 #include "llvm/Support/Allocator.h"
     43 #include "llvm/Support/SMLoc.h"
     44 #include <limits>
     45 #include <map>
     46 #include <utility>
     47 
     48 namespace llvm {
     49 class MemoryBufferRef;
     50 class SourceMgr;
     51 class Twine;
     52 class raw_ostream;
     53 
     54 namespace yaml {
     55 
     56 class document_iterator;
     57 class Document;
     58 class Node;
     59 class Scanner;
     60 struct Token;
     61 
     62 /// \brief Dump all the tokens in this stream to OS.
     63 /// \returns true if there was an error, false otherwise.
     64 bool dumpTokens(StringRef Input, raw_ostream &);
     65 
     66 /// \brief Scans all tokens in input without outputting anything. This is used
     67 ///        for benchmarking the tokenizer.
     68 /// \returns true if there was an error, false otherwise.
     69 bool scanTokens(StringRef Input);
     70 
     71 /// \brief Escape \a Input for a double quoted scalar.
     72 std::string escape(StringRef Input);
     73 
     74 /// \brief This class represents a YAML stream potentially containing multiple
     75 ///        documents.
     76 class Stream {
     77 public:
     78   /// \brief This keeps a reference to the string referenced by \p Input.
     79   Stream(StringRef Input, SourceMgr &, bool ShowColors = true);
     80 
     81   Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true);
     82   ~Stream();
     83 
     84   document_iterator begin();
     85   document_iterator end();
     86   void skip();
     87   bool failed();
     88   bool validate() {
     89     skip();
     90     return !failed();
     91   }
     92 
     93   void printError(Node *N, const Twine &Msg);
     94 
     95 private:
     96   std::unique_ptr<Scanner> scanner;
     97   std::unique_ptr<Document> CurrentDoc;
     98 
     99   friend class Document;
    100 };
    101 
    102 /// \brief Abstract base class for all Nodes.
    103 class Node {
    104   virtual void anchor();
    105 
    106 public:
    107   enum NodeKind {
    108     NK_Null,
    109     NK_Scalar,
    110     NK_BlockScalar,
    111     NK_KeyValue,
    112     NK_Mapping,
    113     NK_Sequence,
    114     NK_Alias
    115   };
    116 
    117   Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor,
    118        StringRef Tag);
    119 
    120   /// \brief Get the value of the anchor attached to this node. If it does not
    121   ///        have one, getAnchor().size() will be 0.
    122   StringRef getAnchor() const { return Anchor; }
    123 
    124   /// \brief Get the tag as it was written in the document. This does not
    125   ///   perform tag resolution.
    126   StringRef getRawTag() const { return Tag; }
    127 
    128   /// \brief Get the verbatium tag for a given Node. This performs tag resoluton
    129   ///   and substitution.
    130   std::string getVerbatimTag() const;
    131 
    132   SMRange getSourceRange() const { return SourceRange; }
    133   void setSourceRange(SMRange SR) { SourceRange = SR; }
    134 
    135   // These functions forward to Document and Scanner.
    136   Token &peekNext();
    137   Token getNext();
    138   Node *parseBlockNode();
    139   BumpPtrAllocator &getAllocator();
    140   void setError(const Twine &Message, Token &Location) const;
    141   bool failed() const;
    142 
    143   virtual void skip() {}
    144 
    145   unsigned int getType() const { return TypeID; }
    146 
    147   void *operator new(size_t Size, BumpPtrAllocator &Alloc,
    148                      size_t Alignment = 16) LLVM_NOEXCEPT {
    149     return Alloc.Allocate(Size, Alignment);
    150   }
    151 
    152   void operator delete(void *Ptr, BumpPtrAllocator &Alloc,
    153                        size_t Size) LLVM_NOEXCEPT {
    154     Alloc.Deallocate(Ptr, Size);
    155   }
    156 
    157 protected:
    158   std::unique_ptr<Document> &Doc;
    159   SMRange SourceRange;
    160 
    161   void operator delete(void *) LLVM_NOEXCEPT = delete;
    162 
    163   ~Node() = default;
    164 
    165 private:
    166   unsigned int TypeID;
    167   StringRef Anchor;
    168   /// \brief The tag as typed in the document.
    169   StringRef Tag;
    170 };
    171 
    172 /// \brief A null value.
    173 ///
    174 /// Example:
    175 ///   !!null null
    176 class NullNode final : public Node {
    177   void anchor() override;
    178 
    179 public:
    180   NullNode(std::unique_ptr<Document> &D)
    181       : Node(NK_Null, D, StringRef(), StringRef()) {}
    182 
    183   static inline bool classof(const Node *N) { return N->getType() == NK_Null; }
    184 };
    185 
    186 /// \brief A scalar node is an opaque datum that can be presented as a
    187 ///        series of zero or more Unicode scalar values.
    188 ///
    189 /// Example:
    190 ///   Adena
    191 class ScalarNode final : public Node {
    192   void anchor() override;
    193 
    194 public:
    195   ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
    196              StringRef Val)
    197       : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
    198     SMLoc Start = SMLoc::getFromPointer(Val.begin());
    199     SMLoc End = SMLoc::getFromPointer(Val.end());
    200     SourceRange = SMRange(Start, End);
    201   }
    202 
    203   // Return Value without any escaping or folding or other fun YAML stuff. This
    204   // is the exact bytes that are contained in the file (after conversion to
    205   // utf8).
    206   StringRef getRawValue() const { return Value; }
    207 
    208   /// \brief Gets the value of this node as a StringRef.
    209   ///
    210   /// \param Storage is used to store the content of the returned StringRef iff
    211   ///        it requires any modification from how it appeared in the source.
    212   ///        This happens with escaped characters and multi-line literals.
    213   StringRef getValue(SmallVectorImpl<char> &Storage) const;
    214 
    215   static inline bool classof(const Node *N) {
    216     return N->getType() == NK_Scalar;
    217   }
    218 
    219 private:
    220   StringRef Value;
    221 
    222   StringRef unescapeDoubleQuoted(StringRef UnquotedValue,
    223                                  StringRef::size_type Start,
    224                                  SmallVectorImpl<char> &Storage) const;
    225 };
    226 
    227 /// \brief A block scalar node is an opaque datum that can be presented as a
    228 ///        series of zero or more Unicode scalar values.
    229 ///
    230 /// Example:
    231 ///   |
    232 ///     Hello
    233 ///     World
    234 class BlockScalarNode final : public Node {
    235   void anchor() override;
    236 
    237 public:
    238   BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
    239                   StringRef Value, StringRef RawVal)
    240       : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) {
    241     SMLoc Start = SMLoc::getFromPointer(RawVal.begin());
    242     SMLoc End = SMLoc::getFromPointer(RawVal.end());
    243     SourceRange = SMRange(Start, End);
    244   }
    245 
    246   /// \brief Gets the value of this node as a StringRef.
    247   StringRef getValue() const { return Value; }
    248 
    249   static inline bool classof(const Node *N) {
    250     return N->getType() == NK_BlockScalar;
    251   }
    252 
    253 private:
    254   StringRef Value;
    255 };
    256 
    257 /// \brief A key and value pair. While not technically a Node under the YAML
    258 ///        representation graph, it is easier to treat them this way.
    259 ///
    260 /// TODO: Consider making this not a child of Node.
    261 ///
    262 /// Example:
    263 ///   Section: .text
    264 class KeyValueNode final : public Node {
    265   void anchor() override;
    266 
    267 public:
    268   KeyValueNode(std::unique_ptr<Document> &D)
    269       : Node(NK_KeyValue, D, StringRef(), StringRef()), Key(nullptr),
    270         Value(nullptr) {}
    271 
    272   /// \brief Parse and return the key.
    273   ///
    274   /// This may be called multiple times.
    275   ///
    276   /// \returns The key, or nullptr if failed() == true.
    277   Node *getKey();
    278 
    279   /// \brief Parse and return the value.
    280   ///
    281   /// This may be called multiple times.
    282   ///
    283   /// \returns The value, or nullptr if failed() == true.
    284   Node *getValue();
    285 
    286   void skip() override {
    287     getKey()->skip();
    288     if (Node *Val = getValue())
    289       Val->skip();
    290   }
    291 
    292   static inline bool classof(const Node *N) {
    293     return N->getType() == NK_KeyValue;
    294   }
    295 
    296 private:
    297   Node *Key;
    298   Node *Value;
    299 };
    300 
    301 /// \brief This is an iterator abstraction over YAML collections shared by both
    302 ///        sequences and maps.
    303 ///
    304 /// BaseT must have a ValueT* member named CurrentEntry and a member function
    305 /// increment() which must set CurrentEntry to 0 to create an end iterator.
    306 template <class BaseT, class ValueT>
    307 class basic_collection_iterator
    308     : public std::iterator<std::forward_iterator_tag, ValueT> {
    309 public:
    310   basic_collection_iterator() : Base(nullptr) {}
    311   basic_collection_iterator(BaseT *B) : Base(B) {}
    312 
    313   ValueT *operator->() const {
    314     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
    315     return Base->CurrentEntry;
    316   }
    317 
    318   ValueT &operator*() const {
    319     assert(Base && Base->CurrentEntry &&
    320            "Attempted to dereference end iterator!");
    321     return *Base->CurrentEntry;
    322   }
    323 
    324   operator ValueT *() const {
    325     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
    326     return Base->CurrentEntry;
    327   }
    328 
    329   bool operator!=(const basic_collection_iterator &Other) const {
    330     if (Base != Other.Base)
    331       return true;
    332     return (Base && Other.Base) &&
    333            Base->CurrentEntry != Other.Base->CurrentEntry;
    334   }
    335 
    336   basic_collection_iterator &operator++() {
    337     assert(Base && "Attempted to advance iterator past end!");
    338     Base->increment();
    339     // Create an end iterator.
    340     if (!Base->CurrentEntry)
    341       Base = nullptr;
    342     return *this;
    343   }
    344 
    345 private:
    346   BaseT *Base;
    347 };
    348 
    349 // The following two templates are used for both MappingNode and Sequence Node.
    350 template <class CollectionType>
    351 typename CollectionType::iterator begin(CollectionType &C) {
    352   assert(C.IsAtBeginning && "You may only iterate over a collection once!");
    353   C.IsAtBeginning = false;
    354   typename CollectionType::iterator ret(&C);
    355   ++ret;
    356   return ret;
    357 }
    358 
    359 template <class CollectionType> void skip(CollectionType &C) {
    360   // TODO: support skipping from the middle of a parsed collection ;/
    361   assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
    362   if (C.IsAtBeginning)
    363     for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e;
    364          ++i)
    365       i->skip();
    366 }
    367 
    368 /// \brief Represents a YAML map created from either a block map for a flow map.
    369 ///
    370 /// This parses the YAML stream as increment() is called.
    371 ///
    372 /// Example:
    373 ///   Name: _main
    374 ///   Scope: Global
    375 class MappingNode final : public Node {
    376   void anchor() override;
    377 
    378 public:
    379   enum MappingType {
    380     MT_Block,
    381     MT_Flow,
    382     MT_Inline ///< An inline mapping node is used for "[key: value]".
    383   };
    384 
    385   MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
    386               MappingType MT)
    387       : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true),
    388         IsAtEnd(false), CurrentEntry(nullptr) {}
    389 
    390   friend class basic_collection_iterator<MappingNode, KeyValueNode>;
    391   typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator;
    392   template <class T> friend typename T::iterator yaml::begin(T &);
    393   template <class T> friend void yaml::skip(T &);
    394 
    395   iterator begin() { return yaml::begin(*this); }
    396 
    397   iterator end() { return iterator(); }
    398 
    399   void skip() override { yaml::skip(*this); }
    400 
    401   static inline bool classof(const Node *N) {
    402     return N->getType() == NK_Mapping;
    403   }
    404 
    405 private:
    406   MappingType Type;
    407   bool IsAtBeginning;
    408   bool IsAtEnd;
    409   KeyValueNode *CurrentEntry;
    410 
    411   void increment();
    412 };
    413 
    414 /// \brief Represents a YAML sequence created from either a block sequence for a
    415 ///        flow sequence.
    416 ///
    417 /// This parses the YAML stream as increment() is called.
    418 ///
    419 /// Example:
    420 ///   - Hello
    421 ///   - World
    422 class SequenceNode final : public Node {
    423   void anchor() override;
    424 
    425 public:
    426   enum SequenceType {
    427     ST_Block,
    428     ST_Flow,
    429     // Use for:
    430     //
    431     // key:
    432     // - val1
    433     // - val2
    434     //
    435     // As a BlockMappingEntry and BlockEnd are not created in this case.
    436     ST_Indentless
    437   };
    438 
    439   SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
    440                SequenceType ST)
    441       : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true),
    442         IsAtEnd(false),
    443         WasPreviousTokenFlowEntry(true), // Start with an imaginary ','.
    444         CurrentEntry(nullptr) {}
    445 
    446   friend class basic_collection_iterator<SequenceNode, Node>;
    447   typedef basic_collection_iterator<SequenceNode, Node> iterator;
    448   template <class T> friend typename T::iterator yaml::begin(T &);
    449   template <class T> friend void yaml::skip(T &);
    450 
    451   void increment();
    452 
    453   iterator begin() { return yaml::begin(*this); }
    454 
    455   iterator end() { return iterator(); }
    456 
    457   void skip() override { yaml::skip(*this); }
    458 
    459   static inline bool classof(const Node *N) {
    460     return N->getType() == NK_Sequence;
    461   }
    462 
    463 private:
    464   SequenceType SeqType;
    465   bool IsAtBeginning;
    466   bool IsAtEnd;
    467   bool WasPreviousTokenFlowEntry;
    468   Node *CurrentEntry;
    469 };
    470 
    471 /// \brief Represents an alias to a Node with an anchor.
    472 ///
    473 /// Example:
    474 ///   *AnchorName
    475 class AliasNode final : public Node {
    476   void anchor() override;
    477 
    478 public:
    479   AliasNode(std::unique_ptr<Document> &D, StringRef Val)
    480       : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
    481 
    482   StringRef getName() const { return Name; }
    483   Node *getTarget();
    484 
    485   static inline bool classof(const Node *N) { return N->getType() == NK_Alias; }
    486 
    487 private:
    488   StringRef Name;
    489 };
    490 
    491 /// \brief A YAML Stream is a sequence of Documents. A document contains a root
    492 ///        node.
    493 class Document {
    494 public:
    495   /// \brief Root for parsing a node. Returns a single node.
    496   Node *parseBlockNode();
    497 
    498   Document(Stream &ParentStream);
    499 
    500   /// \brief Finish parsing the current document and return true if there are
    501   ///        more. Return false otherwise.
    502   bool skip();
    503 
    504   /// \brief Parse and return the root level node.
    505   Node *getRoot() {
    506     if (Root)
    507       return Root;
    508     return Root = parseBlockNode();
    509   }
    510 
    511   const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; }
    512 
    513 private:
    514   friend class Node;
    515   friend class document_iterator;
    516 
    517   /// \brief Stream to read tokens from.
    518   Stream &stream;
    519 
    520   /// \brief Used to allocate nodes to. All are destroyed without calling their
    521   ///        destructor when the document is destroyed.
    522   BumpPtrAllocator NodeAllocator;
    523 
    524   /// \brief The root node. Used to support skipping a partially parsed
    525   ///        document.
    526   Node *Root;
    527 
    528   /// \brief Maps tag prefixes to their expansion.
    529   std::map<StringRef, StringRef> TagMap;
    530 
    531   Token &peekNext();
    532   Token getNext();
    533   void setError(const Twine &Message, Token &Location) const;
    534   bool failed() const;
    535 
    536   /// \brief Parse %BLAH directives and return true if any were encountered.
    537   bool parseDirectives();
    538 
    539   /// \brief Parse %YAML
    540   void parseYAMLDirective();
    541 
    542   /// \brief Parse %TAG
    543   void parseTAGDirective();
    544 
    545   /// \brief Consume the next token and error if it is not \a TK.
    546   bool expectToken(int TK);
    547 };
    548 
    549 /// \brief Iterator abstraction for Documents over a Stream.
    550 class document_iterator {
    551 public:
    552   document_iterator() : Doc(nullptr) {}
    553   document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {}
    554 
    555   bool operator==(const document_iterator &Other) {
    556     if (isAtEnd() || Other.isAtEnd())
    557       return isAtEnd() && Other.isAtEnd();
    558 
    559     return Doc == Other.Doc;
    560   }
    561   bool operator!=(const document_iterator &Other) { return !(*this == Other); }
    562 
    563   document_iterator operator++() {
    564     assert(Doc && "incrementing iterator past the end.");
    565     if (!(*Doc)->skip()) {
    566       Doc->reset(nullptr);
    567     } else {
    568       Stream &S = (*Doc)->stream;
    569       Doc->reset(new Document(S));
    570     }
    571     return *this;
    572   }
    573 
    574   Document &operator*() { return *Doc->get(); }
    575 
    576   std::unique_ptr<Document> &operator->() { return *Doc; }
    577 
    578 private:
    579   bool isAtEnd() const { return !Doc || !*Doc; }
    580 
    581   std::unique_ptr<Document> *Doc;
    582 };
    583 
    584 } // End namespace yaml.
    585 
    586 } // End namespace llvm.
    587 
    588 #endif
    589