Home | History | Annotate | Download | only in Support
      1 //===--- YAMLParser.h - Simple YAML parser --------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 //  This is a YAML 1.2 parser.
     11 //
     12 //  See http://www.yaml.org/spec/1.2/spec.html for the full standard.
     13 //
     14 //  This currently does not implement the following:
     15 //    * Multi-line literal folding.
     16 //    * Tag resolution.
     17 //    * UTF-16.
     18 //    * BOMs anywhere other than the first Unicode scalar value in the file.
     19 //
     20 //  The most important class here is Stream. This represents a YAML stream with
     21 //  0, 1, or many documents.
     22 //
     23 //  SourceMgr sm;
     24 //  StringRef input = getInput();
     25 //  yaml::Stream stream(input, sm);
     26 //
     27 //  for (yaml::document_iterator di = stream.begin(), de = stream.end();
     28 //       di != de; ++di) {
     29 //    yaml::Node *n = di->getRoot();
     30 //    if (n) {
     31 //      // Do something with n...
     32 //    } else
     33 //      break;
     34 //  }
     35 //
     36 //===----------------------------------------------------------------------===//
     37 
     38 #ifndef LLVM_SUPPORT_YAMLPARSER_H
     39 #define LLVM_SUPPORT_YAMLPARSER_H
     40 
     41 #include "llvm/ADT/OwningPtr.h"
     42 #include "llvm/ADT/SmallString.h"
     43 #include "llvm/ADT/StringRef.h"
     44 #include "llvm/Support/Allocator.h"
     45 #include "llvm/Support/SMLoc.h"
     46 #include <limits>
     47 #include <utility>
     48 
     49 namespace llvm {
     50 class MemoryBuffer;
     51 class SourceMgr;
     52 class raw_ostream;
     53 class Twine;
     54 
     55 namespace yaml {
     56 
     57 class document_iterator;
     58 class Document;
     59 class Node;
     60 class Scanner;
     61 struct Token;
     62 
     63 /// @brief Dump all the tokens in this stream to OS.
     64 /// @returns true if there was an error, false otherwise.
     65 bool dumpTokens(StringRef Input, raw_ostream &);
     66 
     67 /// @brief Scans all tokens in input without outputting anything. This is used
     68 ///        for benchmarking the tokenizer.
     69 /// @returns true if there was an error, false otherwise.
     70 bool scanTokens(StringRef Input);
     71 
     72 /// @brief Escape \a Input for a double quoted scalar.
     73 std::string escape(StringRef Input);
     74 
     75 /// @brief This class represents a YAML stream potentially containing multiple
     76 ///        documents.
     77 class Stream {
     78 public:
     79   /// @brief This keeps a reference to the string referenced by \p Input.
     80   Stream(StringRef Input, SourceMgr &);
     81 
     82   /// @brief This takes ownership of \p InputBuffer.
     83   Stream(MemoryBuffer *InputBuffer, SourceMgr &);
     84   ~Stream();
     85 
     86   document_iterator begin();
     87   document_iterator end();
     88   void skip();
     89   bool failed();
     90   bool validate() {
     91     skip();
     92     return !failed();
     93   }
     94 
     95   void printError(Node *N, const Twine &Msg);
     96 
     97 private:
     98   OwningPtr<Scanner> scanner;
     99   OwningPtr<Document> CurrentDoc;
    100 
    101   friend class Document;
    102 
    103   /// @brief Validate a %YAML x.x directive.
    104   void handleYAMLDirective(const Token &);
    105 };
    106 
    107 /// @brief Abstract base class for all Nodes.
    108 class Node {
    109 public:
    110   enum NodeKind {
    111     NK_Null,
    112     NK_Scalar,
    113     NK_KeyValue,
    114     NK_Mapping,
    115     NK_Sequence,
    116     NK_Alias
    117   };
    118 
    119   Node(unsigned int Type, OwningPtr<Document>&, StringRef Anchor);
    120 
    121   /// @brief Get the value of the anchor attached to this node. If it does not
    122   ///        have one, getAnchor().size() will be 0.
    123   StringRef getAnchor() const { return Anchor; }
    124 
    125   SMRange getSourceRange() const { return SourceRange; }
    126   void setSourceRange(SMRange SR) { SourceRange = SR; }
    127 
    128   // These functions forward to Document and Scanner.
    129   Token &peekNext();
    130   Token getNext();
    131   Node *parseBlockNode();
    132   BumpPtrAllocator &getAllocator();
    133   void setError(const Twine &Message, Token &Location) const;
    134   bool failed() const;
    135 
    136   virtual void skip() {}
    137 
    138   unsigned int getType() const { return TypeID; }
    139 
    140   void *operator new ( size_t Size
    141                      , BumpPtrAllocator &Alloc
    142                      , size_t Alignment = 16) throw() {
    143     return Alloc.Allocate(Size, Alignment);
    144   }
    145 
    146   void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() {
    147     Alloc.Deallocate(Ptr);
    148   }
    149 
    150 protected:
    151   OwningPtr<Document> &Doc;
    152   SMRange SourceRange;
    153 
    154   void operator delete(void *) throw() {}
    155 
    156   virtual ~Node() {}
    157 
    158 private:
    159   unsigned int TypeID;
    160   StringRef Anchor;
    161 };
    162 
    163 /// @brief A null value.
    164 ///
    165 /// Example:
    166 ///   !!null null
    167 class NullNode : public Node {
    168 public:
    169   NullNode(OwningPtr<Document> &D) : Node(NK_Null, D, StringRef()) {}
    170 
    171   static inline bool classof(const Node *N) {
    172     return N->getType() == NK_Null;
    173   }
    174 };
    175 
    176 /// @brief A scalar node is an opaque datum that can be presented as a
    177 ///        series of zero or more Unicode scalar values.
    178 ///
    179 /// Example:
    180 ///   Adena
    181 class ScalarNode : public Node {
    182 public:
    183   ScalarNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Val)
    184     : Node(NK_Scalar, D, Anchor)
    185     , Value(Val) {
    186     SMLoc Start = SMLoc::getFromPointer(Val.begin());
    187     SMLoc End = SMLoc::getFromPointer(Val.end());
    188     SourceRange = SMRange(Start, End);
    189   }
    190 
    191   // Return Value without any escaping or folding or other fun YAML stuff. This
    192   // is the exact bytes that are contained in the file (after conversion to
    193   // utf8).
    194   StringRef getRawValue() const { return Value; }
    195 
    196   /// @brief Gets the value of this node as a StringRef.
    197   ///
    198   /// @param Storage is used to store the content of the returned StringRef iff
    199   ///        it requires any modification from how it appeared in the source.
    200   ///        This happens with escaped characters and multi-line literals.
    201   StringRef getValue(SmallVectorImpl<char> &Storage) const;
    202 
    203   static inline bool classof(const Node *N) {
    204     return N->getType() == NK_Scalar;
    205   }
    206 
    207 private:
    208   StringRef Value;
    209 
    210   StringRef unescapeDoubleQuoted( StringRef UnquotedValue
    211                                 , StringRef::size_type Start
    212                                 , SmallVectorImpl<char> &Storage) const;
    213 };
    214 
    215 /// @brief A key and value pair. While not technically a Node under the YAML
    216 ///        representation graph, it is easier to treat them this way.
    217 ///
    218 /// TODO: Consider making this not a child of Node.
    219 ///
    220 /// Example:
    221 ///   Section: .text
    222 class KeyValueNode : public Node {
    223 public:
    224   KeyValueNode(OwningPtr<Document> &D)
    225     : Node(NK_KeyValue, D, StringRef())
    226     , Key(0)
    227     , Value(0)
    228   {}
    229 
    230   /// @brief Parse and return the key.
    231   ///
    232   /// This may be called multiple times.
    233   ///
    234   /// @returns The key, or nullptr if failed() == true.
    235   Node *getKey();
    236 
    237   /// @brief Parse and return the value.
    238   ///
    239   /// This may be called multiple times.
    240   ///
    241   /// @returns The value, or nullptr if failed() == true.
    242   Node *getValue();
    243 
    244   virtual void skip() LLVM_OVERRIDE {
    245     getKey()->skip();
    246     getValue()->skip();
    247   }
    248 
    249   static inline bool classof(const Node *N) {
    250     return N->getType() == NK_KeyValue;
    251   }
    252 
    253 private:
    254   Node *Key;
    255   Node *Value;
    256 };
    257 
    258 /// @brief This is an iterator abstraction over YAML collections shared by both
    259 ///        sequences and maps.
    260 ///
    261 /// BaseT must have a ValueT* member named CurrentEntry and a member function
    262 /// increment() which must set CurrentEntry to 0 to create an end iterator.
    263 template <class BaseT, class ValueT>
    264 class basic_collection_iterator
    265   : public std::iterator<std::forward_iterator_tag, ValueT> {
    266 public:
    267   basic_collection_iterator() : Base(0) {}
    268   basic_collection_iterator(BaseT *B) : Base(B) {}
    269 
    270   ValueT *operator ->() const {
    271     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
    272     return Base->CurrentEntry;
    273   }
    274 
    275   ValueT &operator *() const {
    276     assert(Base && Base->CurrentEntry &&
    277            "Attempted to dereference end iterator!");
    278     return *Base->CurrentEntry;
    279   }
    280 
    281   operator ValueT*() const {
    282     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
    283     return Base->CurrentEntry;
    284   }
    285 
    286   bool operator !=(const basic_collection_iterator &Other) const {
    287     if(Base != Other.Base)
    288       return true;
    289     return (Base && Other.Base) && Base->CurrentEntry
    290                                    != Other.Base->CurrentEntry;
    291   }
    292 
    293   basic_collection_iterator &operator++() {
    294     assert(Base && "Attempted to advance iterator past end!");
    295     Base->increment();
    296     // Create an end iterator.
    297     if (Base->CurrentEntry == 0)
    298       Base = 0;
    299     return *this;
    300   }
    301 
    302 private:
    303   BaseT *Base;
    304 };
    305 
    306 // The following two templates are used for both MappingNode and Sequence Node.
    307 template <class CollectionType>
    308 typename CollectionType::iterator begin(CollectionType &C) {
    309   assert(C.IsAtBeginning && "You may only iterate over a collection once!");
    310   C.IsAtBeginning = false;
    311   typename CollectionType::iterator ret(&C);
    312   ++ret;
    313   return ret;
    314 }
    315 
    316 template <class CollectionType>
    317 void skip(CollectionType &C) {
    318   // TODO: support skipping from the middle of a parsed collection ;/
    319   assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
    320   if (C.IsAtBeginning)
    321     for (typename CollectionType::iterator i = begin(C), e = C.end();
    322                                            i != e; ++i)
    323       i->skip();
    324 }
    325 
    326 /// @brief Represents a YAML map created from either a block map for a flow map.
    327 ///
    328 /// This parses the YAML stream as increment() is called.
    329 ///
    330 /// Example:
    331 ///   Name: _main
    332 ///   Scope: Global
    333 class MappingNode : public Node {
    334 public:
    335   enum MappingType {
    336     MT_Block,
    337     MT_Flow,
    338     MT_Inline ///< An inline mapping node is used for "[key: value]".
    339   };
    340 
    341   MappingNode(OwningPtr<Document> &D, StringRef Anchor, MappingType MT)
    342     : Node(NK_Mapping, D, Anchor)
    343     , Type(MT)
    344     , IsAtBeginning(true)
    345     , IsAtEnd(false)
    346     , CurrentEntry(0)
    347   {}
    348 
    349   friend class basic_collection_iterator<MappingNode, KeyValueNode>;
    350   typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator;
    351   template <class T> friend typename T::iterator yaml::begin(T &);
    352   template <class T> friend void yaml::skip(T &);
    353 
    354   iterator begin() {
    355     return yaml::begin(*this);
    356   }
    357 
    358   iterator end() { return iterator(); }
    359 
    360   virtual void skip() LLVM_OVERRIDE {
    361     yaml::skip(*this);
    362   }
    363 
    364   static inline bool classof(const Node *N) {
    365     return N->getType() == NK_Mapping;
    366   }
    367 
    368 private:
    369   MappingType Type;
    370   bool IsAtBeginning;
    371   bool IsAtEnd;
    372   KeyValueNode *CurrentEntry;
    373 
    374   void increment();
    375 };
    376 
    377 /// @brief Represents a YAML sequence created from either a block sequence for a
    378 ///        flow sequence.
    379 ///
    380 /// This parses the YAML stream as increment() is called.
    381 ///
    382 /// Example:
    383 ///   - Hello
    384 ///   - World
    385 class SequenceNode : public Node {
    386 public:
    387   enum SequenceType {
    388     ST_Block,
    389     ST_Flow,
    390     // Use for:
    391     //
    392     // key:
    393     // - val1
    394     // - val2
    395     //
    396     // As a BlockMappingEntry and BlockEnd are not created in this case.
    397     ST_Indentless
    398   };
    399 
    400   SequenceNode(OwningPtr<Document> &D, StringRef Anchor, SequenceType ST)
    401     : Node(NK_Sequence, D, Anchor)
    402     , SeqType(ST)
    403     , IsAtBeginning(true)
    404     , IsAtEnd(false)
    405     , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','.
    406     , CurrentEntry(0)
    407   {}
    408 
    409   friend class basic_collection_iterator<SequenceNode, Node>;
    410   typedef basic_collection_iterator<SequenceNode, Node> iterator;
    411   template <class T> friend typename T::iterator yaml::begin(T &);
    412   template <class T> friend void yaml::skip(T &);
    413 
    414   void increment();
    415 
    416   iterator begin() {
    417     return yaml::begin(*this);
    418   }
    419 
    420   iterator end() { return iterator(); }
    421 
    422   virtual void skip() LLVM_OVERRIDE {
    423     yaml::skip(*this);
    424   }
    425 
    426   static inline bool classof(const Node *N) {
    427     return N->getType() == NK_Sequence;
    428   }
    429 
    430 private:
    431   SequenceType SeqType;
    432   bool IsAtBeginning;
    433   bool IsAtEnd;
    434   bool WasPreviousTokenFlowEntry;
    435   Node *CurrentEntry;
    436 };
    437 
    438 /// @brief Represents an alias to a Node with an anchor.
    439 ///
    440 /// Example:
    441 ///   *AnchorName
    442 class AliasNode : public Node {
    443 public:
    444   AliasNode(OwningPtr<Document> &D, StringRef Val)
    445     : Node(NK_Alias, D, StringRef()), Name(Val) {}
    446 
    447   StringRef getName() const { return Name; }
    448   Node *getTarget();
    449 
    450   static inline bool classof(const Node *N) {
    451     return N->getType() == NK_Alias;
    452   }
    453 
    454 private:
    455   StringRef Name;
    456 };
    457 
    458 /// @brief A YAML Stream is a sequence of Documents. A document contains a root
    459 ///        node.
    460 class Document {
    461 public:
    462   /// @brief Root for parsing a node. Returns a single node.
    463   Node *parseBlockNode();
    464 
    465   Document(Stream &ParentStream);
    466 
    467   /// @brief Finish parsing the current document and return true if there are
    468   ///        more. Return false otherwise.
    469   bool skip();
    470 
    471   /// @brief Parse and return the root level node.
    472   Node *getRoot() {
    473     if (Root)
    474       return Root;
    475     return Root = parseBlockNode();
    476   }
    477 
    478 private:
    479   friend class Node;
    480   friend class document_iterator;
    481 
    482   /// @brief Stream to read tokens from.
    483   Stream &stream;
    484 
    485   /// @brief Used to allocate nodes to. All are destroyed without calling their
    486   ///        destructor when the document is destroyed.
    487   BumpPtrAllocator NodeAllocator;
    488 
    489   /// @brief The root node. Used to support skipping a partially parsed
    490   ///        document.
    491   Node *Root;
    492 
    493   Token &peekNext();
    494   Token getNext();
    495   void setError(const Twine &Message, Token &Location) const;
    496   bool failed() const;
    497 
    498   void handleTagDirective(const Token &Tag) {
    499     // TODO: Track tags.
    500   }
    501 
    502   /// @brief Parse %BLAH directives and return true if any were encountered.
    503   bool parseDirectives();
    504 
    505   /// @brief Consume the next token and error if it is not \a TK.
    506   bool expectToken(int TK);
    507 };
    508 
    509 /// @brief Iterator abstraction for Documents over a Stream.
    510 class document_iterator {
    511 public:
    512   document_iterator() : Doc(0) {}
    513   document_iterator(OwningPtr<Document> &D) : Doc(&D) {}
    514 
    515   bool operator ==(const document_iterator &Other) {
    516     if (isAtEnd() || Other.isAtEnd())
    517       return isAtEnd() && Other.isAtEnd();
    518 
    519     return Doc == Other.Doc;
    520   }
    521   bool operator !=(const document_iterator &Other) {
    522     return !(*this == Other);
    523   }
    524 
    525   document_iterator operator ++() {
    526     assert(Doc != 0 && "incrementing iterator past the end.");
    527     if (!(*Doc)->skip()) {
    528       Doc->reset(0);
    529     } else {
    530       Stream &S = (*Doc)->stream;
    531       Doc->reset(new Document(S));
    532     }
    533     return *this;
    534   }
    535 
    536   Document &operator *() {
    537     return *Doc->get();
    538   }
    539 
    540   OwningPtr<Document> &operator ->() {
    541     return *Doc;
    542   }
    543 
    544 private:
    545   bool isAtEnd() const {
    546     return !Doc || !*Doc;
    547   }
    548 
    549   OwningPtr<Document> *Doc;
    550 };
    551 
    552 }
    553 }
    554 
    555 #endif
    556