Home | History | Annotate | Download | only in Support
      1 //===--- YAMLParser.h - Simple YAML parser --------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 //  This is a YAML 1.2 parser.
     11 //
     12 //  See http://www.yaml.org/spec/1.2/spec.html for the full standard.
     13 //
     14 //  This currently does not implement the following:
     15 //    * Multi-line literal folding.
     16 //    * Tag resolution.
     17 //    * UTF-16.
     18 //    * BOMs anywhere other than the first Unicode scalar value in the file.
     19 //
     20 //  The most important class here is Stream. This represents a YAML stream with
     21 //  0, 1, or many documents.
     22 //
     23 //  SourceMgr sm;
     24 //  StringRef input = getInput();
     25 //  yaml::Stream stream(input, sm);
     26 //
     27 //  for (yaml::document_iterator di = stream.begin(), de = stream.end();
     28 //       di != de; ++di) {
     29 //    yaml::Node *n = di->getRoot();
     30 //    if (n) {
     31 //      // Do something with n...
     32 //    } else
     33 //      break;
     34 //  }
     35 //
     36 //===----------------------------------------------------------------------===//
     37 
     38 #ifndef LLVM_SUPPORT_YAML_PARSER_H
     39 #define LLVM_SUPPORT_YAML_PARSER_H
     40 
     41 #include "llvm/ADT/OwningPtr.h"
     42 #include "llvm/ADT/SmallString.h"
     43 #include "llvm/ADT/StringRef.h"
     44 #include "llvm/Support/Allocator.h"
     45 #include "llvm/Support/SMLoc.h"
     46 
     47 #include <limits>
     48 #include <utility>
     49 
     50 namespace llvm {
     51 class MemoryBuffer;
     52 class SourceMgr;
     53 class raw_ostream;
     54 class Twine;
     55 
     56 namespace yaml {
     57 
     58 class document_iterator;
     59 class Document;
     60 class Node;
     61 class Scanner;
     62 struct Token;
     63 
     64 /// @brief Dump all the tokens in this stream to OS.
     65 /// @returns true if there was an error, false otherwise.
     66 bool dumpTokens(StringRef Input, raw_ostream &);
     67 
     68 /// @brief Scans all tokens in input without outputting anything. This is used
     69 ///        for benchmarking the tokenizer.
     70 /// @returns true if there was an error, false otherwise.
     71 bool scanTokens(StringRef Input);
     72 
     73 /// @brief Escape \a Input for a double quoted scalar.
     74 std::string escape(StringRef Input);
     75 
     76 /// @brief This class represents a YAML stream potentially containing multiple
     77 ///        documents.
     78 class Stream {
     79 public:
     80   Stream(StringRef Input, SourceMgr &);
     81   ~Stream();
     82 
     83   document_iterator begin();
     84   document_iterator end();
     85   void skip();
     86   bool failed();
     87   bool validate() {
     88     skip();
     89     return !failed();
     90   }
     91 
     92   void printError(Node *N, const Twine &Msg);
     93 
     94 private:
     95   OwningPtr<Scanner> scanner;
     96   OwningPtr<Document> CurrentDoc;
     97 
     98   friend class Document;
     99 
    100   /// @brief Validate a %YAML x.x directive.
    101   void handleYAMLDirective(const Token &);
    102 };
    103 
    104 /// @brief Abstract base class for all Nodes.
    105 class Node {
    106 public:
    107   enum NodeKind {
    108     NK_Null,
    109     NK_Scalar,
    110     NK_KeyValue,
    111     NK_Mapping,
    112     NK_Sequence,
    113     NK_Alias
    114   };
    115 
    116   Node(unsigned int Type, OwningPtr<Document>&, StringRef Anchor);
    117 
    118   /// @brief Get the value of the anchor attached to this node. If it does not
    119   ///        have one, getAnchor().size() will be 0.
    120   StringRef getAnchor() const { return Anchor; }
    121 
    122   SMRange getSourceRange() const { return SourceRange; }
    123   void setSourceRange(SMRange SR) { SourceRange = SR; }
    124 
    125   // These functions forward to Document and Scanner.
    126   Token &peekNext();
    127   Token getNext();
    128   Node *parseBlockNode();
    129   BumpPtrAllocator &getAllocator();
    130   void setError(const Twine &Message, Token &Location) const;
    131   bool failed() const;
    132 
    133   virtual void skip() {};
    134 
    135   unsigned int getType() const { return TypeID; }
    136   static inline bool classof(const Node *) { return true; }
    137 
    138   void *operator new ( size_t Size
    139                      , BumpPtrAllocator &Alloc
    140                      , size_t Alignment = 16) throw() {
    141     return Alloc.Allocate(Size, Alignment);
    142   }
    143 
    144   void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() {
    145     Alloc.Deallocate(Ptr);
    146   }
    147 
    148 protected:
    149   OwningPtr<Document> &Doc;
    150   SMRange SourceRange;
    151 
    152   void operator delete(void *) throw() {}
    153 
    154   virtual ~Node() {}
    155 
    156 private:
    157   unsigned int TypeID;
    158   StringRef Anchor;
    159 };
    160 
    161 /// @brief A null value.
    162 ///
    163 /// Example:
    164 ///   !!null null
    165 class NullNode : public Node {
    166 public:
    167   NullNode(OwningPtr<Document> &D) : Node(NK_Null, D, StringRef()) {}
    168 
    169   static inline bool classof(const NullNode *) { return true; }
    170   static inline bool classof(const Node *N) {
    171     return N->getType() == NK_Null;
    172   }
    173 };
    174 
    175 /// @brief A scalar node is an opaque datum that can be presented as a
    176 ///        series of zero or more Unicode scalar values.
    177 ///
    178 /// Example:
    179 ///   Adena
    180 class ScalarNode : public Node {
    181 public:
    182   ScalarNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Val)
    183     : Node(NK_Scalar, D, Anchor)
    184     , Value(Val) {
    185     SMLoc Start = SMLoc::getFromPointer(Val.begin());
    186     SMLoc End = SMLoc::getFromPointer(Val.end() - 1);
    187     SourceRange = SMRange(Start, End);
    188   }
    189 
    190   // Return Value without any escaping or folding or other fun YAML stuff. This
    191   // is the exact bytes that are contained in the file (after conversion to
    192   // utf8).
    193   StringRef getRawValue() const { return Value; }
    194 
    195   /// @brief Gets the value of this node as a StringRef.
    196   ///
    197   /// @param Storage is used to store the content of the returned StringRef iff
    198   ///        it requires any modification from how it appeared in the source.
    199   ///        This happens with escaped characters and multi-line literals.
    200   StringRef getValue(SmallVectorImpl<char> &Storage) const;
    201 
    202   static inline bool classof(const ScalarNode *) { return true; }
    203   static inline bool classof(const Node *N) {
    204     return N->getType() == NK_Scalar;
    205   }
    206 
    207 private:
    208   StringRef Value;
    209 
    210   StringRef unescapeDoubleQuoted( StringRef UnquotedValue
    211                                 , StringRef::size_type Start
    212                                 , SmallVectorImpl<char> &Storage) const;
    213 };
    214 
    215 /// @brief A key and value pair. While not technically a Node under the YAML
    216 ///        representation graph, it is easier to treat them this way.
    217 ///
    218 /// TODO: Consider making this not a child of Node.
    219 ///
    220 /// Example:
    221 ///   Section: .text
    222 class KeyValueNode : public Node {
    223 public:
    224   KeyValueNode(OwningPtr<Document> &D)
    225     : Node(NK_KeyValue, D, StringRef())
    226     , Key(0)
    227     , Value(0)
    228   {}
    229 
    230   /// @brief Parse and return the key.
    231   ///
    232   /// This may be called multiple times.
    233   ///
    234   /// @returns The key, or nullptr if failed() == true.
    235   Node *getKey();
    236 
    237   /// @brief Parse and return the value.
    238   ///
    239   /// This may be called multiple times.
    240   ///
    241   /// @returns The value, or nullptr if failed() == true.
    242   Node *getValue();
    243 
    244   virtual void skip() {
    245     getKey()->skip();
    246     getValue()->skip();
    247   }
    248 
    249   static inline bool classof(const KeyValueNode *) { return true; }
    250   static inline bool classof(const Node *N) {
    251     return N->getType() == NK_KeyValue;
    252   }
    253 
    254 private:
    255   Node *Key;
    256   Node *Value;
    257 };
    258 
    259 /// @brief This is an iterator abstraction over YAML collections shared by both
    260 ///        sequences and maps.
    261 ///
    262 /// BaseT must have a ValueT* member named CurrentEntry and a member function
    263 /// increment() which must set CurrentEntry to 0 to create an end iterator.
    264 template <class BaseT, class ValueT>
    265 class basic_collection_iterator
    266   : public std::iterator<std::forward_iterator_tag, ValueT> {
    267 public:
    268   basic_collection_iterator() : Base(0) {}
    269   basic_collection_iterator(BaseT *B) : Base(B) {}
    270 
    271   ValueT *operator ->() const {
    272     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
    273     return Base->CurrentEntry;
    274   }
    275 
    276   ValueT &operator *() const {
    277     assert(Base && Base->CurrentEntry &&
    278            "Attempted to dereference end iterator!");
    279     return *Base->CurrentEntry;
    280   }
    281 
    282   operator ValueT*() const {
    283     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
    284     return Base->CurrentEntry;
    285   }
    286 
    287   bool operator !=(const basic_collection_iterator &Other) const {
    288     if(Base != Other.Base)
    289       return true;
    290     return (Base && Other.Base) && Base->CurrentEntry
    291                                    != Other.Base->CurrentEntry;
    292   }
    293 
    294   basic_collection_iterator &operator++() {
    295     assert(Base && "Attempted to advance iterator past end!");
    296     Base->increment();
    297     // Create an end iterator.
    298     if (Base->CurrentEntry == 0)
    299       Base = 0;
    300     return *this;
    301   }
    302 
    303 private:
    304   BaseT *Base;
    305 };
    306 
    307 // The following two templates are used for both MappingNode and Sequence Node.
    308 template <class CollectionType>
    309 typename CollectionType::iterator begin(CollectionType &C) {
    310   assert(C.IsAtBeginning && "You may only iterate over a collection once!");
    311   C.IsAtBeginning = false;
    312   typename CollectionType::iterator ret(&C);
    313   ++ret;
    314   return ret;
    315 }
    316 
    317 template <class CollectionType>
    318 void skip(CollectionType &C) {
    319   // TODO: support skipping from the middle of a parsed collection ;/
    320   assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
    321   if (C.IsAtBeginning)
    322     for (typename CollectionType::iterator i = begin(C), e = C.end();
    323                                            i != e; ++i)
    324       i->skip();
    325 }
    326 
    327 /// @brief Represents a YAML map created from either a block map for a flow map.
    328 ///
    329 /// This parses the YAML stream as increment() is called.
    330 ///
    331 /// Example:
    332 ///   Name: _main
    333 ///   Scope: Global
    334 class MappingNode : public Node {
    335 public:
    336   enum MappingType {
    337     MT_Block,
    338     MT_Flow,
    339     MT_Inline //< An inline mapping node is used for "[key: value]".
    340   };
    341 
    342   MappingNode(OwningPtr<Document> &D, StringRef Anchor, MappingType MT)
    343     : Node(NK_Mapping, D, Anchor)
    344     , Type(MT)
    345     , IsAtBeginning(true)
    346     , IsAtEnd(false)
    347     , CurrentEntry(0)
    348   {}
    349 
    350   friend class basic_collection_iterator<MappingNode, KeyValueNode>;
    351   typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator;
    352   template <class T> friend typename T::iterator yaml::begin(T &);
    353   template <class T> friend void yaml::skip(T &);
    354 
    355   iterator begin() {
    356     return yaml::begin(*this);
    357   }
    358 
    359   iterator end() { return iterator(); }
    360 
    361   virtual void skip() {
    362     yaml::skip(*this);
    363   }
    364 
    365   static inline bool classof(const MappingNode *) { return true; }
    366   static inline bool classof(const Node *N) {
    367     return N->getType() == NK_Mapping;
    368   }
    369 
    370 private:
    371   MappingType Type;
    372   bool IsAtBeginning;
    373   bool IsAtEnd;
    374   KeyValueNode *CurrentEntry;
    375 
    376   void increment();
    377 };
    378 
    379 /// @brief Represents a YAML sequence created from either a block sequence for a
    380 ///        flow sequence.
    381 ///
    382 /// This parses the YAML stream as increment() is called.
    383 ///
    384 /// Example:
    385 ///   - Hello
    386 ///   - World
    387 class SequenceNode : public Node {
    388 public:
    389   enum SequenceType {
    390     ST_Block,
    391     ST_Flow,
    392     // Use for:
    393     //
    394     // key:
    395     // - val1
    396     // - val2
    397     //
    398     // As a BlockMappingEntry and BlockEnd are not created in this case.
    399     ST_Indentless
    400   };
    401 
    402   SequenceNode(OwningPtr<Document> &D, StringRef Anchor, SequenceType ST)
    403     : Node(NK_Sequence, D, Anchor)
    404     , SeqType(ST)
    405     , IsAtBeginning(true)
    406     , IsAtEnd(false)
    407     , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','.
    408     , CurrentEntry(0)
    409   {}
    410 
    411   friend class basic_collection_iterator<SequenceNode, Node>;
    412   typedef basic_collection_iterator<SequenceNode, Node> iterator;
    413   template <class T> friend typename T::iterator yaml::begin(T &);
    414   template <class T> friend void yaml::skip(T &);
    415 
    416   void increment();
    417 
    418   iterator begin() {
    419     return yaml::begin(*this);
    420   }
    421 
    422   iterator end() { return iterator(); }
    423 
    424   virtual void skip() {
    425     yaml::skip(*this);
    426   }
    427 
    428   static inline bool classof(const SequenceNode *) { return true; }
    429   static inline bool classof(const Node *N) {
    430     return N->getType() == NK_Sequence;
    431   }
    432 
    433 private:
    434   SequenceType SeqType;
    435   bool IsAtBeginning;
    436   bool IsAtEnd;
    437   bool WasPreviousTokenFlowEntry;
    438   Node *CurrentEntry;
    439 };
    440 
    441 /// @brief Represents an alias to a Node with an anchor.
    442 ///
    443 /// Example:
    444 ///   *AnchorName
    445 class AliasNode : public Node {
    446 public:
    447   AliasNode(OwningPtr<Document> &D, StringRef Val)
    448     : Node(NK_Alias, D, StringRef()), Name(Val) {}
    449 
    450   StringRef getName() const { return Name; }
    451   Node *getTarget();
    452 
    453   static inline bool classof(const ScalarNode *) { return true; }
    454   static inline bool classof(const Node *N) {
    455     return N->getType() == NK_Alias;
    456   }
    457 
    458 private:
    459   StringRef Name;
    460 };
    461 
    462 /// @brief A YAML Stream is a sequence of Documents. A document contains a root
    463 ///        node.
    464 class Document {
    465 public:
    466   /// @brief Root for parsing a node. Returns a single node.
    467   Node *parseBlockNode();
    468 
    469   Document(Stream &ParentStream);
    470 
    471   /// @brief Finish parsing the current document and return true if there are
    472   ///        more. Return false otherwise.
    473   bool skip();
    474 
    475   /// @brief Parse and return the root level node.
    476   Node *getRoot() {
    477     if (Root)
    478       return Root;
    479     return Root = parseBlockNode();
    480   }
    481 
    482 private:
    483   friend class Node;
    484   friend class document_iterator;
    485 
    486   /// @brief Stream to read tokens from.
    487   Stream &stream;
    488 
    489   /// @brief Used to allocate nodes to. All are destroyed without calling their
    490   ///        destructor when the document is destroyed.
    491   BumpPtrAllocator NodeAllocator;
    492 
    493   /// @brief The root node. Used to support skipping a partially parsed
    494   ///        document.
    495   Node *Root;
    496 
    497   Token &peekNext();
    498   Token getNext();
    499   void setError(const Twine &Message, Token &Location) const;
    500   bool failed() const;
    501 
    502   void handleTagDirective(const Token &Tag) {
    503     // TODO: Track tags.
    504   }
    505 
    506   /// @brief Parse %BLAH directives and return true if any were encountered.
    507   bool parseDirectives();
    508 
    509   /// @brief Consume the next token and error if it is not \a TK.
    510   bool expectToken(int TK);
    511 };
    512 
    513 /// @brief Iterator abstraction for Documents over a Stream.
    514 class document_iterator {
    515 public:
    516   document_iterator() : Doc(NullDoc) {}
    517   document_iterator(OwningPtr<Document> &D) : Doc(D) {}
    518 
    519   bool operator ==(const document_iterator &Other) {
    520     return Doc == Other.Doc;
    521   }
    522   bool operator !=(const document_iterator &Other) {
    523     return !(*this == Other);
    524   }
    525 
    526   document_iterator operator ++() {
    527     if (!Doc->skip()) {
    528       Doc.reset(0);
    529     } else {
    530       Stream &S = Doc->stream;
    531       Doc.reset(new Document(S));
    532     }
    533     return *this;
    534   }
    535 
    536   Document &operator *() {
    537     return *Doc;
    538   }
    539 
    540   OwningPtr<Document> &operator ->() {
    541     return Doc;
    542   }
    543 
    544 private:
    545   static OwningPtr<Document> NullDoc;
    546   OwningPtr<Document> &Doc;
    547 };
    548 
    549 }
    550 }
    551 
    552 #endif
    553