1 //===--- YAMLParser.h - Simple YAML parser --------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This is a YAML 1.2 parser. 11 // 12 // See http://www.yaml.org/spec/1.2/spec.html for the full standard. 13 // 14 // This currently does not implement the following: 15 // * Multi-line literal folding. 16 // * Tag resolution. 17 // * UTF-16. 18 // * BOMs anywhere other than the first Unicode scalar value in the file. 19 // 20 // The most important class here is Stream. This represents a YAML stream with 21 // 0, 1, or many documents. 22 // 23 // SourceMgr sm; 24 // StringRef input = getInput(); 25 // yaml::Stream stream(input, sm); 26 // 27 // for (yaml::document_iterator di = stream.begin(), de = stream.end(); 28 // di != de; ++di) { 29 // yaml::Node *n = di->getRoot(); 30 // if (n) { 31 // // Do something with n... 32 // } else 33 // break; 34 // } 35 // 36 //===----------------------------------------------------------------------===// 37 38 #ifndef LLVM_SUPPORT_YAMLPARSER_H 39 #define LLVM_SUPPORT_YAMLPARSER_H 40 41 #include "llvm/ADT/StringRef.h" 42 #include "llvm/Support/Allocator.h" 43 #include "llvm/Support/SMLoc.h" 44 #include <map> 45 #include <utility> 46 47 namespace llvm { 48 class MemoryBufferRef; 49 class SourceMgr; 50 class Twine; 51 class raw_ostream; 52 53 namespace yaml { 54 55 class document_iterator; 56 class Document; 57 class Node; 58 class Scanner; 59 struct Token; 60 61 /// \brief Dump all the tokens in this stream to OS. 62 /// \returns true if there was an error, false otherwise. 63 bool dumpTokens(StringRef Input, raw_ostream &); 64 65 /// \brief Scans all tokens in input without outputting anything. This is used 66 /// for benchmarking the tokenizer. 67 /// \returns true if there was an error, false otherwise. 68 bool scanTokens(StringRef Input); 69 70 /// \brief Escape \a Input for a double quoted scalar. 71 std::string escape(StringRef Input); 72 73 /// \brief This class represents a YAML stream potentially containing multiple 74 /// documents. 75 class Stream { 76 public: 77 /// \brief This keeps a reference to the string referenced by \p Input. 78 Stream(StringRef Input, SourceMgr &, bool ShowColors = true); 79 80 Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true); 81 ~Stream(); 82 83 document_iterator begin(); 84 document_iterator end(); 85 void skip(); 86 bool failed(); 87 bool validate() { 88 skip(); 89 return !failed(); 90 } 91 92 void printError(Node *N, const Twine &Msg); 93 94 private: 95 std::unique_ptr<Scanner> scanner; 96 std::unique_ptr<Document> CurrentDoc; 97 98 friend class Document; 99 }; 100 101 /// \brief Abstract base class for all Nodes. 102 class Node { 103 virtual void anchor(); 104 105 public: 106 enum NodeKind { 107 NK_Null, 108 NK_Scalar, 109 NK_BlockScalar, 110 NK_KeyValue, 111 NK_Mapping, 112 NK_Sequence, 113 NK_Alias 114 }; 115 116 Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor, 117 StringRef Tag); 118 119 /// \brief Get the value of the anchor attached to this node. If it does not 120 /// have one, getAnchor().size() will be 0. 121 StringRef getAnchor() const { return Anchor; } 122 123 /// \brief Get the tag as it was written in the document. This does not 124 /// perform tag resolution. 125 StringRef getRawTag() const { return Tag; } 126 127 /// \brief Get the verbatium tag for a given Node. This performs tag resoluton 128 /// and substitution. 129 std::string getVerbatimTag() const; 130 131 SMRange getSourceRange() const { return SourceRange; } 132 void setSourceRange(SMRange SR) { SourceRange = SR; } 133 134 // These functions forward to Document and Scanner. 135 Token &peekNext(); 136 Token getNext(); 137 Node *parseBlockNode(); 138 BumpPtrAllocator &getAllocator(); 139 void setError(const Twine &Message, Token &Location) const; 140 bool failed() const; 141 142 virtual void skip() {} 143 144 unsigned int getType() const { return TypeID; } 145 146 void *operator new(size_t Size, BumpPtrAllocator &Alloc, 147 size_t Alignment = 16) LLVM_NOEXCEPT { 148 return Alloc.Allocate(Size, Alignment); 149 } 150 151 void operator delete(void *Ptr, BumpPtrAllocator &Alloc, 152 size_t Size) LLVM_NOEXCEPT { 153 Alloc.Deallocate(Ptr, Size); 154 } 155 156 protected: 157 std::unique_ptr<Document> &Doc; 158 SMRange SourceRange; 159 160 void operator delete(void *) LLVM_NOEXCEPT = delete; 161 162 ~Node() = default; 163 164 private: 165 unsigned int TypeID; 166 StringRef Anchor; 167 /// \brief The tag as typed in the document. 168 StringRef Tag; 169 }; 170 171 /// \brief A null value. 172 /// 173 /// Example: 174 /// !!null null 175 class NullNode final : public Node { 176 void anchor() override; 177 178 public: 179 NullNode(std::unique_ptr<Document> &D) 180 : Node(NK_Null, D, StringRef(), StringRef()) {} 181 182 static inline bool classof(const Node *N) { return N->getType() == NK_Null; } 183 }; 184 185 /// \brief A scalar node is an opaque datum that can be presented as a 186 /// series of zero or more Unicode scalar values. 187 /// 188 /// Example: 189 /// Adena 190 class ScalarNode final : public Node { 191 void anchor() override; 192 193 public: 194 ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 195 StringRef Val) 196 : Node(NK_Scalar, D, Anchor, Tag), Value(Val) { 197 SMLoc Start = SMLoc::getFromPointer(Val.begin()); 198 SMLoc End = SMLoc::getFromPointer(Val.end()); 199 SourceRange = SMRange(Start, End); 200 } 201 202 // Return Value without any escaping or folding or other fun YAML stuff. This 203 // is the exact bytes that are contained in the file (after conversion to 204 // utf8). 205 StringRef getRawValue() const { return Value; } 206 207 /// \brief Gets the value of this node as a StringRef. 208 /// 209 /// \param Storage is used to store the content of the returned StringRef iff 210 /// it requires any modification from how it appeared in the source. 211 /// This happens with escaped characters and multi-line literals. 212 StringRef getValue(SmallVectorImpl<char> &Storage) const; 213 214 static inline bool classof(const Node *N) { 215 return N->getType() == NK_Scalar; 216 } 217 218 private: 219 StringRef Value; 220 221 StringRef unescapeDoubleQuoted(StringRef UnquotedValue, 222 StringRef::size_type Start, 223 SmallVectorImpl<char> &Storage) const; 224 }; 225 226 /// \brief A block scalar node is an opaque datum that can be presented as a 227 /// series of zero or more Unicode scalar values. 228 /// 229 /// Example: 230 /// | 231 /// Hello 232 /// World 233 class BlockScalarNode final : public Node { 234 void anchor() override; 235 236 public: 237 BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 238 StringRef Value, StringRef RawVal) 239 : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) { 240 SMLoc Start = SMLoc::getFromPointer(RawVal.begin()); 241 SMLoc End = SMLoc::getFromPointer(RawVal.end()); 242 SourceRange = SMRange(Start, End); 243 } 244 245 /// \brief Gets the value of this node as a StringRef. 246 StringRef getValue() const { return Value; } 247 248 static inline bool classof(const Node *N) { 249 return N->getType() == NK_BlockScalar; 250 } 251 252 private: 253 StringRef Value; 254 }; 255 256 /// \brief A key and value pair. While not technically a Node under the YAML 257 /// representation graph, it is easier to treat them this way. 258 /// 259 /// TODO: Consider making this not a child of Node. 260 /// 261 /// Example: 262 /// Section: .text 263 class KeyValueNode final : public Node { 264 void anchor() override; 265 266 public: 267 KeyValueNode(std::unique_ptr<Document> &D) 268 : Node(NK_KeyValue, D, StringRef(), StringRef()), Key(nullptr), 269 Value(nullptr) {} 270 271 /// \brief Parse and return the key. 272 /// 273 /// This may be called multiple times. 274 /// 275 /// \returns The key, or nullptr if failed() == true. 276 Node *getKey(); 277 278 /// \brief Parse and return the value. 279 /// 280 /// This may be called multiple times. 281 /// 282 /// \returns The value, or nullptr if failed() == true. 283 Node *getValue(); 284 285 void skip() override { 286 getKey()->skip(); 287 if (Node *Val = getValue()) 288 Val->skip(); 289 } 290 291 static inline bool classof(const Node *N) { 292 return N->getType() == NK_KeyValue; 293 } 294 295 private: 296 Node *Key; 297 Node *Value; 298 }; 299 300 /// \brief This is an iterator abstraction over YAML collections shared by both 301 /// sequences and maps. 302 /// 303 /// BaseT must have a ValueT* member named CurrentEntry and a member function 304 /// increment() which must set CurrentEntry to 0 to create an end iterator. 305 template <class BaseT, class ValueT> 306 class basic_collection_iterator 307 : public std::iterator<std::input_iterator_tag, ValueT> { 308 public: 309 basic_collection_iterator() : Base(nullptr) {} 310 basic_collection_iterator(BaseT *B) : Base(B) {} 311 312 ValueT *operator->() const { 313 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 314 return Base->CurrentEntry; 315 } 316 317 ValueT &operator*() const { 318 assert(Base && Base->CurrentEntry && 319 "Attempted to dereference end iterator!"); 320 return *Base->CurrentEntry; 321 } 322 323 operator ValueT *() const { 324 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 325 return Base->CurrentEntry; 326 } 327 328 /// Note on EqualityComparable: 329 /// 330 /// The iterator is not re-entrant, 331 /// it is meant to be used for parsing YAML on-demand 332 /// Once iteration started - it can point only to one entry at a time 333 /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal 334 /// iff Base and Other.Base are equal. 335 bool operator==(const basic_collection_iterator &Other) const { 336 if (Base && (Base == Other.Base)) { 337 assert((Base->CurrentEntry == Other.Base->CurrentEntry) 338 && "Equal Bases expected to point to equal Entries"); 339 } 340 341 return Base == Other.Base; 342 } 343 344 bool operator!=(const basic_collection_iterator &Other) const { 345 return !(Base == Other.Base); 346 } 347 348 basic_collection_iterator &operator++() { 349 assert(Base && "Attempted to advance iterator past end!"); 350 Base->increment(); 351 // Create an end iterator. 352 if (!Base->CurrentEntry) 353 Base = nullptr; 354 return *this; 355 } 356 357 private: 358 BaseT *Base; 359 }; 360 361 // The following two templates are used for both MappingNode and Sequence Node. 362 template <class CollectionType> 363 typename CollectionType::iterator begin(CollectionType &C) { 364 assert(C.IsAtBeginning && "You may only iterate over a collection once!"); 365 C.IsAtBeginning = false; 366 typename CollectionType::iterator ret(&C); 367 ++ret; 368 return ret; 369 } 370 371 template <class CollectionType> void skip(CollectionType &C) { 372 // TODO: support skipping from the middle of a parsed collection ;/ 373 assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); 374 if (C.IsAtBeginning) 375 for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e; 376 ++i) 377 i->skip(); 378 } 379 380 /// \brief Represents a YAML map created from either a block map for a flow map. 381 /// 382 /// This parses the YAML stream as increment() is called. 383 /// 384 /// Example: 385 /// Name: _main 386 /// Scope: Global 387 class MappingNode final : public Node { 388 void anchor() override; 389 390 public: 391 enum MappingType { 392 MT_Block, 393 MT_Flow, 394 MT_Inline ///< An inline mapping node is used for "[key: value]". 395 }; 396 397 MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 398 MappingType MT) 399 : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true), 400 IsAtEnd(false), CurrentEntry(nullptr) {} 401 402 friend class basic_collection_iterator<MappingNode, KeyValueNode>; 403 typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator; 404 template <class T> friend typename T::iterator yaml::begin(T &); 405 template <class T> friend void yaml::skip(T &); 406 407 iterator begin() { return yaml::begin(*this); } 408 409 iterator end() { return iterator(); } 410 411 void skip() override { yaml::skip(*this); } 412 413 static inline bool classof(const Node *N) { 414 return N->getType() == NK_Mapping; 415 } 416 417 private: 418 MappingType Type; 419 bool IsAtBeginning; 420 bool IsAtEnd; 421 KeyValueNode *CurrentEntry; 422 423 void increment(); 424 }; 425 426 /// \brief Represents a YAML sequence created from either a block sequence for a 427 /// flow sequence. 428 /// 429 /// This parses the YAML stream as increment() is called. 430 /// 431 /// Example: 432 /// - Hello 433 /// - World 434 class SequenceNode final : public Node { 435 void anchor() override; 436 437 public: 438 enum SequenceType { 439 ST_Block, 440 ST_Flow, 441 // Use for: 442 // 443 // key: 444 // - val1 445 // - val2 446 // 447 // As a BlockMappingEntry and BlockEnd are not created in this case. 448 ST_Indentless 449 }; 450 451 SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag, 452 SequenceType ST) 453 : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true), 454 IsAtEnd(false), 455 WasPreviousTokenFlowEntry(true), // Start with an imaginary ','. 456 CurrentEntry(nullptr) {} 457 458 friend class basic_collection_iterator<SequenceNode, Node>; 459 typedef basic_collection_iterator<SequenceNode, Node> iterator; 460 template <class T> friend typename T::iterator yaml::begin(T &); 461 template <class T> friend void yaml::skip(T &); 462 463 void increment(); 464 465 iterator begin() { return yaml::begin(*this); } 466 467 iterator end() { return iterator(); } 468 469 void skip() override { yaml::skip(*this); } 470 471 static inline bool classof(const Node *N) { 472 return N->getType() == NK_Sequence; 473 } 474 475 private: 476 SequenceType SeqType; 477 bool IsAtBeginning; 478 bool IsAtEnd; 479 bool WasPreviousTokenFlowEntry; 480 Node *CurrentEntry; 481 }; 482 483 /// \brief Represents an alias to a Node with an anchor. 484 /// 485 /// Example: 486 /// *AnchorName 487 class AliasNode final : public Node { 488 void anchor() override; 489 490 public: 491 AliasNode(std::unique_ptr<Document> &D, StringRef Val) 492 : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {} 493 494 StringRef getName() const { return Name; } 495 Node *getTarget(); 496 497 static inline bool classof(const Node *N) { return N->getType() == NK_Alias; } 498 499 private: 500 StringRef Name; 501 }; 502 503 /// \brief A YAML Stream is a sequence of Documents. A document contains a root 504 /// node. 505 class Document { 506 public: 507 /// \brief Root for parsing a node. Returns a single node. 508 Node *parseBlockNode(); 509 510 Document(Stream &ParentStream); 511 512 /// \brief Finish parsing the current document and return true if there are 513 /// more. Return false otherwise. 514 bool skip(); 515 516 /// \brief Parse and return the root level node. 517 Node *getRoot() { 518 if (Root) 519 return Root; 520 return Root = parseBlockNode(); 521 } 522 523 const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; } 524 525 private: 526 friend class Node; 527 friend class document_iterator; 528 529 /// \brief Stream to read tokens from. 530 Stream &stream; 531 532 /// \brief Used to allocate nodes to. All are destroyed without calling their 533 /// destructor when the document is destroyed. 534 BumpPtrAllocator NodeAllocator; 535 536 /// \brief The root node. Used to support skipping a partially parsed 537 /// document. 538 Node *Root; 539 540 /// \brief Maps tag prefixes to their expansion. 541 std::map<StringRef, StringRef> TagMap; 542 543 Token &peekNext(); 544 Token getNext(); 545 void setError(const Twine &Message, Token &Location) const; 546 bool failed() const; 547 548 /// \brief Parse %BLAH directives and return true if any were encountered. 549 bool parseDirectives(); 550 551 /// \brief Parse %YAML 552 void parseYAMLDirective(); 553 554 /// \brief Parse %TAG 555 void parseTAGDirective(); 556 557 /// \brief Consume the next token and error if it is not \a TK. 558 bool expectToken(int TK); 559 }; 560 561 /// \brief Iterator abstraction for Documents over a Stream. 562 class document_iterator { 563 public: 564 document_iterator() : Doc(nullptr) {} 565 document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {} 566 567 bool operator==(const document_iterator &Other) { 568 if (isAtEnd() || Other.isAtEnd()) 569 return isAtEnd() && Other.isAtEnd(); 570 571 return Doc == Other.Doc; 572 } 573 bool operator!=(const document_iterator &Other) { return !(*this == Other); } 574 575 document_iterator operator++() { 576 assert(Doc && "incrementing iterator past the end."); 577 if (!(*Doc)->skip()) { 578 Doc->reset(nullptr); 579 } else { 580 Stream &S = (*Doc)->stream; 581 Doc->reset(new Document(S)); 582 } 583 return *this; 584 } 585 586 Document &operator*() { return *Doc->get(); } 587 588 std::unique_ptr<Document> &operator->() { return *Doc; } 589 590 private: 591 bool isAtEnd() const { return !Doc || !*Doc; } 592 593 std::unique_ptr<Document> *Doc; 594 }; 595 596 } // End namespace yaml. 597 598 } // End namespace llvm. 599 600 #endif 601