1 //===--- YAMLParser.h - Simple YAML parser --------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This is a YAML 1.2 parser. 11 // 12 // See http://www.yaml.org/spec/1.2/spec.html for the full standard. 13 // 14 // This currently does not implement the following: 15 // * Multi-line literal folding. 16 // * Tag resolution. 17 // * UTF-16. 18 // * BOMs anywhere other than the first Unicode scalar value in the file. 19 // 20 // The most important class here is Stream. This represents a YAML stream with 21 // 0, 1, or many documents. 22 // 23 // SourceMgr sm; 24 // StringRef input = getInput(); 25 // yaml::Stream stream(input, sm); 26 // 27 // for (yaml::document_iterator di = stream.begin(), de = stream.end(); 28 // di != de; ++di) { 29 // yaml::Node *n = di->getRoot(); 30 // if (n) { 31 // // Do something with n... 32 // } else 33 // break; 34 // } 35 // 36 //===----------------------------------------------------------------------===// 37 38 #ifndef LLVM_SUPPORT_YAMLPARSER_H 39 #define LLVM_SUPPORT_YAMLPARSER_H 40 41 #include "llvm/ADT/OwningPtr.h" 42 #include "llvm/ADT/SmallString.h" 43 #include "llvm/ADT/StringRef.h" 44 #include "llvm/Support/Allocator.h" 45 #include "llvm/Support/SMLoc.h" 46 #include <limits> 47 #include <utility> 48 49 namespace llvm { 50 class MemoryBuffer; 51 class SourceMgr; 52 class raw_ostream; 53 class Twine; 54 55 namespace yaml { 56 57 class document_iterator; 58 class Document; 59 class Node; 60 class Scanner; 61 struct Token; 62 63 /// @brief Dump all the tokens in this stream to OS. 64 /// @returns true if there was an error, false otherwise. 65 bool dumpTokens(StringRef Input, raw_ostream &); 66 67 /// @brief Scans all tokens in input without outputting anything. This is used 68 /// for benchmarking the tokenizer. 69 /// @returns true if there was an error, false otherwise. 70 bool scanTokens(StringRef Input); 71 72 /// @brief Escape \a Input for a double quoted scalar. 73 std::string escape(StringRef Input); 74 75 /// @brief This class represents a YAML stream potentially containing multiple 76 /// documents. 77 class Stream { 78 public: 79 /// @brief This keeps a reference to the string referenced by \p Input. 80 Stream(StringRef Input, SourceMgr &); 81 82 /// @brief This takes ownership of \p InputBuffer. 83 Stream(MemoryBuffer *InputBuffer, SourceMgr &); 84 ~Stream(); 85 86 document_iterator begin(); 87 document_iterator end(); 88 void skip(); 89 bool failed(); 90 bool validate() { 91 skip(); 92 return !failed(); 93 } 94 95 void printError(Node *N, const Twine &Msg); 96 97 private: 98 OwningPtr<Scanner> scanner; 99 OwningPtr<Document> CurrentDoc; 100 101 friend class Document; 102 103 /// @brief Validate a %YAML x.x directive. 104 void handleYAMLDirective(const Token &); 105 }; 106 107 /// @brief Abstract base class for all Nodes. 108 class Node { 109 public: 110 enum NodeKind { 111 NK_Null, 112 NK_Scalar, 113 NK_KeyValue, 114 NK_Mapping, 115 NK_Sequence, 116 NK_Alias 117 }; 118 119 Node(unsigned int Type, OwningPtr<Document>&, StringRef Anchor); 120 121 /// @brief Get the value of the anchor attached to this node. If it does not 122 /// have one, getAnchor().size() will be 0. 123 StringRef getAnchor() const { return Anchor; } 124 125 SMRange getSourceRange() const { return SourceRange; } 126 void setSourceRange(SMRange SR) { SourceRange = SR; } 127 128 // These functions forward to Document and Scanner. 129 Token &peekNext(); 130 Token getNext(); 131 Node *parseBlockNode(); 132 BumpPtrAllocator &getAllocator(); 133 void setError(const Twine &Message, Token &Location) const; 134 bool failed() const; 135 136 virtual void skip() {} 137 138 unsigned int getType() const { return TypeID; } 139 140 void *operator new ( size_t Size 141 , BumpPtrAllocator &Alloc 142 , size_t Alignment = 16) throw() { 143 return Alloc.Allocate(Size, Alignment); 144 } 145 146 void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() { 147 Alloc.Deallocate(Ptr); 148 } 149 150 protected: 151 OwningPtr<Document> &Doc; 152 SMRange SourceRange; 153 154 void operator delete(void *) throw() {} 155 156 virtual ~Node() {} 157 158 private: 159 unsigned int TypeID; 160 StringRef Anchor; 161 }; 162 163 /// @brief A null value. 164 /// 165 /// Example: 166 /// !!null null 167 class NullNode : public Node { 168 public: 169 NullNode(OwningPtr<Document> &D) : Node(NK_Null, D, StringRef()) {} 170 171 static inline bool classof(const Node *N) { 172 return N->getType() == NK_Null; 173 } 174 }; 175 176 /// @brief A scalar node is an opaque datum that can be presented as a 177 /// series of zero or more Unicode scalar values. 178 /// 179 /// Example: 180 /// Adena 181 class ScalarNode : public Node { 182 public: 183 ScalarNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Val) 184 : Node(NK_Scalar, D, Anchor) 185 , Value(Val) { 186 SMLoc Start = SMLoc::getFromPointer(Val.begin()); 187 SMLoc End = SMLoc::getFromPointer(Val.end()); 188 SourceRange = SMRange(Start, End); 189 } 190 191 // Return Value without any escaping or folding or other fun YAML stuff. This 192 // is the exact bytes that are contained in the file (after conversion to 193 // utf8). 194 StringRef getRawValue() const { return Value; } 195 196 /// @brief Gets the value of this node as a StringRef. 197 /// 198 /// @param Storage is used to store the content of the returned StringRef iff 199 /// it requires any modification from how it appeared in the source. 200 /// This happens with escaped characters and multi-line literals. 201 StringRef getValue(SmallVectorImpl<char> &Storage) const; 202 203 static inline bool classof(const Node *N) { 204 return N->getType() == NK_Scalar; 205 } 206 207 private: 208 StringRef Value; 209 210 StringRef unescapeDoubleQuoted( StringRef UnquotedValue 211 , StringRef::size_type Start 212 , SmallVectorImpl<char> &Storage) const; 213 }; 214 215 /// @brief A key and value pair. While not technically a Node under the YAML 216 /// representation graph, it is easier to treat them this way. 217 /// 218 /// TODO: Consider making this not a child of Node. 219 /// 220 /// Example: 221 /// Section: .text 222 class KeyValueNode : public Node { 223 public: 224 KeyValueNode(OwningPtr<Document> &D) 225 : Node(NK_KeyValue, D, StringRef()) 226 , Key(0) 227 , Value(0) 228 {} 229 230 /// @brief Parse and return the key. 231 /// 232 /// This may be called multiple times. 233 /// 234 /// @returns The key, or nullptr if failed() == true. 235 Node *getKey(); 236 237 /// @brief Parse and return the value. 238 /// 239 /// This may be called multiple times. 240 /// 241 /// @returns The value, or nullptr if failed() == true. 242 Node *getValue(); 243 244 virtual void skip() LLVM_OVERRIDE { 245 getKey()->skip(); 246 getValue()->skip(); 247 } 248 249 static inline bool classof(const Node *N) { 250 return N->getType() == NK_KeyValue; 251 } 252 253 private: 254 Node *Key; 255 Node *Value; 256 }; 257 258 /// @brief This is an iterator abstraction over YAML collections shared by both 259 /// sequences and maps. 260 /// 261 /// BaseT must have a ValueT* member named CurrentEntry and a member function 262 /// increment() which must set CurrentEntry to 0 to create an end iterator. 263 template <class BaseT, class ValueT> 264 class basic_collection_iterator 265 : public std::iterator<std::forward_iterator_tag, ValueT> { 266 public: 267 basic_collection_iterator() : Base(0) {} 268 basic_collection_iterator(BaseT *B) : Base(B) {} 269 270 ValueT *operator ->() const { 271 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 272 return Base->CurrentEntry; 273 } 274 275 ValueT &operator *() const { 276 assert(Base && Base->CurrentEntry && 277 "Attempted to dereference end iterator!"); 278 return *Base->CurrentEntry; 279 } 280 281 operator ValueT*() const { 282 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 283 return Base->CurrentEntry; 284 } 285 286 bool operator !=(const basic_collection_iterator &Other) const { 287 if(Base != Other.Base) 288 return true; 289 return (Base && Other.Base) && Base->CurrentEntry 290 != Other.Base->CurrentEntry; 291 } 292 293 basic_collection_iterator &operator++() { 294 assert(Base && "Attempted to advance iterator past end!"); 295 Base->increment(); 296 // Create an end iterator. 297 if (Base->CurrentEntry == 0) 298 Base = 0; 299 return *this; 300 } 301 302 private: 303 BaseT *Base; 304 }; 305 306 // The following two templates are used for both MappingNode and Sequence Node. 307 template <class CollectionType> 308 typename CollectionType::iterator begin(CollectionType &C) { 309 assert(C.IsAtBeginning && "You may only iterate over a collection once!"); 310 C.IsAtBeginning = false; 311 typename CollectionType::iterator ret(&C); 312 ++ret; 313 return ret; 314 } 315 316 template <class CollectionType> 317 void skip(CollectionType &C) { 318 // TODO: support skipping from the middle of a parsed collection ;/ 319 assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); 320 if (C.IsAtBeginning) 321 for (typename CollectionType::iterator i = begin(C), e = C.end(); 322 i != e; ++i) 323 i->skip(); 324 } 325 326 /// @brief Represents a YAML map created from either a block map for a flow map. 327 /// 328 /// This parses the YAML stream as increment() is called. 329 /// 330 /// Example: 331 /// Name: _main 332 /// Scope: Global 333 class MappingNode : public Node { 334 public: 335 enum MappingType { 336 MT_Block, 337 MT_Flow, 338 MT_Inline ///< An inline mapping node is used for "[key: value]". 339 }; 340 341 MappingNode(OwningPtr<Document> &D, StringRef Anchor, MappingType MT) 342 : Node(NK_Mapping, D, Anchor) 343 , Type(MT) 344 , IsAtBeginning(true) 345 , IsAtEnd(false) 346 , CurrentEntry(0) 347 {} 348 349 friend class basic_collection_iterator<MappingNode, KeyValueNode>; 350 typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator; 351 template <class T> friend typename T::iterator yaml::begin(T &); 352 template <class T> friend void yaml::skip(T &); 353 354 iterator begin() { 355 return yaml::begin(*this); 356 } 357 358 iterator end() { return iterator(); } 359 360 virtual void skip() LLVM_OVERRIDE { 361 yaml::skip(*this); 362 } 363 364 static inline bool classof(const Node *N) { 365 return N->getType() == NK_Mapping; 366 } 367 368 private: 369 MappingType Type; 370 bool IsAtBeginning; 371 bool IsAtEnd; 372 KeyValueNode *CurrentEntry; 373 374 void increment(); 375 }; 376 377 /// @brief Represents a YAML sequence created from either a block sequence for a 378 /// flow sequence. 379 /// 380 /// This parses the YAML stream as increment() is called. 381 /// 382 /// Example: 383 /// - Hello 384 /// - World 385 class SequenceNode : public Node { 386 public: 387 enum SequenceType { 388 ST_Block, 389 ST_Flow, 390 // Use for: 391 // 392 // key: 393 // - val1 394 // - val2 395 // 396 // As a BlockMappingEntry and BlockEnd are not created in this case. 397 ST_Indentless 398 }; 399 400 SequenceNode(OwningPtr<Document> &D, StringRef Anchor, SequenceType ST) 401 : Node(NK_Sequence, D, Anchor) 402 , SeqType(ST) 403 , IsAtBeginning(true) 404 , IsAtEnd(false) 405 , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','. 406 , CurrentEntry(0) 407 {} 408 409 friend class basic_collection_iterator<SequenceNode, Node>; 410 typedef basic_collection_iterator<SequenceNode, Node> iterator; 411 template <class T> friend typename T::iterator yaml::begin(T &); 412 template <class T> friend void yaml::skip(T &); 413 414 void increment(); 415 416 iterator begin() { 417 return yaml::begin(*this); 418 } 419 420 iterator end() { return iterator(); } 421 422 virtual void skip() LLVM_OVERRIDE { 423 yaml::skip(*this); 424 } 425 426 static inline bool classof(const Node *N) { 427 return N->getType() == NK_Sequence; 428 } 429 430 private: 431 SequenceType SeqType; 432 bool IsAtBeginning; 433 bool IsAtEnd; 434 bool WasPreviousTokenFlowEntry; 435 Node *CurrentEntry; 436 }; 437 438 /// @brief Represents an alias to a Node with an anchor. 439 /// 440 /// Example: 441 /// *AnchorName 442 class AliasNode : public Node { 443 public: 444 AliasNode(OwningPtr<Document> &D, StringRef Val) 445 : Node(NK_Alias, D, StringRef()), Name(Val) {} 446 447 StringRef getName() const { return Name; } 448 Node *getTarget(); 449 450 static inline bool classof(const Node *N) { 451 return N->getType() == NK_Alias; 452 } 453 454 private: 455 StringRef Name; 456 }; 457 458 /// @brief A YAML Stream is a sequence of Documents. A document contains a root 459 /// node. 460 class Document { 461 public: 462 /// @brief Root for parsing a node. Returns a single node. 463 Node *parseBlockNode(); 464 465 Document(Stream &ParentStream); 466 467 /// @brief Finish parsing the current document and return true if there are 468 /// more. Return false otherwise. 469 bool skip(); 470 471 /// @brief Parse and return the root level node. 472 Node *getRoot() { 473 if (Root) 474 return Root; 475 return Root = parseBlockNode(); 476 } 477 478 private: 479 friend class Node; 480 friend class document_iterator; 481 482 /// @brief Stream to read tokens from. 483 Stream &stream; 484 485 /// @brief Used to allocate nodes to. All are destroyed without calling their 486 /// destructor when the document is destroyed. 487 BumpPtrAllocator NodeAllocator; 488 489 /// @brief The root node. Used to support skipping a partially parsed 490 /// document. 491 Node *Root; 492 493 Token &peekNext(); 494 Token getNext(); 495 void setError(const Twine &Message, Token &Location) const; 496 bool failed() const; 497 498 void handleTagDirective(const Token &Tag) { 499 // TODO: Track tags. 500 } 501 502 /// @brief Parse %BLAH directives and return true if any were encountered. 503 bool parseDirectives(); 504 505 /// @brief Consume the next token and error if it is not \a TK. 506 bool expectToken(int TK); 507 }; 508 509 /// @brief Iterator abstraction for Documents over a Stream. 510 class document_iterator { 511 public: 512 document_iterator() : Doc(0) {} 513 document_iterator(OwningPtr<Document> &D) : Doc(&D) {} 514 515 bool operator ==(const document_iterator &Other) { 516 if (isAtEnd() || Other.isAtEnd()) 517 return isAtEnd() && Other.isAtEnd(); 518 519 return Doc == Other.Doc; 520 } 521 bool operator !=(const document_iterator &Other) { 522 return !(*this == Other); 523 } 524 525 document_iterator operator ++() { 526 assert(Doc != 0 && "incrementing iterator past the end."); 527 if (!(*Doc)->skip()) { 528 Doc->reset(0); 529 } else { 530 Stream &S = (*Doc)->stream; 531 Doc->reset(new Document(S)); 532 } 533 return *this; 534 } 535 536 Document &operator *() { 537 return *Doc->get(); 538 } 539 540 OwningPtr<Document> &operator ->() { 541 return *Doc; 542 } 543 544 private: 545 bool isAtEnd() const { 546 return !Doc || !*Doc; 547 } 548 549 OwningPtr<Document> *Doc; 550 }; 551 552 } 553 } 554 555 #endif 556