1 //===--- YAMLParser.h - Simple YAML parser --------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This is a YAML 1.2 parser. 11 // 12 // See http://www.yaml.org/spec/1.2/spec.html for the full standard. 13 // 14 // This currently does not implement the following: 15 // * Multi-line literal folding. 16 // * Tag resolution. 17 // * UTF-16. 18 // * BOMs anywhere other than the first Unicode scalar value in the file. 19 // 20 // The most important class here is Stream. This represents a YAML stream with 21 // 0, 1, or many documents. 22 // 23 // SourceMgr sm; 24 // StringRef input = getInput(); 25 // yaml::Stream stream(input, sm); 26 // 27 // for (yaml::document_iterator di = stream.begin(), de = stream.end(); 28 // di != de; ++di) { 29 // yaml::Node *n = di->getRoot(); 30 // if (n) { 31 // // Do something with n... 32 // } else 33 // break; 34 // } 35 // 36 //===----------------------------------------------------------------------===// 37 38 #ifndef LLVM_SUPPORT_YAML_PARSER_H 39 #define LLVM_SUPPORT_YAML_PARSER_H 40 41 #include "llvm/ADT/OwningPtr.h" 42 #include "llvm/ADT/SmallString.h" 43 #include "llvm/ADT/StringRef.h" 44 #include "llvm/Support/Allocator.h" 45 #include "llvm/Support/SMLoc.h" 46 47 #include <limits> 48 #include <utility> 49 50 namespace llvm { 51 class MemoryBuffer; 52 class SourceMgr; 53 class raw_ostream; 54 class Twine; 55 56 namespace yaml { 57 58 class document_iterator; 59 class Document; 60 class Node; 61 class Scanner; 62 struct Token; 63 64 /// @brief Dump all the tokens in this stream to OS. 65 /// @returns true if there was an error, false otherwise. 66 bool dumpTokens(StringRef Input, raw_ostream &); 67 68 /// @brief Scans all tokens in input without outputting anything. This is used 69 /// for benchmarking the tokenizer. 70 /// @returns true if there was an error, false otherwise. 71 bool scanTokens(StringRef Input); 72 73 /// @brief Escape \a Input for a double quoted scalar. 74 std::string escape(StringRef Input); 75 76 /// @brief This class represents a YAML stream potentially containing multiple 77 /// documents. 78 class Stream { 79 public: 80 Stream(StringRef Input, SourceMgr &); 81 ~Stream(); 82 83 document_iterator begin(); 84 document_iterator end(); 85 void skip(); 86 bool failed(); 87 bool validate() { 88 skip(); 89 return !failed(); 90 } 91 92 void printError(Node *N, const Twine &Msg); 93 94 private: 95 OwningPtr<Scanner> scanner; 96 OwningPtr<Document> CurrentDoc; 97 98 friend class Document; 99 100 /// @brief Validate a %YAML x.x directive. 101 void handleYAMLDirective(const Token &); 102 }; 103 104 /// @brief Abstract base class for all Nodes. 105 class Node { 106 public: 107 enum NodeKind { 108 NK_Null, 109 NK_Scalar, 110 NK_KeyValue, 111 NK_Mapping, 112 NK_Sequence, 113 NK_Alias 114 }; 115 116 Node(unsigned int Type, OwningPtr<Document>&, StringRef Anchor); 117 118 /// @brief Get the value of the anchor attached to this node. If it does not 119 /// have one, getAnchor().size() will be 0. 120 StringRef getAnchor() const { return Anchor; } 121 122 SMRange getSourceRange() const { return SourceRange; } 123 void setSourceRange(SMRange SR) { SourceRange = SR; } 124 125 // These functions forward to Document and Scanner. 126 Token &peekNext(); 127 Token getNext(); 128 Node *parseBlockNode(); 129 BumpPtrAllocator &getAllocator(); 130 void setError(const Twine &Message, Token &Location) const; 131 bool failed() const; 132 133 virtual void skip() {}; 134 135 unsigned int getType() const { return TypeID; } 136 static inline bool classof(const Node *) { return true; } 137 138 void *operator new ( size_t Size 139 , BumpPtrAllocator &Alloc 140 , size_t Alignment = 16) throw() { 141 return Alloc.Allocate(Size, Alignment); 142 } 143 144 void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() { 145 Alloc.Deallocate(Ptr); 146 } 147 148 protected: 149 OwningPtr<Document> &Doc; 150 SMRange SourceRange; 151 152 void operator delete(void *) throw() {} 153 154 virtual ~Node() {} 155 156 private: 157 unsigned int TypeID; 158 StringRef Anchor; 159 }; 160 161 /// @brief A null value. 162 /// 163 /// Example: 164 /// !!null null 165 class NullNode : public Node { 166 public: 167 NullNode(OwningPtr<Document> &D) : Node(NK_Null, D, StringRef()) {} 168 169 static inline bool classof(const NullNode *) { return true; } 170 static inline bool classof(const Node *N) { 171 return N->getType() == NK_Null; 172 } 173 }; 174 175 /// @brief A scalar node is an opaque datum that can be presented as a 176 /// series of zero or more Unicode scalar values. 177 /// 178 /// Example: 179 /// Adena 180 class ScalarNode : public Node { 181 public: 182 ScalarNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Val) 183 : Node(NK_Scalar, D, Anchor) 184 , Value(Val) { 185 SMLoc Start = SMLoc::getFromPointer(Val.begin()); 186 SMLoc End = SMLoc::getFromPointer(Val.end() - 1); 187 SourceRange = SMRange(Start, End); 188 } 189 190 // Return Value without any escaping or folding or other fun YAML stuff. This 191 // is the exact bytes that are contained in the file (after conversion to 192 // utf8). 193 StringRef getRawValue() const { return Value; } 194 195 /// @brief Gets the value of this node as a StringRef. 196 /// 197 /// @param Storage is used to store the content of the returned StringRef iff 198 /// it requires any modification from how it appeared in the source. 199 /// This happens with escaped characters and multi-line literals. 200 StringRef getValue(SmallVectorImpl<char> &Storage) const; 201 202 static inline bool classof(const ScalarNode *) { return true; } 203 static inline bool classof(const Node *N) { 204 return N->getType() == NK_Scalar; 205 } 206 207 private: 208 StringRef Value; 209 210 StringRef unescapeDoubleQuoted( StringRef UnquotedValue 211 , StringRef::size_type Start 212 , SmallVectorImpl<char> &Storage) const; 213 }; 214 215 /// @brief A key and value pair. While not technically a Node under the YAML 216 /// representation graph, it is easier to treat them this way. 217 /// 218 /// TODO: Consider making this not a child of Node. 219 /// 220 /// Example: 221 /// Section: .text 222 class KeyValueNode : public Node { 223 public: 224 KeyValueNode(OwningPtr<Document> &D) 225 : Node(NK_KeyValue, D, StringRef()) 226 , Key(0) 227 , Value(0) 228 {} 229 230 /// @brief Parse and return the key. 231 /// 232 /// This may be called multiple times. 233 /// 234 /// @returns The key, or nullptr if failed() == true. 235 Node *getKey(); 236 237 /// @brief Parse and return the value. 238 /// 239 /// This may be called multiple times. 240 /// 241 /// @returns The value, or nullptr if failed() == true. 242 Node *getValue(); 243 244 virtual void skip() { 245 getKey()->skip(); 246 getValue()->skip(); 247 } 248 249 static inline bool classof(const KeyValueNode *) { return true; } 250 static inline bool classof(const Node *N) { 251 return N->getType() == NK_KeyValue; 252 } 253 254 private: 255 Node *Key; 256 Node *Value; 257 }; 258 259 /// @brief This is an iterator abstraction over YAML collections shared by both 260 /// sequences and maps. 261 /// 262 /// BaseT must have a ValueT* member named CurrentEntry and a member function 263 /// increment() which must set CurrentEntry to 0 to create an end iterator. 264 template <class BaseT, class ValueT> 265 class basic_collection_iterator 266 : public std::iterator<std::forward_iterator_tag, ValueT> { 267 public: 268 basic_collection_iterator() : Base(0) {} 269 basic_collection_iterator(BaseT *B) : Base(B) {} 270 271 ValueT *operator ->() const { 272 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 273 return Base->CurrentEntry; 274 } 275 276 ValueT &operator *() const { 277 assert(Base && Base->CurrentEntry && 278 "Attempted to dereference end iterator!"); 279 return *Base->CurrentEntry; 280 } 281 282 operator ValueT*() const { 283 assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); 284 return Base->CurrentEntry; 285 } 286 287 bool operator !=(const basic_collection_iterator &Other) const { 288 if(Base != Other.Base) 289 return true; 290 return (Base && Other.Base) && Base->CurrentEntry 291 != Other.Base->CurrentEntry; 292 } 293 294 basic_collection_iterator &operator++() { 295 assert(Base && "Attempted to advance iterator past end!"); 296 Base->increment(); 297 // Create an end iterator. 298 if (Base->CurrentEntry == 0) 299 Base = 0; 300 return *this; 301 } 302 303 private: 304 BaseT *Base; 305 }; 306 307 // The following two templates are used for both MappingNode and Sequence Node. 308 template <class CollectionType> 309 typename CollectionType::iterator begin(CollectionType &C) { 310 assert(C.IsAtBeginning && "You may only iterate over a collection once!"); 311 C.IsAtBeginning = false; 312 typename CollectionType::iterator ret(&C); 313 ++ret; 314 return ret; 315 } 316 317 template <class CollectionType> 318 void skip(CollectionType &C) { 319 // TODO: support skipping from the middle of a parsed collection ;/ 320 assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); 321 if (C.IsAtBeginning) 322 for (typename CollectionType::iterator i = begin(C), e = C.end(); 323 i != e; ++i) 324 i->skip(); 325 } 326 327 /// @brief Represents a YAML map created from either a block map for a flow map. 328 /// 329 /// This parses the YAML stream as increment() is called. 330 /// 331 /// Example: 332 /// Name: _main 333 /// Scope: Global 334 class MappingNode : public Node { 335 public: 336 enum MappingType { 337 MT_Block, 338 MT_Flow, 339 MT_Inline //< An inline mapping node is used for "[key: value]". 340 }; 341 342 MappingNode(OwningPtr<Document> &D, StringRef Anchor, MappingType MT) 343 : Node(NK_Mapping, D, Anchor) 344 , Type(MT) 345 , IsAtBeginning(true) 346 , IsAtEnd(false) 347 , CurrentEntry(0) 348 {} 349 350 friend class basic_collection_iterator<MappingNode, KeyValueNode>; 351 typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator; 352 template <class T> friend typename T::iterator yaml::begin(T &); 353 template <class T> friend void yaml::skip(T &); 354 355 iterator begin() { 356 return yaml::begin(*this); 357 } 358 359 iterator end() { return iterator(); } 360 361 virtual void skip() { 362 yaml::skip(*this); 363 } 364 365 static inline bool classof(const MappingNode *) { return true; } 366 static inline bool classof(const Node *N) { 367 return N->getType() == NK_Mapping; 368 } 369 370 private: 371 MappingType Type; 372 bool IsAtBeginning; 373 bool IsAtEnd; 374 KeyValueNode *CurrentEntry; 375 376 void increment(); 377 }; 378 379 /// @brief Represents a YAML sequence created from either a block sequence for a 380 /// flow sequence. 381 /// 382 /// This parses the YAML stream as increment() is called. 383 /// 384 /// Example: 385 /// - Hello 386 /// - World 387 class SequenceNode : public Node { 388 public: 389 enum SequenceType { 390 ST_Block, 391 ST_Flow, 392 // Use for: 393 // 394 // key: 395 // - val1 396 // - val2 397 // 398 // As a BlockMappingEntry and BlockEnd are not created in this case. 399 ST_Indentless 400 }; 401 402 SequenceNode(OwningPtr<Document> &D, StringRef Anchor, SequenceType ST) 403 : Node(NK_Sequence, D, Anchor) 404 , SeqType(ST) 405 , IsAtBeginning(true) 406 , IsAtEnd(false) 407 , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','. 408 , CurrentEntry(0) 409 {} 410 411 friend class basic_collection_iterator<SequenceNode, Node>; 412 typedef basic_collection_iterator<SequenceNode, Node> iterator; 413 template <class T> friend typename T::iterator yaml::begin(T &); 414 template <class T> friend void yaml::skip(T &); 415 416 void increment(); 417 418 iterator begin() { 419 return yaml::begin(*this); 420 } 421 422 iterator end() { return iterator(); } 423 424 virtual void skip() { 425 yaml::skip(*this); 426 } 427 428 static inline bool classof(const SequenceNode *) { return true; } 429 static inline bool classof(const Node *N) { 430 return N->getType() == NK_Sequence; 431 } 432 433 private: 434 SequenceType SeqType; 435 bool IsAtBeginning; 436 bool IsAtEnd; 437 bool WasPreviousTokenFlowEntry; 438 Node *CurrentEntry; 439 }; 440 441 /// @brief Represents an alias to a Node with an anchor. 442 /// 443 /// Example: 444 /// *AnchorName 445 class AliasNode : public Node { 446 public: 447 AliasNode(OwningPtr<Document> &D, StringRef Val) 448 : Node(NK_Alias, D, StringRef()), Name(Val) {} 449 450 StringRef getName() const { return Name; } 451 Node *getTarget(); 452 453 static inline bool classof(const ScalarNode *) { return true; } 454 static inline bool classof(const Node *N) { 455 return N->getType() == NK_Alias; 456 } 457 458 private: 459 StringRef Name; 460 }; 461 462 /// @brief A YAML Stream is a sequence of Documents. A document contains a root 463 /// node. 464 class Document { 465 public: 466 /// @brief Root for parsing a node. Returns a single node. 467 Node *parseBlockNode(); 468 469 Document(Stream &ParentStream); 470 471 /// @brief Finish parsing the current document and return true if there are 472 /// more. Return false otherwise. 473 bool skip(); 474 475 /// @brief Parse and return the root level node. 476 Node *getRoot() { 477 if (Root) 478 return Root; 479 return Root = parseBlockNode(); 480 } 481 482 private: 483 friend class Node; 484 friend class document_iterator; 485 486 /// @brief Stream to read tokens from. 487 Stream &stream; 488 489 /// @brief Used to allocate nodes to. All are destroyed without calling their 490 /// destructor when the document is destroyed. 491 BumpPtrAllocator NodeAllocator; 492 493 /// @brief The root node. Used to support skipping a partially parsed 494 /// document. 495 Node *Root; 496 497 Token &peekNext(); 498 Token getNext(); 499 void setError(const Twine &Message, Token &Location) const; 500 bool failed() const; 501 502 void handleTagDirective(const Token &Tag) { 503 // TODO: Track tags. 504 } 505 506 /// @brief Parse %BLAH directives and return true if any were encountered. 507 bool parseDirectives(); 508 509 /// @brief Consume the next token and error if it is not \a TK. 510 bool expectToken(int TK); 511 }; 512 513 /// @brief Iterator abstraction for Documents over a Stream. 514 class document_iterator { 515 public: 516 document_iterator() : Doc(NullDoc) {} 517 document_iterator(OwningPtr<Document> &D) : Doc(D) {} 518 519 bool operator ==(const document_iterator &Other) { 520 return Doc == Other.Doc; 521 } 522 bool operator !=(const document_iterator &Other) { 523 return !(*this == Other); 524 } 525 526 document_iterator operator ++() { 527 if (!Doc->skip()) { 528 Doc.reset(0); 529 } else { 530 Stream &S = Doc->stream; 531 Doc.reset(new Document(S)); 532 } 533 return *this; 534 } 535 536 Document &operator *() { 537 return *Doc; 538 } 539 540 OwningPtr<Document> &operator ->() { 541 return Doc; 542 } 543 544 private: 545 static OwningPtr<Document> NullDoc; 546 OwningPtr<Document> &Doc; 547 }; 548 549 } 550 } 551 552 #endif 553