1 #include "XMLHandler.h" 2 3 #include <algorithm> 4 #include <expat.h> 5 #include <stdio.h> 6 #include <string.h> 7 #include <fcntl.h> 8 #include <unistd.h> 9 #include <errno.h> 10 11 #define NS_SEPARATOR 1 12 #define MORE_INDENT " " 13 14 static string 15 xml_text_escape(const string& s) 16 { 17 string result; 18 const size_t N = s.length(); 19 for (size_t i=0; i<N; i++) { 20 char c = s[i]; 21 switch (c) { 22 case '<': 23 result += "<"; 24 break; 25 case '>': 26 result += ">"; 27 break; 28 case '&': 29 result += "&"; 30 break; 31 default: 32 result += c; 33 break; 34 } 35 } 36 return result; 37 } 38 39 static string 40 xml_attr_escape(const string& s) 41 { 42 string result; 43 const size_t N = s.length(); 44 for (size_t i=0; i<N; i++) { 45 char c = s[i]; 46 switch (c) { 47 case '\"': 48 result += """; 49 break; 50 default: 51 result += c; 52 break; 53 } 54 } 55 return result; 56 } 57 58 XMLNamespaceMap::XMLNamespaceMap() 59 { 60 } 61 62 XMLNamespaceMap::XMLNamespaceMap(char const*const* nspaces) 63 64 { 65 while (*nspaces) { 66 m_map[nspaces[1]] = nspaces[0]; 67 nspaces += 2; 68 } 69 } 70 71 string 72 XMLNamespaceMap::Get(const string& ns) const 73 { 74 if (ns == "xml") { 75 return ns; 76 } 77 map<string,string>::const_iterator it = m_map.find(ns); 78 if (it == m_map.end()) { 79 return ""; 80 } else { 81 return it->second; 82 } 83 } 84 85 string 86 XMLNamespaceMap::GetPrefix(const string& ns) const 87 { 88 if (ns == "") { 89 return ""; 90 } 91 map<string,string>::const_iterator it = m_map.find(ns); 92 if (it != m_map.end()) { 93 if (it->second == "") { 94 return ""; 95 } else { 96 return it->second + ":"; 97 } 98 } else { 99 return ":"; // invalid 100 } 101 } 102 103 void 104 XMLNamespaceMap::AddToAttributes(vector<XMLAttribute>* attrs) const 105 { 106 map<string,string>::const_iterator it; 107 for (it=m_map.begin(); it!=m_map.end(); it++) { 108 if (it->second == "xml") { 109 continue; 110 } 111 XMLAttribute attr; 112 if (it->second == "") { 113 attr.name = "xmlns"; 114 } else { 115 attr.name = "xmlns:"; 116 attr.name += it->second; 117 } 118 attr.value = it->first; 119 attrs->push_back(attr); 120 } 121 } 122 123 XMLAttribute::XMLAttribute() 124 { 125 } 126 127 XMLAttribute::XMLAttribute(const XMLAttribute& that) 128 :ns(that.ns), 129 name(that.name), 130 value(that.value) 131 { 132 } 133 134 XMLAttribute::XMLAttribute(string n, string na, string v) 135 :ns(n), 136 name(na), 137 value(v) 138 { 139 } 140 141 XMLAttribute::~XMLAttribute() 142 { 143 } 144 145 int 146 XMLAttribute::Compare(const XMLAttribute& that) const 147 { 148 if (ns != that.ns) { 149 return ns < that.ns ? -1 : 1; 150 } 151 if (name != that.name) { 152 return name < that.name ? -1 : 1; 153 } 154 return 0; 155 } 156 157 string 158 XMLAttribute::Find(const vector<XMLAttribute>& list, const string& ns, const string& name, 159 const string& def) 160 { 161 const size_t N = list.size(); 162 for (size_t i=0; i<N; i++) { 163 const XMLAttribute& attr = list[i]; 164 if (attr.ns == ns && attr.name == name) { 165 return attr.value; 166 } 167 } 168 return def; 169 } 170 171 struct xml_handler_data { 172 vector<XMLHandler*> stack; 173 XML_Parser parser; 174 vector<vector<XMLAttribute>*> attributes; 175 string filename; 176 }; 177 178 XMLNode::XMLNode() 179 { 180 } 181 182 XMLNode::~XMLNode() 183 { 184 // for_each(m_children.begin(), m_children.end(), delete_object<XMLNode>); 185 } 186 187 XMLNode* 188 XMLNode::Clone() const 189 { 190 switch (m_type) { 191 case ELEMENT: { 192 XMLNode* e = XMLNode::NewElement(m_pos, m_ns, m_name, m_attrs, m_pretty); 193 const size_t N = m_children.size(); 194 for (size_t i=0; i<N; i++) { 195 e->m_children.push_back(m_children[i]->Clone()); 196 } 197 return e; 198 } 199 case TEXT: { 200 return XMLNode::NewText(m_pos, m_text, m_pretty); 201 } 202 default: 203 return NULL; 204 } 205 } 206 207 XMLNode* 208 XMLNode::NewElement(const SourcePos& pos, const string& ns, const string& name, 209 const vector<XMLAttribute>& attrs, int pretty) 210 { 211 XMLNode* node = new XMLNode(); 212 node->m_type = ELEMENT; 213 node->m_pretty = pretty; 214 node->m_pos = pos; 215 node->m_ns = ns; 216 node->m_name = name; 217 node->m_attrs = attrs; 218 return node; 219 } 220 221 XMLNode* 222 XMLNode::NewText(const SourcePos& pos, const string& text, int pretty) 223 { 224 XMLNode* node = new XMLNode(); 225 node->m_type = TEXT; 226 node->m_pretty = pretty; 227 node->m_pos = pos; 228 node->m_text = text; 229 return node; 230 } 231 232 void 233 XMLNode::SetPrettyRecursive(int value) 234 { 235 m_pretty = value; 236 const size_t N = m_children.size(); 237 for (size_t i=0; i<N; i++) { 238 m_children[i]->SetPrettyRecursive(value); 239 } 240 } 241 242 string 243 XMLNode::ContentsToString(const XMLNamespaceMap& nspaces) const 244 { 245 return contents_to_string(nspaces, ""); 246 } 247 248 string 249 XMLNode::ToString(const XMLNamespaceMap& nspaces) const 250 { 251 return to_string(nspaces, ""); 252 } 253 254 string 255 XMLNode::OpenTagToString(const XMLNamespaceMap& nspaces, int pretty) const 256 { 257 return open_tag_to_string(nspaces, "", pretty); 258 } 259 260 string 261 XMLNode::contents_to_string(const XMLNamespaceMap& nspaces, const string& indent) const 262 { 263 string result; 264 const size_t N = m_children.size(); 265 for (size_t i=0; i<N; i++) { 266 const XMLNode* child = m_children[i]; 267 switch (child->Type()) { 268 case ELEMENT: 269 if (m_pretty == PRETTY) { 270 result += '\n'; 271 result += indent; 272 } 273 case TEXT: 274 result += child->to_string(nspaces, indent); 275 break; 276 } 277 } 278 return result; 279 } 280 281 string 282 trim_string(const string& str) 283 { 284 const char* p = str.c_str(); 285 while (*p && isspace(*p)) { 286 p++; 287 } 288 const char* q = str.c_str() + str.length() - 1; 289 while (q > p && isspace(*q)) { 290 q--; 291 } 292 q++; 293 return string(p, q-p); 294 } 295 296 string 297 XMLNode::open_tag_to_string(const XMLNamespaceMap& nspaces, const string& indent, int pretty) const 298 { 299 if (m_type != ELEMENT) { 300 return ""; 301 } 302 string result = "<"; 303 result += nspaces.GetPrefix(m_ns); 304 result += m_name; 305 306 vector<XMLAttribute> attrs = m_attrs; 307 308 sort(attrs.begin(), attrs.end()); 309 310 const size_t N = attrs.size(); 311 for (size_t i=0; i<N; i++) { 312 const XMLAttribute& attr = attrs[i]; 313 if (i == 0 || m_pretty == EXACT || pretty == EXACT) { 314 result += ' '; 315 } 316 else { 317 result += "\n"; 318 result += indent; 319 result += MORE_INDENT; 320 result += MORE_INDENT; 321 } 322 result += nspaces.GetPrefix(attr.ns); 323 result += attr.name; 324 result += "=\""; 325 result += xml_attr_escape(attr.value); 326 result += '\"'; 327 } 328 329 if (m_children.size() > 0) { 330 result += '>'; 331 } else { 332 result += " />"; 333 } 334 return result; 335 } 336 337 string 338 XMLNode::to_string(const XMLNamespaceMap& nspaces, const string& indent) const 339 { 340 switch (m_type) 341 { 342 case TEXT: { 343 if (m_pretty == EXACT) { 344 return xml_text_escape(m_text); 345 } else { 346 return xml_text_escape(trim_string(m_text)); 347 } 348 } 349 case ELEMENT: { 350 string result = open_tag_to_string(nspaces, indent, PRETTY); 351 352 if (m_children.size() > 0) { 353 result += contents_to_string(nspaces, indent + MORE_INDENT); 354 355 if (m_pretty == PRETTY && m_children.size() > 0) { 356 result += '\n'; 357 result += indent; 358 } 359 360 result += "</"; 361 result += nspaces.GetPrefix(m_ns); 362 result += m_name; 363 result += '>'; 364 } 365 return result; 366 } 367 default: 368 return ""; 369 } 370 } 371 372 string 373 XMLNode::CollapseTextContents() const 374 { 375 if (m_type == TEXT) { 376 return m_text; 377 } 378 else if (m_type == ELEMENT) { 379 string result; 380 381 const size_t N=m_children.size(); 382 for (size_t i=0; i<N; i++) { 383 result += m_children[i]->CollapseTextContents(); 384 } 385 386 return result; 387 } 388 else { 389 return ""; 390 } 391 } 392 393 vector<XMLNode*> 394 XMLNode::GetElementsByName(const string& ns, const string& name) const 395 { 396 vector<XMLNode*> result; 397 const size_t N=m_children.size(); 398 for (size_t i=0; i<N; i++) { 399 XMLNode* child = m_children[i]; 400 if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) { 401 result.push_back(child); 402 } 403 } 404 return result; 405 } 406 407 XMLNode* 408 XMLNode::GetElementByNameAt(const string& ns, const string& name, size_t index) const 409 { 410 vector<XMLNode*> result; 411 const size_t N=m_children.size(); 412 for (size_t i=0; i<N; i++) { 413 XMLNode* child = m_children[i]; 414 if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) { 415 if (index == 0) { 416 return child; 417 } else { 418 index--; 419 } 420 } 421 } 422 return NULL; 423 } 424 425 size_t 426 XMLNode::CountElementsByName(const string& ns, const string& name) const 427 { 428 size_t result = 0; 429 const size_t N=m_children.size(); 430 for (size_t i=0; i<N; i++) { 431 XMLNode* child = m_children[i]; 432 if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) { 433 result++; 434 } 435 } 436 return result; 437 } 438 439 string 440 XMLNode::GetAttribute(const string& ns, const string& name, const string& def) const 441 { 442 return XMLAttribute::Find(m_attrs, ns, name, def); 443 } 444 445 static void 446 parse_namespace(const char* data, string* ns, string* name) 447 { 448 const char* p = strchr(data, NS_SEPARATOR); 449 if (p != NULL) { 450 ns->assign(data, p-data); 451 name->assign(p+1); 452 } else { 453 ns->assign(""); 454 name->assign(data); 455 } 456 } 457 458 static void 459 convert_attrs(const char** in, vector<XMLAttribute>* out) 460 { 461 while (*in) { 462 XMLAttribute attr; 463 parse_namespace(in[0], &attr.ns, &attr.name); 464 attr.value = in[1]; 465 out->push_back(attr); 466 in += 2; 467 } 468 } 469 470 static bool 471 list_contains(const vector<XMLHandler*>& stack, XMLHandler* handler) 472 { 473 const size_t N = stack.size(); 474 for (size_t i=0; i<N; i++) { 475 if (stack[i] == handler) { 476 return true; 477 } 478 } 479 return false; 480 } 481 482 static void XMLCALL 483 start_element_handler(void *userData, const char *name, const char **attrs) 484 { 485 xml_handler_data* data = (xml_handler_data*)userData; 486 487 XMLHandler* handler = data->stack[data->stack.size()-1]; 488 489 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser)); 490 string nsString; 491 string nameString; 492 XMLHandler* next = handler; 493 vector<XMLAttribute> attributes; 494 495 parse_namespace(name, &nsString, &nameString); 496 convert_attrs(attrs, &attributes); 497 498 handler->OnStartElement(pos, nsString, nameString, attributes, &next); 499 500 if (next == NULL) { 501 next = handler; 502 } 503 504 if (next != handler) { 505 next->elementPos = pos; 506 next->elementNamespace = nsString; 507 next->elementName = nameString; 508 next->elementAttributes = attributes; 509 } 510 511 data->stack.push_back(next); 512 } 513 514 static void XMLCALL 515 end_element_handler(void *userData, const char *name) 516 { 517 xml_handler_data* data = (xml_handler_data*)userData; 518 519 XMLHandler* handler = data->stack[data->stack.size()-1]; 520 data->stack.pop_back(); 521 522 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser)); 523 524 if (!list_contains(data->stack, handler)) { 525 handler->OnDone(pos); 526 if (data->stack.size() > 1) { 527 // not top one 528 delete handler; 529 } 530 } 531 532 handler = data->stack[data->stack.size()-1]; 533 534 string nsString; 535 string nameString; 536 537 parse_namespace(name, &nsString, &nameString); 538 539 handler->OnEndElement(pos, nsString, nameString); 540 } 541 542 static void XMLCALL 543 text_handler(void *userData, const XML_Char *s, int len) 544 { 545 xml_handler_data* data = (xml_handler_data*)userData; 546 XMLHandler* handler = data->stack[data->stack.size()-1]; 547 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser)); 548 handler->OnText(pos, string(s, len)); 549 } 550 551 static void XMLCALL 552 comment_handler(void *userData, const char *comment) 553 { 554 xml_handler_data* data = (xml_handler_data*)userData; 555 XMLHandler* handler = data->stack[data->stack.size()-1]; 556 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser)); 557 handler->OnComment(pos, string(comment)); 558 } 559 560 bool 561 XMLHandler::ParseFile(const string& filename, XMLHandler* handler) 562 { 563 char buf[16384]; 564 int fd = open(filename.c_str(), O_RDONLY); 565 if (fd < 0) { 566 SourcePos(filename, -1).Error("Unable to open file for read: %s", strerror(errno)); 567 return false; 568 } 569 570 XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR); 571 xml_handler_data state; 572 state.stack.push_back(handler); 573 state.parser = parser; 574 state.filename = filename; 575 576 XML_SetUserData(parser, &state); 577 XML_SetElementHandler(parser, start_element_handler, end_element_handler); 578 XML_SetCharacterDataHandler(parser, text_handler); 579 XML_SetCommentHandler(parser, comment_handler); 580 581 ssize_t len; 582 bool done; 583 do { 584 len = read(fd, buf, sizeof(buf)); 585 done = len < (ssize_t)sizeof(buf); 586 if (len < 0) { 587 SourcePos(filename, -1).Error("Error reading file: %s\n", strerror(errno)); 588 close(fd); 589 return false; 590 } 591 if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) { 592 SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error( 593 "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser))); 594 close(fd); 595 return false; 596 } 597 } while (!done); 598 599 XML_ParserFree(parser); 600 601 close(fd); 602 603 return true; 604 } 605 606 bool 607 XMLHandler::ParseString(const string& filename, const string& text, XMLHandler* handler) 608 { 609 XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR); 610 xml_handler_data state; 611 state.stack.push_back(handler); 612 state.parser = parser; 613 state.filename = filename; 614 615 XML_SetUserData(parser, &state); 616 XML_SetElementHandler(parser, start_element_handler, end_element_handler); 617 XML_SetCharacterDataHandler(parser, text_handler); 618 XML_SetCommentHandler(parser, comment_handler); 619 620 if (XML_Parse(parser, text.c_str(), text.size(), true) == XML_STATUS_ERROR) { 621 SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error( 622 "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser))); 623 return false; 624 } 625 626 XML_ParserFree(parser); 627 628 return true; 629 } 630 631 XMLHandler::XMLHandler() 632 { 633 } 634 635 XMLHandler::~XMLHandler() 636 { 637 } 638 639 int 640 XMLHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name, 641 const vector<XMLAttribute>& attrs, XMLHandler** next) 642 { 643 return 0; 644 } 645 646 int 647 XMLHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name) 648 { 649 return 0; 650 } 651 652 int 653 XMLHandler::OnText(const SourcePos& pos, const string& text) 654 { 655 return 0; 656 } 657 658 int 659 XMLHandler::OnComment(const SourcePos& pos, const string& text) 660 { 661 return 0; 662 } 663 664 int 665 XMLHandler::OnDone(const SourcePos& pos) 666 { 667 return 0; 668 } 669 670 TopElementHandler::TopElementHandler(const string& ns, const string& name, XMLHandler* next) 671 :m_ns(ns), 672 m_name(name), 673 m_next(next) 674 { 675 } 676 677 int 678 TopElementHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name, 679 const vector<XMLAttribute>& attrs, XMLHandler** next) 680 { 681 *next = m_next; 682 return 0; 683 } 684 685 int 686 TopElementHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name) 687 { 688 return 0; 689 } 690 691 int 692 TopElementHandler::OnText(const SourcePos& pos, const string& text) 693 { 694 return 0; 695 } 696 697 int 698 TopElementHandler::OnDone(const SourcePos& pos) 699 { 700 return 0; 701 } 702 703 704 NodeHandler::NodeHandler(XMLNode* root, int pretty) 705 :m_root(root), 706 m_pretty(pretty) 707 { 708 if (root != NULL) { 709 m_nodes.push_back(root); 710 } 711 } 712 713 NodeHandler::~NodeHandler() 714 { 715 } 716 717 int 718 NodeHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name, 719 const vector<XMLAttribute>& attrs, XMLHandler** next) 720 { 721 int pretty; 722 if (XMLAttribute::Find(attrs, XMLNS_XMLNS, "space", "") == "preserve") { 723 pretty = XMLNode::EXACT; 724 } else { 725 if (m_root == NULL) { 726 pretty = m_pretty; 727 } else { 728 pretty = m_nodes[m_nodes.size()-1]->Pretty(); 729 } 730 } 731 XMLNode* n = XMLNode::NewElement(pos, ns, name, attrs, pretty); 732 if (m_root == NULL) { 733 m_root = n; 734 } else { 735 m_nodes[m_nodes.size()-1]->EditChildren().push_back(n); 736 } 737 m_nodes.push_back(n); 738 return 0; 739 } 740 741 int 742 NodeHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name) 743 { 744 m_nodes.pop_back(); 745 return 0; 746 } 747 748 int 749 NodeHandler::OnText(const SourcePos& pos, const string& text) 750 { 751 if (m_root == NULL) { 752 return 1; 753 } 754 XMLNode* n = XMLNode::NewText(pos, text, m_nodes[m_nodes.size()-1]->Pretty()); 755 m_nodes[m_nodes.size()-1]->EditChildren().push_back(n); 756 return 0; 757 } 758 759 int 760 NodeHandler::OnComment(const SourcePos& pos, const string& text) 761 { 762 return 0; 763 } 764 765 int 766 NodeHandler::OnDone(const SourcePos& pos) 767 { 768 return 0; 769 } 770 771 XMLNode* 772 NodeHandler::ParseFile(const string& filename, int pretty) 773 { 774 NodeHandler handler(NULL, pretty); 775 if (!XMLHandler::ParseFile(filename, &handler)) { 776 fprintf(stderr, "error parsing file: %s\n", filename.c_str()); 777 return NULL; 778 } 779 return handler.Root(); 780 } 781 782 XMLNode* 783 NodeHandler::ParseString(const string& filename, const string& text, int pretty) 784 { 785 NodeHandler handler(NULL, pretty); 786 if (!XMLHandler::ParseString(filename, text, &handler)) { 787 fprintf(stderr, "error parsing file: %s\n", filename.c_str()); 788 return NULL; 789 } 790 return handler.Root(); 791 } 792 793 794