1 /** 2 * This file is part of the DOM implementation for KDE. 3 * 4 * Copyright (C) 2000 Peter Kelly (pmk (at) post.com) 5 * Copyright (C) 2005, 2006 Apple Computer, Inc. 6 * Copyright (C) 2006 Alexey Proskuryakov (ap (at) webkit.org) 7 * Copyright (C) 2007 Samuel Weinig (sam (at) webkit.org) 8 * Copyright (C) 2007 The Android Open Source Project 9 * 10 * This library is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Library General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This library is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Library General Public License for more details. 19 * 20 * You should have received a copy of the GNU Library General Public License 21 * along with this library; see the file COPYING.LIB. If not, write to 22 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23 * Boston, MA 02111-1307, USA. 24 */ 25 26 #include "config.h" 27 #include "XMLTokenizer.h" 28 29 #include "CDATASection.h" 30 #include "CachedScript.h" 31 #include "Comment.h" 32 #include "CString.h" 33 #include "DocLoader.h" 34 #include "Document.h" 35 #include "DocumentFragment.h" 36 #include "Frame.h" 37 #include "FrameLoader.h" 38 #include "FrameView.h" 39 #include "HTMLNames.h" 40 #include "HTMLScriptElement.h" 41 #include "HTMLTableSectionElement.h" 42 #include "HTMLTokenizer.h" 43 #include "ProcessingInstruction.h" 44 #include "EventNames.h" 45 46 // strndup is not available everywhere, so here is a portable version <reed> 47 static char* portable_strndup(const char src[], size_t len) 48 { 49 char* origDst = (char*)malloc(len + 1); 50 if (NULL == origDst) 51 return NULL; 52 53 char* dst = origDst; 54 while (len-- > 0) { 55 if ((*dst++ = *src++) == 0) 56 return origDst; 57 } 58 *dst = 0; 59 return origDst; 60 } 61 62 namespace WebCore { 63 64 using namespace EventNames; 65 using namespace HTMLNames; 66 67 const int maxErrors = 25; 68 69 class PendingCallbacks { 70 public: 71 PendingCallbacks() { 72 m_callbacks.setAutoDelete(true); 73 } 74 75 void appendStartElementNSCallback(const XML_Char* name, const XML_Char** atts) { 76 PendingStartElementNSCallback* callback = new PendingStartElementNSCallback; 77 78 callback->name = strdup(name); 79 callback->count = 0; 80 while (atts[callback->count]) 81 callback->count++; 82 callback->atts = (XML_Char**)malloc(sizeof(XML_Char*) * (callback->count+1)); 83 for (int i=0; i<callback->count; i++) 84 callback->atts[i] = strdup(atts[i]); 85 callback->atts[callback->count] = NULL; 86 87 m_callbacks.append(callback); 88 } 89 90 void appendEndElementNSCallback() { 91 PendingEndElementNSCallback* callback = new PendingEndElementNSCallback; 92 93 m_callbacks.append(callback); 94 } 95 96 void appendCharactersCallback(const XML_Char* s, int len) { 97 PendingCharactersCallback* callback = new PendingCharactersCallback; 98 99 callback->s = portable_strndup(s, len); 100 callback->len = len; 101 102 m_callbacks.append(callback); 103 } 104 105 void appendProcessingInstructionCallback(const XML_Char* target, const XML_Char* data) { 106 PendingProcessingInstructionCallback* callback = new PendingProcessingInstructionCallback; 107 108 callback->target = strdup(target); 109 callback->data = strdup(data); 110 111 m_callbacks.append(callback); 112 } 113 114 void appendStartCDATABlockCallback() { 115 PendingStartCDATABlockCallback* callback = new PendingStartCDATABlockCallback; 116 117 m_callbacks.append(callback); 118 } 119 120 void appendEndCDATABlockCallback() { 121 PendingEndCDATABlockCallback* callback = new PendingEndCDATABlockCallback; 122 123 m_callbacks.append(callback); 124 } 125 126 void appendCommentCallback(const XML_Char* s) { 127 PendingCommentCallback* callback = new PendingCommentCallback; 128 129 callback->s = strdup(s); 130 131 m_callbacks.append(callback); 132 } 133 134 void appendErrorCallback(XMLTokenizer::ErrorType type, const char* message, int lineNumber, int columnNumber) { 135 PendingErrorCallback* callback = new PendingErrorCallback; 136 137 callback->message = strdup(message); 138 callback->type = type; 139 callback->lineNumber = lineNumber; 140 callback->columnNumber = columnNumber; 141 142 m_callbacks.append(callback); 143 } 144 145 void callAndRemoveFirstCallback(XMLTokenizer* tokenizer) { 146 PendingCallback* cb = m_callbacks.getFirst(); 147 148 cb->call(tokenizer); 149 m_callbacks.removeFirst(); 150 } 151 152 bool isEmpty() const { return m_callbacks.isEmpty(); } 153 154 private: 155 struct PendingCallback { 156 157 virtual ~PendingCallback() { } 158 159 virtual void call(XMLTokenizer* tokenizer) = 0; 160 }; 161 162 struct PendingStartElementNSCallback : public PendingCallback { 163 virtual ~PendingStartElementNSCallback() { 164 free(name); 165 for (int i=0; i<count; i++) 166 free(atts[i]); 167 free(atts); 168 } 169 170 virtual void call(XMLTokenizer* tokenizer) { 171 tokenizer->startElementNs(name, (const XML_Char**)(atts)); 172 } 173 174 XML_Char* name; 175 int count; 176 XML_Char** atts; 177 }; 178 179 struct PendingEndElementNSCallback : public PendingCallback { 180 virtual void call(XMLTokenizer* tokenizer) { 181 tokenizer->endElementNs(); 182 } 183 }; 184 185 struct PendingCharactersCallback : public PendingCallback { 186 virtual ~PendingCharactersCallback() { 187 free(s); 188 } 189 190 virtual void call(XMLTokenizer* tokenizer) { 191 tokenizer->characters(s, len); 192 } 193 194 XML_Char* s; 195 int len; 196 }; 197 198 struct PendingProcessingInstructionCallback : public PendingCallback { 199 virtual ~PendingProcessingInstructionCallback() { 200 free(target); 201 free(data); 202 } 203 204 virtual void call(XMLTokenizer* tokenizer) { 205 tokenizer->processingInstruction(target, data); 206 } 207 208 XML_Char* target; 209 XML_Char* data; 210 }; 211 212 struct PendingStartCDATABlockCallback : public PendingCallback { 213 virtual void call(XMLTokenizer* tokenizer) { 214 tokenizer->startCdata(); 215 } 216 }; 217 218 struct PendingEndCDATABlockCallback : public PendingCallback { 219 virtual void call(XMLTokenizer* tokenizer) { 220 tokenizer->endCdata(); 221 } 222 }; 223 224 struct PendingCommentCallback : public PendingCallback { 225 virtual ~PendingCommentCallback() { 226 free(s); 227 } 228 229 virtual void call(XMLTokenizer* tokenizer) { 230 tokenizer->comment(s); 231 } 232 233 XML_Char* s; 234 }; 235 236 struct PendingErrorCallback: public PendingCallback { 237 virtual ~PendingErrorCallback() { 238 free (message); 239 } 240 241 virtual void call(XMLTokenizer* tokenizer) { 242 tokenizer->error(type, message, lineNumber, columnNumber); 243 } 244 245 XMLTokenizer::ErrorType type; 246 char* message; 247 int lineNumber; 248 int columnNumber; 249 }; 250 251 public: 252 DeprecatedPtrList<PendingCallback> m_callbacks; 253 }; 254 255 // -------------------------------- 256 257 XMLTokenizer::XMLTokenizer(Document *_doc, FrameView *_view) 258 : m_doc(_doc) 259 , m_view(_view) 260 , m_parser(0) 261 , m_currentNode(_doc) 262 , m_currentNodeIsReferenced(false) 263 , m_sawError(false) 264 , m_sawXSLTransform(false) 265 , m_sawFirstElement(false) 266 , m_parserPaused(false) 267 , m_requestingScript(false) 268 , m_finishCalled(false) 269 , m_errorCount(0) 270 , m_pendingScript(0) 271 , m_scriptStartLine(0) 272 , m_parsingFragment(false) 273 , m_pendingCallbacks(new PendingCallbacks) 274 { 275 } 276 277 XMLTokenizer::XMLTokenizer(DocumentFragment *fragment, Element *parentElement) 278 : m_doc(fragment->document()) 279 , m_view(0) 280 , m_parser(0) 281 , m_currentNode(fragment) 282 , m_currentNodeIsReferenced(fragment) 283 , m_sawError(false) 284 , m_sawXSLTransform(false) 285 , m_sawFirstElement(false) 286 , m_parserPaused(false) 287 , m_requestingScript(false) 288 , m_finishCalled(false) 289 , m_errorCount(0) 290 , m_pendingScript(0) 291 , m_scriptStartLine(0) 292 , m_parsingFragment(true) 293 , m_pendingCallbacks(new PendingCallbacks) 294 { 295 if (fragment) 296 fragment->ref(); 297 if (m_doc) 298 m_doc->ref(); 299 300 // Add namespaces based on the parent node 301 Vector<Element*> elemStack; 302 while (parentElement) { 303 elemStack.append(parentElement); 304 305 Node* n = parentElement->parentNode(); 306 if (!n || !n->isElementNode()) 307 break; 308 parentElement = static_cast<Element*>(n); 309 } 310 311 if (elemStack.isEmpty()) 312 return; 313 314 for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) { 315 if (NamedAttrMap* attrs = element->attributes()) { 316 for (unsigned i = 0; i < attrs->length(); i++) { 317 Attribute* attr = attrs->attributeItem(i); 318 if (attr->localName() == "xmlns") 319 m_defaultNamespaceURI = attr->value(); 320 else if (attr->prefix() == "xmlns") 321 m_prefixToNamespaceMap.set(attr->localName(), attr->value()); 322 } 323 } 324 } 325 } 326 327 XMLTokenizer::~XMLTokenizer() 328 { 329 setCurrentNode(0); 330 if (m_parsingFragment && m_doc) 331 m_doc->deref(); 332 if (m_pendingScript) 333 m_pendingScript->deref(this); 334 } 335 336 void XMLTokenizer::setCurrentNode(Node* n) 337 { 338 bool nodeNeedsReference = n && n != m_doc; 339 if (nodeNeedsReference) 340 n->ref(); 341 if (m_currentNodeIsReferenced) 342 m_currentNode->deref(); 343 m_currentNode = n; 344 m_currentNodeIsReferenced = nodeNeedsReference; 345 } 346 347 // use space instead of ':' as separator because ':' can be inside an uri 348 const XML_Char tripletSep=' '; 349 350 inline DeprecatedString toQString(const XML_Char* str, unsigned int len) 351 { 352 return DeprecatedString::fromUtf8(reinterpret_cast<const char *>(str), len); 353 } 354 355 inline DeprecatedString toQString(const XML_Char* str) 356 { 357 return DeprecatedString::fromUtf8(str ? reinterpret_cast<const char *>(str) : ""); 358 } 359 360 // triplet is formatted as URI + sep + local_name + sep + prefix. 361 static inline void splitTriplet(const XML_Char *name, String &uri, String &localname, String &prefix) 362 { 363 String string[3]; 364 int found = 0; 365 const char *start = reinterpret_cast<const char *>(name); 366 367 while(start && (found < 3)) { 368 char *next = strchr(start, tripletSep); 369 if (next) { 370 string[found++] = toQString(start, (next-start)); 371 start = next+1; 372 } else { 373 string[found++] = toQString(start); 374 break; 375 } 376 } 377 378 switch(found) { 379 case 1: 380 localname = string[0]; 381 break; 382 case 2: 383 uri = string[0]; 384 localname = string[1]; 385 break; 386 case 3: 387 uri = string[0]; 388 localname = string[1]; 389 prefix = string[2]; 390 break; 391 } 392 } 393 394 static inline void handleElementNamespaces(Element *newElement, const String &uri, const String &prefix, ExceptionCode &exceptioncode) 395 { 396 if (uri.isEmpty()) 397 return; 398 399 String namespaceQName("xmlns"); 400 if(!prefix.isEmpty()) 401 namespaceQName += String(":")+ prefix; 402 newElement->setAttributeNS(String("http://www.w3.org/2000/xmlns/"), namespaceQName, uri, exceptioncode); 403 } 404 405 static inline void handleElementAttributes(Element *newElement, const XML_Char **atts, ExceptionCode &exceptioncode) 406 { 407 for (int i = 0; atts[i]; i += 2) { 408 String attrURI, attrLocalName, attrPrefix; 409 splitTriplet(atts[i], attrURI, attrLocalName, attrPrefix); 410 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + String(":") + attrLocalName; 411 String attrValue = toQString(atts[i+1]); 412 newElement->setAttributeNS(attrURI, attrQName, attrValue, exceptioncode); 413 if (exceptioncode) // exception while setting attributes 414 return; 415 } 416 } 417 418 void XMLTokenizer::startElementNs(const XML_Char *name, const XML_Char **atts) 419 { 420 if (m_parserStopped) 421 return; 422 423 if (m_parserPaused) { 424 m_pendingCallbacks->appendStartElementNSCallback(name, atts); 425 return; 426 } 427 428 m_sawFirstElement = true; 429 430 exitText(); 431 432 String uri, localName, prefix; 433 splitTriplet(name, uri, localName, prefix); 434 String qName = prefix.isEmpty() ? localName : prefix + ":" + localName; 435 436 if (m_parsingFragment && uri.isEmpty()) { 437 if (!prefix.isEmpty()) 438 uri = String(m_prefixToNamespaceMap.get(prefix.impl())); 439 else 440 uri = m_defaultNamespaceURI; 441 } 442 443 ExceptionCode ec = 0; 444 RefPtr<Element> newElement = m_doc->createElementNS(uri, qName, ec); 445 if (!newElement) { 446 stopParsing(); 447 return; 448 } 449 450 handleElementNamespaces(newElement.get(), uri, prefix, ec); 451 if (ec) { 452 stopParsing(); 453 return; 454 } 455 456 handleElementAttributes(newElement.get(), atts, ec); 457 if (ec) { 458 stopParsing(); 459 return; 460 } 461 462 if (newElement->hasTagName(scriptTag)) 463 static_cast<HTMLScriptElement*>(newElement.get())->setCreatedByParser(true); 464 465 if (newElement->hasTagName(HTMLNames::scriptTag)) 466 m_scriptStartLine = lineNumber(); 467 468 if (!m_currentNode->addChild(newElement.get())) { 469 stopParsing(); 470 return; 471 } 472 473 setCurrentNode(newElement.get()); 474 if (m_view && !newElement->attached()) 475 newElement->attach(); 476 } 477 478 void XMLTokenizer::endElementNs() 479 { 480 if (m_parserStopped) 481 return; 482 483 if (m_parserPaused) { 484 m_pendingCallbacks->appendEndElementNSCallback(); 485 return; 486 } 487 488 exitText(); 489 490 Node* n = m_currentNode; 491 RefPtr<Node> parent = n->parentNode(); 492 n->finishedParsing(); 493 494 // don't load external scripts for standalone documents (for now) 495 if (n->isElementNode() && m_view && static_cast<Element*>(n)->hasTagName(scriptTag)) { 496 ASSERT(!m_pendingScript); 497 498 m_requestingScript = true; 499 500 Element* scriptElement = static_cast<Element*>(n); 501 String scriptHref; 502 503 if (static_cast<Element*>(n)->hasTagName(scriptTag)) 504 scriptHref = scriptElement->getAttribute(srcAttr); 505 506 if (!scriptHref.isEmpty()) { 507 // we have a src attribute 508 const AtomicString& charset = scriptElement->getAttribute(charsetAttr); 509 if ((m_pendingScript = m_doc->docLoader()->requestScript(scriptHref, charset))) { 510 m_scriptElement = scriptElement; 511 m_pendingScript->ref(this); 512 513 // m_pendingScript will be 0 if script was already loaded and ref() executed it 514 if (m_pendingScript) 515 pauseParsing(); 516 } else 517 m_scriptElement = 0; 518 519 } else { 520 String scriptCode = ""; 521 for (Node* child = scriptElement->firstChild(); child; child = child->nextSibling()) { 522 if (child->isTextNode() || child->nodeType() == Node::CDATA_SECTION_NODE) 523 scriptCode += static_cast<CharacterData*>(child)->data(); 524 } 525 m_view->frame()->loader()->executeScript(m_doc->URL(), m_scriptStartLine - 1, scriptCode); 526 } 527 528 m_requestingScript = false; 529 } 530 531 setCurrentNode(parent.get()); 532 } 533 534 void XMLTokenizer::characters(const XML_Char *s, int len) 535 { 536 if (m_parserStopped) 537 return; 538 539 if (m_parserPaused) { 540 m_pendingCallbacks->appendCharactersCallback(s, len); 541 return; 542 } 543 544 if (m_currentNode->isTextNode() || enterText()) { 545 ExceptionCode ec = 0; 546 static_cast<Text*>(m_currentNode)->appendData(toQString(s, len), ec); 547 } 548 } 549 550 bool XMLTokenizer::enterText() 551 { 552 RefPtr<Node> newNode = new Text(m_doc, ""); 553 if (!m_currentNode->addChild(newNode.get())) 554 return false; 555 setCurrentNode(newNode.get()); 556 return true; 557 } 558 559 void XMLTokenizer::exitText() 560 { 561 if (m_parserStopped) 562 return; 563 564 if (!m_currentNode || !m_currentNode->isTextNode()) 565 return; 566 567 if (m_view && m_currentNode && !m_currentNode->attached()) 568 m_currentNode->attach(); 569 570 // FIXME: What's the right thing to do if the parent is really 0? 571 // Just leaving the current node set to the text node doesn't make much sense. 572 if (Node* par = m_currentNode->parentNode()) 573 setCurrentNode(par); 574 } 575 576 void XMLTokenizer::processingInstruction(const XML_Char *target, const XML_Char *data) 577 { 578 if (m_parserStopped) 579 return; 580 581 if (m_parserPaused) { 582 m_pendingCallbacks->appendProcessingInstructionCallback(target, data); 583 return; 584 } 585 586 exitText(); 587 588 // ### handle exceptions 589 int exception = 0; 590 RefPtr<ProcessingInstruction> pi = m_doc->createProcessingInstruction( 591 toQString(target), toQString(data), exception); 592 if (exception) 593 return; 594 595 if (!m_currentNode->addChild(pi.get())) 596 return; 597 if (m_view && !pi->attached()) 598 pi->attach(); 599 600 // don't load stylesheets for standalone documents 601 if (m_doc->frame()) { 602 m_sawXSLTransform = !m_sawFirstElement && !pi->checkStyleSheet(); 603 if (m_sawXSLTransform) 604 stopParsing(); 605 } 606 } 607 608 void XMLTokenizer::comment(const XML_Char *s) 609 { 610 if (m_parserStopped) 611 return; 612 613 if (m_parserPaused) { 614 m_pendingCallbacks->appendCommentCallback(s); 615 return; 616 } 617 618 exitText(); 619 620 RefPtr<Node> newNode = m_doc->createComment(toQString(s)); 621 m_currentNode->addChild(newNode.get()); 622 if (m_view && !newNode->attached()) 623 newNode->attach(); 624 } 625 626 void XMLTokenizer::startCdata() 627 { 628 if (m_parserStopped) 629 return; 630 631 if (m_parserPaused) { 632 m_pendingCallbacks->appendStartCDATABlockCallback(); 633 return; 634 } 635 636 exitText(); 637 638 RefPtr<Node> newNode = new CDATASection(m_doc, ""); 639 if (!m_currentNode->addChild(newNode.get())) 640 return; 641 if (m_view && !newNode->attached()) 642 newNode->attach(); 643 setCurrentNode(newNode.get()); 644 } 645 646 void XMLTokenizer::endCdata() 647 { 648 if (m_parserStopped) 649 return; 650 651 if (m_parserPaused) { 652 m_pendingCallbacks->appendEndCDATABlockCallback(); 653 return; 654 } 655 656 if (m_currentNode->parentNode() != 0) 657 setCurrentNode(m_currentNode->parentNode()); 658 } 659 660 static void XMLCALL startElementHandler(void *userdata, const XML_Char *name, const XML_Char **atts) 661 { 662 XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata); 663 tokenizer->startElementNs(name, atts); 664 } 665 666 static void XMLCALL endElementHandler(void *userdata, const XML_Char *name) 667 { 668 XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata); 669 tokenizer->endElementNs(); 670 } 671 672 static void charactersHandler(void *userdata, const XML_Char *s, int len) 673 { 674 XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata); 675 tokenizer->characters(s, len); 676 } 677 678 static void processingInstructionHandler(void *userdata, const XML_Char *target, const XML_Char *data) 679 { 680 XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata); 681 tokenizer->processingInstruction(target, data); 682 } 683 684 static void commentHandler(void *userdata, const XML_Char *comment) 685 { 686 XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata); 687 tokenizer->comment(comment); 688 } 689 690 static void startCdataHandler(void *userdata) 691 { 692 XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata); 693 tokenizer->startCdata(); 694 } 695 696 static void endCdataHandler(void *userdata) 697 { 698 XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata); 699 tokenizer->endCdata(); 700 } 701 702 static int unknownEncodingHandler(void *userdata, const XML_Char *name, XML_Encoding *info) 703 { 704 // Expat doesn't like latin1 so we have to build this map 705 // to do conversion correctly. 706 // FIXME: Create a wrapper for expat that looks like libxml. 707 if (strcasecmp(name, "latin1") == 0) 708 { 709 for (int i=0; i<256; i++) { 710 info->map[i] = i; 711 } 712 return XML_STATUS_OK; 713 } 714 return XML_STATUS_ERROR; 715 } 716 717 bool XMLTokenizer::write(const SegmentedString&s, bool /*appendData*/ ) 718 { 719 String parseString = s.toString(); 720 721 if (m_parserStopped || m_sawXSLTransform) 722 return false; 723 724 if (m_parserPaused) { 725 m_pendingSrc.append(s); 726 return false; 727 } 728 729 if (!m_parser) { 730 static const UChar BOM = 0xFEFF; 731 static const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM); 732 m_parser = XML_ParserCreateNS(BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE", tripletSep); 733 XML_SetUserData(m_parser, (void *)this); 734 XML_SetReturnNSTriplet(m_parser, true); 735 736 XML_SetStartElementHandler(m_parser, startElementHandler); 737 XML_SetEndElementHandler(m_parser, endElementHandler); 738 XML_SetCharacterDataHandler(m_parser, charactersHandler); 739 XML_SetProcessingInstructionHandler(m_parser, processingInstructionHandler); 740 XML_SetCommentHandler(m_parser, commentHandler); 741 XML_SetStartCdataSectionHandler(m_parser, startCdataHandler); 742 XML_SetEndCdataSectionHandler(m_parser, endCdataHandler); 743 XML_SetUnknownEncodingHandler(m_parser, unknownEncodingHandler, NULL); 744 } 745 746 enum XML_Status result = XML_Parse(m_parser, (const char*)parseString.characters(), sizeof(UChar) * parseString.length(), false); 747 if (result == XML_STATUS_ERROR) { 748 reportError(); 749 return false; 750 } 751 752 return true; 753 } 754 755 void XMLTokenizer::end() 756 { 757 if (m_parser) { 758 XML_Parse(m_parser, 0, 0, true); 759 XML_ParserFree(m_parser); 760 m_parser = 0; 761 } 762 763 if (m_sawError) 764 insertErrorMessageBlock(); 765 else { 766 exitText(); 767 m_doc->updateStyleSelector(); 768 } 769 770 setCurrentNode(0); 771 m_doc->finishedParsing(); 772 } 773 774 void XMLTokenizer::finish() 775 { 776 if (m_parserPaused) 777 m_finishCalled = true; 778 else 779 end(); 780 } 781 782 void XMLTokenizer::reportError() 783 { 784 ErrorType type = nonFatal; 785 enum XML_Error code = XML_GetErrorCode(m_parser); 786 switch (code) { 787 case XML_ERROR_NO_MEMORY: 788 type = fatal; 789 break; 790 case XML_ERROR_FINISHED: 791 type = warning; 792 break; 793 default: 794 type = nonFatal; 795 } 796 error(type, XML_ErrorString(code), lineNumber(), columnNumber()); 797 } 798 799 void XMLTokenizer::error(ErrorType type, const char* m, int lineNumber, int columnNumber) 800 { 801 if (type == fatal || m_errorCount < maxErrors) { 802 switch (type) { 803 case warning: 804 m_errorMessages += String::format("warning on line %d at column %d: %s", lineNumber, columnNumber, m); 805 break; 806 case fatal: 807 case nonFatal: 808 m_errorMessages += String::format("error on line %d at column %d: %s", lineNumber, columnNumber, m); 809 } 810 ++m_errorCount; 811 } 812 813 if (type != warning) 814 m_sawError = true; 815 816 if (type == fatal) 817 stopParsing(); 818 } 819 820 static inline RefPtr<Element> createXHTMLParserErrorHeader(Document* doc, const String& errorMessages) 821 { 822 ExceptionCode ec = 0; 823 RefPtr<Element> reportElement = doc->createElementNS(xhtmlNamespaceURI, "parsererror", ec); 824 reportElement->setAttribute(styleAttr, "display:block; pre; border: 2px solid #c77; padding: 0 1em 0 1em; margin: 1em; background-color: #fdd; color: black"); 825 826 RefPtr<Element> h3 = doc->createElementNS(xhtmlNamespaceURI, "h3", ec); 827 reportElement->appendChild(h3.get(), ec); 828 h3->appendChild(doc->createTextNode("This page contains the following errors:"), ec); 829 830 RefPtr<Element> fixed = doc->createElementNS(xhtmlNamespaceURI, "div", ec); 831 reportElement->appendChild(fixed.get(), ec); 832 fixed->setAttribute(styleAttr, "font-family:monospace;font-size:12px"); 833 fixed->appendChild(doc->createTextNode(errorMessages), ec); 834 835 h3 = doc->createElementNS(xhtmlNamespaceURI, "h3", ec); 836 reportElement->appendChild(h3.get(), ec); 837 h3->appendChild(doc->createTextNode("Below is a rendering of the page up to the first error."), ec); 838 839 return reportElement; 840 } 841 842 void XMLTokenizer::insertErrorMessageBlock() 843 { 844 // One or more errors occurred during parsing of the code. Display an error block to the user above 845 // the normal content (the DOM tree is created manually and includes line/col info regarding 846 // where the errors are located) 847 848 // Create elements for display 849 ExceptionCode ec = 0; 850 Document* doc = m_doc; 851 Node* documentElement = doc->documentElement(); 852 if (!documentElement) { 853 RefPtr<Node> rootElement = doc->createElementNS(xhtmlNamespaceURI, "html", ec); 854 doc->appendChild(rootElement, ec); 855 RefPtr<Node> body = doc->createElementNS(xhtmlNamespaceURI, "body", ec); 856 rootElement->appendChild(body, ec); 857 documentElement = body.get(); 858 } 859 860 RefPtr<Element> reportElement = createXHTMLParserErrorHeader(doc, m_errorMessages); 861 documentElement->insertBefore(reportElement, documentElement->firstChild(), ec); 862 doc->updateRendering(); 863 } 864 865 void XMLTokenizer::notifyFinished(CachedResource *finishedObj) 866 { 867 ASSERT(m_pendingScript == finishedObj); 868 869 String cachedScriptUrl = m_pendingScript->url(); 870 String scriptSource = m_pendingScript->script(); 871 bool errorOccurred = m_pendingScript->errorOccurred(); 872 m_pendingScript->deref(this); 873 m_pendingScript = 0; 874 875 RefPtr<Element> e = m_scriptElement; 876 m_scriptElement = 0; 877 878 if (errorOccurred) 879 EventTargetNodeCast(e.get())->dispatchHTMLEvent(errorEvent, true, false); 880 else { 881 m_view->frame()->loader()->executeScript(cachedScriptUrl, 0, scriptSource); 882 EventTargetNodeCast(e.get())->dispatchHTMLEvent(loadEvent, false, false); 883 } 884 885 m_scriptElement = 0; 886 887 if (!m_requestingScript) 888 resumeParsing(); 889 } 890 891 bool XMLTokenizer::isWaitingForScripts() const 892 { 893 return m_pendingScript != 0; 894 } 895 896 Tokenizer *newXMLTokenizer(Document *d, FrameView *v) 897 { 898 return new XMLTokenizer(d, v); 899 } 900 901 int XMLTokenizer::lineNumber() const 902 { 903 return XML_GetCurrentLineNumber(m_parser); 904 } 905 906 int XMLTokenizer::columnNumber() const 907 { 908 return XML_GetCurrentColumnNumber(m_parser); 909 } 910 911 void XMLTokenizer::stopParsing() 912 { 913 Tokenizer::stopParsing(); 914 if (m_parser) 915 XML_StopParser(m_parser, 0); 916 } 917 918 void XMLTokenizer::pauseParsing() 919 { 920 if (m_parsingFragment) 921 return; 922 923 m_parserPaused = true; 924 } 925 926 void XMLTokenizer::resumeParsing() 927 { 928 ASSERT(m_parserPaused); 929 930 m_parserPaused = false; 931 932 // First, execute any pending callbacks 933 while (!m_pendingCallbacks->isEmpty()) { 934 m_pendingCallbacks->callAndRemoveFirstCallback(this); 935 936 // A callback paused the parser 937 if (m_parserPaused) 938 return; 939 } 940 941 // Then, write any pending data 942 SegmentedString rest = m_pendingSrc; 943 m_pendingSrc.clear(); 944 write(rest, false); 945 946 // Finally, if finish() has been called and write() didn't result 947 // in any further callbacks being queued, call end() 948 if (m_finishCalled && m_pendingCallbacks->isEmpty()) 949 end(); 950 } 951 952 // -------------------------------- 953 954 bool parseXMLDocumentFragment(const String &string, DocumentFragment *fragment, Element *parent) 955 { 956 XMLTokenizer tokenizer(fragment, parent); 957 958 XML_Parser parser = XML_ParserCreateNS(NULL, tripletSep); 959 tokenizer.setXMLParser(parser); 960 961 XML_SetUserData(parser, (void *)&tokenizer); 962 XML_SetReturnNSTriplet(parser, true); 963 964 XML_SetStartElementHandler(parser, startElementHandler); 965 XML_SetEndElementHandler(parser, endElementHandler); 966 XML_SetCharacterDataHandler(parser, charactersHandler); 967 XML_SetProcessingInstructionHandler(parser, processingInstructionHandler); 968 XML_SetCommentHandler(parser, commentHandler); 969 XML_SetStartCdataSectionHandler(parser, startCdataHandler); 970 XML_SetEndCdataSectionHandler(parser, endCdataHandler); 971 972 CString cString = string.utf8(); 973 int result = XML_Parse(parser, cString.data(), cString.length(), true); 974 975 XML_ParserFree(parser); 976 tokenizer.setXMLParser(0); 977 978 return result != XML_STATUS_ERROR; 979 } 980 981 // -------------------------------- 982 983 struct AttributeParseState { 984 HashMap<String, String> attributes; 985 bool gotAttributes; 986 }; 987 988 static void attributesStartElementHandler(void *userData, const XML_Char *name, const XML_Char **atts) 989 { 990 if (strcmp(name, "attrs") != 0) 991 return; 992 993 if (atts[0] == 0 ) 994 return; 995 996 AttributeParseState *state = static_cast<AttributeParseState *>(userData); 997 state->gotAttributes = true; 998 999 for (int i = 0; atts[i]; i += 2) { 1000 DeprecatedString attrName = toQString(atts[i]); 1001 DeprecatedString attrValue = toQString(atts[i+1]); 1002 state->attributes.set(attrName, attrValue); 1003 } 1004 } 1005 1006 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK) 1007 { 1008 AttributeParseState state; 1009 state.gotAttributes = false; 1010 1011 XML_Parser parser = XML_ParserCreateNS(NULL, tripletSep); 1012 XML_SetUserData(parser, (void *)&state); 1013 XML_SetReturnNSTriplet(parser, true); 1014 1015 XML_SetStartElementHandler(parser, attributesStartElementHandler); 1016 String input = "<?xml version=\"1.0\"?><attrs " + string.deprecatedString() + " />"; 1017 CString cString = input.deprecatedString().utf8(); 1018 if ( XML_Parse(parser, cString.data(), cString.length(), true) != XML_STATUS_ERROR ) 1019 attrsOK = state.gotAttributes; 1020 XML_ParserFree(parser); 1021 1022 return state.attributes; 1023 } 1024 1025 } 1026