1 /* 2 Copyright (C) 1997 Martin Jones (mjones (at) kde.org) 3 (C) 1997 Torben Weis (weis (at) kde.org) 4 (C) 1999,2001 Lars Knoll (knoll (at) kde.org) 5 (C) 2000,2001 Dirk Mueller (mueller (at) kde.org) 6 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 7 Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) 8 9 This library is free software; you can redistribute it and/or 10 modify it under the terms of the GNU Library General Public 11 License as published by the Free Software Foundation; either 12 version 2 of the License, or (at your option) any later version. 13 14 This library is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 Library General Public License for more details. 18 19 You should have received a copy of the GNU Library General Public License 20 along with this library; see the file COPYING.LIB. If not, write to 21 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 22 Boston, MA 02110-1301, USA. 23 */ 24 25 #include "config.h" 26 #include "HTMLParser.h" 27 28 #include "CharacterNames.h" 29 #include "CSSPropertyNames.h" 30 #include "CSSValueKeywords.h" 31 #include "Chrome.h" 32 #include "ChromeClient.h" 33 #include "Comment.h" 34 #include "Console.h" 35 #include "DOMWindow.h" 36 #include "DocumentFragment.h" 37 #include "DocumentType.h" 38 #include "Frame.h" 39 #include "HTMLBodyElement.h" 40 #include "HTMLDocument.h" 41 #include "HTMLDivElement.h" 42 #include "HTMLDListElement.h" 43 #include "HTMLElementFactory.h" 44 #include "HTMLFormElement.h" 45 #include "HTMLHeadElement.h" 46 #include "HTMLHRElement.h" 47 #include "HTMLHtmlElement.h" 48 #include "HTMLIsIndexElement.h" 49 #include "HTMLMapElement.h" 50 #include "HTMLNames.h" 51 #include "HTMLParserQuirks.h" 52 #include "HTMLTableCellElement.h" 53 #include "HTMLTableRowElement.h" 54 #include "HTMLTableSectionElement.h" 55 #include "HTMLTokenizer.h" 56 #include "LocalizedStrings.h" 57 #include "Page.h" 58 #include "Settings.h" 59 #include "Text.h" 60 #include <wtf/StdLibExtras.h> 61 62 namespace WebCore { 63 64 using namespace HTMLNames; 65 66 static const unsigned cMaxRedundantTagDepth = 20; 67 static const unsigned cResidualStyleMaxDepth = 200; 68 static const unsigned cResidualStyleIterationLimit = 5; 69 70 71 static const int minBlockLevelTagPriority = 3; 72 73 // A cap on the number of tags with priority minBlockLevelTagPriority or higher 74 // allowed in m_blockStack. The cap is enforced by adding such new elements as 75 // siblings instead of children once it is reached. 76 static const size_t cMaxBlockDepth = 4096; 77 78 struct HTMLStackElem : Noncopyable { 79 HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx) 80 : tagName(t) 81 , level(lvl) 82 , strayTableContent(false) 83 , node(n) 84 , didRefNode(r) 85 , next(nx) 86 { 87 } 88 89 void derefNode() 90 { 91 if (didRefNode) 92 node->deref(); 93 } 94 95 AtomicString tagName; 96 int level; 97 bool strayTableContent; 98 Node* node; 99 bool didRefNode; 100 HTMLStackElem* next; 101 }; 102 103 /** 104 * The parser parses tokenized input into the document, building up the 105 * document tree. If the document is well-formed, parsing it is straightforward. 106 * 107 * Unfortunately, we have to handle many HTML documents that are not well-formed, 108 * so the parser has to be tolerant about errors. 109 * 110 * We have to take care of at least the following error conditions: 111 * 112 * 1. The element being added is explicitly forbidden inside some outer tag. 113 * In this case we should close all tags up to the one, which forbids 114 * the element, and add it afterwards. 115 * 116 * 2. We are not allowed to add the element directly. It could be that 117 * the person writing the document forgot some tag in between (or that the 118 * tag in between is optional). This could be the case with the following 119 * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?). 120 * 121 * 3. We want to add a block element inside to an inline element. Close all 122 * inline elements up to the next higher block element. 123 * 124 * 4. If this doesn't help, close elements until we are allowed to add the 125 * element or ignore the tag. 126 * 127 */ 128 129 HTMLParser::HTMLParser(HTMLDocument* doc, bool reportErrors) 130 : m_document(doc) 131 , m_current(doc) 132 , m_didRefCurrent(false) 133 , m_blockStack(0) 134 , m_blocksInStack(0) 135 , m_hasPElementInScope(NotInScope) 136 , m_inBody(false) 137 , m_haveContent(false) 138 , m_haveFrameSet(false) 139 , m_isParsingFragment(false) 140 , m_reportErrors(reportErrors) 141 , m_handlingResidualStyleAcrossBlocks(false) 142 , m_inStrayTableContent(0) 143 , m_scriptingPermission(FragmentScriptingAllowed) 144 , m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0) 145 { 146 } 147 148 HTMLParser::HTMLParser(DocumentFragment* frag, FragmentScriptingPermission scriptingPermission) 149 : m_document(frag->document()) 150 , m_current(frag) 151 , m_didRefCurrent(true) 152 , m_blockStack(0) 153 , m_blocksInStack(0) 154 , m_hasPElementInScope(NotInScope) 155 , m_inBody(true) 156 , m_haveContent(false) 157 , m_haveFrameSet(false) 158 , m_isParsingFragment(true) 159 , m_reportErrors(false) 160 , m_handlingResidualStyleAcrossBlocks(false) 161 , m_inStrayTableContent(0) 162 , m_scriptingPermission(scriptingPermission) 163 , m_parserQuirks(m_document->page() ? m_document->page()->chrome()->client()->createHTMLParserQuirks() : 0) 164 { 165 if (frag) 166 frag->ref(); 167 } 168 169 HTMLParser::~HTMLParser() 170 { 171 freeBlock(); 172 if (m_didRefCurrent) 173 m_current->deref(); 174 } 175 176 void HTMLParser::reset() 177 { 178 ASSERT(!m_isParsingFragment); 179 180 setCurrent(m_document); 181 182 freeBlock(); 183 184 m_inBody = false; 185 m_haveFrameSet = false; 186 m_haveContent = false; 187 m_inStrayTableContent = 0; 188 189 m_currentFormElement = 0; 190 m_currentMapElement = 0; 191 m_head = 0; 192 m_isindexElement = 0; 193 194 m_skipModeTag = nullAtom; 195 196 if (m_parserQuirks) 197 m_parserQuirks->reset(); 198 } 199 200 void HTMLParser::setCurrent(Node* newCurrent) 201 { 202 bool didRefNewCurrent = newCurrent && newCurrent != m_document; 203 if (didRefNewCurrent) 204 newCurrent->ref(); 205 if (m_didRefCurrent) 206 m_current->deref(); 207 m_current = newCurrent; 208 m_didRefCurrent = didRefNewCurrent; 209 } 210 211 inline static int tagPriorityOfNode(Node* n) 212 { 213 return n->isHTMLElement() ? static_cast<HTMLElement*>(n)->tagPriority() : 0; 214 } 215 216 inline void HTMLParser::limitBlockDepth(int tagPriority) 217 { 218 if (tagPriority >= minBlockLevelTagPriority) { 219 while (m_blocksInStack >= cMaxBlockDepth) 220 popBlock(m_blockStack->tagName); 221 } 222 } 223 224 inline bool HTMLParser::insertNodeAfterLimitBlockDepth(Node* n, bool flat) 225 { 226 limitBlockDepth(tagPriorityOfNode(n)); 227 return insertNode(n, flat); 228 } 229 230 PassRefPtr<Node> HTMLParser::parseToken(Token* t) 231 { 232 if (!m_skipModeTag.isNull()) { 233 if (!t->beginTag && t->tagName == m_skipModeTag) 234 // Found the end tag for the current skip mode, so we're done skipping. 235 m_skipModeTag = nullAtom; 236 else if (m_current->localName() == t->tagName) 237 // Do not skip </iframe>. 238 // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag? 239 ; 240 else 241 return 0; 242 } 243 244 // Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>. 245 if (t->isCloseTag(brTag) && m_document->inCompatMode()) { 246 reportError(MalformedBRError); 247 t->beginTag = true; 248 } 249 250 if (!t->beginTag) { 251 processCloseTag(t); 252 return 0; 253 } 254 255 // Ignore spaces, if we're not inside a paragraph or other inline code. 256 // Do not alter the text if it is part of a scriptTag. 257 if (t->tagName == textAtom && t->text && m_current->localName() != scriptTag) { 258 if (m_inBody && !skipMode() && m_current->localName() != styleTag && 259 m_current->localName() != titleTag && !t->text->containsOnlyWhitespace()) 260 m_haveContent = true; 261 262 RefPtr<Node> n; 263 String text = t->text.get(); 264 unsigned charsLeft = text.length(); 265 while (charsLeft) { 266 // split large blocks of text to nodes of manageable size 267 n = Text::createWithLengthLimit(m_document, text, charsLeft); 268 if (!insertNodeAfterLimitBlockDepth(n.get(), t->selfClosingTag)) 269 return 0; 270 } 271 return n; 272 } 273 274 RefPtr<Node> n = getNode(t); 275 // just to be sure, and to catch currently unimplemented stuff 276 if (!n) 277 return 0; 278 279 // set attributes 280 if (n->isHTMLElement()) { 281 HTMLElement* e = static_cast<HTMLElement*>(n.get()); 282 if (m_scriptingPermission == FragmentScriptingAllowed || t->tagName != scriptTag) 283 e->setAttributeMap(t->attrs.get(), m_scriptingPermission); 284 285 // take care of optional close tags 286 if (e->endTagRequirement() == TagStatusOptional) 287 popBlock(t->tagName); 288 289 // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing 290 // syntax was used, report an error. 291 if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) { 292 if (t->tagName == scriptTag) 293 reportError(IncorrectXMLCloseScriptWarning); 294 else 295 reportError(IncorrectXMLSelfCloseError, &t->tagName); 296 } 297 } 298 299 if (!insertNodeAfterLimitBlockDepth(n.get(), t->selfClosingTag)) { 300 // we couldn't insert the node 301 302 if (n->isElementNode()) { 303 Element* e = static_cast<Element*>(n.get()); 304 e->setAttributeMap(0); 305 } 306 307 if (m_currentMapElement == n) 308 m_currentMapElement = 0; 309 310 if (m_currentFormElement == n) 311 m_currentFormElement = 0; 312 313 if (m_head == n) 314 m_head = 0; 315 316 return 0; 317 } 318 return n; 319 } 320 321 void HTMLParser::parseDoctypeToken(DoctypeToken* t) 322 { 323 // Ignore any doctype after the first. Ignore doctypes in fragments. 324 if (m_document->doctype() || m_isParsingFragment || m_current != m_document) 325 return; 326 327 // Make a new doctype node and set it as our doctype. 328 m_document->addChild(DocumentType::create(m_document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID))); 329 } 330 331 static bool isTableSection(const Node* n) 332 { 333 return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag); 334 } 335 336 static bool isTablePart(const Node* n) 337 { 338 return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) || 339 isTableSection(n); 340 } 341 342 static bool isTableRelated(const Node* n) 343 { 344 return n->hasTagName(tableTag) || isTablePart(n); 345 } 346 347 static bool isScopingTag(const AtomicString& tagName) 348 { 349 return tagName == appletTag || tagName == captionTag || tagName == tdTag || tagName == thTag || tagName == buttonTag || tagName == marqueeTag || tagName == objectTag || tagName == tableTag || tagName == htmlTag; 350 } 351 352 bool HTMLParser::insertNode(Node* n, bool flat) 353 { 354 RefPtr<Node> protectNode(n); 355 356 const AtomicString& localName = n->localName(); 357 358 // <table> is never allowed inside stray table content. Always pop out of the stray table content 359 // and close up the first table, and then start the second table as a sibling. 360 if (m_inStrayTableContent && localName == tableTag) 361 popBlock(tableTag); 362 363 if (m_parserQuirks && !m_parserQuirks->shouldInsertNode(m_current, n)) 364 return false; 365 366 int tagPriority = tagPriorityOfNode(n); 367 368 // let's be stupid and just try to insert it. 369 // this should work if the document is well-formed 370 Node* newNode = m_current->addChild(n); 371 if (!newNode) 372 return handleError(n, flat, localName, tagPriority); // Try to handle the error. 373 374 // don't push elements without end tags (e.g., <img>) on the stack 375 bool parentAttached = m_current->attached(); 376 if (tagPriority > 0 && !flat) { 377 if (newNode == m_current) { 378 // This case should only be hit when a demoted <form> is placed inside a table. 379 ASSERT(localName == formTag); 380 reportError(FormInsideTablePartError, &m_current->localName()); 381 HTMLFormElement* form = static_cast<HTMLFormElement*>(n); 382 form->setDemoted(true); 383 } else { 384 // The pushBlock function transfers ownership of current to the block stack 385 // so we're guaranteed that m_didRefCurrent is false. The code below is an 386 // optimized version of setCurrent that takes advantage of that fact and also 387 // assumes that newNode is neither 0 nor a pointer to the document. 388 pushBlock(localName, tagPriority); 389 newNode->beginParsingChildren(); 390 ASSERT(!m_didRefCurrent); 391 newNode->ref(); 392 m_current = newNode; 393 m_didRefCurrent = true; 394 } 395 if (parentAttached && !n->attached() && !m_isParsingFragment) 396 n->attach(); 397 } else { 398 if (parentAttached && !n->attached() && !m_isParsingFragment) 399 n->attach(); 400 n->finishParsingChildren(); 401 } 402 403 if (localName == htmlTag && m_document->frame()) 404 m_document->frame()->loader()->dispatchDocumentElementAvailable(); 405 406 return true; 407 } 408 409 bool HTMLParser::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority) 410 { 411 // Error handling code. This is just ad hoc handling of specific parent/child combinations. 412 HTMLElement* e; 413 bool handled = false; 414 415 // 1. Check out the element's tag name to decide how to deal with errors. 416 if (n->isHTMLElement()) { 417 HTMLElement* h = static_cast<HTMLElement*>(n); 418 if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) { 419 if (m_inStrayTableContent && !isTableRelated(m_current)) { 420 reportError(MisplacedTablePartError, &localName, &m_current->localName()); 421 // pop out to the nearest enclosing table-related tag. 422 while (m_blockStack && !isTableRelated(m_current)) 423 popOneBlock(); 424 return insertNode(n); 425 } 426 } else if (h->hasLocalName(headTag)) { 427 if (!m_current->isDocumentNode() && !m_current->hasTagName(htmlTag)) { 428 reportError(MisplacedHeadError); 429 return false; 430 } 431 } else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) { 432 bool createdHead = false; 433 if (!m_head) { 434 createHead(); 435 createdHead = true; 436 } 437 if (m_head) { 438 if (!createdHead) 439 reportError(MisplacedHeadContentError, &localName, &m_current->localName()); 440 if (m_head->addChild(n)) { 441 if (!n->attached() && !m_isParsingFragment) 442 n->attach(); 443 return true; 444 } else 445 return false; 446 } 447 } else if (h->hasLocalName(htmlTag)) { 448 if (!m_current->isDocumentNode() ) { 449 if (m_document->documentElement() && m_document->documentElement()->hasTagName(htmlTag)) { 450 reportError(RedundantHTMLBodyError, &localName); 451 // we have another <HTML> element.... apply attributes to existing one 452 // make sure we don't overwrite already existing attributes 453 NamedNodeMap* map = static_cast<Element*>(n)->attributes(true); 454 Element* existingHTML = static_cast<Element*>(m_document->documentElement()); 455 NamedNodeMap* bmap = existingHTML->attributes(false); 456 for (unsigned l = 0; map && l < map->length(); ++l) { 457 Attribute* it = map->attributeItem(l); 458 if (!bmap->getAttributeItem(it->name())) 459 existingHTML->setAttribute(it->name(), it->value()); 460 } 461 } 462 return false; 463 } 464 } else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag) || h->hasLocalName(scriptTag)) { 465 bool createdHead = false; 466 if (!m_head) { 467 createHead(); 468 createdHead = true; 469 } 470 if (m_head) { 471 Node* newNode = m_head->addChild(n); 472 if (!newNode) { 473 setSkipMode(h->tagQName()); 474 return false; 475 } 476 477 if (!createdHead) 478 reportError(MisplacedHeadContentError, &localName, &m_current->localName()); 479 480 pushBlock(localName, tagPriority); 481 newNode->beginParsingChildren(); 482 setCurrent(newNode); 483 if (!n->attached() && !m_isParsingFragment) 484 n->attach(); 485 return true; 486 } 487 if (m_inBody) { 488 setSkipMode(h->tagQName()); 489 return false; 490 } 491 } else if (h->hasLocalName(bodyTag)) { 492 if (m_inBody && m_document->body()) { 493 // we have another <BODY> element.... apply attributes to existing one 494 // make sure we don't overwrite already existing attributes 495 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor> 496 reportError(RedundantHTMLBodyError, &localName); 497 NamedNodeMap* map = static_cast<Element*>(n)->attributes(true); 498 Element* existingBody = m_document->body(); 499 NamedNodeMap* bmap = existingBody->attributes(false); 500 for (unsigned l = 0; map && l < map->length(); ++l) { 501 Attribute* it = map->attributeItem(l); 502 if (!bmap->getAttributeItem(it->name())) 503 existingBody->setAttribute(it->name(), it->value()); 504 } 505 return false; 506 } 507 else if (!m_current->isDocumentNode()) 508 return false; 509 } else if (h->hasLocalName(areaTag)) { 510 if (m_currentMapElement) { 511 reportError(MisplacedAreaError, &m_current->localName()); 512 m_currentMapElement->addChild(n); 513 if (!n->attached() && !m_isParsingFragment) 514 n->attach(); 515 handled = true; 516 return true; 517 } 518 return false; 519 } else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) { 520 if (isTableRelated(m_current)) { 521 while (m_blockStack && isTablePart(m_current)) 522 popOneBlock(); 523 return insertNode(n); 524 } 525 } 526 } else if (n->isCommentNode() && !m_head) 527 return false; 528 529 // 2. Next we examine our currently active element to do some further error handling. 530 if (m_current->isHTMLElement()) { 531 HTMLElement* h = static_cast<HTMLElement*>(m_current); 532 const AtomicString& currentTagName = h->localName(); 533 if (h->hasLocalName(htmlTag)) { 534 HTMLElement* elt = n->isHTMLElement() ? static_cast<HTMLElement*>(n) : 0; 535 if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) || 536 elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) || 537 elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) || 538 elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) || 539 elt->hasLocalName(baseTag))) { 540 if (!m_head) { 541 m_head = new HTMLHeadElement(headTag, m_document); 542 insertNode(m_head.get()); 543 handled = true; 544 } 545 } else { 546 if (n->isTextNode()) { 547 Text* t = static_cast<Text*>(n); 548 if (t->containsOnlyWhitespace()) 549 return false; 550 } 551 if (!m_haveFrameSet) { 552 // Ensure that head exists. 553 // But not for older versions of Mail, where the implicit <head> isn't expected - <rdar://problem/6863795> 554 if (shouldCreateImplicitHead(m_document)) 555 createHead(); 556 557 popBlock(headTag); 558 e = new HTMLBodyElement(bodyTag, m_document); 559 startBody(); 560 insertNode(e); 561 handled = true; 562 } else 563 reportError(MisplacedFramesetContentError, &localName); 564 } 565 } else if (h->hasLocalName(headTag)) { 566 if (n->hasTagName(htmlTag)) 567 return false; 568 else { 569 // This means the body starts here... 570 if (!m_haveFrameSet) { 571 ASSERT(currentTagName == headTag); 572 popBlock(currentTagName); 573 e = new HTMLBodyElement(bodyTag, m_document); 574 startBody(); 575 insertNode(e); 576 handled = true; 577 } else 578 reportError(MisplacedFramesetContentError, &localName); 579 } 580 } else if (h->hasLocalName(addressTag) || h->hasLocalName(fontTag) 581 || h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) { 582 reportError(MisplacedContentRetryError, &localName, ¤tTagName); 583 popBlock(currentTagName); 584 handled = true; 585 } else if (h->hasLocalName(captionTag)) { 586 // Illegal content in a caption. Close the caption and try again. 587 reportError(MisplacedCaptionContentError, &localName); 588 popBlock(currentTagName); 589 if (isTablePart(n)) 590 return insertNode(n, flat); 591 } else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) { 592 if (n->hasTagName(tableTag)) { 593 reportError(MisplacedTableError, ¤tTagName); 594 if (m_isParsingFragment && !h->hasLocalName(tableTag)) 595 // fragment may contain table parts without <table> ancestor, pop them one by one 596 popBlock(h->localName()); 597 popBlock(localName); // end the table 598 handled = true; // ...and start a new one 599 } else { 600 ExceptionCode ec = 0; 601 Node* node = m_current; 602 Node* parent = node->parentNode(); 603 // A script may have removed the current node's parent from the DOM 604 // http://bugs.webkit.org/show_bug.cgi?id=7137 605 // FIXME: we should do real recovery here and re-parent with the correct node. 606 if (!parent) 607 return false; 608 Node* grandparent = parent->parentNode(); 609 610 if (n->isTextNode() || 611 (h->hasLocalName(trTag) && 612 isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) || 613 ((!n->hasTagName(tdTag) && !n->hasTagName(thTag) && 614 !n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) && 615 parent->hasTagName(tableTag))) { 616 node = (node->hasTagName(tableTag)) ? node : 617 ((node->hasTagName(trTag)) ? grandparent : parent); 618 // This can happen with fragments 619 if (!node) 620 return false; 621 Node* parent = node->parentNode(); 622 if (!parent) 623 return false; 624 parent->insertBefore(n, node, ec); 625 if (!ec) { 626 reportError(StrayTableContentError, &localName, ¤tTagName); 627 if (n->isHTMLElement() && tagPriority > 0 && 628 !flat && static_cast<HTMLElement*>(n)->endTagRequirement() != TagStatusForbidden) 629 { 630 pushBlock(localName, tagPriority); 631 n->beginParsingChildren(); 632 setCurrent(n); 633 m_inStrayTableContent++; 634 m_blockStack->strayTableContent = true; 635 } 636 return true; 637 } 638 } 639 640 if (!ec) { 641 if (m_current->hasTagName(trTag)) { 642 reportError(TablePartRequiredError, &localName, &tdTag.localName()); 643 e = new HTMLTableCellElement(tdTag, m_document); 644 } else if (m_current->hasTagName(tableTag)) { 645 // Don't report an error in this case, since making a <tbody> happens all the time when you have <table><tr>, 646 // and it isn't really a parse error per se. 647 e = new HTMLTableSectionElement(tbodyTag, m_document); 648 } else { 649 reportError(TablePartRequiredError, &localName, &trTag.localName()); 650 e = new HTMLTableRowElement(trTag, m_document); 651 } 652 653 insertNode(e); 654 handled = true; 655 } 656 } 657 } else if (h->hasLocalName(objectTag)) { 658 reportError(MisplacedContentRetryError, &localName, ¤tTagName); 659 popBlock(objectTag); 660 handled = true; 661 } else if (h->hasLocalName(pTag) || isHeadingTag(currentTagName)) { 662 if (!isInline(n)) { 663 popBlock(currentTagName); 664 handled = true; 665 } 666 } else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) { 667 if (localName == optgroupTag) { 668 popBlock(currentTagName); 669 handled = true; 670 } else if (localName == selectTag) { 671 // IE treats a nested select as </select>. Let's do the same 672 popBlock(localName); 673 } 674 } else if (h->hasLocalName(selectTag)) { 675 if (localName == inputTag || localName == textareaTag) { 676 reportError(MisplacedContentRetryError, &localName, ¤tTagName); 677 popBlock(currentTagName); 678 handled = true; 679 } 680 } else if (h->hasLocalName(colgroupTag)) { 681 popBlock(currentTagName); 682 handled = true; 683 } else if (!h->hasLocalName(bodyTag)) { 684 if (isInline(m_current)) { 685 popInlineBlocks(); 686 handled = true; 687 } 688 } 689 } else if (m_current->isDocumentNode()) { 690 if (n->isTextNode()) { 691 Text* t = static_cast<Text*>(n); 692 if (t->containsOnlyWhitespace()) 693 return false; 694 } 695 696 if (!m_document->documentElement()) { 697 e = new HTMLHtmlElement(htmlTag, m_document); 698 insertNode(e); 699 handled = true; 700 } 701 } 702 703 // 3. If we couldn't handle the error, just return false and attempt to error-correct again. 704 if (!handled) { 705 reportError(IgnoredContentError, &localName, &m_current->localName()); 706 return false; 707 } 708 return insertNode(n); 709 } 710 711 typedef bool (HTMLParser::*CreateErrorCheckFunc)(Token* t, RefPtr<Node>&); 712 typedef HashMap<AtomicStringImpl*, CreateErrorCheckFunc> FunctionMap; 713 714 bool HTMLParser::textCreateErrorCheck(Token* t, RefPtr<Node>& result) 715 { 716 result = Text::create(m_document, t->text.get()); 717 return false; 718 } 719 720 bool HTMLParser::commentCreateErrorCheck(Token* t, RefPtr<Node>& result) 721 { 722 result = Comment::create(m_document, t->text.get()); 723 return false; 724 } 725 726 bool HTMLParser::headCreateErrorCheck(Token*, RefPtr<Node>& result) 727 { 728 if (!m_head || m_current->localName() == htmlTag) { 729 m_head = new HTMLHeadElement(headTag, m_document); 730 result = m_head; 731 } else 732 reportError(MisplacedHeadError); 733 return false; 734 } 735 736 bool HTMLParser::bodyCreateErrorCheck(Token*, RefPtr<Node>&) 737 { 738 // body no longer allowed if we have a frameset 739 if (m_haveFrameSet) 740 return false; 741 742 // Ensure that head exists (unless parsing a fragment). 743 // But not for older versions of Mail, where the implicit <head> isn't expected - <rdar://problem/6863795> 744 if (!m_isParsingFragment && shouldCreateImplicitHead(m_document)) 745 createHead(); 746 747 popBlock(headTag); 748 startBody(); 749 return true; 750 } 751 752 bool HTMLParser::framesetCreateErrorCheck(Token*, RefPtr<Node>&) 753 { 754 popBlock(headTag); 755 if (m_inBody && !m_haveFrameSet && !m_haveContent) { 756 popBlock(bodyTag); 757 // ### actually for IE document.body returns the now hidden "body" element 758 // we can't implement that behaviour now because it could cause too many 759 // regressions and the headaches are not worth the work as long as there is 760 // no site actually relying on that detail (Dirk) 761 if (m_document->body()) 762 m_document->body()->setAttribute(styleAttr, "display:none"); 763 m_inBody = false; 764 } 765 if ((m_haveContent || m_haveFrameSet) && m_current->localName() == htmlTag) 766 return false; 767 m_haveFrameSet = true; 768 startBody(); 769 return true; 770 } 771 772 bool HTMLParser::formCreateErrorCheck(Token* t, RefPtr<Node>& result) 773 { 774 // Only create a new form if we're not already inside one. 775 // This is consistent with other browsers' behavior. 776 if (!m_currentFormElement) { 777 m_currentFormElement = new HTMLFormElement(formTag, m_document); 778 result = m_currentFormElement; 779 pCloserCreateErrorCheck(t, result); 780 } 781 return false; 782 } 783 784 bool HTMLParser::isindexCreateErrorCheck(Token* t, RefPtr<Node>& result) 785 { 786 RefPtr<Node> n = handleIsindex(t); 787 if (!m_inBody) 788 m_isindexElement = n.release(); 789 else { 790 t->selfClosingTag = true; 791 result = n.release(); 792 } 793 return false; 794 } 795 796 bool HTMLParser::selectCreateErrorCheck(Token*, RefPtr<Node>&) 797 { 798 return true; 799 } 800 801 bool HTMLParser::ddCreateErrorCheck(Token* t, RefPtr<Node>& result) 802 { 803 pCloserCreateErrorCheck(t, result); 804 popBlock(dtTag); 805 popBlock(ddTag); 806 return true; 807 } 808 809 bool HTMLParser::dtCreateErrorCheck(Token* t, RefPtr<Node>& result) 810 { 811 pCloserCreateErrorCheck(t, result); 812 popBlock(ddTag); 813 popBlock(dtTag); 814 return true; 815 } 816 817 bool HTMLParser::rpCreateErrorCheck(Token*, RefPtr<Node>&) 818 { 819 popBlock(rpTag); 820 popBlock(rtTag); 821 return true; 822 } 823 824 bool HTMLParser::rtCreateErrorCheck(Token*, RefPtr<Node>&) 825 { 826 popBlock(rpTag); 827 popBlock(rtTag); 828 return true; 829 } 830 831 bool HTMLParser::nestedCreateErrorCheck(Token* t, RefPtr<Node>&) 832 { 833 popBlock(t->tagName); 834 return true; 835 } 836 837 bool HTMLParser::nestedPCloserCreateErrorCheck(Token* t, RefPtr<Node>& result) 838 { 839 pCloserCreateErrorCheck(t, result); 840 popBlock(t->tagName); 841 return true; 842 } 843 844 bool HTMLParser::nestedStyleCreateErrorCheck(Token* t, RefPtr<Node>&) 845 { 846 return allowNestedRedundantTag(t->tagName); 847 } 848 849 bool HTMLParser::tableCellCreateErrorCheck(Token*, RefPtr<Node>&) 850 { 851 popBlock(tdTag); 852 popBlock(thTag); 853 return true; 854 } 855 856 bool HTMLParser::tableSectionCreateErrorCheck(Token*, RefPtr<Node>&) 857 { 858 popBlock(theadTag); 859 popBlock(tbodyTag); 860 popBlock(tfootTag); 861 return true; 862 } 863 864 bool HTMLParser::noembedCreateErrorCheck(Token*, RefPtr<Node>&) 865 { 866 setSkipMode(noembedTag); 867 return true; 868 } 869 870 bool HTMLParser::noframesCreateErrorCheck(Token*, RefPtr<Node>&) 871 { 872 setSkipMode(noframesTag); 873 return true; 874 } 875 876 bool HTMLParser::noscriptCreateErrorCheck(Token*, RefPtr<Node>&) 877 { 878 if (!m_isParsingFragment) { 879 Settings* settings = m_document->settings(); 880 if (settings && settings->isJavaScriptEnabled()) 881 setSkipMode(noscriptTag); 882 } 883 return true; 884 } 885 886 bool HTMLParser::pCloserCreateErrorCheck(Token*, RefPtr<Node>&) 887 { 888 if (hasPElementInScope()) 889 popBlock(pTag); 890 return true; 891 } 892 893 bool HTMLParser::pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&) 894 { 895 if (m_document->inCompatMode()) 896 return true; 897 if (hasPElementInScope()) 898 popBlock(pTag); 899 return true; 900 } 901 902 bool HTMLParser::mapCreateErrorCheck(Token*, RefPtr<Node>& result) 903 { 904 m_currentMapElement = new HTMLMapElement(mapTag, m_document); 905 result = m_currentMapElement; 906 return false; 907 } 908 909 PassRefPtr<Node> HTMLParser::getNode(Token* t) 910 { 911 // Init our error handling table. 912 DEFINE_STATIC_LOCAL(FunctionMap, gFunctionMap, ()); 913 if (gFunctionMap.isEmpty()) { 914 gFunctionMap.set(aTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); 915 gFunctionMap.set(addressTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 916 gFunctionMap.set(articleTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 917 gFunctionMap.set(asideTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 918 gFunctionMap.set(bTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); 919 gFunctionMap.set(bigTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); 920 gFunctionMap.set(blockquoteTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 921 gFunctionMap.set(bodyTag.localName().impl(), &HTMLParser::bodyCreateErrorCheck); 922 gFunctionMap.set(buttonTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); 923 gFunctionMap.set(centerTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 924 gFunctionMap.set(commentAtom.impl(), &HTMLParser::commentCreateErrorCheck); 925 gFunctionMap.set(ddTag.localName().impl(), &HTMLParser::ddCreateErrorCheck); 926 gFunctionMap.set(dirTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 927 gFunctionMap.set(divTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 928 gFunctionMap.set(dlTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 929 gFunctionMap.set(dtTag.localName().impl(), &HTMLParser::dtCreateErrorCheck); 930 gFunctionMap.set(formTag.localName().impl(), &HTMLParser::formCreateErrorCheck); 931 gFunctionMap.set(fieldsetTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 932 gFunctionMap.set(footerTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 933 gFunctionMap.set(framesetTag.localName().impl(), &HTMLParser::framesetCreateErrorCheck); 934 gFunctionMap.set(h1Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 935 gFunctionMap.set(h2Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 936 gFunctionMap.set(h3Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 937 gFunctionMap.set(h4Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 938 gFunctionMap.set(h5Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 939 gFunctionMap.set(h6Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 940 gFunctionMap.set(headTag.localName().impl(), &HTMLParser::headCreateErrorCheck); 941 gFunctionMap.set(headerTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 942 gFunctionMap.set(hrTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 943 gFunctionMap.set(iTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); 944 gFunctionMap.set(isindexTag.localName().impl(), &HTMLParser::isindexCreateErrorCheck); 945 gFunctionMap.set(liTag.localName().impl(), &HTMLParser::nestedPCloserCreateErrorCheck); 946 gFunctionMap.set(listingTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 947 gFunctionMap.set(mapTag.localName().impl(), &HTMLParser::mapCreateErrorCheck); 948 gFunctionMap.set(menuTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 949 gFunctionMap.set(navTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 950 gFunctionMap.set(nobrTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); 951 gFunctionMap.set(noembedTag.localName().impl(), &HTMLParser::noembedCreateErrorCheck); 952 gFunctionMap.set(noframesTag.localName().impl(), &HTMLParser::noframesCreateErrorCheck); 953 gFunctionMap.set(noscriptTag.localName().impl(), &HTMLParser::noscriptCreateErrorCheck); 954 gFunctionMap.set(olTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 955 gFunctionMap.set(pTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 956 gFunctionMap.set(plaintextTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 957 gFunctionMap.set(preTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 958 gFunctionMap.set(rpTag.localName().impl(), &HTMLParser::rpCreateErrorCheck); 959 gFunctionMap.set(rtTag.localName().impl(), &HTMLParser::rtCreateErrorCheck); 960 gFunctionMap.set(sTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); 961 gFunctionMap.set(sectionTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 962 gFunctionMap.set(selectTag.localName().impl(), &HTMLParser::selectCreateErrorCheck); 963 gFunctionMap.set(smallTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); 964 gFunctionMap.set(strikeTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); 965 gFunctionMap.set(tableTag.localName().impl(), &HTMLParser::pCloserStrictCreateErrorCheck); 966 gFunctionMap.set(tbodyTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); 967 gFunctionMap.set(tdTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck); 968 gFunctionMap.set(textAtom.impl(), &HTMLParser::textCreateErrorCheck); 969 gFunctionMap.set(tfootTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); 970 gFunctionMap.set(thTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck); 971 gFunctionMap.set(theadTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck); 972 gFunctionMap.set(trTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck); 973 gFunctionMap.set(ttTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); 974 gFunctionMap.set(uTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck); 975 gFunctionMap.set(ulTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck); 976 } 977 978 bool proceed = true; 979 RefPtr<Node> result; 980 if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl())) 981 proceed = (this->*errorCheckFunc)(t, result); 982 if (proceed) 983 result = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, t->tagName, xhtmlNamespaceURI), m_document, m_currentFormElement.get()); 984 return result.release(); 985 } 986 987 bool HTMLParser::allowNestedRedundantTag(const AtomicString& tagName) 988 { 989 // www.liceo.edu.mx is an example of a site that achieves a level of nesting of 990 // about 1500 tags, all from a bunch of <b>s. We will only allow at most 20 991 // nested tags of the same type before just ignoring them all together. 992 unsigned i = 0; 993 for (HTMLStackElem* curr = m_blockStack; 994 i < cMaxRedundantTagDepth && curr && curr->tagName == tagName; 995 curr = curr->next, i++) { } 996 return i != cMaxRedundantTagDepth; 997 } 998 999 void HTMLParser::processCloseTag(Token* t) 1000 { 1001 // Support for really broken html. 1002 // we never close the body tag, since some stupid web pages close it before the actual end of the doc. 1003 // let's rely on the end() call to close things. 1004 if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom) 1005 return; 1006 1007 bool checkForCloseTagErrors = true; 1008 if (t->tagName == formTag && m_currentFormElement) { 1009 m_currentFormElement = 0; 1010 checkForCloseTagErrors = false; 1011 } else if (t->tagName == mapTag) 1012 m_currentMapElement = 0; 1013 else if (t->tagName == pTag) 1014 checkForCloseTagErrors = false; 1015 1016 HTMLStackElem* oldElem = m_blockStack; 1017 popBlock(t->tagName, checkForCloseTagErrors); 1018 if (oldElem == m_blockStack && t->tagName == pTag) { 1019 // We encountered a stray </p>. Amazingly Gecko, WinIE, and MacIE all treat 1020 // this as a valid break, i.e., <p></p>. So go ahead and make the empty 1021 // paragraph. 1022 t->beginTag = true; 1023 parseToken(t); 1024 popBlock(t->tagName); 1025 reportError(StrayParagraphCloseError); 1026 } 1027 } 1028 1029 bool HTMLParser::isHeadingTag(const AtomicString& tagName) 1030 { 1031 DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, headingTags, ()); 1032 if (headingTags.isEmpty()) { 1033 headingTags.add(h1Tag.localName().impl()); 1034 headingTags.add(h2Tag.localName().impl()); 1035 headingTags.add(h3Tag.localName().impl()); 1036 headingTags.add(h4Tag.localName().impl()); 1037 headingTags.add(h5Tag.localName().impl()); 1038 headingTags.add(h6Tag.localName().impl()); 1039 } 1040 1041 return headingTags.contains(tagName.impl()); 1042 } 1043 1044 bool HTMLParser::isInline(Node* node) const 1045 { 1046 if (node->isTextNode()) 1047 return true; 1048 1049 if (node->isHTMLElement()) { 1050 HTMLElement* e = static_cast<HTMLElement*>(node); 1051 if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) || 1052 e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) || 1053 e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) || 1054 e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) || 1055 e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) || 1056 e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) || 1057 e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) || 1058 e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) || 1059 e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) || 1060 e->hasLocalName(noembedTag)) 1061 return true; 1062 #if !ENABLE(XHTMLMP) 1063 if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) { 1064 Settings* settings = m_document->settings(); 1065 if (settings && settings->isJavaScriptEnabled()) 1066 return true; 1067 } 1068 #endif 1069 } 1070 1071 return false; 1072 } 1073 1074 bool HTMLParser::isResidualStyleTag(const AtomicString& tagName) 1075 { 1076 DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, residualStyleTags, ()); 1077 if (residualStyleTags.isEmpty()) { 1078 residualStyleTags.add(aTag.localName().impl()); 1079 residualStyleTags.add(fontTag.localName().impl()); 1080 residualStyleTags.add(ttTag.localName().impl()); 1081 residualStyleTags.add(uTag.localName().impl()); 1082 residualStyleTags.add(bTag.localName().impl()); 1083 residualStyleTags.add(iTag.localName().impl()); 1084 residualStyleTags.add(sTag.localName().impl()); 1085 residualStyleTags.add(strikeTag.localName().impl()); 1086 residualStyleTags.add(bigTag.localName().impl()); 1087 residualStyleTags.add(smallTag.localName().impl()); 1088 residualStyleTags.add(emTag.localName().impl()); 1089 residualStyleTags.add(strongTag.localName().impl()); 1090 residualStyleTags.add(dfnTag.localName().impl()); 1091 residualStyleTags.add(codeTag.localName().impl()); 1092 residualStyleTags.add(sampTag.localName().impl()); 1093 residualStyleTags.add(kbdTag.localName().impl()); 1094 residualStyleTags.add(varTag.localName().impl()); 1095 residualStyleTags.add(nobrTag.localName().impl()); 1096 } 1097 1098 return residualStyleTags.contains(tagName.impl()); 1099 } 1100 1101 bool HTMLParser::isAffectedByResidualStyle(const AtomicString& tagName) 1102 { 1103 DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, unaffectedTags, ()); 1104 if (unaffectedTags.isEmpty()) { 1105 unaffectedTags.add(bodyTag.localName().impl()); 1106 unaffectedTags.add(tableTag.localName().impl()); 1107 unaffectedTags.add(theadTag.localName().impl()); 1108 unaffectedTags.add(tbodyTag.localName().impl()); 1109 unaffectedTags.add(tfootTag.localName().impl()); 1110 unaffectedTags.add(trTag.localName().impl()); 1111 unaffectedTags.add(thTag.localName().impl()); 1112 unaffectedTags.add(tdTag.localName().impl()); 1113 unaffectedTags.add(captionTag.localName().impl()); 1114 unaffectedTags.add(colgroupTag.localName().impl()); 1115 unaffectedTags.add(colTag.localName().impl()); 1116 unaffectedTags.add(optionTag.localName().impl()); 1117 unaffectedTags.add(optgroupTag.localName().impl()); 1118 unaffectedTags.add(selectTag.localName().impl()); 1119 unaffectedTags.add(objectTag.localName().impl()); 1120 unaffectedTags.add(datagridTag.localName().impl()); 1121 unaffectedTags.add(datalistTag.localName().impl()); 1122 } 1123 1124 return !unaffectedTags.contains(tagName.impl()); 1125 } 1126 1127 void HTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem) 1128 { 1129 HTMLStackElem* maxElem = 0; 1130 bool finished = false; 1131 bool strayTableContent = elem->strayTableContent; 1132 1133 unsigned iterationCount = 0; 1134 1135 m_handlingResidualStyleAcrossBlocks = true; 1136 while (!finished && (iterationCount++ < cResidualStyleIterationLimit)) { 1137 // Find the outermost element that crosses over to a higher level. If there exists another higher-level 1138 // element, we will do another pass, until we have corrected the innermost one. 1139 ExceptionCode ec = 0; 1140 HTMLStackElem* curr = m_blockStack; 1141 HTMLStackElem* prev = 0; 1142 HTMLStackElem* prevMaxElem = 0; 1143 maxElem = 0; 1144 finished = true; 1145 while (curr && curr != elem) { 1146 if (curr->level > elem->level) { 1147 if (!isAffectedByResidualStyle(curr->tagName)) 1148 return; 1149 if (maxElem) 1150 // We will need another pass. 1151 finished = false; 1152 maxElem = curr; 1153 prevMaxElem = prev; 1154 } 1155 1156 prev = curr; 1157 curr = curr->next; 1158 } 1159 1160 if (!curr || !maxElem) 1161 return; 1162 1163 Node* residualElem = prev->node; 1164 Node* blockElem = prevMaxElem ? prevMaxElem->node : m_current; 1165 Node* parentElem = elem->node; 1166 1167 // Check to see if the reparenting that is going to occur is allowed according to the DOM. 1168 // FIXME: We should either always allow it or perform an additional fixup instead of 1169 // just bailing here. 1170 // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now. 1171 if (!parentElem->childAllowed(blockElem)) 1172 return; 1173 1174 m_hasPElementInScope = Unknown; 1175 1176 if (maxElem->node->parentNode() != elem->node) { 1177 // Walk the stack and remove any elements that aren't residual style tags. These 1178 // are basically just being closed up. Example: 1179 // <font><span>Moo<p>Goo</font></p>. 1180 // In the above example, the <span> doesn't need to be reopened. It can just close. 1181 HTMLStackElem* currElem = maxElem->next; 1182 HTMLStackElem* prevElem = maxElem; 1183 while (currElem != elem) { 1184 HTMLStackElem* nextElem = currElem->next; 1185 if (!isResidualStyleTag(currElem->tagName)) { 1186 prevElem->next = nextElem; 1187 prevElem->derefNode(); 1188 prevElem->node = currElem->node; 1189 prevElem->didRefNode = currElem->didRefNode; 1190 delete currElem; 1191 } 1192 else 1193 prevElem = currElem; 1194 currElem = nextElem; 1195 } 1196 1197 // We have to reopen residual tags in between maxElem and elem. An example of this case is: 1198 // <font><i>Moo<p>Foo</font>. 1199 // In this case, we need to transform the part before the <p> into: 1200 // <font><i>Moo</i></font><i> 1201 // so that the <i> will remain open. This involves the modification of elements 1202 // in the block stack. 1203 // This will also affect how we ultimately reparent the block, since we want it to end up 1204 // under the reopened residual tags (e.g., the <i> in the above example.) 1205 RefPtr<Node> prevNode = 0; 1206 currElem = maxElem; 1207 while (currElem->node != residualElem) { 1208 if (isResidualStyleTag(currElem->node->localName())) { 1209 // Create a clone of this element. 1210 // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem. 1211 Node* currNode = currElem->node->cloneNode(false).releaseRef(); 1212 reportError(ResidualStyleError, &currNode->localName()); 1213 1214 // Change the stack element's node to point to the clone. 1215 // The stack element adopts the reference we obtained above by calling release(). 1216 currElem->derefNode(); 1217 currElem->node = currNode; 1218 currElem->didRefNode = true; 1219 1220 // Attach the previous node as a child of this new node. 1221 if (prevNode) 1222 currNode->appendChild(prevNode, ec); 1223 else // The new parent for the block element is going to be the innermost clone. 1224 parentElem = currNode; // FIXME: We shifted parentElem to be a residual inline. We never checked to see if blockElem could be legally placed inside the inline though. 1225 1226 prevNode = currNode; 1227 } 1228 1229 currElem = currElem->next; 1230 } 1231 1232 // Now append the chain of new residual style elements if one exists. 1233 if (prevNode) 1234 elem->node->appendChild(prevNode, ec); // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section. 1235 } 1236 1237 // Check if the block is still in the tree. If it isn't, then we don't 1238 // want to remove it from its parent (that would crash) or insert it into 1239 // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778 1240 bool isBlockStillInTree = blockElem->parentNode(); 1241 1242 // We need to make a clone of |residualElem| and place it just inside |blockElem|. 1243 // All content of |blockElem| is reparented to be under this clone. We then 1244 // reparent |blockElem| using real DOM calls so that attachment/detachment will 1245 // be performed to fix up the rendering tree. 1246 // So for this example: <b>...<p>Foo</b>Goo</p> 1247 // The end result will be: <b>...</b><p><b>Foo</b>Goo</p> 1248 // 1249 // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids. 1250 if (isBlockStillInTree) 1251 blockElem->parentNode()->removeChild(blockElem, ec); 1252 1253 Node* newNodePtr = 0; 1254 if (blockElem->firstChild()) { 1255 // Step 2: Clone |residualElem|. 1256 RefPtr<Node> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids. 1257 newNodePtr = newNode.get(); 1258 reportError(ResidualStyleError, &newNode->localName()); 1259 1260 // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem| 1261 // before we've put |newElem| into the document. That way we'll only do one attachment of all 1262 // the new content (instead of a bunch of individual attachments). 1263 Node* currNode = blockElem->firstChild(); 1264 while (currNode) { 1265 Node* nextNode = currNode->nextSibling(); 1266 newNode->appendChild(currNode, ec); 1267 currNode = nextNode; 1268 } 1269 1270 // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no 1271 // attachment can occur yet. 1272 blockElem->appendChild(newNode.release(), ec); 1273 } else 1274 finished = true; 1275 1276 // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place. 1277 if (isBlockStillInTree) 1278 parentElem->appendChild(blockElem, ec); 1279 1280 // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us. Also update 1281 // the node associated with the previous stack element so that when it gets popped, 1282 // it doesn't make the residual element the next current node. 1283 HTMLStackElem* currElem = maxElem; 1284 HTMLStackElem* prevElem = 0; 1285 while (currElem != elem) { 1286 prevElem = currElem; 1287 currElem = currElem->next; 1288 } 1289 prevElem->next = elem->next; 1290 prevElem->derefNode(); 1291 prevElem->node = elem->node; 1292 prevElem->didRefNode = elem->didRefNode; 1293 if (!finished) { 1294 // Repurpose |elem| to represent |newNode| and insert it at the appropriate position 1295 // in the stack. We do not do this for the innermost block, because in that case the new 1296 // node is effectively no longer open. 1297 elem->next = maxElem; 1298 elem->node = prevMaxElem->node; 1299 elem->didRefNode = prevMaxElem->didRefNode; 1300 elem->strayTableContent = false; 1301 prevMaxElem->next = elem; 1302 ASSERT(newNodePtr); 1303 prevMaxElem->node = newNodePtr; 1304 newNodePtr->ref(); 1305 prevMaxElem->didRefNode = true; 1306 } else 1307 delete elem; 1308 } 1309 1310 // FIXME: If we ever make a case like this work: 1311 // <table><b><i><form></b></form></i></table> 1312 // Then this check will be too simplistic. Right now the <i><form> chain will end up inside the <tbody>, which is pretty crazy. 1313 if (strayTableContent) 1314 m_inStrayTableContent--; 1315 1316 // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>. 1317 // In the above example, Goo should stay italic. 1318 // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth. 1319 1320 HTMLStackElem* curr = m_blockStack; 1321 HTMLStackElem* residualStyleStack = 0; 1322 unsigned stackDepth = 1; 1323 unsigned redundantStyleCount = 0; 1324 while (curr && curr != maxElem) { 1325 // We will actually schedule this tag for reopening 1326 // after we complete the close of this entire block. 1327 if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) { 1328 // We've overloaded the use of stack elements and are just reusing the 1329 // struct with a slightly different meaning to the variables. Instead of chaining 1330 // from innermost to outermost, we build up a list of all the tags we need to reopen 1331 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing 1332 // to the outermost tag we need to reopen. 1333 // We also set curr->node to be the actual element that corresponds to the ID stored in 1334 // curr->id rather than the node that you should pop to when the element gets pulled off 1335 // the stack. 1336 if (residualStyleStack && curr->tagName == residualStyleStack->tagName && curr->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) 1337 redundantStyleCount++; 1338 else 1339 redundantStyleCount = 0; 1340 1341 if (redundantStyleCount < cMaxRedundantTagDepth) 1342 moveOneBlockToStack(residualStyleStack); 1343 else 1344 popOneBlock(); 1345 } else 1346 popOneBlock(); 1347 1348 curr = m_blockStack; 1349 } 1350 1351 reopenResidualStyleTags(residualStyleStack, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content. 1352 1353 m_handlingResidualStyleAcrossBlocks = false; 1354 } 1355 1356 void HTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent) 1357 { 1358 // Loop for each tag that needs to be reopened. 1359 while (elem) { 1360 // Create a shallow clone of the DOM node for this element. 1361 RefPtr<Node> newNode = elem->node->cloneNode(false); 1362 reportError(ResidualStyleError, &newNode->localName()); 1363 1364 // Append the new node. In the malformed table case, we need to insert before the table, 1365 // which will be the last child. 1366 ExceptionCode ec = 0; 1367 if (malformedTableParent) 1368 malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec); 1369 else 1370 m_current->appendChild(newNode, ec); 1371 // FIXME: Is it really OK to ignore the exceptions here? 1372 1373 // Now push a new stack element for this node we just created. 1374 pushBlock(elem->tagName, elem->level); 1375 newNode->beginParsingChildren(); 1376 1377 // Set our strayTableContent boolean if needed, so that the reopened tag also knows 1378 // that it is inside a malformed table. 1379 m_blockStack->strayTableContent = malformedTableParent != 0; 1380 if (m_blockStack->strayTableContent) 1381 m_inStrayTableContent++; 1382 1383 // Clear our malformed table parent variable. 1384 malformedTableParent = 0; 1385 1386 // Update |current| manually to point to the new node. 1387 setCurrent(newNode.get()); 1388 1389 // Advance to the next tag that needs to be reopened. 1390 HTMLStackElem* next = elem->next; 1391 elem->derefNode(); 1392 delete elem; 1393 elem = next; 1394 } 1395 } 1396 1397 void HTMLParser::pushBlock(const AtomicString& tagName, int level) 1398 { 1399 m_blockStack = new HTMLStackElem(tagName, level, m_current, m_didRefCurrent, m_blockStack); 1400 if (level >= minBlockLevelTagPriority) 1401 m_blocksInStack++; 1402 m_didRefCurrent = false; 1403 if (tagName == pTag) 1404 m_hasPElementInScope = InScope; 1405 else if (isScopingTag(tagName)) 1406 m_hasPElementInScope = NotInScope; 1407 } 1408 1409 void HTMLParser::popBlock(const AtomicString& tagName, bool reportErrors) 1410 { 1411 HTMLStackElem* elem = m_blockStack; 1412 1413 if (m_parserQuirks && elem && !m_parserQuirks->shouldPopBlock(elem->tagName, tagName)) 1414 return; 1415 1416 int maxLevel = 0; 1417 1418 while (elem && (elem->tagName != tagName)) { 1419 if (maxLevel < elem->level) 1420 maxLevel = elem->level; 1421 elem = elem->next; 1422 } 1423 1424 if (!elem) { 1425 if (reportErrors) 1426 reportError(StrayCloseTagError, &tagName, 0, true); 1427 return; 1428 } 1429 1430 if (maxLevel > elem->level) { 1431 // We didn't match because the tag is in a different scope, e.g., 1432 // <b><p>Foo</b>. Try to correct the problem. 1433 if (!isResidualStyleTag(tagName)) 1434 return; 1435 return handleResidualStyleCloseTagAcrossBlocks(elem); 1436 } 1437 1438 bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName); 1439 HTMLStackElem* residualStyleStack = 0; 1440 Node* malformedTableParent = 0; 1441 1442 elem = m_blockStack; 1443 unsigned stackDepth = 1; 1444 unsigned redundantStyleCount = 0; 1445 while (elem) { 1446 if (elem->tagName == tagName) { 1447 int strayTable = m_inStrayTableContent; 1448 popOneBlock(); 1449 elem = 0; 1450 1451 // This element was the root of some malformed content just inside an implicit or 1452 // explicit <tbody> or <tr>. 1453 // If we end up needing to reopen residual style tags, the root of the reopened chain 1454 // must also know that it is the root of malformed content inside a <tbody>/<tr>. 1455 if (strayTable && (m_inStrayTableContent < strayTable) && residualStyleStack) { 1456 Node* curr = m_current; 1457 while (curr && !curr->hasTagName(tableTag)) 1458 curr = curr->parentNode(); 1459 malformedTableParent = curr ? curr->parentNode() : 0; 1460 } 1461 } 1462 else { 1463 if (m_currentFormElement && elem->tagName == formTag) 1464 // A <form> is being closed prematurely (and this is 1465 // malformed HTML). Set an attribute on the form to clear out its 1466 // bottom margin. 1467 m_currentFormElement->setMalformed(true); 1468 1469 // Schedule this tag for reopening 1470 // after we complete the close of this entire block. 1471 if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth) { 1472 // We've overloaded the use of stack elements and are just reusing the 1473 // struct with a slightly different meaning to the variables. Instead of chaining 1474 // from innermost to outermost, we build up a list of all the tags we need to reopen 1475 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing 1476 // to the outermost tag we need to reopen. 1477 // We also set elem->node to be the actual element that corresponds to the ID stored in 1478 // elem->id rather than the node that you should pop to when the element gets pulled off 1479 // the stack. 1480 if (residualStyleStack && elem->tagName == residualStyleStack->tagName && elem->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes())) 1481 redundantStyleCount++; 1482 else 1483 redundantStyleCount = 0; 1484 1485 if (redundantStyleCount < cMaxRedundantTagDepth) 1486 moveOneBlockToStack(residualStyleStack); 1487 else 1488 popOneBlock(); 1489 } else 1490 popOneBlock(); 1491 elem = m_blockStack; 1492 } 1493 } 1494 1495 reopenResidualStyleTags(residualStyleStack, malformedTableParent); 1496 } 1497 1498 inline HTMLStackElem* HTMLParser::popOneBlockCommon() 1499 { 1500 HTMLStackElem* elem = m_blockStack; 1501 1502 // Form elements restore their state during the parsing process. 1503 // Also, a few elements (<applet>, <object>) need to know when all child elements (<param>s) are available. 1504 if (m_current && elem->node != m_current) 1505 m_current->finishParsingChildren(); 1506 1507 if (m_blockStack->level >= minBlockLevelTagPriority) { 1508 ASSERT(m_blocksInStack > 0); 1509 m_blocksInStack--; 1510 } 1511 m_blockStack = elem->next; 1512 m_current = elem->node; 1513 m_didRefCurrent = elem->didRefNode; 1514 1515 if (elem->strayTableContent) 1516 m_inStrayTableContent--; 1517 1518 if (elem->tagName == pTag) 1519 m_hasPElementInScope = NotInScope; 1520 else if (isScopingTag(elem->tagName)) 1521 m_hasPElementInScope = Unknown; 1522 1523 return elem; 1524 } 1525 1526 void HTMLParser::popOneBlock() 1527 { 1528 // Store the current node before popOneBlockCommon overwrites it. 1529 Node* lastCurrent = m_current; 1530 bool didRefLastCurrent = m_didRefCurrent; 1531 1532 delete popOneBlockCommon(); 1533 1534 if (didRefLastCurrent) 1535 lastCurrent->deref(); 1536 } 1537 1538 void HTMLParser::moveOneBlockToStack(HTMLStackElem*& head) 1539 { 1540 // We'll be using the stack element we're popping, but for the current node. 1541 // See the two callers for details. 1542 1543 // Store the current node before popOneBlockCommon overwrites it. 1544 Node* lastCurrent = m_current; 1545 bool didRefLastCurrent = m_didRefCurrent; 1546 1547 // Pop the block, but don't deref the current node as popOneBlock does because 1548 // we'll be using the pointer in the new stack element. 1549 HTMLStackElem* elem = popOneBlockCommon(); 1550 1551 // Transfer the current node into the stack element. 1552 // No need to deref the old elem->node because popOneBlockCommon transferred 1553 // it into the m_current/m_didRefCurrent fields. 1554 elem->node = lastCurrent; 1555 elem->didRefNode = didRefLastCurrent; 1556 elem->next = head; 1557 head = elem; 1558 } 1559 1560 void HTMLParser::checkIfHasPElementInScope() 1561 { 1562 m_hasPElementInScope = NotInScope; 1563 HTMLStackElem* elem = m_blockStack; 1564 while (elem) { 1565 const AtomicString& tagName = elem->tagName; 1566 if (tagName == pTag) { 1567 m_hasPElementInScope = InScope; 1568 return; 1569 } else if (isScopingTag(tagName)) 1570 return; 1571 elem = elem->next; 1572 } 1573 } 1574 1575 void HTMLParser::popInlineBlocks() 1576 { 1577 while (m_blockStack && isInline(m_current)) 1578 popOneBlock(); 1579 } 1580 1581 void HTMLParser::freeBlock() 1582 { 1583 while (m_blockStack) 1584 popOneBlock(); 1585 ASSERT(!m_blocksInStack); 1586 } 1587 1588 void HTMLParser::createHead() 1589 { 1590 if (m_head) 1591 return; 1592 1593 if (!m_document->documentElement()) { 1594 insertNode(new HTMLHtmlElement(htmlTag, m_document)); 1595 ASSERT(m_document->documentElement()); 1596 } 1597 1598 m_head = new HTMLHeadElement(headTag, m_document); 1599 HTMLElement* body = m_document->body(); 1600 ExceptionCode ec = 0; 1601 m_document->documentElement()->insertBefore(m_head.get(), body, ec); 1602 if (ec) 1603 m_head = 0; 1604 1605 // If the body does not exist yet, then the <head> should be pushed as the current block. 1606 if (m_head && !body) { 1607 pushBlock(m_head->localName(), m_head->tagPriority()); 1608 setCurrent(m_head.get()); 1609 } 1610 } 1611 1612 PassRefPtr<Node> HTMLParser::handleIsindex(Token* t) 1613 { 1614 RefPtr<Node> n = new HTMLDivElement(divTag, m_document); 1615 1616 NamedMappedAttrMap* attrs = t->attrs.get(); 1617 1618 RefPtr<HTMLIsIndexElement> isIndex = new HTMLIsIndexElement(isindexTag, m_document, m_currentFormElement.get()); 1619 isIndex->setAttributeMap(attrs); 1620 isIndex->setAttribute(typeAttr, "khtml_isindex"); 1621 1622 String text = searchableIndexIntroduction(); 1623 if (attrs) { 1624 if (Attribute* a = attrs->getAttributeItem(promptAttr)) 1625 text = a->value().string() + " "; 1626 t->attrs = 0; 1627 } 1628 1629 n->addChild(new HTMLHRElement(hrTag, m_document)); 1630 n->addChild(Text::create(m_document, text)); 1631 n->addChild(isIndex.release()); 1632 n->addChild(new HTMLHRElement(hrTag, m_document)); 1633 1634 return n.release(); 1635 } 1636 1637 void HTMLParser::startBody() 1638 { 1639 if (m_inBody) 1640 return; 1641 1642 m_inBody = true; 1643 1644 if (m_isindexElement) { 1645 insertNode(m_isindexElement.get(), true /* don't descend into this node */); 1646 m_isindexElement = 0; 1647 } 1648 } 1649 1650 void HTMLParser::finished() 1651 { 1652 // In the case of a completely empty document, here's the place to create the HTML element. 1653 if (m_current && m_current->isDocumentNode() && !m_document->documentElement()) 1654 insertNode(new HTMLHtmlElement(htmlTag, m_document)); 1655 1656 // This ensures that "current" is not left pointing to a node when the document is destroyed. 1657 freeBlock(); 1658 setCurrent(0); 1659 1660 // Warning, this may delete the tokenizer and parser, so don't try to do anything else after this. 1661 if (!m_isParsingFragment) 1662 m_document->finishedParsing(); 1663 } 1664 1665 void HTMLParser::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags) 1666 { 1667 Frame* frame = m_document->frame(); 1668 if (!frame) 1669 return; 1670 1671 HTMLTokenizer* htmlTokenizer = static_cast<HTMLTokenizer*>(m_document->tokenizer()); 1672 int lineNumber = htmlTokenizer->lineNumber() + 1; 1673 1674 AtomicString tag1; 1675 AtomicString tag2; 1676 if (tagName1) { 1677 if (*tagName1 == "#text") 1678 tag1 = "Text"; 1679 else if (*tagName1 == "#comment") 1680 tag1 = "<!-- comment -->"; 1681 else 1682 tag1 = (closeTags ? "</" : "<") + *tagName1 + ">"; 1683 } 1684 if (tagName2) { 1685 if (*tagName2 == "#text") 1686 tag2 = "Text"; 1687 else if (*tagName2 == "#comment") 1688 tag2 = "<!-- comment -->"; 1689 else 1690 tag2 = (closeTags ? "</" : "<") + *tagName2 + ">"; 1691 } 1692 1693 const char* errorMsg = htmlParserErrorMessageTemplate(errorCode); 1694 if (!errorMsg) 1695 return; 1696 1697 String message; 1698 if (htmlTokenizer->processingContentWrittenByScript()) 1699 message += htmlParserDocumentWriteMessage(); 1700 message += errorMsg; 1701 message.replace("%tag1", tag1); 1702 message.replace("%tag2", tag2); 1703 1704 frame->domWindow()->console()->addMessage(HTMLMessageSource, LogMessageType, 1705 isWarning(errorCode) ? WarningMessageLevel : ErrorMessageLevel, 1706 message, lineNumber, m_document->url().string()); 1707 } 1708 1709 #ifdef BUILDING_ON_LEOPARD 1710 bool shouldCreateImplicitHead(Document* document) 1711 { 1712 ASSERT(document); 1713 1714 Settings* settings = document->page() ? document->page()->settings() : 0; 1715 return settings ? !settings->needsLeopardMailQuirks() : true; 1716 } 1717 #elif defined(BUILDING_ON_TIGER) 1718 bool shouldCreateImplicitHead(Document* document) 1719 { 1720 ASSERT(document); 1721 1722 Settings* settings = document->page() ? document->page()->settings() : 0; 1723 return settings ? !settings->needsTigerMailQuirks() : true; 1724 } 1725 #endif 1726 1727 } 1728