1 /* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. 3 * Copyright (C) 2009, 2010 Google Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "core/editing/MarkupAccumulator.h" 29 30 #include "HTMLNames.h" 31 #include "XLinkNames.h" 32 #include "XMLNSNames.h" 33 #include "XMLNames.h" 34 #include "core/dom/CDATASection.h" 35 #include "core/dom/Comment.h" 36 #include "core/dom/DocumentFragment.h" 37 #include "core/dom/DocumentType.h" 38 #include "core/dom/ProcessingInstruction.h" 39 #include "core/editing/Editor.h" 40 #include "core/html/HTMLElement.h" 41 #include "core/html/HTMLTemplateElement.h" 42 #include "weborigin/KURL.h" 43 #include "wtf/unicode/CharacterNames.h" 44 45 namespace WebCore { 46 47 using namespace HTMLNames; 48 49 void MarkupAccumulator::appendCharactersReplacingEntities(StringBuilder& result, const String& source, unsigned offset, unsigned length, EntityMask entityMask) 50 { 51 DEFINE_STATIC_LOCAL(const String, ampReference, ("&")); 52 DEFINE_STATIC_LOCAL(const String, ltReference, ("<")); 53 DEFINE_STATIC_LOCAL(const String, gtReference, (">")); 54 DEFINE_STATIC_LOCAL(const String, quotReference, (""")); 55 DEFINE_STATIC_LOCAL(const String, nbspReference, (" ")); 56 57 static const EntityDescription entityMaps[] = { 58 { '&', ampReference, EntityAmp }, 59 { '<', ltReference, EntityLt }, 60 { '>', gtReference, EntityGt }, 61 { '"', quotReference, EntityQuot }, 62 { noBreakSpace, nbspReference, EntityNbsp }, 63 }; 64 65 if (!(offset + length)) 66 return; 67 68 ASSERT(offset + length <= source.length()); 69 70 if (source.is8Bit()) { 71 const LChar* text = source.characters8() + offset; 72 73 size_t positionAfterLastEntity = 0; 74 for (size_t i = 0; i < length; ++i) { 75 for (size_t entityIndex = 0; entityIndex < WTF_ARRAY_LENGTH(entityMaps); ++entityIndex) { 76 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) { 77 result.append(text + positionAfterLastEntity, i - positionAfterLastEntity); 78 result.append(entityMaps[entityIndex].reference); 79 positionAfterLastEntity = i + 1; 80 break; 81 } 82 } 83 } 84 result.append(text + positionAfterLastEntity, length - positionAfterLastEntity); 85 } else { 86 const UChar* text = source.characters16() + offset; 87 88 size_t positionAfterLastEntity = 0; 89 for (size_t i = 0; i < length; ++i) { 90 for (size_t entityIndex = 0; entityIndex < WTF_ARRAY_LENGTH(entityMaps); ++entityIndex) { 91 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) { 92 result.append(text + positionAfterLastEntity, i - positionAfterLastEntity); 93 result.append(entityMaps[entityIndex].reference); 94 positionAfterLastEntity = i + 1; 95 break; 96 } 97 } 98 } 99 result.append(text + positionAfterLastEntity, length - positionAfterLastEntity); 100 } 101 } 102 103 MarkupAccumulator::MarkupAccumulator(Vector<Node*>* nodes, EAbsoluteURLs resolveUrlsMethod, const Range* range) 104 : m_nodes(nodes) 105 , m_range(range) 106 , m_resolveURLsMethod(resolveUrlsMethod) 107 { 108 } 109 110 MarkupAccumulator::~MarkupAccumulator() 111 { 112 } 113 114 String MarkupAccumulator::serializeNodes(Node* targetNode, EChildrenOnly childrenOnly) 115 { 116 return serializeNodes(targetNode, childrenOnly, 0); 117 } 118 119 String MarkupAccumulator::serializeNodes(Node* targetNode, EChildrenOnly childrenOnly, Vector<QualifiedName>* tagNamesToSkip) 120 { 121 serializeNodesWithNamespaces(targetNode, childrenOnly, 0, tagNamesToSkip); 122 return m_markup.toString(); 123 } 124 125 void MarkupAccumulator::serializeNodesWithNamespaces(Node* targetNode, EChildrenOnly childrenOnly, const Namespaces* namespaces, Vector<QualifiedName>* tagNamesToSkip) 126 { 127 if (tagNamesToSkip) { 128 for (size_t i = 0; i < tagNamesToSkip->size(); ++i) { 129 if (targetNode->hasTagName(tagNamesToSkip->at(i))) 130 return; 131 } 132 } 133 134 Namespaces namespaceHash; 135 if (namespaces) 136 namespaceHash = *namespaces; 137 138 if (!childrenOnly) 139 appendStartTag(targetNode, &namespaceHash); 140 141 if (!(targetNode->document()->isHTMLDocument() && elementCannotHaveEndTag(targetNode))) { 142 Node* current = targetNode->hasTagName(templateTag) ? toHTMLTemplateElement(targetNode)->content()->firstChild() : targetNode->firstChild(); 143 for ( ; current; current = current->nextSibling()) 144 serializeNodesWithNamespaces(current, IncludeNode, &namespaceHash, tagNamesToSkip); 145 } 146 147 if (!childrenOnly) 148 appendEndTag(targetNode); 149 } 150 151 String MarkupAccumulator::resolveURLIfNeeded(const Element* element, const String& urlString) const 152 { 153 switch (m_resolveURLsMethod) { 154 case ResolveAllURLs: 155 return element->document()->completeURL(urlString).string(); 156 157 case ResolveNonLocalURLs: 158 if (!element->document()->url().isLocalFile()) 159 return element->document()->completeURL(urlString).string(); 160 break; 161 162 case DoNotResolveURLs: 163 break; 164 } 165 return urlString; 166 } 167 168 void MarkupAccumulator::appendString(const String& string) 169 { 170 m_markup.append(string); 171 } 172 173 void MarkupAccumulator::appendStartTag(Node* node, Namespaces* namespaces) 174 { 175 appendStartMarkup(m_markup, node, namespaces); 176 if (m_nodes) 177 m_nodes->append(node); 178 } 179 180 void MarkupAccumulator::appendEndTag(Node* node) 181 { 182 appendEndMarkup(m_markup, node); 183 } 184 185 size_t MarkupAccumulator::totalLength(const Vector<String>& strings) 186 { 187 size_t length = 0; 188 for (size_t i = 0; i < strings.size(); ++i) 189 length += strings[i].length(); 190 return length; 191 } 192 193 void MarkupAccumulator::concatenateMarkup(StringBuilder& result) 194 { 195 result.append(m_markup); 196 } 197 198 void MarkupAccumulator::appendAttributeValue(StringBuilder& result, const String& attribute, bool documentIsHTML) 199 { 200 appendCharactersReplacingEntities(result, attribute, 0, attribute.length(), 201 documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue); 202 } 203 204 void MarkupAccumulator::appendCustomAttributes(StringBuilder&, Element*, Namespaces*) 205 { 206 } 207 208 void MarkupAccumulator::appendQuotedURLAttributeValue(StringBuilder& result, const Element* element, const Attribute& attribute) 209 { 210 ASSERT(element->isURLAttribute(attribute)); 211 const String resolvedURLString = resolveURLIfNeeded(element, attribute.value()); 212 UChar quoteChar = '"'; 213 String strippedURLString = resolvedURLString.stripWhiteSpace(); 214 if (protocolIsJavaScript(strippedURLString)) { 215 // minimal escaping for javascript urls 216 if (strippedURLString.contains('"')) { 217 if (strippedURLString.contains('\'')) 218 strippedURLString.replaceWithLiteral('"', """); 219 else 220 quoteChar = '\''; 221 } 222 result.append(quoteChar); 223 result.append(strippedURLString); 224 result.append(quoteChar); 225 return; 226 } 227 228 // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML. 229 result.append(quoteChar); 230 appendAttributeValue(result, resolvedURLString, false); 231 result.append(quoteChar); 232 } 233 234 void MarkupAccumulator::appendNodeValue(StringBuilder& result, const Node* node, const Range* range, EntityMask entityMask) 235 { 236 const String str = node->nodeValue(); 237 unsigned length = str.length(); 238 unsigned start = 0; 239 240 if (range) { 241 if (node == range->endContainer()) 242 length = range->endOffset(); 243 if (node == range->startContainer()) { 244 start = range->startOffset(); 245 length -= start; 246 } 247 } 248 249 appendCharactersReplacingEntities(result, str, start, length, entityMask); 250 } 251 252 bool MarkupAccumulator::shouldAddNamespaceElement(const Element* element) 253 { 254 // Don't add namespace attribute if it is already defined for this elem. 255 const AtomicString& prefix = element->prefix(); 256 if (prefix.isEmpty()) 257 return !element->hasAttribute(xmlnsAtom); 258 259 DEFINE_STATIC_LOCAL(String, xmlnsWithColon, ("xmlns:")); 260 return !element->hasAttribute(xmlnsWithColon + prefix); 261 } 262 263 bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, Namespaces& namespaces) 264 { 265 // Don't add namespace attributes twice 266 if (attribute.name() == XMLNSNames::xmlnsAttr) { 267 namespaces.set(emptyAtom.impl(), attribute.value().impl()); 268 return false; 269 } 270 271 QualifiedName xmlnsPrefixAttr(xmlnsAtom, attribute.localName(), XMLNSNames::xmlnsNamespaceURI); 272 if (attribute.name() == xmlnsPrefixAttr) { 273 namespaces.set(attribute.localName().impl(), attribute.value().impl()); 274 return false; 275 } 276 277 return true; 278 } 279 280 void MarkupAccumulator::appendNamespace(StringBuilder& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces) 281 { 282 if (namespaceURI.isEmpty()) 283 return; 284 285 // Use emptyAtoms's impl() for both null and empty strings since the HashMap can't handle 0 as a key 286 StringImpl* pre = prefix.isEmpty() ? emptyAtom.impl() : prefix.impl(); 287 StringImpl* foundNS = namespaces.get(pre); 288 if (foundNS != namespaceURI.impl()) { 289 namespaces.set(pre, namespaceURI.impl()); 290 result.append(' '); 291 result.append(xmlnsAtom.string()); 292 if (!prefix.isEmpty()) { 293 result.append(':'); 294 result.append(prefix); 295 } 296 297 result.append('='); 298 result.append('"'); 299 appendAttributeValue(result, namespaceURI, false); 300 result.append('"'); 301 } 302 } 303 304 EntityMask MarkupAccumulator::entityMaskForText(Text* text) const 305 { 306 const QualifiedName* parentName = 0; 307 if (text->parentElement()) 308 parentName = &(text->parentElement())->tagQName(); 309 310 if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag)) 311 return EntityMaskInCDATA; 312 313 return text->document()->isHTMLDocument() ? EntityMaskInHTMLPCDATA : EntityMaskInPCDATA; 314 } 315 316 void MarkupAccumulator::appendText(StringBuilder& result, Text* text) 317 { 318 appendNodeValue(result, text, m_range, entityMaskForText(text)); 319 } 320 321 void MarkupAccumulator::appendComment(StringBuilder& result, const String& comment) 322 { 323 // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->". 324 result.appendLiteral("<!--"); 325 result.append(comment); 326 result.appendLiteral("-->"); 327 } 328 329 void MarkupAccumulator::appendXMLDeclaration(StringBuilder& result, const Document* document) 330 { 331 if (!document->hasXMLDeclaration()) 332 return; 333 334 result.appendLiteral("<?xml version=\""); 335 result.append(document->xmlVersion()); 336 const String& encoding = document->xmlEncoding(); 337 if (!encoding.isEmpty()) { 338 result.appendLiteral("\" encoding=\""); 339 result.append(encoding); 340 } 341 if (document->xmlStandaloneStatus() != Document::StandaloneUnspecified) { 342 result.appendLiteral("\" standalone=\""); 343 if (document->xmlStandalone()) 344 result.appendLiteral("yes"); 345 else 346 result.appendLiteral("no"); 347 } 348 349 result.appendLiteral("\"?>"); 350 } 351 352 void MarkupAccumulator::appendDocumentType(StringBuilder& result, const DocumentType* n) 353 { 354 if (n->name().isEmpty()) 355 return; 356 357 result.appendLiteral("<!DOCTYPE "); 358 result.append(n->name()); 359 if (!n->publicId().isEmpty()) { 360 result.appendLiteral(" PUBLIC \""); 361 result.append(n->publicId()); 362 result.append('"'); 363 if (!n->systemId().isEmpty()) { 364 result.append(' '); 365 result.append('"'); 366 result.append(n->systemId()); 367 result.append('"'); 368 } 369 } else if (!n->systemId().isEmpty()) { 370 result.appendLiteral(" SYSTEM \""); 371 result.append(n->systemId()); 372 result.append('"'); 373 } 374 if (!n->internalSubset().isEmpty()) { 375 result.append(' '); 376 result.append('['); 377 result.append(n->internalSubset()); 378 result.append(']'); 379 } 380 result.append('>'); 381 } 382 383 void MarkupAccumulator::appendProcessingInstruction(StringBuilder& result, const String& target, const String& data) 384 { 385 // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>". 386 result.append('<'); 387 result.append('?'); 388 result.append(target); 389 result.append(' '); 390 result.append(data); 391 result.append('?'); 392 result.append('>'); 393 } 394 395 void MarkupAccumulator::appendElement(StringBuilder& result, Element* element, Namespaces* namespaces) 396 { 397 appendOpenTag(result, element, namespaces); 398 399 if (element->hasAttributes()) { 400 unsigned length = element->attributeCount(); 401 for (unsigned int i = 0; i < length; i++) 402 appendAttribute(result, element, *element->attributeItem(i), namespaces); 403 } 404 405 // Give an opportunity to subclasses to add their own attributes. 406 appendCustomAttributes(result, element, namespaces); 407 408 appendCloseTag(result, element); 409 } 410 411 void MarkupAccumulator::appendOpenTag(StringBuilder& result, Element* element, Namespaces* namespaces) 412 { 413 result.append('<'); 414 result.append(element->nodeNamePreservingCase()); 415 if (!element->document()->isHTMLDocument() && namespaces && shouldAddNamespaceElement(element)) 416 appendNamespace(result, element->prefix(), element->namespaceURI(), *namespaces); 417 } 418 419 void MarkupAccumulator::appendCloseTag(StringBuilder& result, Element* element) 420 { 421 if (shouldSelfClose(element)) { 422 if (element->isHTMLElement()) 423 result.append(' '); // XHTML 1.0 <-> HTML compatibility. 424 result.append('/'); 425 } 426 result.append('>'); 427 } 428 429 static inline bool attributeIsInSerializedNamespace(const Attribute& attribute) 430 { 431 return attribute.namespaceURI() == XMLNames::xmlNamespaceURI 432 || attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI 433 || attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI; 434 } 435 436 void MarkupAccumulator::appendAttribute(StringBuilder& result, Element* element, const Attribute& attribute, Namespaces* namespaces) 437 { 438 bool documentIsHTML = element->document()->isHTMLDocument(); 439 440 result.append(' '); 441 442 if (documentIsHTML && !attributeIsInSerializedNamespace(attribute)) 443 result.append(attribute.name().localName()); 444 else { 445 QualifiedName prefixedName = attribute.name(); 446 if (attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI) { 447 if (!attribute.prefix()) 448 prefixedName.setPrefix(xlinkAtom); 449 } else if (attribute.namespaceURI() == XMLNames::xmlNamespaceURI) { 450 if (!attribute.prefix()) 451 prefixedName.setPrefix(xmlAtom); 452 } else if (attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI) { 453 if (attribute.name() != XMLNSNames::xmlnsAttr && !attribute.prefix()) 454 prefixedName.setPrefix(xmlnsAtom); 455 } 456 result.append(prefixedName.toString()); 457 } 458 459 result.append('='); 460 461 if (element->isURLAttribute(attribute)) 462 appendQuotedURLAttributeValue(result, element, attribute); 463 else { 464 result.append('"'); 465 appendAttributeValue(result, attribute.value(), documentIsHTML); 466 result.append('"'); 467 } 468 469 if (!documentIsHTML && namespaces && shouldAddNamespaceAttribute(attribute, *namespaces)) 470 appendNamespace(result, attribute.prefix(), attribute.namespaceURI(), *namespaces); 471 } 472 473 void MarkupAccumulator::appendCDATASection(StringBuilder& result, const String& section) 474 { 475 // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>". 476 result.appendLiteral("<![CDATA["); 477 result.append(section); 478 result.appendLiteral("]]>"); 479 } 480 481 void MarkupAccumulator::appendStartMarkup(StringBuilder& result, const Node* node, Namespaces* namespaces) 482 { 483 switch (node->nodeType()) { 484 case Node::TEXT_NODE: 485 appendText(result, toText(const_cast<Node*>(node))); 486 break; 487 case Node::COMMENT_NODE: 488 appendComment(result, static_cast<const Comment*>(node)->data()); 489 break; 490 case Node::DOCUMENT_NODE: 491 appendXMLDeclaration(result, toDocument(node)); 492 break; 493 case Node::DOCUMENT_FRAGMENT_NODE: 494 break; 495 case Node::DOCUMENT_TYPE_NODE: 496 appendDocumentType(result, static_cast<const DocumentType*>(node)); 497 break; 498 case Node::PROCESSING_INSTRUCTION_NODE: 499 appendProcessingInstruction(result, static_cast<const ProcessingInstruction*>(node)->target(), static_cast<const ProcessingInstruction*>(node)->data()); 500 break; 501 case Node::ELEMENT_NODE: 502 appendElement(result, toElement(const_cast<Node*>(node)), namespaces); 503 break; 504 case Node::CDATA_SECTION_NODE: 505 appendCDATASection(result, static_cast<const CDATASection*>(node)->data()); 506 break; 507 case Node::ATTRIBUTE_NODE: 508 case Node::ENTITY_NODE: 509 case Node::NOTATION_NODE: 510 case Node::XPATH_NAMESPACE_NODE: 511 ASSERT_NOT_REACHED(); 512 break; 513 } 514 } 515 516 // Rules of self-closure 517 // 1. No elements in HTML documents use the self-closing syntax. 518 // 2. Elements w/ children never self-close because they use a separate end tag. 519 // 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag. 520 // 4. Other elements self-close. 521 bool MarkupAccumulator::shouldSelfClose(const Node* node) 522 { 523 if (node->document()->isHTMLDocument()) 524 return false; 525 if (node->hasChildNodes()) 526 return false; 527 if (node->isHTMLElement() && !elementCannotHaveEndTag(node)) 528 return false; 529 return true; 530 } 531 532 bool MarkupAccumulator::elementCannotHaveEndTag(const Node* node) 533 { 534 if (!node->isHTMLElement()) 535 return false; 536 537 // FIXME: ieForbidsInsertHTML may not be the right function to call here 538 // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML 539 // or createContextualFragment. It does not necessarily align with 540 // which elements should be serialized w/o end tags. 541 return static_cast<const HTMLElement*>(node)->ieForbidsInsertHTML(); 542 } 543 544 void MarkupAccumulator::appendEndMarkup(StringBuilder& result, const Node* node) 545 { 546 if (!node->isElementNode() || shouldSelfClose(node) || (!node->hasChildNodes() && elementCannotHaveEndTag(node))) 547 return; 548 549 result.append('<'); 550 result.append('/'); 551 result.append(toElement(node)->nodeNamePreservingCase()); 552 result.append('>'); 553 } 554 555 } 556