1 /* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. 3 * Copyright (C) 2009, 2010 Google Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "core/editing/MarkupAccumulator.h" 29 30 #include "core/HTMLNames.h" 31 #include "core/XLinkNames.h" 32 #include "core/XMLNSNames.h" 33 #include "core/XMLNames.h" 34 #include "core/dom/CDATASection.h" 35 #include "core/dom/Comment.h" 36 #include "core/dom/Document.h" 37 #include "core/dom/DocumentFragment.h" 38 #include "core/dom/DocumentType.h" 39 #include "core/dom/ProcessingInstruction.h" 40 #include "core/editing/Editor.h" 41 #include "core/html/HTMLElement.h" 42 #include "core/html/HTMLTemplateElement.h" 43 #include "platform/weborigin/KURL.h" 44 #include "wtf/unicode/CharacterNames.h" 45 46 namespace WebCore { 47 48 using namespace HTMLNames; 49 50 struct EntityDescription { 51 UChar entity; 52 const CString& reference; 53 EntityMask mask; 54 }; 55 56 template <typename CharType> 57 static inline void appendCharactersReplacingEntitiesInternal(StringBuilder& result, CharType* text, unsigned length, const EntityDescription entityMaps[], unsigned entityMapsCount, EntityMask entityMask) 58 { 59 unsigned positionAfterLastEntity = 0; 60 for (unsigned i = 0; i < length; ++i) { 61 for (unsigned entityIndex = 0; entityIndex < entityMapsCount; ++entityIndex) { 62 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) { 63 result.append(text + positionAfterLastEntity, i - positionAfterLastEntity); 64 const CString& replacement = entityMaps[entityIndex].reference; 65 result.append(replacement.data(), replacement.length()); 66 positionAfterLastEntity = i + 1; 67 break; 68 } 69 } 70 } 71 result.append(text + positionAfterLastEntity, length - positionAfterLastEntity); 72 } 73 74 void MarkupAccumulator::appendCharactersReplacingEntities(StringBuilder& result, const String& source, unsigned offset, unsigned length, EntityMask entityMask) 75 { 76 DEFINE_STATIC_LOCAL(const CString, ampReference, ("&")); 77 DEFINE_STATIC_LOCAL(const CString, ltReference, ("<")); 78 DEFINE_STATIC_LOCAL(const CString, gtReference, (">")); 79 DEFINE_STATIC_LOCAL(const CString, quotReference, (""")); 80 DEFINE_STATIC_LOCAL(const CString, nbspReference, (" ")); 81 82 static const EntityDescription entityMaps[] = { 83 { '&', ampReference, EntityAmp }, 84 { '<', ltReference, EntityLt }, 85 { '>', gtReference, EntityGt }, 86 { '"', quotReference, EntityQuot }, 87 { noBreakSpace, nbspReference, EntityNbsp }, 88 }; 89 90 if (!(offset + length)) 91 return; 92 93 ASSERT(offset + length <= source.length()); 94 if (source.is8Bit()) 95 appendCharactersReplacingEntitiesInternal(result, source.characters8() + offset, length, entityMaps, WTF_ARRAY_LENGTH(entityMaps), entityMask); 96 else 97 appendCharactersReplacingEntitiesInternal(result, source.characters16() + offset, length, entityMaps, WTF_ARRAY_LENGTH(entityMaps), entityMask); 98 } 99 100 MarkupAccumulator::MarkupAccumulator(WillBeHeapVector<RawPtrWillBeMember<Node> >* nodes, EAbsoluteURLs resolveUrlsMethod, const Range* range, SerializationType serializationType) 101 : m_nodes(nodes) 102 , m_range(range) 103 , m_resolveURLsMethod(resolveUrlsMethod) 104 , m_serializationType(serializationType) 105 { 106 } 107 108 MarkupAccumulator::~MarkupAccumulator() 109 { 110 } 111 112 String MarkupAccumulator::serializeNodes(Node& targetNode, EChildrenOnly childrenOnly, Vector<QualifiedName>* tagNamesToSkip) 113 { 114 Namespaces* namespaces = 0; 115 Namespaces namespaceHash; 116 if (!serializeAsHTMLDocument(targetNode)) { 117 // Add pre-bound namespaces for XML fragments. 118 namespaceHash.set(xmlAtom, XMLNames::xmlNamespaceURI); 119 namespaces = &namespaceHash; 120 } 121 122 serializeNodesWithNamespaces(targetNode, childrenOnly, namespaces, tagNamesToSkip); 123 return m_markup.toString(); 124 } 125 126 void MarkupAccumulator::serializeNodesWithNamespaces(Node& targetNode, EChildrenOnly childrenOnly, const Namespaces* namespaces, Vector<QualifiedName>* tagNamesToSkip) 127 { 128 if (tagNamesToSkip) { 129 for (size_t i = 0; i < tagNamesToSkip->size(); ++i) { 130 if (targetNode.hasTagName(tagNamesToSkip->at(i))) 131 return; 132 } 133 } 134 135 Namespaces namespaceHash; 136 if (namespaces) 137 namespaceHash = *namespaces; 138 139 if (!childrenOnly) 140 appendStartTag(targetNode, &namespaceHash); 141 142 if (!(serializeAsHTMLDocument(targetNode) && elementCannotHaveEndTag(targetNode))) { 143 Node* current = isHTMLTemplateElement(targetNode) ? toHTMLTemplateElement(targetNode).content()->firstChild() : targetNode.firstChild(); 144 for ( ; current; current = current->nextSibling()) 145 serializeNodesWithNamespaces(*current, IncludeNode, &namespaceHash, tagNamesToSkip); 146 } 147 148 if (!childrenOnly) 149 appendEndTag(targetNode); 150 } 151 152 String MarkupAccumulator::resolveURLIfNeeded(const Element& element, const String& urlString) const 153 { 154 switch (m_resolveURLsMethod) { 155 case ResolveAllURLs: 156 return element.document().completeURL(urlString).string(); 157 158 case ResolveNonLocalURLs: 159 if (!element.document().url().isLocalFile()) 160 return element.document().completeURL(urlString).string(); 161 break; 162 163 case DoNotResolveURLs: 164 break; 165 } 166 return urlString; 167 } 168 169 void MarkupAccumulator::appendString(const String& string) 170 { 171 m_markup.append(string); 172 } 173 174 void MarkupAccumulator::appendStartTag(Node& node, Namespaces* namespaces) 175 { 176 appendStartMarkup(m_markup, node, namespaces); 177 if (m_nodes) 178 m_nodes->append(&node); 179 } 180 181 void MarkupAccumulator::appendEndTag(const Node& node) 182 { 183 appendEndMarkup(m_markup, node); 184 } 185 186 size_t MarkupAccumulator::totalLength(const Vector<String>& strings) 187 { 188 size_t length = 0; 189 for (size_t i = 0; i < strings.size(); ++i) 190 length += strings[i].length(); 191 return length; 192 } 193 194 void MarkupAccumulator::concatenateMarkup(StringBuilder& result) 195 { 196 result.append(m_markup); 197 } 198 199 void MarkupAccumulator::appendAttributeValue(StringBuilder& result, const String& attribute, bool documentIsHTML) 200 { 201 appendCharactersReplacingEntities(result, attribute, 0, attribute.length(), 202 documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue); 203 } 204 205 void MarkupAccumulator::appendCustomAttributes(StringBuilder&, const Element&, Namespaces*) 206 { 207 } 208 209 void MarkupAccumulator::appendQuotedURLAttributeValue(StringBuilder& result, const Element& element, const Attribute& attribute) 210 { 211 ASSERT(element.isURLAttribute(attribute)); 212 const String resolvedURLString = resolveURLIfNeeded(element, attribute.value()); 213 UChar quoteChar = '"'; 214 String strippedURLString = resolvedURLString.stripWhiteSpace(); 215 if (protocolIsJavaScript(strippedURLString)) { 216 // minimal escaping for javascript urls 217 if (strippedURLString.contains('"')) { 218 if (strippedURLString.contains('\'')) 219 strippedURLString.replaceWithLiteral('"', """); 220 else 221 quoteChar = '\''; 222 } 223 result.append(quoteChar); 224 result.append(strippedURLString); 225 result.append(quoteChar); 226 return; 227 } 228 229 // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML. 230 result.append(quoteChar); 231 appendAttributeValue(result, resolvedURLString, false); 232 result.append(quoteChar); 233 } 234 235 bool MarkupAccumulator::shouldAddNamespaceElement(const Element& element, Namespaces& namespaces) 236 { 237 // Don't add namespace attribute if it is already defined for this elem. 238 const AtomicString& prefix = element.prefix(); 239 if (prefix.isEmpty()) { 240 if (element.hasAttribute(xmlnsAtom)) { 241 namespaces.set(emptyAtom, element.namespaceURI()); 242 return false; 243 } 244 return true; 245 } 246 247 return !element.hasAttribute(WTF::xmlnsWithColon + prefix); 248 } 249 250 bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, const Element& element) 251 { 252 // xmlns and xmlns:prefix attributes should be handled by another branch in appendAttribute. 253 ASSERT(attribute.namespaceURI() != XMLNSNames::xmlnsNamespaceURI); 254 255 // Attributes are in the null namespace by default. 256 if (!attribute.namespaceURI()) 257 return false; 258 259 // Attributes without a prefix will need one generated for them, and an xmlns attribute for that prefix. 260 if (!attribute.prefix()) 261 return true; 262 263 return !element.hasAttribute(WTF::xmlnsWithColon + attribute.prefix()); 264 } 265 266 void MarkupAccumulator::appendNamespace(StringBuilder& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces) 267 { 268 if (namespaceURI.isEmpty()) 269 return; 270 271 const AtomicString& lookupKey = (!prefix) ? emptyAtom : prefix; 272 AtomicString foundURI = namespaces.get(lookupKey); 273 if (foundURI != namespaceURI) { 274 namespaces.set(lookupKey, namespaceURI); 275 result.append(' '); 276 result.append(xmlnsAtom.string()); 277 if (!prefix.isEmpty()) { 278 result.append(':'); 279 result.append(prefix); 280 } 281 282 result.appendLiteral("=\""); 283 appendAttributeValue(result, namespaceURI, false); 284 result.append('"'); 285 } 286 } 287 288 EntityMask MarkupAccumulator::entityMaskForText(const Text& text) const 289 { 290 if (!serializeAsHTMLDocument(text)) 291 return EntityMaskInPCDATA; 292 293 const QualifiedName* parentName = 0; 294 if (text.parentElement()) 295 parentName = &(text.parentElement())->tagQName(); 296 297 if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag)) 298 return EntityMaskInCDATA; 299 return EntityMaskInHTMLPCDATA; 300 } 301 302 void MarkupAccumulator::appendText(StringBuilder& result, Text& text) 303 { 304 const String& str = text.data(); 305 unsigned length = str.length(); 306 unsigned start = 0; 307 308 if (m_range) { 309 if (text == m_range->endContainer()) 310 length = m_range->endOffset(); 311 if (text == m_range->startContainer()) { 312 start = m_range->startOffset(); 313 length -= start; 314 } 315 } 316 appendCharactersReplacingEntities(result, str, start, length, entityMaskForText(text)); 317 } 318 319 void MarkupAccumulator::appendComment(StringBuilder& result, const String& comment) 320 { 321 // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->". 322 result.appendLiteral("<!--"); 323 result.append(comment); 324 result.appendLiteral("-->"); 325 } 326 327 void MarkupAccumulator::appendXMLDeclaration(StringBuilder& result, const Document& document) 328 { 329 if (!document.hasXMLDeclaration()) 330 return; 331 332 result.appendLiteral("<?xml version=\""); 333 result.append(document.xmlVersion()); 334 const String& encoding = document.xmlEncoding(); 335 if (!encoding.isEmpty()) { 336 result.appendLiteral("\" encoding=\""); 337 result.append(encoding); 338 } 339 if (document.xmlStandaloneStatus() != Document::StandaloneUnspecified) { 340 result.appendLiteral("\" standalone=\""); 341 if (document.xmlStandalone()) 342 result.appendLiteral("yes"); 343 else 344 result.appendLiteral("no"); 345 } 346 347 result.appendLiteral("\"?>"); 348 } 349 350 void MarkupAccumulator::appendDocumentType(StringBuilder& result, const DocumentType& n) 351 { 352 if (n.name().isEmpty()) 353 return; 354 355 result.appendLiteral("<!DOCTYPE "); 356 result.append(n.name()); 357 if (!n.publicId().isEmpty()) { 358 result.appendLiteral(" PUBLIC \""); 359 result.append(n.publicId()); 360 result.append('"'); 361 if (!n.systemId().isEmpty()) { 362 result.appendLiteral(" \""); 363 result.append(n.systemId()); 364 result.append('"'); 365 } 366 } else if (!n.systemId().isEmpty()) { 367 result.appendLiteral(" SYSTEM \""); 368 result.append(n.systemId()); 369 result.append('"'); 370 } 371 result.append('>'); 372 } 373 374 void MarkupAccumulator::appendProcessingInstruction(StringBuilder& result, const String& target, const String& data) 375 { 376 // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>". 377 result.appendLiteral("<?"); 378 result.append(target); 379 result.append(' '); 380 result.append(data); 381 result.appendLiteral("?>"); 382 } 383 384 void MarkupAccumulator::appendElement(StringBuilder& result, Element& element, Namespaces* namespaces) 385 { 386 appendOpenTag(result, element, namespaces); 387 388 if (element.hasAttributes()) { 389 AttributeCollection attributes = element.attributes(); 390 AttributeCollection::const_iterator end = attributes.end(); 391 for (AttributeCollection::const_iterator it = attributes.begin(); it != end; ++it) 392 appendAttribute(result, element, *it, namespaces); 393 } 394 395 // Give an opportunity to subclasses to add their own attributes. 396 appendCustomAttributes(result, element, namespaces); 397 398 appendCloseTag(result, element); 399 } 400 401 static String nodeNamePreservingCase(const Element& element) 402 { 403 return element.tagQName().toString(); 404 } 405 406 void MarkupAccumulator::appendOpenTag(StringBuilder& result, const Element& element, Namespaces* namespaces) 407 { 408 result.append('<'); 409 result.append(nodeNamePreservingCase(element)); 410 if (!serializeAsHTMLDocument(element) && namespaces && shouldAddNamespaceElement(element, *namespaces)) 411 appendNamespace(result, element.prefix(), element.namespaceURI(), *namespaces); 412 } 413 414 void MarkupAccumulator::appendCloseTag(StringBuilder& result, const Element& element) 415 { 416 if (shouldSelfClose(element)) { 417 if (element.isHTMLElement()) 418 result.append(' '); // XHTML 1.0 <-> HTML compatibility. 419 result.append('/'); 420 } 421 result.append('>'); 422 } 423 424 static inline bool attributeIsInSerializedNamespace(const Attribute& attribute) 425 { 426 return attribute.namespaceURI() == XMLNames::xmlNamespaceURI 427 || attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI 428 || attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI; 429 } 430 431 void MarkupAccumulator::appendAttribute(StringBuilder& result, const Element& element, const Attribute& attribute, Namespaces* namespaces) 432 { 433 bool documentIsHTML = serializeAsHTMLDocument(element); 434 435 QualifiedName prefixedName = attribute.name(); 436 if (documentIsHTML && !attributeIsInSerializedNamespace(attribute)) { 437 result.append(' '); 438 result.append(attribute.name().localName()); 439 } else { 440 if (attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI) { 441 if (!attribute.prefix()) 442 prefixedName.setPrefix(xlinkAtom); 443 } else if (attribute.namespaceURI() == XMLNames::xmlNamespaceURI) { 444 if (!attribute.prefix()) 445 prefixedName.setPrefix(xmlAtom); 446 } else if (attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI) { 447 if (!attribute.prefix() && attribute.localName() != xmlnsAtom) 448 prefixedName.setPrefix(xmlnsAtom); 449 if (namespaces) { // Account for the namespace attribute we're about to append. 450 const AtomicString& lookupKey = (!attribute.prefix()) ? emptyAtom : attribute.localName(); 451 namespaces->set(lookupKey, attribute.value()); 452 } 453 } else if (namespaces && shouldAddNamespaceAttribute(attribute, element)) { 454 if (!attribute.prefix()) { 455 // This behavior is in process of being standardized. See crbug.com/248044 and https://www.w3.org/Bugs/Public/show_bug.cgi?id=24208 456 String prefixPrefix("ns", 2); 457 for (unsigned i = attribute.namespaceURI().impl()->existingHash(); ; ++i) { 458 AtomicString newPrefix(String(prefixPrefix + String::number(i))); 459 AtomicString foundURI = namespaces->get(newPrefix); 460 if (foundURI == attribute.namespaceURI() || foundURI == nullAtom) { 461 // We already generated a prefix for this namespace. 462 prefixedName.setPrefix(newPrefix); 463 break; 464 } 465 } 466 } 467 ASSERT(prefixedName.prefix()); 468 appendNamespace(result, prefixedName.prefix(), attribute.namespaceURI(), *namespaces); 469 } 470 result.append(' '); 471 result.append(prefixedName.toString()); 472 } 473 474 result.append('='); 475 476 if (element.isURLAttribute(attribute)) { 477 appendQuotedURLAttributeValue(result, element, attribute); 478 } else { 479 result.append('"'); 480 appendAttributeValue(result, attribute.value(), documentIsHTML); 481 result.append('"'); 482 } 483 } 484 485 void MarkupAccumulator::appendCDATASection(StringBuilder& result, const String& section) 486 { 487 // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>". 488 result.appendLiteral("<![CDATA["); 489 result.append(section); 490 result.appendLiteral("]]>"); 491 } 492 493 void MarkupAccumulator::appendStartMarkup(StringBuilder& result, Node& node, Namespaces* namespaces) 494 { 495 switch (node.nodeType()) { 496 case Node::TEXT_NODE: 497 appendText(result, toText(node)); 498 break; 499 case Node::COMMENT_NODE: 500 appendComment(result, toComment(node).data()); 501 break; 502 case Node::DOCUMENT_NODE: 503 appendXMLDeclaration(result, toDocument(node)); 504 break; 505 case Node::DOCUMENT_FRAGMENT_NODE: 506 break; 507 case Node::DOCUMENT_TYPE_NODE: 508 appendDocumentType(result, toDocumentType(node)); 509 break; 510 case Node::PROCESSING_INSTRUCTION_NODE: 511 appendProcessingInstruction(result, toProcessingInstruction(node).target(), toProcessingInstruction(node).data()); 512 break; 513 case Node::ELEMENT_NODE: 514 appendElement(result, toElement(node), namespaces); 515 break; 516 case Node::CDATA_SECTION_NODE: 517 appendCDATASection(result, toCDATASection(node).data()); 518 break; 519 case Node::ATTRIBUTE_NODE: 520 ASSERT_NOT_REACHED(); 521 break; 522 } 523 } 524 525 // Rules of self-closure 526 // 1. No elements in HTML documents use the self-closing syntax. 527 // 2. Elements w/ children never self-close because they use a separate end tag. 528 // 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag. 529 // 4. Other elements self-close. 530 bool MarkupAccumulator::shouldSelfClose(const Node& node) 531 { 532 if (serializeAsHTMLDocument(node)) 533 return false; 534 if (node.hasChildren()) 535 return false; 536 if (node.isHTMLElement() && !elementCannotHaveEndTag(node)) 537 return false; 538 return true; 539 } 540 541 bool MarkupAccumulator::elementCannotHaveEndTag(const Node& node) 542 { 543 if (!node.isHTMLElement()) 544 return false; 545 546 // FIXME: ieForbidsInsertHTML may not be the right function to call here 547 // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML 548 // or createContextualFragment. It does not necessarily align with 549 // which elements should be serialized w/o end tags. 550 return toHTMLElement(node).ieForbidsInsertHTML(); 551 } 552 553 void MarkupAccumulator::appendEndMarkup(StringBuilder& result, const Node& node) 554 { 555 if (!node.isElementNode() || shouldSelfClose(node) || (!node.hasChildren() && elementCannotHaveEndTag(node))) 556 return; 557 558 result.appendLiteral("</"); 559 result.append(nodeNamePreservingCase(toElement(node))); 560 result.append('>'); 561 } 562 563 bool MarkupAccumulator::serializeAsHTMLDocument(const Node& node) const 564 { 565 if (m_serializationType == ForcedXML) 566 return false; 567 return node.document().isHTMLDocument(); 568 } 569 570 } 571