1 /* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. 3 * Copyright (C) 2009, 2010 Google Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "core/editing/MarkupAccumulator.h" 29 30 #include "HTMLNames.h" 31 #include "XLinkNames.h" 32 #include "XMLNSNames.h" 33 #include "XMLNames.h" 34 #include "core/dom/CDATASection.h" 35 #include "core/dom/Comment.h" 36 #include "core/dom/DocumentFragment.h" 37 #include "core/dom/DocumentType.h" 38 #include "core/dom/ProcessingInstruction.h" 39 #include "core/editing/Editor.h" 40 #include "core/html/HTMLElement.h" 41 #include "core/html/HTMLTemplateElement.h" 42 #include "platform/weborigin/KURL.h" 43 #include "wtf/unicode/CharacterNames.h" 44 45 namespace WebCore { 46 47 using namespace HTMLNames; 48 49 void MarkupAccumulator::appendCharactersReplacingEntities(StringBuilder& result, const String& source, unsigned offset, unsigned length, EntityMask entityMask) 50 { 51 DEFINE_STATIC_LOCAL(const String, ampReference, ("&")); 52 DEFINE_STATIC_LOCAL(const String, ltReference, ("<")); 53 DEFINE_STATIC_LOCAL(const String, gtReference, (">")); 54 DEFINE_STATIC_LOCAL(const String, quotReference, (""")); 55 DEFINE_STATIC_LOCAL(const String, nbspReference, (" ")); 56 57 static const EntityDescription entityMaps[] = { 58 { '&', ampReference, EntityAmp }, 59 { '<', ltReference, EntityLt }, 60 { '>', gtReference, EntityGt }, 61 { '"', quotReference, EntityQuot }, 62 { noBreakSpace, nbspReference, EntityNbsp }, 63 }; 64 65 if (!(offset + length)) 66 return; 67 68 ASSERT(offset + length <= source.length()); 69 70 if (source.is8Bit()) { 71 const LChar* text = source.characters8() + offset; 72 73 size_t positionAfterLastEntity = 0; 74 for (size_t i = 0; i < length; ++i) { 75 for (size_t entityIndex = 0; entityIndex < WTF_ARRAY_LENGTH(entityMaps); ++entityIndex) { 76 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) { 77 result.append(text + positionAfterLastEntity, i - positionAfterLastEntity); 78 result.append(entityMaps[entityIndex].reference); 79 positionAfterLastEntity = i + 1; 80 break; 81 } 82 } 83 } 84 result.append(text + positionAfterLastEntity, length - positionAfterLastEntity); 85 } else { 86 const UChar* text = source.characters16() + offset; 87 88 size_t positionAfterLastEntity = 0; 89 for (size_t i = 0; i < length; ++i) { 90 for (size_t entityIndex = 0; entityIndex < WTF_ARRAY_LENGTH(entityMaps); ++entityIndex) { 91 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) { 92 result.append(text + positionAfterLastEntity, i - positionAfterLastEntity); 93 result.append(entityMaps[entityIndex].reference); 94 positionAfterLastEntity = i + 1; 95 break; 96 } 97 } 98 } 99 result.append(text + positionAfterLastEntity, length - positionAfterLastEntity); 100 } 101 } 102 103 MarkupAccumulator::MarkupAccumulator(Vector<Node*>* nodes, EAbsoluteURLs resolveUrlsMethod, const Range* range) 104 : m_nodes(nodes) 105 , m_range(range) 106 , m_resolveURLsMethod(resolveUrlsMethod) 107 { 108 } 109 110 MarkupAccumulator::~MarkupAccumulator() 111 { 112 } 113 114 String MarkupAccumulator::serializeNodes(Node* targetNode, EChildrenOnly childrenOnly) 115 { 116 return serializeNodes(targetNode, childrenOnly, 0); 117 } 118 119 String MarkupAccumulator::serializeNodes(Node* targetNode, EChildrenOnly childrenOnly, Vector<QualifiedName>* tagNamesToSkip) 120 { 121 serializeNodesWithNamespaces(targetNode, childrenOnly, 0, tagNamesToSkip); 122 return m_markup.toString(); 123 } 124 125 void MarkupAccumulator::serializeNodesWithNamespaces(Node* targetNode, EChildrenOnly childrenOnly, const Namespaces* namespaces, Vector<QualifiedName>* tagNamesToSkip) 126 { 127 if (tagNamesToSkip) { 128 for (size_t i = 0; i < tagNamesToSkip->size(); ++i) { 129 if (targetNode->hasTagName(tagNamesToSkip->at(i))) 130 return; 131 } 132 } 133 134 Namespaces namespaceHash; 135 if (namespaces) 136 namespaceHash = *namespaces; 137 138 if (!childrenOnly) 139 appendStartTag(targetNode, &namespaceHash); 140 141 if (!(targetNode->document().isHTMLDocument() && elementCannotHaveEndTag(targetNode))) { 142 Node* current = targetNode->hasTagName(templateTag) ? toHTMLTemplateElement(targetNode)->content()->firstChild() : targetNode->firstChild(); 143 for ( ; current; current = current->nextSibling()) 144 serializeNodesWithNamespaces(current, IncludeNode, &namespaceHash, tagNamesToSkip); 145 } 146 147 if (!childrenOnly) 148 appendEndTag(targetNode); 149 } 150 151 String MarkupAccumulator::resolveURLIfNeeded(const Element* element, const String& urlString) const 152 { 153 switch (m_resolveURLsMethod) { 154 case ResolveAllURLs: 155 return element->document().completeURL(urlString).string(); 156 157 case ResolveNonLocalURLs: 158 if (!element->document().url().isLocalFile()) 159 return element->document().completeURL(urlString).string(); 160 break; 161 162 case DoNotResolveURLs: 163 break; 164 } 165 return urlString; 166 } 167 168 void MarkupAccumulator::appendString(const String& string) 169 { 170 m_markup.append(string); 171 } 172 173 void MarkupAccumulator::appendStartTag(Node* node, Namespaces* namespaces) 174 { 175 appendStartMarkup(m_markup, node, namespaces); 176 if (m_nodes) 177 m_nodes->append(node); 178 } 179 180 void MarkupAccumulator::appendEndTag(Node* node) 181 { 182 appendEndMarkup(m_markup, node); 183 } 184 185 size_t MarkupAccumulator::totalLength(const Vector<String>& strings) 186 { 187 size_t length = 0; 188 for (size_t i = 0; i < strings.size(); ++i) 189 length += strings[i].length(); 190 return length; 191 } 192 193 void MarkupAccumulator::concatenateMarkup(StringBuilder& result) 194 { 195 result.append(m_markup); 196 } 197 198 void MarkupAccumulator::appendAttributeValue(StringBuilder& result, const String& attribute, bool documentIsHTML) 199 { 200 appendCharactersReplacingEntities(result, attribute, 0, attribute.length(), 201 documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue); 202 } 203 204 void MarkupAccumulator::appendCustomAttributes(StringBuilder&, Element*, Namespaces*) 205 { 206 } 207 208 void MarkupAccumulator::appendQuotedURLAttributeValue(StringBuilder& result, const Element* element, const Attribute& attribute) 209 { 210 ASSERT(element->isURLAttribute(attribute)); 211 const String resolvedURLString = resolveURLIfNeeded(element, attribute.value()); 212 UChar quoteChar = '"'; 213 String strippedURLString = resolvedURLString.stripWhiteSpace(); 214 if (protocolIsJavaScript(strippedURLString)) { 215 // minimal escaping for javascript urls 216 if (strippedURLString.contains('"')) { 217 if (strippedURLString.contains('\'')) 218 strippedURLString.replaceWithLiteral('"', """); 219 else 220 quoteChar = '\''; 221 } 222 result.append(quoteChar); 223 result.append(strippedURLString); 224 result.append(quoteChar); 225 return; 226 } 227 228 // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML. 229 result.append(quoteChar); 230 appendAttributeValue(result, resolvedURLString, false); 231 result.append(quoteChar); 232 } 233 234 void MarkupAccumulator::appendNodeValue(StringBuilder& result, const Node* node, const Range* range, EntityMask entityMask) 235 { 236 const String str = node->nodeValue(); 237 unsigned length = str.length(); 238 unsigned start = 0; 239 240 if (range) { 241 if (node == range->endContainer()) 242 length = range->endOffset(); 243 if (node == range->startContainer()) { 244 start = range->startOffset(); 245 length -= start; 246 } 247 } 248 249 appendCharactersReplacingEntities(result, str, start, length, entityMask); 250 } 251 252 bool MarkupAccumulator::shouldAddNamespaceElement(const Element* element) 253 { 254 // Don't add namespace attribute if it is already defined for this elem. 255 const AtomicString& prefix = element->prefix(); 256 if (prefix.isEmpty()) 257 return !element->hasAttribute(xmlnsAtom); 258 259 DEFINE_STATIC_LOCAL(String, xmlnsWithColon, ("xmlns:")); 260 return !element->hasAttribute(xmlnsWithColon + prefix); 261 } 262 263 bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, Namespaces& namespaces) 264 { 265 // Don't add namespace attributes twice 266 if (attribute.name() == XMLNSNames::xmlnsAttr) { 267 namespaces.set(emptyAtom.impl(), attribute.value().impl()); 268 return false; 269 } 270 271 QualifiedName xmlnsPrefixAttr(xmlnsAtom, attribute.localName(), XMLNSNames::xmlnsNamespaceURI); 272 if (attribute.name() == xmlnsPrefixAttr) { 273 namespaces.set(attribute.localName().impl(), attribute.value().impl()); 274 return false; 275 } 276 277 return true; 278 } 279 280 void MarkupAccumulator::appendNamespace(StringBuilder& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces) 281 { 282 if (namespaceURI.isEmpty()) 283 return; 284 285 // Use emptyAtoms's impl() for both null and empty strings since the HashMap can't handle 0 as a key 286 StringImpl* pre = prefix.isEmpty() ? emptyAtom.impl() : prefix.impl(); 287 StringImpl* foundNS = namespaces.get(pre); 288 if (foundNS != namespaceURI.impl()) { 289 namespaces.set(pre, namespaceURI.impl()); 290 result.append(' '); 291 result.append(xmlnsAtom.string()); 292 if (!prefix.isEmpty()) { 293 result.append(':'); 294 result.append(prefix); 295 } 296 297 result.append('='); 298 result.append('"'); 299 appendAttributeValue(result, namespaceURI, false); 300 result.append('"'); 301 } 302 } 303 304 EntityMask MarkupAccumulator::entityMaskForText(Text* text) const 305 { 306 if (!text->document().isHTMLDocument()) 307 return EntityMaskInPCDATA; 308 309 const QualifiedName* parentName = 0; 310 if (text->parentElement()) 311 parentName = &(text->parentElement())->tagQName(); 312 313 if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag)) 314 return EntityMaskInCDATA; 315 return EntityMaskInHTMLPCDATA; 316 } 317 318 void MarkupAccumulator::appendText(StringBuilder& result, Text* text) 319 { 320 appendNodeValue(result, text, m_range, entityMaskForText(text)); 321 } 322 323 void MarkupAccumulator::appendComment(StringBuilder& result, const String& comment) 324 { 325 // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->". 326 result.appendLiteral("<!--"); 327 result.append(comment); 328 result.appendLiteral("-->"); 329 } 330 331 void MarkupAccumulator::appendXMLDeclaration(StringBuilder& result, const Document* document) 332 { 333 if (!document->hasXMLDeclaration()) 334 return; 335 336 result.appendLiteral("<?xml version=\""); 337 result.append(document->xmlVersion()); 338 const String& encoding = document->xmlEncoding(); 339 if (!encoding.isEmpty()) { 340 result.appendLiteral("\" encoding=\""); 341 result.append(encoding); 342 } 343 if (document->xmlStandaloneStatus() != Document::StandaloneUnspecified) { 344 result.appendLiteral("\" standalone=\""); 345 if (document->xmlStandalone()) 346 result.appendLiteral("yes"); 347 else 348 result.appendLiteral("no"); 349 } 350 351 result.appendLiteral("\"?>"); 352 } 353 354 void MarkupAccumulator::appendDocumentType(StringBuilder& result, const DocumentType* n) 355 { 356 if (n->name().isEmpty()) 357 return; 358 359 result.appendLiteral("<!DOCTYPE "); 360 result.append(n->name()); 361 if (!n->publicId().isEmpty()) { 362 result.appendLiteral(" PUBLIC \""); 363 result.append(n->publicId()); 364 result.append('"'); 365 if (!n->systemId().isEmpty()) { 366 result.append(' '); 367 result.append('"'); 368 result.append(n->systemId()); 369 result.append('"'); 370 } 371 } else if (!n->systemId().isEmpty()) { 372 result.appendLiteral(" SYSTEM \""); 373 result.append(n->systemId()); 374 result.append('"'); 375 } 376 if (!n->internalSubset().isEmpty()) { 377 result.append(' '); 378 result.append('['); 379 result.append(n->internalSubset()); 380 result.append(']'); 381 } 382 result.append('>'); 383 } 384 385 void MarkupAccumulator::appendProcessingInstruction(StringBuilder& result, const String& target, const String& data) 386 { 387 // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>". 388 result.append('<'); 389 result.append('?'); 390 result.append(target); 391 result.append(' '); 392 result.append(data); 393 result.append('?'); 394 result.append('>'); 395 } 396 397 void MarkupAccumulator::appendElement(StringBuilder& result, Element* element, Namespaces* namespaces) 398 { 399 appendOpenTag(result, element, namespaces); 400 401 if (element->hasAttributes()) { 402 unsigned length = element->attributeCount(); 403 for (unsigned int i = 0; i < length; i++) 404 appendAttribute(result, element, *element->attributeItem(i), namespaces); 405 } 406 407 // Give an opportunity to subclasses to add their own attributes. 408 appendCustomAttributes(result, element, namespaces); 409 410 appendCloseTag(result, element); 411 } 412 413 void MarkupAccumulator::appendOpenTag(StringBuilder& result, Element* element, Namespaces* namespaces) 414 { 415 result.append('<'); 416 result.append(element->nodeNamePreservingCase()); 417 if (!element->document().isHTMLDocument() && namespaces && shouldAddNamespaceElement(element)) 418 appendNamespace(result, element->prefix(), element->namespaceURI(), *namespaces); 419 } 420 421 void MarkupAccumulator::appendCloseTag(StringBuilder& result, Element* element) 422 { 423 if (shouldSelfClose(element)) { 424 if (element->isHTMLElement()) 425 result.append(' '); // XHTML 1.0 <-> HTML compatibility. 426 result.append('/'); 427 } 428 result.append('>'); 429 } 430 431 static inline bool attributeIsInSerializedNamespace(const Attribute& attribute) 432 { 433 return attribute.namespaceURI() == XMLNames::xmlNamespaceURI 434 || attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI 435 || attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI; 436 } 437 438 void MarkupAccumulator::appendAttribute(StringBuilder& result, Element* element, const Attribute& attribute, Namespaces* namespaces) 439 { 440 bool documentIsHTML = element->document().isHTMLDocument(); 441 442 result.append(' '); 443 444 if (documentIsHTML && !attributeIsInSerializedNamespace(attribute)) 445 result.append(attribute.name().localName()); 446 else { 447 QualifiedName prefixedName = attribute.name(); 448 if (attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI) { 449 if (!attribute.prefix()) 450 prefixedName.setPrefix(xlinkAtom); 451 } else if (attribute.namespaceURI() == XMLNames::xmlNamespaceURI) { 452 if (!attribute.prefix()) 453 prefixedName.setPrefix(xmlAtom); 454 } else if (attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI) { 455 if (attribute.name() != XMLNSNames::xmlnsAttr && !attribute.prefix()) 456 prefixedName.setPrefix(xmlnsAtom); 457 } 458 result.append(prefixedName.toString()); 459 } 460 461 result.append('='); 462 463 if (element->isURLAttribute(attribute)) 464 appendQuotedURLAttributeValue(result, element, attribute); 465 else { 466 result.append('"'); 467 appendAttributeValue(result, attribute.value(), documentIsHTML); 468 result.append('"'); 469 } 470 471 if (!documentIsHTML && namespaces && shouldAddNamespaceAttribute(attribute, *namespaces)) 472 appendNamespace(result, attribute.prefix(), attribute.namespaceURI(), *namespaces); 473 } 474 475 void MarkupAccumulator::appendCDATASection(StringBuilder& result, const String& section) 476 { 477 // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>". 478 result.appendLiteral("<![CDATA["); 479 result.append(section); 480 result.appendLiteral("]]>"); 481 } 482 483 void MarkupAccumulator::appendStartMarkup(StringBuilder& result, const Node* node, Namespaces* namespaces) 484 { 485 switch (node->nodeType()) { 486 case Node::TEXT_NODE: 487 appendText(result, toText(const_cast<Node*>(node))); 488 break; 489 case Node::COMMENT_NODE: 490 appendComment(result, toComment(node)->data()); 491 break; 492 case Node::DOCUMENT_NODE: 493 appendXMLDeclaration(result, toDocument(node)); 494 break; 495 case Node::DOCUMENT_FRAGMENT_NODE: 496 break; 497 case Node::DOCUMENT_TYPE_NODE: 498 appendDocumentType(result, toDocumentType(node)); 499 break; 500 case Node::PROCESSING_INSTRUCTION_NODE: 501 appendProcessingInstruction(result, toProcessingInstruction(node)->target(), toProcessingInstruction(node)->data()); 502 break; 503 case Node::ELEMENT_NODE: 504 appendElement(result, toElement(const_cast<Node*>(node)), namespaces); 505 break; 506 case Node::CDATA_SECTION_NODE: 507 appendCDATASection(result, toCDATASection(node)->data()); 508 break; 509 case Node::ATTRIBUTE_NODE: 510 case Node::ENTITY_NODE: 511 case Node::NOTATION_NODE: 512 case Node::XPATH_NAMESPACE_NODE: 513 ASSERT_NOT_REACHED(); 514 break; 515 } 516 } 517 518 // Rules of self-closure 519 // 1. No elements in HTML documents use the self-closing syntax. 520 // 2. Elements w/ children never self-close because they use a separate end tag. 521 // 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag. 522 // 4. Other elements self-close. 523 bool MarkupAccumulator::shouldSelfClose(const Node* node) 524 { 525 if (node->document().isHTMLDocument()) 526 return false; 527 if (node->hasChildNodes()) 528 return false; 529 if (node->isHTMLElement() && !elementCannotHaveEndTag(node)) 530 return false; 531 return true; 532 } 533 534 bool MarkupAccumulator::elementCannotHaveEndTag(const Node* node) 535 { 536 if (!node->isHTMLElement()) 537 return false; 538 539 // FIXME: ieForbidsInsertHTML may not be the right function to call here 540 // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML 541 // or createContextualFragment. It does not necessarily align with 542 // which elements should be serialized w/o end tags. 543 return toHTMLElement(node)->ieForbidsInsertHTML(); 544 } 545 546 void MarkupAccumulator::appendEndMarkup(StringBuilder& result, const Node* node) 547 { 548 if (!node->isElementNode() || shouldSelfClose(node) || (!node->hasChildNodes() && elementCannotHaveEndTag(node))) 549 return; 550 551 result.append('<'); 552 result.append('/'); 553 result.append(toElement(node)->nodeNamePreservingCase()); 554 result.append('>'); 555 } 556 557 } 558