Home | History | Annotate | Download | only in editing
      1 /*
      2  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved.
      3  * Copyright (C) 2009, 2010 Google Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     15  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     16  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     17  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     18  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     19  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     20  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "core/editing/MarkupAccumulator.h"
     29 
     30 #include "HTMLNames.h"
     31 #include "XLinkNames.h"
     32 #include "XMLNSNames.h"
     33 #include "XMLNames.h"
     34 #include "core/dom/CDATASection.h"
     35 #include "core/dom/Comment.h"
     36 #include "core/dom/DocumentFragment.h"
     37 #include "core/dom/DocumentType.h"
     38 #include "core/dom/ProcessingInstruction.h"
     39 #include "core/editing/Editor.h"
     40 #include "core/html/HTMLElement.h"
     41 #include "core/html/HTMLTemplateElement.h"
     42 #include "weborigin/KURL.h"
     43 #include "wtf/unicode/CharacterNames.h"
     44 
     45 namespace WebCore {
     46 
     47 using namespace HTMLNames;
     48 
     49 void MarkupAccumulator::appendCharactersReplacingEntities(StringBuilder& result, const String& source, unsigned offset, unsigned length, EntityMask entityMask)
     50 {
     51     DEFINE_STATIC_LOCAL(const String, ampReference, ("&"));
     52     DEFINE_STATIC_LOCAL(const String, ltReference, ("<"));
     53     DEFINE_STATIC_LOCAL(const String, gtReference, (">"));
     54     DEFINE_STATIC_LOCAL(const String, quotReference, ("""));
     55     DEFINE_STATIC_LOCAL(const String, nbspReference, (" "));
     56 
     57     static const EntityDescription entityMaps[] = {
     58         { '&', ampReference, EntityAmp },
     59         { '<', ltReference, EntityLt },
     60         { '>', gtReference, EntityGt },
     61         { '"', quotReference, EntityQuot },
     62         { noBreakSpace, nbspReference, EntityNbsp },
     63     };
     64 
     65     if (!(offset + length))
     66         return;
     67 
     68     ASSERT(offset + length <= source.length());
     69 
     70     if (source.is8Bit()) {
     71         const LChar* text = source.characters8() + offset;
     72 
     73         size_t positionAfterLastEntity = 0;
     74         for (size_t i = 0; i < length; ++i) {
     75             for (size_t entityIndex = 0; entityIndex < WTF_ARRAY_LENGTH(entityMaps); ++entityIndex) {
     76                 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) {
     77                     result.append(text + positionAfterLastEntity, i - positionAfterLastEntity);
     78                     result.append(entityMaps[entityIndex].reference);
     79                     positionAfterLastEntity = i + 1;
     80                     break;
     81                 }
     82             }
     83         }
     84         result.append(text + positionAfterLastEntity, length - positionAfterLastEntity);
     85     } else {
     86         const UChar* text = source.characters16() + offset;
     87 
     88         size_t positionAfterLastEntity = 0;
     89         for (size_t i = 0; i < length; ++i) {
     90             for (size_t entityIndex = 0; entityIndex < WTF_ARRAY_LENGTH(entityMaps); ++entityIndex) {
     91                 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) {
     92                     result.append(text + positionAfterLastEntity, i - positionAfterLastEntity);
     93                     result.append(entityMaps[entityIndex].reference);
     94                     positionAfterLastEntity = i + 1;
     95                     break;
     96                 }
     97             }
     98         }
     99         result.append(text + positionAfterLastEntity, length - positionAfterLastEntity);
    100     }
    101 }
    102 
    103 MarkupAccumulator::MarkupAccumulator(Vector<Node*>* nodes, EAbsoluteURLs resolveUrlsMethod, const Range* range)
    104     : m_nodes(nodes)
    105     , m_range(range)
    106     , m_resolveURLsMethod(resolveUrlsMethod)
    107 {
    108 }
    109 
    110 MarkupAccumulator::~MarkupAccumulator()
    111 {
    112 }
    113 
    114 String MarkupAccumulator::serializeNodes(Node* targetNode, EChildrenOnly childrenOnly)
    115 {
    116     return serializeNodes(targetNode, childrenOnly, 0);
    117 }
    118 
    119 String MarkupAccumulator::serializeNodes(Node* targetNode, EChildrenOnly childrenOnly, Vector<QualifiedName>* tagNamesToSkip)
    120 {
    121     serializeNodesWithNamespaces(targetNode, childrenOnly, 0, tagNamesToSkip);
    122     return m_markup.toString();
    123 }
    124 
    125 void MarkupAccumulator::serializeNodesWithNamespaces(Node* targetNode, EChildrenOnly childrenOnly, const Namespaces* namespaces, Vector<QualifiedName>* tagNamesToSkip)
    126 {
    127     if (tagNamesToSkip) {
    128         for (size_t i = 0; i < tagNamesToSkip->size(); ++i) {
    129             if (targetNode->hasTagName(tagNamesToSkip->at(i)))
    130                 return;
    131         }
    132     }
    133 
    134     Namespaces namespaceHash;
    135     if (namespaces)
    136         namespaceHash = *namespaces;
    137 
    138     if (!childrenOnly)
    139         appendStartTag(targetNode, &namespaceHash);
    140 
    141     if (!(targetNode->document()->isHTMLDocument() && elementCannotHaveEndTag(targetNode))) {
    142         Node* current = targetNode->hasTagName(templateTag) ? toHTMLTemplateElement(targetNode)->content()->firstChild() : targetNode->firstChild();
    143         for ( ; current; current = current->nextSibling())
    144             serializeNodesWithNamespaces(current, IncludeNode, &namespaceHash, tagNamesToSkip);
    145     }
    146 
    147     if (!childrenOnly)
    148         appendEndTag(targetNode);
    149 }
    150 
    151 String MarkupAccumulator::resolveURLIfNeeded(const Element* element, const String& urlString) const
    152 {
    153     switch (m_resolveURLsMethod) {
    154     case ResolveAllURLs:
    155         return element->document()->completeURL(urlString).string();
    156 
    157     case ResolveNonLocalURLs:
    158         if (!element->document()->url().isLocalFile())
    159             return element->document()->completeURL(urlString).string();
    160         break;
    161 
    162     case DoNotResolveURLs:
    163         break;
    164     }
    165     return urlString;
    166 }
    167 
    168 void MarkupAccumulator::appendString(const String& string)
    169 {
    170     m_markup.append(string);
    171 }
    172 
    173 void MarkupAccumulator::appendStartTag(Node* node, Namespaces* namespaces)
    174 {
    175     appendStartMarkup(m_markup, node, namespaces);
    176     if (m_nodes)
    177         m_nodes->append(node);
    178 }
    179 
    180 void MarkupAccumulator::appendEndTag(Node* node)
    181 {
    182     appendEndMarkup(m_markup, node);
    183 }
    184 
    185 size_t MarkupAccumulator::totalLength(const Vector<String>& strings)
    186 {
    187     size_t length = 0;
    188     for (size_t i = 0; i < strings.size(); ++i)
    189         length += strings[i].length();
    190     return length;
    191 }
    192 
    193 void MarkupAccumulator::concatenateMarkup(StringBuilder& result)
    194 {
    195     result.append(m_markup);
    196 }
    197 
    198 void MarkupAccumulator::appendAttributeValue(StringBuilder& result, const String& attribute, bool documentIsHTML)
    199 {
    200     appendCharactersReplacingEntities(result, attribute, 0, attribute.length(),
    201         documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue);
    202 }
    203 
    204 void MarkupAccumulator::appendCustomAttributes(StringBuilder&, Element*, Namespaces*)
    205 {
    206 }
    207 
    208 void MarkupAccumulator::appendQuotedURLAttributeValue(StringBuilder& result, const Element* element, const Attribute& attribute)
    209 {
    210     ASSERT(element->isURLAttribute(attribute));
    211     const String resolvedURLString = resolveURLIfNeeded(element, attribute.value());
    212     UChar quoteChar = '"';
    213     String strippedURLString = resolvedURLString.stripWhiteSpace();
    214     if (protocolIsJavaScript(strippedURLString)) {
    215         // minimal escaping for javascript urls
    216         if (strippedURLString.contains('"')) {
    217             if (strippedURLString.contains('\''))
    218                 strippedURLString.replaceWithLiteral('"', "&quot;");
    219             else
    220                 quoteChar = '\'';
    221         }
    222         result.append(quoteChar);
    223         result.append(strippedURLString);
    224         result.append(quoteChar);
    225         return;
    226     }
    227 
    228     // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML.
    229     result.append(quoteChar);
    230     appendAttributeValue(result, resolvedURLString, false);
    231     result.append(quoteChar);
    232 }
    233 
    234 void MarkupAccumulator::appendNodeValue(StringBuilder& result, const Node* node, const Range* range, EntityMask entityMask)
    235 {
    236     const String str = node->nodeValue();
    237     unsigned length = str.length();
    238     unsigned start = 0;
    239 
    240     if (range) {
    241         if (node == range->endContainer())
    242             length = range->endOffset();
    243         if (node == range->startContainer()) {
    244             start = range->startOffset();
    245             length -= start;
    246         }
    247     }
    248 
    249     appendCharactersReplacingEntities(result, str, start, length, entityMask);
    250 }
    251 
    252 bool MarkupAccumulator::shouldAddNamespaceElement(const Element* element)
    253 {
    254     // Don't add namespace attribute if it is already defined for this elem.
    255     const AtomicString& prefix = element->prefix();
    256     if (prefix.isEmpty())
    257         return !element->hasAttribute(xmlnsAtom);
    258 
    259     DEFINE_STATIC_LOCAL(String, xmlnsWithColon, ("xmlns:"));
    260     return !element->hasAttribute(xmlnsWithColon + prefix);
    261 }
    262 
    263 bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, Namespaces& namespaces)
    264 {
    265     // Don't add namespace attributes twice
    266     if (attribute.name() == XMLNSNames::xmlnsAttr) {
    267         namespaces.set(emptyAtom.impl(), attribute.value().impl());
    268         return false;
    269     }
    270 
    271     QualifiedName xmlnsPrefixAttr(xmlnsAtom, attribute.localName(), XMLNSNames::xmlnsNamespaceURI);
    272     if (attribute.name() == xmlnsPrefixAttr) {
    273         namespaces.set(attribute.localName().impl(), attribute.value().impl());
    274         return false;
    275     }
    276 
    277     return true;
    278 }
    279 
    280 void MarkupAccumulator::appendNamespace(StringBuilder& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces)
    281 {
    282     if (namespaceURI.isEmpty())
    283         return;
    284 
    285     // Use emptyAtoms's impl() for both null and empty strings since the HashMap can't handle 0 as a key
    286     StringImpl* pre = prefix.isEmpty() ? emptyAtom.impl() : prefix.impl();
    287     StringImpl* foundNS = namespaces.get(pre);
    288     if (foundNS != namespaceURI.impl()) {
    289         namespaces.set(pre, namespaceURI.impl());
    290         result.append(' ');
    291         result.append(xmlnsAtom.string());
    292         if (!prefix.isEmpty()) {
    293             result.append(':');
    294             result.append(prefix);
    295         }
    296 
    297         result.append('=');
    298         result.append('"');
    299         appendAttributeValue(result, namespaceURI, false);
    300         result.append('"');
    301     }
    302 }
    303 
    304 EntityMask MarkupAccumulator::entityMaskForText(Text* text) const
    305 {
    306     const QualifiedName* parentName = 0;
    307     if (text->parentElement())
    308         parentName = &(text->parentElement())->tagQName();
    309 
    310     if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag))
    311         return EntityMaskInCDATA;
    312 
    313     return text->document()->isHTMLDocument() ? EntityMaskInHTMLPCDATA : EntityMaskInPCDATA;
    314 }
    315 
    316 void MarkupAccumulator::appendText(StringBuilder& result, Text* text)
    317 {
    318     appendNodeValue(result, text, m_range, entityMaskForText(text));
    319 }
    320 
    321 void MarkupAccumulator::appendComment(StringBuilder& result, const String& comment)
    322 {
    323     // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->".
    324     result.appendLiteral("<!--");
    325     result.append(comment);
    326     result.appendLiteral("-->");
    327 }
    328 
    329 void MarkupAccumulator::appendXMLDeclaration(StringBuilder& result, const Document* document)
    330 {
    331     if (!document->hasXMLDeclaration())
    332         return;
    333 
    334     result.appendLiteral("<?xml version=\"");
    335     result.append(document->xmlVersion());
    336     const String& encoding = document->xmlEncoding();
    337     if (!encoding.isEmpty()) {
    338         result.appendLiteral("\" encoding=\"");
    339         result.append(encoding);
    340     }
    341     if (document->xmlStandaloneStatus() != Document::StandaloneUnspecified) {
    342         result.appendLiteral("\" standalone=\"");
    343         if (document->xmlStandalone())
    344             result.appendLiteral("yes");
    345         else
    346             result.appendLiteral("no");
    347     }
    348 
    349     result.appendLiteral("\"?>");
    350 }
    351 
    352 void MarkupAccumulator::appendDocumentType(StringBuilder& result, const DocumentType* n)
    353 {
    354     if (n->name().isEmpty())
    355         return;
    356 
    357     result.appendLiteral("<!DOCTYPE ");
    358     result.append(n->name());
    359     if (!n->publicId().isEmpty()) {
    360         result.appendLiteral(" PUBLIC \"");
    361         result.append(n->publicId());
    362         result.append('"');
    363         if (!n->systemId().isEmpty()) {
    364             result.append(' ');
    365             result.append('"');
    366             result.append(n->systemId());
    367             result.append('"');
    368         }
    369     } else if (!n->systemId().isEmpty()) {
    370         result.appendLiteral(" SYSTEM \"");
    371         result.append(n->systemId());
    372         result.append('"');
    373     }
    374     if (!n->internalSubset().isEmpty()) {
    375         result.append(' ');
    376         result.append('[');
    377         result.append(n->internalSubset());
    378         result.append(']');
    379     }
    380     result.append('>');
    381 }
    382 
    383 void MarkupAccumulator::appendProcessingInstruction(StringBuilder& result, const String& target, const String& data)
    384 {
    385     // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>".
    386     result.append('<');
    387     result.append('?');
    388     result.append(target);
    389     result.append(' ');
    390     result.append(data);
    391     result.append('?');
    392     result.append('>');
    393 }
    394 
    395 void MarkupAccumulator::appendElement(StringBuilder& result, Element* element, Namespaces* namespaces)
    396 {
    397     appendOpenTag(result, element, namespaces);
    398 
    399     if (element->hasAttributes()) {
    400         unsigned length = element->attributeCount();
    401         for (unsigned int i = 0; i < length; i++)
    402             appendAttribute(result, element, *element->attributeItem(i), namespaces);
    403     }
    404 
    405     // Give an opportunity to subclasses to add their own attributes.
    406     appendCustomAttributes(result, element, namespaces);
    407 
    408     appendCloseTag(result, element);
    409 }
    410 
    411 void MarkupAccumulator::appendOpenTag(StringBuilder& result, Element* element, Namespaces* namespaces)
    412 {
    413     result.append('<');
    414     result.append(element->nodeNamePreservingCase());
    415     if (!element->document()->isHTMLDocument() && namespaces && shouldAddNamespaceElement(element))
    416         appendNamespace(result, element->prefix(), element->namespaceURI(), *namespaces);
    417 }
    418 
    419 void MarkupAccumulator::appendCloseTag(StringBuilder& result, Element* element)
    420 {
    421     if (shouldSelfClose(element)) {
    422         if (element->isHTMLElement())
    423             result.append(' '); // XHTML 1.0 <-> HTML compatibility.
    424         result.append('/');
    425     }
    426     result.append('>');
    427 }
    428 
    429 static inline bool attributeIsInSerializedNamespace(const Attribute& attribute)
    430 {
    431     return attribute.namespaceURI() == XMLNames::xmlNamespaceURI
    432         || attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI
    433         || attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI;
    434 }
    435 
    436 void MarkupAccumulator::appendAttribute(StringBuilder& result, Element* element, const Attribute& attribute, Namespaces* namespaces)
    437 {
    438     bool documentIsHTML = element->document()->isHTMLDocument();
    439 
    440     result.append(' ');
    441 
    442     if (documentIsHTML && !attributeIsInSerializedNamespace(attribute))
    443         result.append(attribute.name().localName());
    444     else {
    445         QualifiedName prefixedName = attribute.name();
    446         if (attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI) {
    447             if (!attribute.prefix())
    448                 prefixedName.setPrefix(xlinkAtom);
    449         } else if (attribute.namespaceURI() == XMLNames::xmlNamespaceURI) {
    450             if (!attribute.prefix())
    451                 prefixedName.setPrefix(xmlAtom);
    452         } else if (attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI) {
    453             if (attribute.name() != XMLNSNames::xmlnsAttr && !attribute.prefix())
    454                 prefixedName.setPrefix(xmlnsAtom);
    455         }
    456         result.append(prefixedName.toString());
    457     }
    458 
    459     result.append('=');
    460 
    461     if (element->isURLAttribute(attribute))
    462         appendQuotedURLAttributeValue(result, element, attribute);
    463     else {
    464         result.append('"');
    465         appendAttributeValue(result, attribute.value(), documentIsHTML);
    466         result.append('"');
    467     }
    468 
    469     if (!documentIsHTML && namespaces && shouldAddNamespaceAttribute(attribute, *namespaces))
    470         appendNamespace(result, attribute.prefix(), attribute.namespaceURI(), *namespaces);
    471 }
    472 
    473 void MarkupAccumulator::appendCDATASection(StringBuilder& result, const String& section)
    474 {
    475     // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>".
    476     result.appendLiteral("<![CDATA[");
    477     result.append(section);
    478     result.appendLiteral("]]>");
    479 }
    480 
    481 void MarkupAccumulator::appendStartMarkup(StringBuilder& result, const Node* node, Namespaces* namespaces)
    482 {
    483     switch (node->nodeType()) {
    484     case Node::TEXT_NODE:
    485         appendText(result, toText(const_cast<Node*>(node)));
    486         break;
    487     case Node::COMMENT_NODE:
    488         appendComment(result, static_cast<const Comment*>(node)->data());
    489         break;
    490     case Node::DOCUMENT_NODE:
    491         appendXMLDeclaration(result, toDocument(node));
    492         break;
    493     case Node::DOCUMENT_FRAGMENT_NODE:
    494         break;
    495     case Node::DOCUMENT_TYPE_NODE:
    496         appendDocumentType(result, static_cast<const DocumentType*>(node));
    497         break;
    498     case Node::PROCESSING_INSTRUCTION_NODE:
    499         appendProcessingInstruction(result, static_cast<const ProcessingInstruction*>(node)->target(), static_cast<const ProcessingInstruction*>(node)->data());
    500         break;
    501     case Node::ELEMENT_NODE:
    502         appendElement(result, toElement(const_cast<Node*>(node)), namespaces);
    503         break;
    504     case Node::CDATA_SECTION_NODE:
    505         appendCDATASection(result, static_cast<const CDATASection*>(node)->data());
    506         break;
    507     case Node::ATTRIBUTE_NODE:
    508     case Node::ENTITY_NODE:
    509     case Node::NOTATION_NODE:
    510     case Node::XPATH_NAMESPACE_NODE:
    511         ASSERT_NOT_REACHED();
    512         break;
    513     }
    514 }
    515 
    516 // Rules of self-closure
    517 // 1. No elements in HTML documents use the self-closing syntax.
    518 // 2. Elements w/ children never self-close because they use a separate end tag.
    519 // 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag.
    520 // 4. Other elements self-close.
    521 bool MarkupAccumulator::shouldSelfClose(const Node* node)
    522 {
    523     if (node->document()->isHTMLDocument())
    524         return false;
    525     if (node->hasChildNodes())
    526         return false;
    527     if (node->isHTMLElement() && !elementCannotHaveEndTag(node))
    528         return false;
    529     return true;
    530 }
    531 
    532 bool MarkupAccumulator::elementCannotHaveEndTag(const Node* node)
    533 {
    534     if (!node->isHTMLElement())
    535         return false;
    536 
    537     // FIXME: ieForbidsInsertHTML may not be the right function to call here
    538     // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML
    539     // or createContextualFragment.  It does not necessarily align with
    540     // which elements should be serialized w/o end tags.
    541     return static_cast<const HTMLElement*>(node)->ieForbidsInsertHTML();
    542 }
    543 
    544 void MarkupAccumulator::appendEndMarkup(StringBuilder& result, const Node* node)
    545 {
    546     if (!node->isElementNode() || shouldSelfClose(node) || (!node->hasChildNodes() && elementCannotHaveEndTag(node)))
    547         return;
    548 
    549     result.append('<');
    550     result.append('/');
    551     result.append(toElement(node)->nodeNamePreservingCase());
    552     result.append('>');
    553 }
    554 
    555 }
    556