Home | History | Annotate | Download | only in editing
      1 /*
      2  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
      3  * Copyright (C) 2009, 2010 Google Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     15  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     16  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     17  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     18  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     19  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     20  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "MarkupAccumulator.h"
     29 
     30 #include "CDATASection.h"
     31 #include "Comment.h"
     32 #include "DocumentFragment.h"
     33 #include "DocumentType.h"
     34 #include "Editor.h"
     35 #include "HTMLElement.h"
     36 #include "HTMLNames.h"
     37 #include "KURL.h"
     38 #include "ProcessingInstruction.h"
     39 #include "XMLNSNames.h"
     40 #include <wtf/unicode/CharacterNames.h>
     41 
     42 namespace WebCore {
     43 
     44 using namespace HTMLNames;
     45 
     46 void appendCharactersReplacingEntities(Vector<UChar>& out, const UChar* content, size_t length, EntityMask entityMask)
     47 {
     48     DEFINE_STATIC_LOCAL(const String, ampReference, ("&amp;"));
     49     DEFINE_STATIC_LOCAL(const String, ltReference, ("&lt;"));
     50     DEFINE_STATIC_LOCAL(const String, gtReference, ("&gt;"));
     51     DEFINE_STATIC_LOCAL(const String, quotReference, ("&quot;"));
     52     DEFINE_STATIC_LOCAL(const String, nbspReference, ("&nbsp;"));
     53 
     54     static const EntityDescription entityMaps[] = {
     55         { '&', ampReference, EntityAmp },
     56         { '<', ltReference, EntityLt },
     57         { '>', gtReference, EntityGt },
     58         { '"', quotReference, EntityQuot },
     59         { noBreakSpace, nbspReference, EntityNbsp },
     60     };
     61 
     62     size_t positionAfterLastEntity = 0;
     63     for (size_t i = 0; i < length; ++i) {
     64         for (size_t m = 0; m < WTF_ARRAY_LENGTH(entityMaps); ++m) {
     65             if (content[i] == entityMaps[m].entity && entityMaps[m].mask & entityMask) {
     66                 out.append(content + positionAfterLastEntity, i - positionAfterLastEntity);
     67                 append(out, entityMaps[m].reference);
     68                 positionAfterLastEntity = i + 1;
     69                 break;
     70             }
     71         }
     72     }
     73     out.append(content + positionAfterLastEntity, length - positionAfterLastEntity);
     74 }
     75 
     76 MarkupAccumulator::MarkupAccumulator(Vector<Node*>* nodes, EAbsoluteURLs shouldResolveURLs, const Range* range)
     77     : m_nodes(nodes)
     78     , m_range(range)
     79     , m_shouldResolveURLs(shouldResolveURLs)
     80 {
     81 }
     82 
     83 MarkupAccumulator::~MarkupAccumulator()
     84 {
     85 }
     86 
     87 String MarkupAccumulator::serializeNodes(Node* node, Node* nodeToSkip, EChildrenOnly childrenOnly)
     88 {
     89     Vector<UChar> out;
     90     serializeNodesWithNamespaces(node, nodeToSkip, childrenOnly, 0);
     91     out.reserveInitialCapacity(length());
     92     concatenateMarkup(out);
     93     return String::adopt(out);
     94 }
     95 
     96 void MarkupAccumulator::serializeNodesWithNamespaces(Node* node, Node* nodeToSkip, EChildrenOnly childrenOnly, const Namespaces* namespaces)
     97 {
     98     if (node == nodeToSkip)
     99         return;
    100 
    101     Namespaces namespaceHash;
    102     if (namespaces)
    103         namespaceHash = *namespaces;
    104 
    105     if (!childrenOnly)
    106         appendStartTag(node, &namespaceHash);
    107 
    108     if (!(node->document()->isHTMLDocument() && elementCannotHaveEndTag(node))) {
    109         for (Node* current = node->firstChild(); current; current = current->nextSibling())
    110             serializeNodesWithNamespaces(current, nodeToSkip, IncludeNode, &namespaceHash);
    111     }
    112 
    113     if (!childrenOnly)
    114         appendEndTag(node);
    115 }
    116 
    117 void MarkupAccumulator::appendString(const String& string)
    118 {
    119     m_succeedingMarkup.append(string);
    120 }
    121 
    122 void MarkupAccumulator::appendStartTag(Node* node, Namespaces* namespaces)
    123 {
    124     Vector<UChar> markup;
    125     appendStartMarkup(markup, node, namespaces);
    126     appendString(String::adopt(markup));
    127     if (m_nodes)
    128         m_nodes->append(node);
    129 }
    130 
    131 void MarkupAccumulator::appendEndTag(Node* node)
    132 {
    133     Vector<UChar> markup;
    134     appendEndMarkup(markup, node);
    135     appendString(String::adopt(markup));
    136 }
    137 
    138 size_t MarkupAccumulator::totalLength(const Vector<String>& strings)
    139 {
    140     size_t length = 0;
    141     for (size_t i = 0; i < strings.size(); ++i)
    142         length += strings[i].length();
    143     return length;
    144 }
    145 
    146 // FIXME: This is a very inefficient way of accumulating the markup.
    147 // We're converting results of appendStartMarkup and appendEndMarkup from Vector<UChar> to String
    148 // and then back to Vector<UChar> and again to String here.
    149 void MarkupAccumulator::concatenateMarkup(Vector<UChar>& out)
    150 {
    151     for (size_t i = 0; i < m_succeedingMarkup.size(); ++i)
    152         append(out, m_succeedingMarkup[i]);
    153 }
    154 
    155 void MarkupAccumulator::appendAttributeValue(Vector<UChar>& result, const String& attribute, bool documentIsHTML)
    156 {
    157     appendCharactersReplacingEntities(result, attribute.characters(), attribute.length(),
    158         documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue);
    159 }
    160 
    161 void MarkupAccumulator::appendQuotedURLAttributeValue(Vector<UChar>& result, const String& urlString)
    162 {
    163     UChar quoteChar = '\"';
    164     String strippedURLString = urlString.stripWhiteSpace();
    165     if (protocolIsJavaScript(strippedURLString)) {
    166         // minimal escaping for javascript urls
    167         if (strippedURLString.contains('"')) {
    168             if (strippedURLString.contains('\''))
    169                 strippedURLString.replace('\"', "&quot;");
    170             else
    171                 quoteChar = '\'';
    172         }
    173         result.append(quoteChar);
    174         append(result, strippedURLString);
    175         result.append(quoteChar);
    176         return;
    177     }
    178 
    179     // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML.
    180     result.append(quoteChar);
    181     appendAttributeValue(result, urlString, false);
    182     result.append(quoteChar);
    183 }
    184 
    185 void MarkupAccumulator::appendNodeValue(Vector<UChar>& out, const Node* node, const Range* range, EntityMask entityMask)
    186 {
    187     String str = node->nodeValue();
    188     const UChar* characters = str.characters();
    189     size_t length = str.length();
    190 
    191     if (range) {
    192         ExceptionCode ec;
    193         if (node == range->endContainer(ec))
    194             length = range->endOffset(ec);
    195         if (node == range->startContainer(ec)) {
    196             size_t start = range->startOffset(ec);
    197             characters += start;
    198             length -= start;
    199         }
    200     }
    201 
    202     appendCharactersReplacingEntities(out, characters, length, entityMask);
    203 }
    204 
    205 bool MarkupAccumulator::shouldAddNamespaceElement(const Element* element)
    206 {
    207     // Don't add namespace attribute if it is already defined for this elem.
    208     const AtomicString& prefix = element->prefix();
    209     AtomicString attr = !prefix.isEmpty() ? "xmlns:" + prefix : "xmlns";
    210     return !element->hasAttribute(attr);
    211 }
    212 
    213 bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, Namespaces& namespaces)
    214 {
    215     namespaces.checkConsistency();
    216 
    217     // Don't add namespace attributes twice
    218     if (attribute.name() == XMLNSNames::xmlnsAttr) {
    219         namespaces.set(emptyAtom.impl(), attribute.value().impl());
    220         return false;
    221     }
    222 
    223     QualifiedName xmlnsPrefixAttr(xmlnsAtom, attribute.localName(), XMLNSNames::xmlnsNamespaceURI);
    224     if (attribute.name() == xmlnsPrefixAttr) {
    225         namespaces.set(attribute.localName().impl(), attribute.value().impl());
    226         return false;
    227     }
    228 
    229     return true;
    230 }
    231 
    232 void MarkupAccumulator::appendNamespace(Vector<UChar>& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces)
    233 {
    234     namespaces.checkConsistency();
    235     if (namespaceURI.isEmpty())
    236         return;
    237 
    238     // Use emptyAtoms's impl() for both null and empty strings since the HashMap can't handle 0 as a key
    239     AtomicStringImpl* pre = prefix.isEmpty() ? emptyAtom.impl() : prefix.impl();
    240     AtomicStringImpl* foundNS = namespaces.get(pre);
    241     if (foundNS != namespaceURI.impl()) {
    242         namespaces.set(pre, namespaceURI.impl());
    243         result.append(' ');
    244         append(result, xmlnsAtom.string());
    245         if (!prefix.isEmpty()) {
    246             result.append(':');
    247             append(result, prefix);
    248         }
    249 
    250         result.append('=');
    251         result.append('"');
    252         appendAttributeValue(result, namespaceURI, false);
    253         result.append('"');
    254     }
    255 }
    256 
    257 EntityMask MarkupAccumulator::entityMaskForText(Text* text) const
    258 {
    259     const QualifiedName* parentName = 0;
    260     if (text->parentElement())
    261         parentName = &static_cast<Element*>(text->parentElement())->tagQName();
    262 
    263     if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag))
    264         return EntityMaskInCDATA;
    265 
    266     return text->document()->isHTMLDocument() ? EntityMaskInHTMLPCDATA : EntityMaskInPCDATA;
    267 }
    268 
    269 void MarkupAccumulator::appendText(Vector<UChar>& out, Text* text)
    270 {
    271     appendNodeValue(out, text, m_range, entityMaskForText(text));
    272 }
    273 
    274 void MarkupAccumulator::appendComment(Vector<UChar>& out, const String& comment)
    275 {
    276     // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->".
    277     append(out, "<!--");
    278     append(out, comment);
    279     append(out, "-->");
    280 }
    281 
    282 void MarkupAccumulator::appendDocumentType(Vector<UChar>& result, const DocumentType* n)
    283 {
    284     if (n->name().isEmpty())
    285         return;
    286 
    287     append(result, "<!DOCTYPE ");
    288     append(result, n->name());
    289     if (!n->publicId().isEmpty()) {
    290         append(result, " PUBLIC \"");
    291         append(result, n->publicId());
    292         append(result, "\"");
    293         if (!n->systemId().isEmpty()) {
    294             append(result, " \"");
    295             append(result, n->systemId());
    296             append(result, "\"");
    297         }
    298     } else if (!n->systemId().isEmpty()) {
    299         append(result, " SYSTEM \"");
    300         append(result, n->systemId());
    301         append(result, "\"");
    302     }
    303     if (!n->internalSubset().isEmpty()) {
    304         append(result, " [");
    305         append(result, n->internalSubset());
    306         append(result, "]");
    307     }
    308     append(result, ">");
    309 }
    310 
    311 void MarkupAccumulator::appendProcessingInstruction(Vector<UChar>& out, const String& target, const String& data)
    312 {
    313     // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>".
    314     append(out, "<?");
    315     append(out, target);
    316     append(out, " ");
    317     append(out, data);
    318     append(out, "?>");
    319 }
    320 
    321 void MarkupAccumulator::appendElement(Vector<UChar>& out, Element* element, Namespaces* namespaces)
    322 {
    323     appendOpenTag(out, element, namespaces);
    324 
    325     NamedNodeMap* attributes = element->attributes();
    326     unsigned length = attributes->length();
    327     for (unsigned int i = 0; i < length; i++)
    328         appendAttribute(out, element, *attributes->attributeItem(i), namespaces);
    329 
    330     appendCloseTag(out, element);
    331 }
    332 
    333 void MarkupAccumulator::appendOpenTag(Vector<UChar>& out, Element* element, Namespaces* namespaces)
    334 {
    335     out.append('<');
    336     append(out, element->nodeNamePreservingCase());
    337     if (!element->document()->isHTMLDocument() && namespaces && shouldAddNamespaceElement(element))
    338         appendNamespace(out, element->prefix(), element->namespaceURI(), *namespaces);
    339 }
    340 
    341 void MarkupAccumulator::appendCloseTag(Vector<UChar>& out, Element* element)
    342 {
    343     if (shouldSelfClose(element)) {
    344         if (element->isHTMLElement())
    345             out.append(' '); // XHTML 1.0 <-> HTML compatibility.
    346         out.append('/');
    347     }
    348     out.append('>');
    349 }
    350 
    351 void MarkupAccumulator::appendAttribute(Vector<UChar>& out, Element* element, const Attribute& attribute, Namespaces* namespaces)
    352 {
    353     bool documentIsHTML = element->document()->isHTMLDocument();
    354 
    355     out.append(' ');
    356 
    357     if (documentIsHTML)
    358         append(out, attribute.name().localName());
    359     else
    360         append(out, attribute.name().toString());
    361 
    362     out.append('=');
    363 
    364     if (element->isURLAttribute(const_cast<Attribute*>(&attribute))) {
    365         // We don't want to complete file:/// URLs because it may contain sensitive information
    366         // about the user's system.
    367         if (shouldResolveURLs() && !element->document()->url().isLocalFile())
    368             appendQuotedURLAttributeValue(out, element->document()->completeURL(attribute.value()).string());
    369         else
    370             appendQuotedURLAttributeValue(out, attribute.value());
    371     } else {
    372         out.append('\"');
    373         appendAttributeValue(out, attribute.value(), documentIsHTML);
    374         out.append('\"');
    375     }
    376 
    377     if (!documentIsHTML && namespaces && shouldAddNamespaceAttribute(attribute, *namespaces))
    378         appendNamespace(out, attribute.prefix(), attribute.namespaceURI(), *namespaces);
    379 }
    380 
    381 void MarkupAccumulator::appendCDATASection(Vector<UChar>& out, const String& section)
    382 {
    383     // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>".
    384     append(out, "<![CDATA[");
    385     append(out, section);
    386     append(out, "]]>");
    387 }
    388 
    389 void MarkupAccumulator::appendStartMarkup(Vector<UChar>& result, const Node* node, Namespaces* namespaces)
    390 {
    391     if (namespaces)
    392         namespaces->checkConsistency();
    393 
    394     switch (node->nodeType()) {
    395     case Node::TEXT_NODE:
    396         appendText(result, static_cast<Text*>(const_cast<Node*>(node)));
    397         break;
    398     case Node::COMMENT_NODE:
    399         appendComment(result, static_cast<const Comment*>(node)->data());
    400         break;
    401     case Node::DOCUMENT_NODE:
    402     case Node::DOCUMENT_FRAGMENT_NODE:
    403         break;
    404     case Node::DOCUMENT_TYPE_NODE:
    405         appendDocumentType(result, static_cast<const DocumentType*>(node));
    406         break;
    407     case Node::PROCESSING_INSTRUCTION_NODE:
    408         appendProcessingInstruction(result, static_cast<const ProcessingInstruction*>(node)->target(), static_cast<const ProcessingInstruction*>(node)->data());
    409         break;
    410     case Node::ELEMENT_NODE:
    411         appendElement(result, static_cast<Element*>(const_cast<Node*>(node)), namespaces);
    412         break;
    413     case Node::CDATA_SECTION_NODE:
    414         appendCDATASection(result, static_cast<const CDATASection*>(node)->data());
    415         break;
    416     case Node::ATTRIBUTE_NODE:
    417     case Node::ENTITY_NODE:
    418     case Node::ENTITY_REFERENCE_NODE:
    419     case Node::NOTATION_NODE:
    420     case Node::XPATH_NAMESPACE_NODE:
    421         ASSERT_NOT_REACHED();
    422         break;
    423     }
    424 }
    425 
    426 // Rules of self-closure
    427 // 1. No elements in HTML documents use the self-closing syntax.
    428 // 2. Elements w/ children never self-close because they use a separate end tag.
    429 // 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag.
    430 // 4. Other elements self-close.
    431 bool MarkupAccumulator::shouldSelfClose(const Node* node)
    432 {
    433     if (node->document()->isHTMLDocument())
    434         return false;
    435     if (node->hasChildNodes())
    436         return false;
    437     if (node->isHTMLElement() && !elementCannotHaveEndTag(node))
    438         return false;
    439     return true;
    440 }
    441 
    442 bool MarkupAccumulator::elementCannotHaveEndTag(const Node* node)
    443 {
    444     if (!node->isHTMLElement())
    445         return false;
    446 
    447     // FIXME: ieForbidsInsertHTML may not be the right function to call here
    448     // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML
    449     // or createContextualFragment.  It does not necessarily align with
    450     // which elements should be serialized w/o end tags.
    451     return static_cast<const HTMLElement*>(node)->ieForbidsInsertHTML();
    452 }
    453 
    454 void MarkupAccumulator::appendEndMarkup(Vector<UChar>& result, const Node* node)
    455 {
    456     if (!node->isElementNode() || shouldSelfClose(node) || (!node->hasChildNodes() && elementCannotHaveEndTag(node)))
    457         return;
    458 
    459     result.append('<');
    460     result.append('/');
    461     append(result, static_cast<const Element*>(node)->nodeNamePreservingCase());
    462     result.append('>');
    463 }
    464 
    465 }
    466