Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2011 Adam Barth. All Rights Reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #include "config.h"
     27 #include "XSSFilter.h"
     28 
     29 #include "DOMWindow.h"
     30 #include "Document.h"
     31 #include "DocumentLoader.h"
     32 #include "Frame.h"
     33 #include "HTMLDocumentParser.h"
     34 #include "HTMLNames.h"
     35 #include "HTMLParamElement.h"
     36 #include "HTMLParserIdioms.h"
     37 #include "Settings.h"
     38 #include "TextEncoding.h"
     39 #include "TextResourceDecoder.h"
     40 #include <wtf/text/CString.h>
     41 
     42 namespace WebCore {
     43 
     44 using namespace HTMLNames;
     45 
     46 static bool isNonCanonicalCharacter(UChar c)
     47 {
     48     // We remove all non-ASCII characters, including non-printable ASCII characters.
     49     //
     50     // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character.
     51     // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the
     52     // adverse effect that we remove any legitimate zeros from a string.
     53     //
     54     // For instance: new String("http://localhost:8000") => new String("http://localhost:8").
     55     return (c == '\\' || c == '0' || c == '\0' || c >= 127);
     56 }
     57 
     58 static String canonicalize(const String& string)
     59 {
     60     return string.removeCharacters(&isNonCanonicalCharacter);
     61 }
     62 
     63 static bool isRequiredForInjection(UChar c)
     64 {
     65     return (c == '\'' || c == '"' || c == '<' || c == '>');
     66 }
     67 
     68 static bool hasName(const HTMLToken& token, const QualifiedName& name)
     69 {
     70     return equalIgnoringNullity(token.name(), static_cast<const String&>(name.localName()));
     71 }
     72 
     73 static bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute)
     74 {
     75     for (size_t i = 0; i < token.attributes().size(); ++i) {
     76         if (equalIgnoringNullity(token.attributes().at(i).m_name, name.localName())) {
     77             indexOfMatchingAttribute = i;
     78             return true;
     79         }
     80     }
     81     return false;
     82 }
     83 
     84 static bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name)
     85 {
     86     const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut.
     87     if (name.size() < lengthOfShortestInlineEventHandlerName)
     88         return false;
     89     return name[0] == 'o' && name[1] == 'n';
     90 }
     91 
     92 static bool isDangerousHTTPEquiv(const String& value)
     93 {
     94     String equiv = value.stripWhiteSpace();
     95     return equalIgnoringCase(equiv, "refresh") || equalIgnoringCase(equiv, "set-cookie");
     96 }
     97 
     98 static bool containsJavaScriptURL(const Vector<UChar, 32>& value)
     99 {
    100     static const char javaScriptScheme[] = "javascript:";
    101     static const size_t lengthOfJavaScriptScheme = sizeof(javaScriptScheme) - 1;
    102 
    103     size_t i;
    104     for (i = 0; i < value.size(); ++i) {
    105         if (!isHTMLSpace(value[i]))
    106             break;
    107     }
    108 
    109     if (value.size() - i < lengthOfJavaScriptScheme)
    110         return false;
    111 
    112     return equalIgnoringCase(value.data() + i, javaScriptScheme, lengthOfJavaScriptScheme);
    113 }
    114 
    115 static String decodeURL(const String& string, const TextEncoding& encoding)
    116 {
    117     String workingString = string;
    118     workingString.replace('+', ' ');
    119     workingString = decodeURLEscapeSequences(workingString);
    120     CString workingStringUTF8 = workingString.utf8();
    121     String decodedString = encoding.decode(workingStringUTF8.data(), workingStringUTF8.length());
    122     // FIXME: Is this check necessary?
    123     if (decodedString.isEmpty())
    124         return canonicalize(workingString);
    125     return canonicalize(decodedString);
    126 }
    127 
    128 XSSFilter::XSSFilter(HTMLDocumentParser* parser)
    129     : m_parser(parser)
    130     , m_isEnabled(false)
    131     , m_xssProtection(XSSProtectionEnabled)
    132     , m_state(Uninitialized)
    133 {
    134     ASSERT(m_parser);
    135     if (Frame* frame = parser->document()->frame()) {
    136         if (Settings* settings = frame->settings())
    137             m_isEnabled = settings->xssAuditorEnabled();
    138     }
    139     // Although tempting to call init() at this point, the various objects
    140     // we want to reference might not all have been constructed yet.
    141 }
    142 
    143 void XSSFilter::init()
    144 {
    145     const size_t miniumLengthForSuffixTree = 512; // FIXME: Tune this parameter.
    146     const int suffixTreeDepth = 5;
    147 
    148     ASSERT(m_state == Uninitialized);
    149     m_state = Initial;
    150 
    151     if (!m_isEnabled)
    152         return;
    153 
    154     // In theory, the Document could have detached from the Frame after the
    155     // XSSFilter was constructed.
    156     if (!m_parser->document()->frame()) {
    157         m_isEnabled = false;
    158         return;
    159     }
    160 
    161     const KURL& url = m_parser->document()->url();
    162 
    163     if (url.protocolIsData()) {
    164         m_isEnabled = false;
    165         return;
    166     }
    167 
    168     TextResourceDecoder* decoder = m_parser->document()->decoder();
    169     m_decodedURL = decoder ? decodeURL(url.string(), decoder->encoding()) : url.string();
    170     if (m_decodedURL.find(isRequiredForInjection, 0) == notFound)
    171         m_decodedURL = String();
    172 
    173     if (DocumentLoader* documentLoader = m_parser->document()->frame()->loader()->documentLoader()) {
    174         DEFINE_STATIC_LOCAL(String, XSSProtectionHeader, ("X-XSS-Protection"));
    175         m_xssProtection = parseXSSProtectionHeader(documentLoader->response().httpHeaderField(XSSProtectionHeader));
    176 
    177         FormData* httpBody = documentLoader->originalRequest().httpBody();
    178         if (httpBody && !httpBody->isEmpty()) {
    179             String httpBodyAsString = httpBody->flattenToString();
    180             m_decodedHTTPBody = decoder ? decodeURL(httpBodyAsString, decoder->encoding()) : httpBodyAsString;
    181             if (m_decodedHTTPBody.find(isRequiredForInjection, 0) == notFound)
    182                 m_decodedHTTPBody = String();
    183             if (m_decodedHTTPBody.length() >= miniumLengthForSuffixTree)
    184                 m_decodedHTTPBodySuffixTree = adoptPtr(new SuffixTree<ASCIICodebook>(m_decodedHTTPBody, suffixTreeDepth));
    185         }
    186     }
    187 
    188     if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty())
    189         m_isEnabled = false;
    190 }
    191 
    192 void XSSFilter::filterToken(HTMLToken& token)
    193 {
    194     if (m_state == Uninitialized) {
    195         init();
    196         ASSERT(m_state == Initial);
    197     }
    198 
    199     if (!m_isEnabled || m_xssProtection == XSSProtectionDisabled)
    200         return;
    201 
    202     bool didBlockScript = false;
    203 
    204     switch (m_state) {
    205     case Uninitialized:
    206         ASSERT_NOT_REACHED();
    207         break;
    208     case Initial:
    209         didBlockScript = filterTokenInitial(token);
    210         break;
    211     case AfterScriptStartTag:
    212         didBlockScript = filterTokenAfterScriptStartTag(token);
    213         ASSERT(m_state == Initial);
    214         m_cachedSnippet = String();
    215         break;
    216     }
    217 
    218     if (didBlockScript) {
    219         // FIXME: Consider using a more helpful console message.
    220         DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n"));
    221         // FIXME: We should add the real line number to the console.
    222         m_parser->document()->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String());
    223 
    224         if (m_xssProtection == XSSProtectionBlockEnabled) {
    225             m_parser->document()->frame()->loader()->stopAllLoaders();
    226             m_parser->document()->frame()->navigationScheduler()->scheduleLocationChange(m_parser->document()->securityOrigin(), blankURL(), String());
    227         }
    228     }
    229 }
    230 
    231 bool XSSFilter::filterTokenInitial(HTMLToken& token)
    232 {
    233     ASSERT(m_state == Initial);
    234 
    235     if (token.type() != HTMLToken::StartTag)
    236         return false;
    237 
    238     bool didBlockScript = eraseDangerousAttributesIfInjected(token);
    239 
    240     if (hasName(token, scriptTag))
    241         didBlockScript |= filterScriptToken(token);
    242     else if (hasName(token, objectTag))
    243         didBlockScript |= filterObjectToken(token);
    244     else if (hasName(token, paramTag))
    245         didBlockScript |= filterParamToken(token);
    246     else if (hasName(token, embedTag))
    247         didBlockScript |= filterEmbedToken(token);
    248     else if (hasName(token, appletTag))
    249         didBlockScript |= filterAppletToken(token);
    250     else if (hasName(token, iframeTag))
    251         didBlockScript |= filterIframeToken(token);
    252     else if (hasName(token, metaTag))
    253         didBlockScript |= filterMetaToken(token);
    254     else if (hasName(token, baseTag))
    255         didBlockScript |= filterBaseToken(token);
    256     else if (hasName(token, formTag))
    257         didBlockScript |= filterFormToken(token);
    258 
    259     return didBlockScript;
    260 }
    261 
    262 bool XSSFilter::filterTokenAfterScriptStartTag(HTMLToken& token)
    263 {
    264     ASSERT(m_state == AfterScriptStartTag);
    265     m_state = Initial;
    266 
    267     if (token.type() != HTMLToken::Character) {
    268         ASSERT(token.type() == HTMLToken::EndTag || token.type() == HTMLToken::EndOfFile);
    269         return false;
    270     }
    271 
    272     int start = 0;
    273     // FIXME: We probably want to grab only the first few characters of the
    274     //        contents of the script element.
    275     int end = token.endIndex() - token.startIndex();
    276     if (isContainedInRequest(m_cachedSnippet + snippetForRange(token, start, end))) {
    277         token.eraseCharacters();
    278         token.appendToCharacter(' '); // Technically, character tokens can't be empty.
    279         return true;
    280     }
    281     return false;
    282 }
    283 
    284 bool XSSFilter::filterScriptToken(HTMLToken& token)
    285 {
    286     ASSERT(m_state == Initial);
    287     ASSERT(token.type() == HTMLToken::StartTag);
    288     ASSERT(hasName(token, scriptTag));
    289 
    290     if (eraseAttributeIfInjected(token, srcAttr, blankURL().string()))
    291         return true;
    292 
    293     m_state = AfterScriptStartTag;
    294     m_cachedSnippet = m_parser->sourceForToken(token);
    295     return false;
    296 }
    297 
    298 bool XSSFilter::filterObjectToken(HTMLToken& token)
    299 {
    300     ASSERT(m_state == Initial);
    301     ASSERT(token.type() == HTMLToken::StartTag);
    302     ASSERT(hasName(token, objectTag));
    303 
    304     bool didBlockScript = false;
    305 
    306     didBlockScript |= eraseAttributeIfInjected(token, dataAttr, blankURL().string());
    307     didBlockScript |= eraseAttributeIfInjected(token, typeAttr);
    308     didBlockScript |= eraseAttributeIfInjected(token, classidAttr);
    309 
    310     return didBlockScript;
    311 }
    312 
    313 bool XSSFilter::filterParamToken(HTMLToken& token)
    314 {
    315     ASSERT(m_state == Initial);
    316     ASSERT(token.type() == HTMLToken::StartTag);
    317     ASSERT(hasName(token, paramTag));
    318 
    319     size_t indexOfNameAttribute;
    320     if (!findAttributeWithName(token, nameAttr, indexOfNameAttribute))
    321         return false;
    322 
    323     const HTMLToken::Attribute& nameAttribute = token.attributes().at(indexOfNameAttribute);
    324     String name = String(nameAttribute.m_value.data(), nameAttribute.m_value.size());
    325 
    326     if (!HTMLParamElement::isURLParameter(name))
    327         return false;
    328 
    329     return eraseAttributeIfInjected(token, valueAttr, blankURL().string());
    330 }
    331 
    332 bool XSSFilter::filterEmbedToken(HTMLToken& token)
    333 {
    334     ASSERT(m_state == Initial);
    335     ASSERT(token.type() == HTMLToken::StartTag);
    336     ASSERT(hasName(token, embedTag));
    337 
    338     bool didBlockScript = false;
    339 
    340     didBlockScript |= eraseAttributeIfInjected(token, srcAttr, blankURL().string());
    341     didBlockScript |= eraseAttributeIfInjected(token, typeAttr);
    342 
    343     return didBlockScript;
    344 }
    345 
    346 bool XSSFilter::filterAppletToken(HTMLToken& token)
    347 {
    348     ASSERT(m_state == Initial);
    349     ASSERT(token.type() == HTMLToken::StartTag);
    350     ASSERT(hasName(token, appletTag));
    351 
    352     bool didBlockScript = false;
    353 
    354     didBlockScript |= eraseAttributeIfInjected(token, codeAttr);
    355     didBlockScript |= eraseAttributeIfInjected(token, objectAttr);
    356 
    357     return didBlockScript;
    358 }
    359 
    360 bool XSSFilter::filterIframeToken(HTMLToken& token)
    361 {
    362     ASSERT(m_state == Initial);
    363     ASSERT(token.type() == HTMLToken::StartTag);
    364     ASSERT(hasName(token, iframeTag));
    365 
    366     return eraseAttributeIfInjected(token, srcAttr);
    367 }
    368 
    369 bool XSSFilter::filterMetaToken(HTMLToken& token)
    370 {
    371     ASSERT(m_state == Initial);
    372     ASSERT(token.type() == HTMLToken::StartTag);
    373     ASSERT(hasName(token, metaTag));
    374 
    375     return eraseAttributeIfInjected(token, http_equivAttr);
    376 }
    377 
    378 bool XSSFilter::filterBaseToken(HTMLToken& token)
    379 {
    380     ASSERT(m_state == Initial);
    381     ASSERT(token.type() == HTMLToken::StartTag);
    382     ASSERT(hasName(token, baseTag));
    383 
    384     return eraseAttributeIfInjected(token, hrefAttr);
    385 }
    386 
    387 bool XSSFilter::filterFormToken(HTMLToken& token)
    388 {
    389     ASSERT(m_state == Initial);
    390     ASSERT(token.type() == HTMLToken::StartTag);
    391     ASSERT(hasName(token, formTag));
    392 
    393     return eraseAttributeIfInjected(token, actionAttr);
    394 }
    395 
    396 bool XSSFilter::eraseDangerousAttributesIfInjected(HTMLToken& token)
    397 {
    398     DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, ("javascript:void(0)"));
    399 
    400     bool didBlockScript = false;
    401     for (size_t i = 0; i < token.attributes().size(); ++i) {
    402         const HTMLToken::Attribute& attribute = token.attributes().at(i);
    403         bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.m_name);
    404         bool valueContainsJavaScriptURL = isInlineEventHandler ? false : containsJavaScriptURL(attribute.m_value);
    405         if (!isInlineEventHandler && !valueContainsJavaScriptURL)
    406             continue;
    407         if (!isContainedInRequest(snippetForAttribute(token, attribute)))
    408             continue;
    409         token.eraseValueOfAttribute(i);
    410         if (valueContainsJavaScriptURL)
    411             token.appendToAttributeValue(i, safeJavaScriptURL);
    412         didBlockScript = true;
    413     }
    414     return didBlockScript;
    415 }
    416 
    417 bool XSSFilter::eraseAttributeIfInjected(HTMLToken& token, const QualifiedName& attributeName, const String& replacementValue)
    418 {
    419     size_t indexOfAttribute;
    420     if (findAttributeWithName(token, attributeName, indexOfAttribute)) {
    421         const HTMLToken::Attribute& attribute = token.attributes().at(indexOfAttribute);
    422         if (isContainedInRequest(snippetForAttribute(token, attribute))) {
    423             if (attributeName == srcAttr && isSameOriginResource(String(attribute.m_value.data(), attribute.m_value.size())))
    424                 return false;
    425             if (attributeName == http_equivAttr && !isDangerousHTTPEquiv(String(attribute.m_value.data(), attribute.m_value.size())))
    426                 return false;
    427             token.eraseValueOfAttribute(indexOfAttribute);
    428             if (!replacementValue.isEmpty())
    429                 token.appendToAttributeValue(indexOfAttribute, replacementValue);
    430             return true;
    431         }
    432     }
    433     return false;
    434 }
    435 
    436 String XSSFilter::snippetForRange(const HTMLToken& token, int start, int end)
    437 {
    438     // FIXME: There's an extra allocation here that we could save by
    439     //        passing the range to the parser.
    440     return m_parser->sourceForToken(token).substring(start, end - start);
    441 }
    442 
    443 String XSSFilter::snippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute)
    444 {
    445     // FIXME: We should grab one character before the name also.
    446     int start = attribute.m_nameRange.m_start - token.startIndex();
    447     // FIXME: We probably want to grab only the first few characters of the attribute value.
    448     int end = attribute.m_valueRange.m_end - token.startIndex();
    449     return snippetForRange(token, start, end);
    450 }
    451 
    452 bool XSSFilter::isContainedInRequest(const String& snippet)
    453 {
    454     ASSERT(!snippet.isEmpty());
    455     String canonicalizedSnippet = canonicalize(snippet);
    456     ASSERT(!canonicalizedSnippet.isEmpty());
    457     if (m_decodedURL.find(canonicalizedSnippet, 0, false) != notFound)
    458         return true;
    459     if (m_decodedHTTPBodySuffixTree && !m_decodedHTTPBodySuffixTree->mightContain(canonicalizedSnippet))
    460         return false;
    461     return m_decodedHTTPBody.find(canonicalizedSnippet, 0, false) != notFound;
    462 }
    463 
    464 bool XSSFilter::isSameOriginResource(const String& url)
    465 {
    466     // If the resource is loaded from the same URL as the enclosing page, it's
    467     // probably not an XSS attack, so we reduce false positives by allowing the
    468     // request. If the resource has a query string, we're more suspicious,
    469     // however, because that's pretty rare and the attacker might be able to
    470     // trick a server-side script into doing something dangerous with the query
    471     // string.
    472     KURL resourceURL(m_parser->document()->url(), url);
    473     return (m_parser->document()->url().host() == resourceURL.host() && resourceURL.query().isEmpty());
    474 }
    475 
    476 }
    477