Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2011 Adam Barth. All Rights Reserved.
      3  * Copyright (C) 2011 Daniel Bates (dbates (at) intudata.com).
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "core/html/parser/XSSAuditor.h"
     29 
     30 #include "HTMLNames.h"
     31 #include "SVGNames.h"
     32 #include "XLinkNames.h"
     33 #include "core/dom/Document.h"
     34 #include "core/fetch/TextResourceDecoder.h"
     35 #include "core/frame/ContentSecurityPolicy.h"
     36 #include "core/frame/Frame.h"
     37 #include "core/html/HTMLParamElement.h"
     38 #include "core/html/parser/HTMLDocumentParser.h"
     39 #include "core/html/parser/HTMLParserIdioms.h"
     40 #include "core/html/parser/XSSAuditorDelegate.h"
     41 #include "core/loader/DocumentLoader.h"
     42 #include "core/frame/Settings.h"
     43 #include "platform/JSONValues.h"
     44 #include "platform/network/FormData.h"
     45 #include "platform/text/DecodeEscapeSequences.h"
     46 #include "wtf/MainThread.h"
     47 
     48 namespace {
     49 
     50 // SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or we'd use it instead.
     51 const char kURLWithUniqueOrigin[] = "data:,";
     52 
     53 } // namespace
     54 
     55 namespace WebCore {
     56 
     57 using namespace HTMLNames;
     58 
     59 static bool isNonCanonicalCharacter(UChar c)
     60 {
     61     // We remove all non-ASCII characters, including non-printable ASCII characters.
     62     //
     63     // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character.
     64     // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the
     65     // adverse effect that we remove any legitimate zeros from a string.
     66     //
     67     // For instance: new String("http://localhost:8000") => new String("http://localhost:8").
     68     return (c == '\\' || c == '0' || c == '\0' || c >= 127);
     69 }
     70 
     71 static String canonicalize(const String& string)
     72 {
     73     return string.removeCharacters(&isNonCanonicalCharacter);
     74 }
     75 
     76 static bool isRequiredForInjection(UChar c)
     77 {
     78     return (c == '\'' || c == '"' || c == '<' || c == '>');
     79 }
     80 
     81 static bool isTerminatingCharacter(UChar c)
     82 {
     83     return (c == '&' || c == '/' || c == '"' || c == '\'' || c == '<' || c == '>' || c == ',');
     84 }
     85 
     86 static bool isHTMLQuote(UChar c)
     87 {
     88     return (c == '"' || c == '\'');
     89 }
     90 
     91 static bool isJSNewline(UChar c)
     92 {
     93     // Per ecma-262 section 7.3 Line Terminators.
     94     return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029);
     95 }
     96 
     97 static bool startsHTMLCommentAt(const String& string, size_t start)
     98 {
     99     return (start + 3 < string.length() && string[start] == '<' && string[start+1] == '!' && string[start+2] == '-' && string[start+3] == '-');
    100 }
    101 
    102 static bool startsSingleLineCommentAt(const String& string, size_t start)
    103 {
    104     return (start + 1 < string.length() && string[start] == '/' && string[start+1] == '/');
    105 }
    106 
    107 static bool startsMultiLineCommentAt(const String& string, size_t start)
    108 {
    109     return (start + 1 < string.length() && string[start] == '/' && string[start+1] == '*');
    110 }
    111 
    112 // If other files need this, we should move this to core/html/parser/HTMLParserIdioms.h
    113 template<size_t inlineCapacity>
    114 bool threadSafeMatch(const Vector<UChar, inlineCapacity>& vector, const QualifiedName& qname)
    115 {
    116     return equalIgnoringNullity(vector, qname.localName().impl());
    117 }
    118 
    119 static bool hasName(const HTMLToken& token, const QualifiedName& name)
    120 {
    121     return threadSafeMatch(token.name(), name);
    122 }
    123 
    124 static bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute)
    125 {
    126     // Notice that we're careful not to ref the StringImpl here because we might be on a background thread.
    127     const String& attrName = name.namespaceURI() == XLinkNames::xlinkNamespaceURI ? "xlink:" + name.localName().string() : name.localName().string();
    128 
    129     for (size_t i = 0; i < token.attributes().size(); ++i) {
    130         if (equalIgnoringNullity(token.attributes().at(i).name, attrName)) {
    131             indexOfMatchingAttribute = i;
    132             return true;
    133         }
    134     }
    135     return false;
    136 }
    137 
    138 static bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name)
    139 {
    140     const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut.
    141     if (name.size() < lengthOfShortestInlineEventHandlerName)
    142         return false;
    143     return name[0] == 'o' && name[1] == 'n';
    144 }
    145 
    146 static bool isDangerousHTTPEquiv(const String& value)
    147 {
    148     String equiv = value.stripWhiteSpace();
    149     return equalIgnoringCase(equiv, "refresh") || equalIgnoringCase(equiv, "set-cookie");
    150 }
    151 
    152 static inline String decode16BitUnicodeEscapeSequences(const String& string)
    153 {
    154     // Note, the encoding is ignored since each %u-escape sequence represents a UTF-16 code unit.
    155     return decodeEscapeSequences<Unicode16BitEscapeSequence>(string, UTF8Encoding());
    156 }
    157 
    158 static inline String decodeStandardURLEscapeSequences(const String& string, const WTF::TextEncoding& encoding)
    159 {
    160     // We use decodeEscapeSequences() instead of decodeURLEscapeSequences() (declared in weborigin/KURL.h) to
    161     // avoid platform-specific URL decoding differences (e.g. KURLGoogle).
    162     return decodeEscapeSequences<URLEscapeSequence>(string, encoding);
    163 }
    164 
    165 static String fullyDecodeString(const String& string, const WTF::TextEncoding& encoding)
    166 {
    167     size_t oldWorkingStringLength;
    168     String workingString = string;
    169     do {
    170         oldWorkingStringLength = workingString.length();
    171         workingString = decode16BitUnicodeEscapeSequences(decodeStandardURLEscapeSequences(workingString, encoding));
    172     } while (workingString.length() < oldWorkingStringLength);
    173     workingString.replace('+', ' ');
    174     workingString = canonicalize(workingString);
    175     return workingString;
    176 }
    177 
    178 static ReflectedXSSDisposition combineXSSProtectionHeaderAndCSP(ReflectedXSSDisposition xssProtection, ReflectedXSSDisposition reflectedXSS)
    179 {
    180     ReflectedXSSDisposition result = std::max(xssProtection, reflectedXSS);
    181 
    182     if (result == ReflectedXSSInvalid || result == FilterReflectedXSS || result == ReflectedXSSUnset)
    183         return FilterReflectedXSS;
    184 
    185     return result;
    186 }
    187 
    188 static bool isSemicolonSeparatedAttribute(const HTMLToken::Attribute& attribute)
    189 {
    190     return threadSafeMatch(attribute.name, SVGNames::valuesAttr);
    191 }
    192 
    193 static bool semicolonSeparatedValueContainsJavaScriptURL(const String& value)
    194 {
    195     Vector<String> valueList;
    196     value.split(';', valueList);
    197     for (size_t i = 0; i < valueList.size(); ++i) {
    198         if (protocolIsJavaScript(valueList[i]))
    199             return true;
    200     }
    201     return false;
    202 }
    203 
    204 XSSAuditor::XSSAuditor()
    205     : m_isEnabled(false)
    206     , m_xssProtection(FilterReflectedXSS)
    207     , m_didSendValidCSPHeader(false)
    208     , m_didSendValidXSSProtectionHeader(false)
    209     , m_state(Uninitialized)
    210     , m_scriptTagFoundInRequest(false)
    211     , m_scriptTagNestingLevel(0)
    212     , m_encoding(UTF8Encoding())
    213 {
    214     // Although tempting to call init() at this point, the various objects
    215     // we want to reference might not all have been constructed yet.
    216 }
    217 
    218 void XSSAuditor::initForFragment()
    219 {
    220     ASSERT(isMainThread());
    221     ASSERT(m_state == Uninitialized);
    222     m_state = FilteringTokens;
    223     // When parsing a fragment, we don't enable the XSS auditor because it's
    224     // too much overhead.
    225     ASSERT(!m_isEnabled);
    226 }
    227 
    228 void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate)
    229 {
    230     const size_t miniumLengthForSuffixTree = 512; // FIXME: Tune this parameter.
    231     const int suffixTreeDepth = 5;
    232 
    233     ASSERT(isMainThread());
    234     if (m_state != Uninitialized)
    235         return;
    236     m_state = FilteringTokens;
    237 
    238     if (Settings* settings = document->settings())
    239         m_isEnabled = settings->xssAuditorEnabled();
    240 
    241     if (!m_isEnabled)
    242         return;
    243 
    244     m_documentURL = document->url().copy();
    245 
    246     // In theory, the Document could have detached from the Frame after the
    247     // XSSAuditor was constructed.
    248     if (!document->frame()) {
    249         m_isEnabled = false;
    250         return;
    251     }
    252 
    253     if (m_documentURL.isEmpty()) {
    254         // The URL can be empty when opening a new browser window or calling window.open("").
    255         m_isEnabled = false;
    256         return;
    257     }
    258 
    259     if (m_documentURL.protocolIsData()) {
    260         m_isEnabled = false;
    261         return;
    262     }
    263 
    264     if (document->encoding().isValid())
    265         m_encoding = document->encoding();
    266 
    267     m_decodedURL = fullyDecodeString(m_documentURL.string(), m_encoding);
    268     if (m_decodedURL.find(isRequiredForInjection) == kNotFound)
    269         m_decodedURL = String();
    270 
    271     String httpBodyAsString;
    272     if (DocumentLoader* documentLoader = document->frame()->loader().documentLoader()) {
    273         DEFINE_STATIC_LOCAL(const AtomicString, XSSProtectionHeader, ("X-XSS-Protection", AtomicString::ConstructFromLiteral));
    274         const AtomicString& headerValue = documentLoader->response().httpHeaderField(XSSProtectionHeader);
    275         String errorDetails;
    276         unsigned errorPosition = 0;
    277         String reportURL;
    278         KURL xssProtectionReportURL;
    279 
    280         // Process the X-XSS-Protection header, then mix in the CSP header's value.
    281         ReflectedXSSDisposition xssProtectionHeader = parseXSSProtectionHeader(headerValue, errorDetails, errorPosition, reportURL);
    282         m_didSendValidXSSProtectionHeader = xssProtectionHeader != ReflectedXSSUnset && xssProtectionHeader != ReflectedXSSInvalid;
    283         if ((xssProtectionHeader == FilterReflectedXSS || xssProtectionHeader == BlockReflectedXSS) && !reportURL.isEmpty()) {
    284             xssProtectionReportURL = document->completeURL(reportURL);
    285             if (MixedContentChecker::isMixedContent(document->securityOrigin(), xssProtectionReportURL)) {
    286                 errorDetails = "insecure reporting URL for secure page";
    287                 xssProtectionHeader = ReflectedXSSInvalid;
    288                 xssProtectionReportURL = KURL();
    289             }
    290         }
    291         if (xssProtectionHeader == ReflectedXSSInvalid)
    292             document->addConsoleMessage(SecurityMessageSource, ErrorMessageLevel, "Error parsing header X-XSS-Protection: " + headerValue + ": "  + errorDetails + " at character position " + String::format("%u", errorPosition) + ". The default protections will be applied.");
    293 
    294         ReflectedXSSDisposition cspHeader = document->contentSecurityPolicy()->reflectedXSSDisposition();
    295         m_didSendValidCSPHeader = cspHeader != ReflectedXSSUnset && cspHeader != ReflectedXSSInvalid;
    296 
    297         m_xssProtection = combineXSSProtectionHeaderAndCSP(xssProtectionHeader, cspHeader);
    298         // FIXME: Combine the two report URLs in some reasonable way.
    299         if (auditorDelegate)
    300             auditorDelegate->setReportURL(xssProtectionReportURL.copy());
    301         FormData* httpBody = documentLoader->request().httpBody();
    302         if (httpBody && !httpBody->isEmpty()) {
    303             httpBodyAsString = httpBody->flattenToString();
    304             if (!httpBodyAsString.isEmpty()) {
    305                 m_decodedHTTPBody = fullyDecodeString(httpBodyAsString, m_encoding);
    306                 if (m_decodedHTTPBody.find(isRequiredForInjection) == kNotFound)
    307                     m_decodedHTTPBody = String();
    308                 if (m_decodedHTTPBody.length() >= miniumLengthForSuffixTree)
    309                     m_decodedHTTPBodySuffixTree = adoptPtr(new SuffixTree<ASCIICodebook>(m_decodedHTTPBody, suffixTreeDepth));
    310             }
    311         }
    312     }
    313 
    314     if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty()) {
    315         m_isEnabled = false;
    316         return;
    317     }
    318 }
    319 
    320 PassOwnPtr<XSSInfo> XSSAuditor::filterToken(const FilterTokenRequest& request)
    321 {
    322     ASSERT(m_state != Uninitialized);
    323     if (!m_isEnabled || m_xssProtection == AllowReflectedXSS)
    324         return nullptr;
    325 
    326     bool didBlockScript = false;
    327     if (request.token.type() == HTMLToken::StartTag)
    328         didBlockScript = filterStartToken(request);
    329     else if (m_scriptTagNestingLevel) {
    330         if (request.token.type() == HTMLToken::Character)
    331             didBlockScript = filterCharacterToken(request);
    332         else if (request.token.type() == HTMLToken::EndTag)
    333             filterEndToken(request);
    334     }
    335 
    336     if (didBlockScript) {
    337         bool didBlockEntirePage = (m_xssProtection == BlockReflectedXSS);
    338         OwnPtr<XSSInfo> xssInfo = XSSInfo::create(m_documentURL, didBlockEntirePage, m_didSendValidXSSProtectionHeader, m_didSendValidCSPHeader);
    339         return xssInfo.release();
    340     }
    341     return nullptr;
    342 }
    343 
    344 bool XSSAuditor::filterStartToken(const FilterTokenRequest& request)
    345 {
    346     m_state = FilteringTokens;
    347     bool didBlockScript = eraseDangerousAttributesIfInjected(request);
    348 
    349     if (hasName(request.token, scriptTag)) {
    350         didBlockScript |= filterScriptToken(request);
    351         ASSERT(request.shouldAllowCDATA || !m_scriptTagNestingLevel);
    352         m_scriptTagNestingLevel++;
    353     } else if (hasName(request.token, objectTag))
    354         didBlockScript |= filterObjectToken(request);
    355     else if (hasName(request.token, paramTag))
    356         didBlockScript |= filterParamToken(request);
    357     else if (hasName(request.token, embedTag))
    358         didBlockScript |= filterEmbedToken(request);
    359     else if (hasName(request.token, appletTag))
    360         didBlockScript |= filterAppletToken(request);
    361     else if (hasName(request.token, iframeTag) || hasName(request.token, frameTag))
    362         didBlockScript |= filterFrameToken(request);
    363     else if (hasName(request.token, metaTag))
    364         didBlockScript |= filterMetaToken(request);
    365     else if (hasName(request.token, baseTag))
    366         didBlockScript |= filterBaseToken(request);
    367     else if (hasName(request.token, formTag))
    368         didBlockScript |= filterFormToken(request);
    369     else if (hasName(request.token, inputTag))
    370         didBlockScript |= filterInputToken(request);
    371     else if (hasName(request.token, buttonTag))
    372         didBlockScript |= filterButtonToken(request);
    373 
    374     return didBlockScript;
    375 }
    376 
    377 void XSSAuditor::filterEndToken(const FilterTokenRequest& request)
    378 {
    379     ASSERT(m_scriptTagNestingLevel);
    380     m_state = FilteringTokens;
    381     if (hasName(request.token, scriptTag)) {
    382         m_scriptTagNestingLevel--;
    383         ASSERT(request.shouldAllowCDATA || !m_scriptTagNestingLevel);
    384     }
    385 }
    386 
    387 bool XSSAuditor::filterCharacterToken(const FilterTokenRequest& request)
    388 {
    389     ASSERT(m_scriptTagNestingLevel);
    390     ASSERT(m_state != Uninitialized);
    391     if (m_state == PermittingAdjacentCharacterTokens)
    392         return false;
    393 
    394     if ((m_state == SuppressingAdjacentCharacterTokens)
    395         || (m_scriptTagFoundInRequest && isContainedInRequest(decodedSnippetForJavaScript(request)))) {
    396         request.token.eraseCharacters();
    397         request.token.appendToCharacter(' '); // Technically, character tokens can't be empty.
    398         m_state = SuppressingAdjacentCharacterTokens;
    399         return true;
    400     }
    401 
    402     m_state = PermittingAdjacentCharacterTokens;
    403     return false;
    404 }
    405 
    406 bool XSSAuditor::filterScriptToken(const FilterTokenRequest& request)
    407 {
    408     ASSERT(request.token.type() == HTMLToken::StartTag);
    409     ASSERT(hasName(request.token, scriptTag));
    410 
    411     bool didBlockScript = false;
    412     m_scriptTagFoundInRequest = isContainedInRequest(decodedSnippetForName(request));
    413     if (m_scriptTagFoundInRequest) {
    414         didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttribute);
    415         didBlockScript |= eraseAttributeIfInjected(request, XLinkNames::hrefAttr, blankURL().string(), SrcLikeAttribute);
    416     }
    417     return didBlockScript;
    418 }
    419 
    420 bool XSSAuditor::filterObjectToken(const FilterTokenRequest& request)
    421 {
    422     ASSERT(request.token.type() == HTMLToken::StartTag);
    423     ASSERT(hasName(request.token, objectTag));
    424 
    425     bool didBlockScript = false;
    426     if (isContainedInRequest(decodedSnippetForName(request))) {
    427         didBlockScript |= eraseAttributeIfInjected(request, dataAttr, blankURL().string(), SrcLikeAttribute);
    428         didBlockScript |= eraseAttributeIfInjected(request, typeAttr);
    429         didBlockScript |= eraseAttributeIfInjected(request, classidAttr);
    430     }
    431     return didBlockScript;
    432 }
    433 
    434 bool XSSAuditor::filterParamToken(const FilterTokenRequest& request)
    435 {
    436     ASSERT(request.token.type() == HTMLToken::StartTag);
    437     ASSERT(hasName(request.token, paramTag));
    438 
    439     size_t indexOfNameAttribute;
    440     if (!findAttributeWithName(request.token, nameAttr, indexOfNameAttribute))
    441         return false;
    442 
    443     const HTMLToken::Attribute& nameAttribute = request.token.attributes().at(indexOfNameAttribute);
    444     if (!HTMLParamElement::isURLParameter(String(nameAttribute.value)))
    445         return false;
    446 
    447     return eraseAttributeIfInjected(request, valueAttr, blankURL().string(), SrcLikeAttribute);
    448 }
    449 
    450 bool XSSAuditor::filterEmbedToken(const FilterTokenRequest& request)
    451 {
    452     ASSERT(request.token.type() == HTMLToken::StartTag);
    453     ASSERT(hasName(request.token, embedTag));
    454 
    455     bool didBlockScript = false;
    456     if (isContainedInRequest(decodedSnippetForName(request))) {
    457         didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttribute);
    458         didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttribute);
    459         didBlockScript |= eraseAttributeIfInjected(request, typeAttr);
    460     }
    461     return didBlockScript;
    462 }
    463 
    464 bool XSSAuditor::filterAppletToken(const FilterTokenRequest& request)
    465 {
    466     ASSERT(request.token.type() == HTMLToken::StartTag);
    467     ASSERT(hasName(request.token, appletTag));
    468 
    469     bool didBlockScript = false;
    470     if (isContainedInRequest(decodedSnippetForName(request))) {
    471         didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttribute);
    472         didBlockScript |= eraseAttributeIfInjected(request, objectAttr);
    473     }
    474     return didBlockScript;
    475 }
    476 
    477 bool XSSAuditor::filterFrameToken(const FilterTokenRequest& request)
    478 {
    479     ASSERT(request.token.type() == HTMLToken::StartTag);
    480     ASSERT(hasName(request.token, iframeTag) || hasName(request.token, frameTag));
    481 
    482     bool didBlockScript = eraseAttributeIfInjected(request, srcdocAttr, String(), ScriptLikeAttribute);
    483     if (isContainedInRequest(decodedSnippetForName(request)))
    484         didBlockScript |= eraseAttributeIfInjected(request, srcAttr, String(), SrcLikeAttribute);
    485 
    486     return didBlockScript;
    487 }
    488 
    489 bool XSSAuditor::filterMetaToken(const FilterTokenRequest& request)
    490 {
    491     ASSERT(request.token.type() == HTMLToken::StartTag);
    492     ASSERT(hasName(request.token, metaTag));
    493 
    494     return eraseAttributeIfInjected(request, http_equivAttr);
    495 }
    496 
    497 bool XSSAuditor::filterBaseToken(const FilterTokenRequest& request)
    498 {
    499     ASSERT(request.token.type() == HTMLToken::StartTag);
    500     ASSERT(hasName(request.token, baseTag));
    501 
    502     return eraseAttributeIfInjected(request, hrefAttr);
    503 }
    504 
    505 bool XSSAuditor::filterFormToken(const FilterTokenRequest& request)
    506 {
    507     ASSERT(request.token.type() == HTMLToken::StartTag);
    508     ASSERT(hasName(request.token, formTag));
    509 
    510     return eraseAttributeIfInjected(request, actionAttr, kURLWithUniqueOrigin);
    511 }
    512 
    513 bool XSSAuditor::filterInputToken(const FilterTokenRequest& request)
    514 {
    515     ASSERT(request.token.type() == HTMLToken::StartTag);
    516     ASSERT(hasName(request.token, inputTag));
    517 
    518     return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttribute);
    519 }
    520 
    521 bool XSSAuditor::filterButtonToken(const FilterTokenRequest& request)
    522 {
    523     ASSERT(request.token.type() == HTMLToken::StartTag);
    524     ASSERT(hasName(request.token, buttonTag));
    525 
    526     return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttribute);
    527 }
    528 
    529 bool XSSAuditor::eraseDangerousAttributesIfInjected(const FilterTokenRequest& request)
    530 {
    531     DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, ("javascript:void(0)"));
    532 
    533     bool didBlockScript = false;
    534     for (size_t i = 0; i < request.token.attributes().size(); ++i) {
    535         const HTMLToken::Attribute& attribute = request.token.attributes().at(i);
    536         bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.name);
    537         // FIXME: It would be better if we didn't create a new String for every attribute in the document.
    538         String strippedValue = stripLeadingAndTrailingHTMLSpaces(String(attribute.value));
    539         bool valueContainsJavaScriptURL = (!isInlineEventHandler && protocolIsJavaScript(strippedValue)) || (isSemicolonSeparatedAttribute(attribute) && semicolonSeparatedValueContainsJavaScriptURL(strippedValue));
    540         if (!isInlineEventHandler && !valueContainsJavaScriptURL)
    541             continue;
    542         if (!isContainedInRequest(decodedSnippetForAttribute(request, attribute, ScriptLikeAttribute)))
    543             continue;
    544         request.token.eraseValueOfAttribute(i);
    545         if (valueContainsJavaScriptURL)
    546             request.token.appendToAttributeValue(i, safeJavaScriptURL);
    547         didBlockScript = true;
    548     }
    549     return didBlockScript;
    550 }
    551 
    552 bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request, const QualifiedName& attributeName, const String& replacementValue, AttributeKind treatment)
    553 {
    554     size_t indexOfAttribute = 0;
    555     if (findAttributeWithName(request.token, attributeName, indexOfAttribute)) {
    556         const HTMLToken::Attribute& attribute = request.token.attributes().at(indexOfAttribute);
    557         if (isContainedInRequest(decodedSnippetForAttribute(request, attribute, treatment))) {
    558             if (threadSafeMatch(attributeName, srcAttr) && isLikelySafeResource(String(attribute.value)))
    559                 return false;
    560             if (threadSafeMatch(attributeName, http_equivAttr) && !isDangerousHTTPEquiv(String(attribute.value)))
    561                 return false;
    562             request.token.eraseValueOfAttribute(indexOfAttribute);
    563             if (!replacementValue.isEmpty())
    564                 request.token.appendToAttributeValue(indexOfAttribute, replacementValue);
    565             return true;
    566         }
    567     }
    568     return false;
    569 }
    570 
    571 String XSSAuditor::decodedSnippetForName(const FilterTokenRequest& request)
    572 {
    573     // Grab a fixed number of characters equal to the length of the token's name plus one (to account for the "<").
    574     return fullyDecodeString(request.sourceTracker.sourceForToken(request.token), m_encoding).substring(0, request.token.name().size() + 1);
    575 }
    576 
    577 String XSSAuditor::decodedSnippetForAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute, AttributeKind treatment)
    578 {
    579     // The range doesn't inlcude the character which terminates the value. So,
    580     // for an input of |name="value"|, the snippet is |name="value|. For an
    581     // unquoted input of |name=value |, the snippet is |name=value|.
    582     // FIXME: We should grab one character before the name also.
    583     int start = attribute.nameRange.start - request.token.startIndex();
    584     int end = attribute.valueRange.end - request.token.startIndex();
    585     String decodedSnippet = fullyDecodeString(request.sourceTracker.sourceForToken(request.token).substring(start, end - start), m_encoding);
    586     decodedSnippet.truncate(kMaximumFragmentLengthTarget);
    587     if (treatment == SrcLikeAttribute) {
    588         int slashCount = 0;
    589         bool commaSeen = false;
    590         // In HTTP URLs, characters following the first ?, #, or third slash may come from
    591         // the page itself and can be merely ignored by an attacker's server when a remote
    592         // script or script-like resource is requested. In DATA URLS, the payload starts at
    593         // the first comma, and the the first /*, //, or <!-- may introduce a comment. Characters
    594         // following this may come from the page itself and may be ignored when the script is
    595         // executed. For simplicity, we don't differentiate based on URL scheme, and stop at
    596         // the first # or ?, the third slash, or the first slash or < once a comma is seen.
    597         for (size_t currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) {
    598             UChar currentChar = decodedSnippet[currentLength];
    599             if (currentChar == '?'
    600                 || currentChar == '#'
    601                 || ((currentChar == '/' || currentChar == '\\') && (commaSeen || ++slashCount > 2))
    602                 || (currentChar == '<' && commaSeen)) {
    603                 decodedSnippet.truncate(currentLength);
    604                 break;
    605             }
    606             if (currentChar == ',')
    607                 commaSeen = true;
    608         }
    609     } else if (treatment == ScriptLikeAttribute) {
    610         // Beware of trailing characters which came from the page itself, not the
    611         // injected vector. Excluding the terminating character covers common cases
    612         // where the page immediately ends the attribute, but doesn't cover more
    613         // complex cases where there is other page data following the injection.
    614         // Generally, these won't parse as javascript, so the injected vector
    615         // typically excludes them from consideration via a single-line comment or
    616         // by enclosing them in a string literal terminated later by the page's own
    617         // closing punctuation. Since the snippet has not been parsed, the vector
    618         // may also try to introduce these via entities. As a result, we'd like to
    619         // stop before the first "//", the first <!--, the first entity, or the first
    620         // quote not immediately following the first equals sign (taking whitespace
    621         // into consideration). To keep things simpler, we don't try to distinguish
    622         // between entity-introducing amperands vs. other uses, nor do we bother to
    623         // check for a second slash for a comment, nor do we bother to check for
    624         // !-- following a less-than sign. We stop instead on any ampersand
    625         // slash, or less-than sign.
    626         size_t position = 0;
    627         if ((position = decodedSnippet.find("=")) != kNotFound
    628             && (position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) != kNotFound
    629             && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != kNotFound) {
    630             decodedSnippet.truncate(position);
    631         }
    632     }
    633     return decodedSnippet;
    634 }
    635 
    636 String XSSAuditor::decodedSnippetForJavaScript(const FilterTokenRequest& request)
    637 {
    638     String string = request.sourceTracker.sourceForToken(request.token);
    639     size_t startPosition = 0;
    640     size_t endPosition = string.length();
    641     size_t foundPosition = kNotFound;
    642 
    643     // Skip over initial comments to find start of code.
    644     while (startPosition < endPosition) {
    645         while (startPosition < endPosition && isHTMLSpace<UChar>(string[startPosition]))
    646             startPosition++;
    647 
    648         // Under SVG/XML rules, only HTML comment syntax matters and the parser returns
    649         // these as a separate comment tokens. Having consumed whitespace, we need not look
    650         // further for these.
    651         if (request.shouldAllowCDATA)
    652             break;
    653 
    654         // Under HTML rules, both the HTML and JS comment synatx matters, and the HTML
    655         // comment ends at the end of the line, not with -->.
    656         if (startsHTMLCommentAt(string, startPosition) || startsSingleLineCommentAt(string, startPosition)) {
    657             while (startPosition < endPosition && !isJSNewline(string[startPosition]))
    658                 startPosition++;
    659         } else if (startsMultiLineCommentAt(string, startPosition)) {
    660             if (startPosition + 2 < endPosition && (foundPosition = string.find("*/", startPosition + 2)) != kNotFound)
    661                 startPosition = foundPosition + 2;
    662             else
    663                 startPosition = endPosition;
    664         } else
    665             break;
    666     }
    667 
    668     String result;
    669     while (startPosition < endPosition && !result.length()) {
    670         // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we
    671         // encounter a comma, or when we  exceed the maximum length target. The comma rule
    672         // covers a common parameter concatenation case performed by some webservers.
    673         // After hitting the length target, we can only stop at a point where we know we are
    674         // not in the middle of a %-escape sequence. For the sake of simplicity, approximate
    675         // not stopping inside a (possibly multiply encoded) %-esacpe sequence by breaking on
    676         // whitespace only. We should have enough text in these cases to avoid false positives.
    677         for (foundPosition = startPosition; foundPosition < endPosition; foundPosition++) {
    678             if (!request.shouldAllowCDATA) {
    679                 if (startsSingleLineCommentAt(string, foundPosition) || startsMultiLineCommentAt(string, foundPosition)) {
    680                     foundPosition += 2;
    681                     break;
    682                 }
    683                 if (startsHTMLCommentAt(string, foundPosition)) {
    684                     foundPosition += 4;
    685                     break;
    686                 }
    687             }
    688             if (string[foundPosition] == ',' || (foundPosition > startPosition + kMaximumFragmentLengthTarget && isHTMLSpace<UChar>(string[foundPosition]))) {
    689                 break;
    690             }
    691         }
    692 
    693         result = fullyDecodeString(string.substring(startPosition, foundPosition - startPosition), m_encoding);
    694         startPosition = foundPosition + 1;
    695     }
    696     return result;
    697 }
    698 
    699 bool XSSAuditor::isContainedInRequest(const String& decodedSnippet)
    700 {
    701     if (decodedSnippet.isEmpty())
    702         return false;
    703     if (m_decodedURL.find(decodedSnippet, 0, false) != kNotFound)
    704         return true;
    705     if (m_decodedHTTPBodySuffixTree && !m_decodedHTTPBodySuffixTree->mightContain(decodedSnippet))
    706         return false;
    707     return m_decodedHTTPBody.find(decodedSnippet, 0, false) != kNotFound;
    708 }
    709 
    710 bool XSSAuditor::isLikelySafeResource(const String& url)
    711 {
    712     // Give empty URLs and about:blank a pass. Making a resourceURL from an
    713     // empty string below will likely later fail the "no query args test" as
    714     // it inherits the document's query args.
    715     if (url.isEmpty() || url == blankURL().string())
    716         return true;
    717 
    718     // If the resource is loaded from the same host as the enclosing page, it's
    719     // probably not an XSS attack, so we reduce false positives by allowing the
    720     // request, ignoring scheme and port considerations. If the resource has a
    721     // query string, we're more suspicious, however, because that's pretty rare
    722     // and the attacker might be able to trick a server-side script into doing
    723     // something dangerous with the query string.
    724     if (m_documentURL.host().isEmpty())
    725         return false;
    726 
    727     KURL resourceURL(m_documentURL, url);
    728     return (m_documentURL.host() == resourceURL.host() && resourceURL.query().isEmpty());
    729 }
    730 
    731 bool XSSAuditor::isSafeToSendToAnotherThread() const
    732 {
    733     return m_documentURL.isSafeToSendToAnotherThread()
    734         && m_decodedURL.isSafeToSendToAnotherThread()
    735         && m_decodedHTTPBody.isSafeToSendToAnotherThread();
    736 }
    737 
    738 } // namespace WebCore
    739