Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2011 Adam Barth. All Rights Reserved.
      3  * Copyright (C) 2011 Daniel Bates (dbates (at) intudata.com).
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "core/html/parser/XSSAuditor.h"
     29 
     30 #include "core/HTMLNames.h"
     31 #include "core/SVGNames.h"
     32 #include "core/XLinkNames.h"
     33 #include "core/dom/Document.h"
     34 #include "core/frame/LocalFrame.h"
     35 #include "core/frame/Settings.h"
     36 #include "core/frame/csp/ContentSecurityPolicy.h"
     37 #include "core/html/HTMLParamElement.h"
     38 #include "core/html/parser/HTMLDocumentParser.h"
     39 #include "core/html/parser/HTMLParserIdioms.h"
     40 #include "core/html/parser/TextResourceDecoder.h"
     41 #include "core/html/parser/XSSAuditorDelegate.h"
     42 #include "core/inspector/ConsoleMessage.h"
     43 #include "core/loader/DocumentLoader.h"
     44 #include "platform/JSONValues.h"
     45 #include "platform/network/FormData.h"
     46 #include "platform/text/DecodeEscapeSequences.h"
     47 #include "wtf/ASCIICType.h"
     48 #include "wtf/MainThread.h"
     49 
     50 namespace {
     51 
     52 // SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or we'd use it instead.
     53 const char kURLWithUniqueOrigin[] = "data:,";
     54 
     55 } // namespace
     56 
     57 namespace blink {
     58 
     59 using namespace HTMLNames;
     60 
     61 static bool isNonCanonicalCharacter(UChar c)
     62 {
     63     // We remove all non-ASCII characters, including non-printable ASCII characters.
     64     //
     65     // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character.
     66     // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the
     67     // adverse effect that we remove any legitimate zeros from a string.
     68     //
     69     // We also remove forward-slash, because it is common for some servers to collapse successive path components, eg,
     70     // a//b becomes a/b.
     71     //
     72     // We also remove the questionmark character, since some severs replace invalid high-bytes with a questionmark. We
     73     // are already stripping the high-bytes so we also strip the questionmark to match.
     74     //
     75     // For instance: new String("http://localhost:8000?x") => new String("http:localhost:8x").
     76     return (c == '\\' || c == '0' || c == '\0' || c == '/' || c == '?' || c >= 127);
     77 }
     78 
     79 static bool isRequiredForInjection(UChar c)
     80 {
     81     return (c == '\'' || c == '"' || c == '<' || c == '>');
     82 }
     83 
     84 static bool isTerminatingCharacter(UChar c)
     85 {
     86     return (c == '&' || c == '/' || c == '"' || c == '\'' || c == '<' || c == '>' || c == ',');
     87 }
     88 
     89 static bool isHTMLQuote(UChar c)
     90 {
     91     return (c == '"' || c == '\'');
     92 }
     93 
     94 static bool isJSNewline(UChar c)
     95 {
     96     // Per ecma-262 section 7.3 Line Terminators.
     97     return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029);
     98 }
     99 
    100 static bool startsHTMLCommentAt(const String& string, size_t start)
    101 {
    102     return (start + 3 < string.length() && string[start] == '<' && string[start + 1] == '!' && string[start + 2] == '-' && string[start + 3] == '-');
    103 }
    104 
    105 static bool startsSingleLineCommentAt(const String& string, size_t start)
    106 {
    107     return (start + 1 < string.length() && string[start] == '/' && string[start + 1] == '/');
    108 }
    109 
    110 static bool startsMultiLineCommentAt(const String& string, size_t start)
    111 {
    112     return (start + 1 < string.length() && string[start] == '/' && string[start + 1] == '*');
    113 }
    114 
    115 static bool startsOpeningScriptTagAt(const String& string, size_t start)
    116 {
    117     return start + 6 < string.length() && string[start] == '<'
    118         && WTF::toASCIILowerUnchecked(string[start + 1]) == 's'
    119         && WTF::toASCIILowerUnchecked(string[start + 2]) == 'c'
    120         && WTF::toASCIILowerUnchecked(string[start + 3]) == 'r'
    121         && WTF::toASCIILowerUnchecked(string[start + 4]) == 'i'
    122         && WTF::toASCIILowerUnchecked(string[start + 5]) == 'p'
    123         && WTF::toASCIILowerUnchecked(string[start + 6]) == 't';
    124 }
    125 
    126 // If other files need this, we should move this to core/html/parser/HTMLParserIdioms.h
    127 template<size_t inlineCapacity>
    128 bool threadSafeMatch(const Vector<UChar, inlineCapacity>& vector, const QualifiedName& qname)
    129 {
    130     return equalIgnoringNullity(vector, qname.localName().impl());
    131 }
    132 
    133 static bool hasName(const HTMLToken& token, const QualifiedName& name)
    134 {
    135     return threadSafeMatch(token.name(), name);
    136 }
    137 
    138 static bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute)
    139 {
    140     // Notice that we're careful not to ref the StringImpl here because we might be on a background thread.
    141     const String& attrName = name.namespaceURI() == XLinkNames::xlinkNamespaceURI ? "xlink:" + name.localName().string() : name.localName().string();
    142 
    143     for (size_t i = 0; i < token.attributes().size(); ++i) {
    144         if (equalIgnoringNullity(token.attributes().at(i).name, attrName)) {
    145             indexOfMatchingAttribute = i;
    146             return true;
    147         }
    148     }
    149     return false;
    150 }
    151 
    152 static bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name)
    153 {
    154     const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut.
    155     if (name.size() < lengthOfShortestInlineEventHandlerName)
    156         return false;
    157     return name[0] == 'o' && name[1] == 'n';
    158 }
    159 
    160 static bool isDangerousHTTPEquiv(const String& value)
    161 {
    162     String equiv = value.stripWhiteSpace();
    163     return equalIgnoringCase(equiv, "refresh") || equalIgnoringCase(equiv, "set-cookie");
    164 }
    165 
    166 static inline String decode16BitUnicodeEscapeSequences(const String& string)
    167 {
    168     // Note, the encoding is ignored since each %u-escape sequence represents a UTF-16 code unit.
    169     return decodeEscapeSequences<Unicode16BitEscapeSequence>(string, UTF8Encoding());
    170 }
    171 
    172 static inline String decodeStandardURLEscapeSequences(const String& string, const WTF::TextEncoding& encoding)
    173 {
    174     // We use decodeEscapeSequences() instead of decodeURLEscapeSequences() (declared in weborigin/KURL.h) to
    175     // avoid platform-specific URL decoding differences (e.g. KURLGoogle).
    176     return decodeEscapeSequences<URLEscapeSequence>(string, encoding);
    177 }
    178 
    179 static String fullyDecodeString(const String& string, const WTF::TextEncoding& encoding)
    180 {
    181     size_t oldWorkingStringLength;
    182     String workingString = string;
    183     do {
    184         oldWorkingStringLength = workingString.length();
    185         workingString = decode16BitUnicodeEscapeSequences(decodeStandardURLEscapeSequences(workingString, encoding));
    186     } while (workingString.length() < oldWorkingStringLength);
    187     workingString.replace('+', ' ');
    188     return workingString;
    189 }
    190 
    191 static void truncateForSrcLikeAttribute(String& decodedSnippet)
    192 {
    193     // In HTTP URLs, characters following the first ?, #, or third slash may come from
    194     // the page itself and can be merely ignored by an attacker's server when a remote
    195     // script or script-like resource is requested. In DATA URLS, the payload starts at
    196     // the first comma, and the the first /*, //, or <!-- may introduce a comment. Characters
    197     // following this may come from the page itself and may be ignored when the script is
    198     // executed. For simplicity, we don't differentiate based on URL scheme, and stop at
    199     // the first # or ?, the third slash, or the first slash or < once a comma is seen.
    200     int slashCount = 0;
    201     bool commaSeen = false;
    202     for (size_t currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) {
    203         UChar currentChar = decodedSnippet[currentLength];
    204         if (currentChar == '?'
    205             || currentChar == '#'
    206             || ((currentChar == '/' || currentChar == '\\') && (commaSeen || ++slashCount > 2))
    207             || (currentChar == '<' && commaSeen)) {
    208             decodedSnippet.truncate(currentLength);
    209             return;
    210         }
    211         if (currentChar == ',')
    212             commaSeen = true;
    213     }
    214 }
    215 
    216 static void truncateForScriptLikeAttribute(String& decodedSnippet)
    217 {
    218     // Beware of trailing characters which came from the page itself, not the
    219     // injected vector. Excluding the terminating character covers common cases
    220     // where the page immediately ends the attribute, but doesn't cover more
    221     // complex cases where there is other page data following the injection.
    222     // Generally, these won't parse as javascript, so the injected vector
    223     // typically excludes them from consideration via a single-line comment or
    224     // by enclosing them in a string literal terminated later by the page's own
    225     // closing punctuation. Since the snippet has not been parsed, the vector
    226     // may also try to introduce these via entities. As a result, we'd like to
    227     // stop before the first "//", the first <!--, the first entity, or the first
    228     // quote not immediately following the first equals sign (taking whitespace
    229     // into consideration). To keep things simpler, we don't try to distinguish
    230     // between entity-introducing amperands vs. other uses, nor do we bother to
    231     // check for a second slash for a comment, nor do we bother to check for
    232     // !-- following a less-than sign. We stop instead on any ampersand
    233     // slash, or less-than sign.
    234     size_t position = 0;
    235     if ((position = decodedSnippet.find("=")) != kNotFound
    236         && (position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) != kNotFound
    237         && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != kNotFound) {
    238         decodedSnippet.truncate(position);
    239     }
    240 }
    241 
    242 static ReflectedXSSDisposition combineXSSProtectionHeaderAndCSP(ReflectedXSSDisposition xssProtection, ReflectedXSSDisposition reflectedXSS)
    243 {
    244     ReflectedXSSDisposition result = std::max(xssProtection, reflectedXSS);
    245 
    246     if (result == ReflectedXSSInvalid || result == FilterReflectedXSS || result == ReflectedXSSUnset)
    247         return FilterReflectedXSS;
    248 
    249     return result;
    250 }
    251 
    252 static bool isSemicolonSeparatedAttribute(const HTMLToken::Attribute& attribute)
    253 {
    254     return threadSafeMatch(attribute.name, SVGNames::valuesAttr);
    255 }
    256 
    257 static String semicolonSeparatedValueContainingJavaScriptURL(const String& value)
    258 {
    259     Vector<String> valueList;
    260     value.split(';', valueList);
    261     for (size_t i = 0; i < valueList.size(); ++i) {
    262         String stripped = stripLeadingAndTrailingHTMLSpaces(valueList[i]);
    263         if (protocolIsJavaScript(stripped))
    264             return stripped;
    265     }
    266     return emptyString();
    267 }
    268 
    269 XSSAuditor::XSSAuditor()
    270     : m_isEnabled(false)
    271     , m_xssProtection(FilterReflectedXSS)
    272     , m_didSendValidCSPHeader(false)
    273     , m_didSendValidXSSProtectionHeader(false)
    274     , m_state(Uninitialized)
    275     , m_scriptTagFoundInRequest(false)
    276     , m_scriptTagNestingLevel(0)
    277     , m_encoding(UTF8Encoding())
    278 {
    279     // Although tempting to call init() at this point, the various objects
    280     // we want to reference might not all have been constructed yet.
    281 }
    282 
    283 void XSSAuditor::initForFragment()
    284 {
    285     ASSERT(isMainThread());
    286     ASSERT(m_state == Uninitialized);
    287     m_state = FilteringTokens;
    288     // When parsing a fragment, we don't enable the XSS auditor because it's
    289     // too much overhead.
    290     ASSERT(!m_isEnabled);
    291 }
    292 
    293 void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate)
    294 {
    295     ASSERT(isMainThread());
    296     if (m_state != Uninitialized)
    297         return;
    298     m_state = FilteringTokens;
    299 
    300     if (Settings* settings = document->settings())
    301         m_isEnabled = settings->xssAuditorEnabled();
    302 
    303     if (!m_isEnabled)
    304         return;
    305 
    306     m_documentURL = document->url().copy();
    307 
    308     // In theory, the Document could have detached from the LocalFrame after the
    309     // XSSAuditor was constructed.
    310     if (!document->frame()) {
    311         m_isEnabled = false;
    312         return;
    313     }
    314 
    315     if (m_documentURL.isEmpty()) {
    316         // The URL can be empty when opening a new browser window or calling window.open("").
    317         m_isEnabled = false;
    318         return;
    319     }
    320 
    321     if (m_documentURL.protocolIsData()) {
    322         m_isEnabled = false;
    323         return;
    324     }
    325 
    326     if (document->encoding().isValid())
    327         m_encoding = document->encoding();
    328 
    329     if (DocumentLoader* documentLoader = document->frame()->loader().documentLoader()) {
    330         DEFINE_STATIC_LOCAL(const AtomicString, XSSProtectionHeader, ("X-XSS-Protection", AtomicString::ConstructFromLiteral));
    331         const AtomicString& headerValue = documentLoader->response().httpHeaderField(XSSProtectionHeader);
    332         String errorDetails;
    333         unsigned errorPosition = 0;
    334         String reportURL;
    335         KURL xssProtectionReportURL;
    336 
    337         // Process the X-XSS-Protection header, then mix in the CSP header's value.
    338         ReflectedXSSDisposition xssProtectionHeader = parseXSSProtectionHeader(headerValue, errorDetails, errorPosition, reportURL);
    339         m_didSendValidXSSProtectionHeader = xssProtectionHeader != ReflectedXSSUnset && xssProtectionHeader != ReflectedXSSInvalid;
    340         if ((xssProtectionHeader == FilterReflectedXSS || xssProtectionHeader == BlockReflectedXSS) && !reportURL.isEmpty()) {
    341             xssProtectionReportURL = document->completeURL(reportURL);
    342             if (MixedContentChecker::isMixedContent(document->securityOrigin(), xssProtectionReportURL)) {
    343                 errorDetails = "insecure reporting URL for secure page";
    344                 xssProtectionHeader = ReflectedXSSInvalid;
    345                 xssProtectionReportURL = KURL();
    346             }
    347         }
    348         if (xssProtectionHeader == ReflectedXSSInvalid)
    349             document->addConsoleMessage(ConsoleMessage::create(SecurityMessageSource, ErrorMessageLevel, "Error parsing header X-XSS-Protection: " + headerValue + ": "  + errorDetails + " at character position " + String::format("%u", errorPosition) + ". The default protections will be applied."));
    350 
    351         ReflectedXSSDisposition cspHeader = document->contentSecurityPolicy()->reflectedXSSDisposition();
    352         m_didSendValidCSPHeader = cspHeader != ReflectedXSSUnset && cspHeader != ReflectedXSSInvalid;
    353 
    354         m_xssProtection = combineXSSProtectionHeaderAndCSP(xssProtectionHeader, cspHeader);
    355         // FIXME: Combine the two report URLs in some reasonable way.
    356         if (auditorDelegate)
    357             auditorDelegate->setReportURL(xssProtectionReportURL.copy());
    358 
    359         FormData* httpBody = documentLoader->request().httpBody();
    360         if (httpBody && !httpBody->isEmpty())
    361             m_httpBodyAsString = httpBody->flattenToString();
    362     }
    363 
    364     setEncoding(m_encoding);
    365 }
    366 
    367 void XSSAuditor::setEncoding(const WTF::TextEncoding& encoding)
    368 {
    369     const size_t miniumLengthForSuffixTree = 512; // FIXME: Tune this parameter.
    370     const int suffixTreeDepth = 5;
    371 
    372     if (!encoding.isValid())
    373         return;
    374 
    375     m_encoding = encoding;
    376 
    377     m_decodedURL = canonicalize(m_documentURL.string(), NoTruncation);
    378     if (m_decodedURL.find(isRequiredForInjection) == kNotFound)
    379         m_decodedURL = String();
    380 
    381     if (!m_httpBodyAsString.isEmpty()) {
    382         m_decodedHTTPBody = canonicalize(m_httpBodyAsString, NoTruncation);
    383         m_httpBodyAsString = String();
    384         if (m_decodedHTTPBody.find(isRequiredForInjection) == kNotFound)
    385             m_decodedHTTPBody = String();
    386             if (m_decodedHTTPBody.length() >= miniumLengthForSuffixTree)
    387                 m_decodedHTTPBodySuffixTree = adoptPtr(new SuffixTree<ASCIICodebook>(m_decodedHTTPBody, suffixTreeDepth));
    388     }
    389 
    390     if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty())
    391         m_isEnabled = false;
    392 }
    393 
    394 PassOwnPtr<XSSInfo> XSSAuditor::filterToken(const FilterTokenRequest& request)
    395 {
    396     ASSERT(m_state != Uninitialized);
    397     if (!m_isEnabled || m_xssProtection == AllowReflectedXSS)
    398         return nullptr;
    399 
    400     bool didBlockScript = false;
    401     if (request.token.type() == HTMLToken::StartTag)
    402         didBlockScript = filterStartToken(request);
    403     else if (m_scriptTagNestingLevel) {
    404         if (request.token.type() == HTMLToken::Character)
    405             didBlockScript = filterCharacterToken(request);
    406         else if (request.token.type() == HTMLToken::EndTag)
    407             filterEndToken(request);
    408     }
    409 
    410     if (didBlockScript) {
    411         bool didBlockEntirePage = (m_xssProtection == BlockReflectedXSS);
    412         OwnPtr<XSSInfo> xssInfo = XSSInfo::create(m_documentURL, didBlockEntirePage, m_didSendValidXSSProtectionHeader, m_didSendValidCSPHeader);
    413         return xssInfo.release();
    414     }
    415     return nullptr;
    416 }
    417 
    418 bool XSSAuditor::filterStartToken(const FilterTokenRequest& request)
    419 {
    420     m_state = FilteringTokens;
    421     bool didBlockScript = eraseDangerousAttributesIfInjected(request);
    422 
    423     if (hasName(request.token, scriptTag)) {
    424         didBlockScript |= filterScriptToken(request);
    425         ASSERT(request.shouldAllowCDATA || !m_scriptTagNestingLevel);
    426         m_scriptTagNestingLevel++;
    427     } else if (hasName(request.token, objectTag))
    428         didBlockScript |= filterObjectToken(request);
    429     else if (hasName(request.token, paramTag))
    430         didBlockScript |= filterParamToken(request);
    431     else if (hasName(request.token, embedTag))
    432         didBlockScript |= filterEmbedToken(request);
    433     else if (hasName(request.token, appletTag))
    434         didBlockScript |= filterAppletToken(request);
    435     else if (hasName(request.token, iframeTag) || hasName(request.token, frameTag))
    436         didBlockScript |= filterFrameToken(request);
    437     else if (hasName(request.token, metaTag))
    438         didBlockScript |= filterMetaToken(request);
    439     else if (hasName(request.token, baseTag))
    440         didBlockScript |= filterBaseToken(request);
    441     else if (hasName(request.token, formTag))
    442         didBlockScript |= filterFormToken(request);
    443     else if (hasName(request.token, inputTag))
    444         didBlockScript |= filterInputToken(request);
    445     else if (hasName(request.token, buttonTag))
    446         didBlockScript |= filterButtonToken(request);
    447 
    448     return didBlockScript;
    449 }
    450 
    451 void XSSAuditor::filterEndToken(const FilterTokenRequest& request)
    452 {
    453     ASSERT(m_scriptTagNestingLevel);
    454     m_state = FilteringTokens;
    455     if (hasName(request.token, scriptTag)) {
    456         m_scriptTagNestingLevel--;
    457         ASSERT(request.shouldAllowCDATA || !m_scriptTagNestingLevel);
    458     }
    459 }
    460 
    461 bool XSSAuditor::filterCharacterToken(const FilterTokenRequest& request)
    462 {
    463     ASSERT(m_scriptTagNestingLevel);
    464     ASSERT(m_state != Uninitialized);
    465     if (m_state == PermittingAdjacentCharacterTokens)
    466         return false;
    467 
    468     if ((m_state == SuppressingAdjacentCharacterTokens)
    469         || (m_scriptTagFoundInRequest && isContainedInRequest(canonicalizedSnippetForJavaScript(request)))) {
    470         request.token.eraseCharacters();
    471         request.token.appendToCharacter(' '); // Technically, character tokens can't be empty.
    472         m_state = SuppressingAdjacentCharacterTokens;
    473         return true;
    474     }
    475 
    476     m_state = PermittingAdjacentCharacterTokens;
    477     return false;
    478 }
    479 
    480 bool XSSAuditor::filterScriptToken(const FilterTokenRequest& request)
    481 {
    482     ASSERT(request.token.type() == HTMLToken::StartTag);
    483     ASSERT(hasName(request.token, scriptTag));
    484 
    485     bool didBlockScript = false;
    486     m_scriptTagFoundInRequest = isContainedInRequest(canonicalizedSnippetForTagName(request));
    487     if (m_scriptTagFoundInRequest) {
    488         didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttributeTruncation);
    489         didBlockScript |= eraseAttributeIfInjected(request, XLinkNames::hrefAttr, blankURL().string(), SrcLikeAttributeTruncation);
    490     }
    491     return didBlockScript;
    492 }
    493 
    494 bool XSSAuditor::filterObjectToken(const FilterTokenRequest& request)
    495 {
    496     ASSERT(request.token.type() == HTMLToken::StartTag);
    497     ASSERT(hasName(request.token, objectTag));
    498 
    499     bool didBlockScript = false;
    500     if (isContainedInRequest(canonicalizedSnippetForTagName(request))) {
    501         didBlockScript |= eraseAttributeIfInjected(request, dataAttr, blankURL().string(), SrcLikeAttributeTruncation);
    502         didBlockScript |= eraseAttributeIfInjected(request, typeAttr);
    503         didBlockScript |= eraseAttributeIfInjected(request, classidAttr);
    504     }
    505     return didBlockScript;
    506 }
    507 
    508 bool XSSAuditor::filterParamToken(const FilterTokenRequest& request)
    509 {
    510     ASSERT(request.token.type() == HTMLToken::StartTag);
    511     ASSERT(hasName(request.token, paramTag));
    512 
    513     size_t indexOfNameAttribute;
    514     if (!findAttributeWithName(request.token, nameAttr, indexOfNameAttribute))
    515         return false;
    516 
    517     const HTMLToken::Attribute& nameAttribute = request.token.attributes().at(indexOfNameAttribute);
    518     if (!HTMLParamElement::isURLParameter(String(nameAttribute.value)))
    519         return false;
    520 
    521     return eraseAttributeIfInjected(request, valueAttr, blankURL().string(), SrcLikeAttributeTruncation);
    522 }
    523 
    524 bool XSSAuditor::filterEmbedToken(const FilterTokenRequest& request)
    525 {
    526     ASSERT(request.token.type() == HTMLToken::StartTag);
    527     ASSERT(hasName(request.token, embedTag));
    528 
    529     bool didBlockScript = false;
    530     if (isContainedInRequest(canonicalizedSnippetForTagName(request))) {
    531         didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttributeTruncation);
    532         didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttributeTruncation);
    533         didBlockScript |= eraseAttributeIfInjected(request, typeAttr);
    534     }
    535     return didBlockScript;
    536 }
    537 
    538 bool XSSAuditor::filterAppletToken(const FilterTokenRequest& request)
    539 {
    540     ASSERT(request.token.type() == HTMLToken::StartTag);
    541     ASSERT(hasName(request.token, appletTag));
    542 
    543     bool didBlockScript = false;
    544     if (isContainedInRequest(canonicalizedSnippetForTagName(request))) {
    545         didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttributeTruncation);
    546         didBlockScript |= eraseAttributeIfInjected(request, objectAttr);
    547     }
    548     return didBlockScript;
    549 }
    550 
    551 bool XSSAuditor::filterFrameToken(const FilterTokenRequest& request)
    552 {
    553     ASSERT(request.token.type() == HTMLToken::StartTag);
    554     ASSERT(hasName(request.token, iframeTag) || hasName(request.token, frameTag));
    555 
    556     bool didBlockScript = eraseAttributeIfInjected(request, srcdocAttr, String(), ScriptLikeAttributeTruncation);
    557     if (isContainedInRequest(canonicalizedSnippetForTagName(request)))
    558         didBlockScript |= eraseAttributeIfInjected(request, srcAttr, String(), SrcLikeAttributeTruncation);
    559 
    560     return didBlockScript;
    561 }
    562 
    563 bool XSSAuditor::filterMetaToken(const FilterTokenRequest& request)
    564 {
    565     ASSERT(request.token.type() == HTMLToken::StartTag);
    566     ASSERT(hasName(request.token, metaTag));
    567 
    568     return eraseAttributeIfInjected(request, http_equivAttr);
    569 }
    570 
    571 bool XSSAuditor::filterBaseToken(const FilterTokenRequest& request)
    572 {
    573     ASSERT(request.token.type() == HTMLToken::StartTag);
    574     ASSERT(hasName(request.token, baseTag));
    575 
    576     return eraseAttributeIfInjected(request, hrefAttr);
    577 }
    578 
    579 bool XSSAuditor::filterFormToken(const FilterTokenRequest& request)
    580 {
    581     ASSERT(request.token.type() == HTMLToken::StartTag);
    582     ASSERT(hasName(request.token, formTag));
    583 
    584     return eraseAttributeIfInjected(request, actionAttr, kURLWithUniqueOrigin);
    585 }
    586 
    587 bool XSSAuditor::filterInputToken(const FilterTokenRequest& request)
    588 {
    589     ASSERT(request.token.type() == HTMLToken::StartTag);
    590     ASSERT(hasName(request.token, inputTag));
    591 
    592     return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttributeTruncation);
    593 }
    594 
    595 bool XSSAuditor::filterButtonToken(const FilterTokenRequest& request)
    596 {
    597     ASSERT(request.token.type() == HTMLToken::StartTag);
    598     ASSERT(hasName(request.token, buttonTag));
    599 
    600     return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttributeTruncation);
    601 }
    602 
    603 bool XSSAuditor::eraseDangerousAttributesIfInjected(const FilterTokenRequest& request)
    604 {
    605     DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, ("javascript:void(0)"));
    606 
    607     bool didBlockScript = false;
    608     for (size_t i = 0; i < request.token.attributes().size(); ++i) {
    609         bool eraseAttribute = false;
    610         bool valueContainsJavaScriptURL = false;
    611         const HTMLToken::Attribute& attribute = request.token.attributes().at(i);
    612         // FIXME: Don't create a new String for every attribute.value in the document.
    613         if (isNameOfInlineEventHandler(attribute.name)) {
    614             eraseAttribute = isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), ScriptLikeAttributeTruncation));
    615         } else if (isSemicolonSeparatedAttribute(attribute)) {
    616             String subValue = semicolonSeparatedValueContainingJavaScriptURL(String(attribute.value));
    617             if (!subValue.isEmpty()) {
    618                 valueContainsJavaScriptURL = true;
    619                 eraseAttribute = isContainedInRequest(canonicalize(nameFromAttribute(request, attribute), NoTruncation))
    620                     && isContainedInRequest(canonicalize(subValue, ScriptLikeAttributeTruncation));
    621             }
    622         } else if (protocolIsJavaScript(stripLeadingAndTrailingHTMLSpaces(String(attribute.value)))) {
    623             valueContainsJavaScriptURL = true;
    624             eraseAttribute = isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), ScriptLikeAttributeTruncation));
    625         }
    626         if (!eraseAttribute)
    627             continue;
    628         request.token.eraseValueOfAttribute(i);
    629         if (valueContainsJavaScriptURL)
    630             request.token.appendToAttributeValue(i, safeJavaScriptURL);
    631         didBlockScript = true;
    632     }
    633     return didBlockScript;
    634 }
    635 
    636 bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request, const QualifiedName& attributeName, const String& replacementValue, TruncationKind treatment)
    637 {
    638     size_t indexOfAttribute = 0;
    639     if (!findAttributeWithName(request.token, attributeName, indexOfAttribute))
    640         return false;
    641 
    642     const HTMLToken::Attribute& attribute = request.token.attributes().at(indexOfAttribute);
    643     if (!isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), treatment)))
    644         return false;
    645 
    646     if (threadSafeMatch(attributeName, srcAttr)) {
    647         if (isLikelySafeResource(String(attribute.value)))
    648             return false;
    649     } else if (threadSafeMatch(attributeName, http_equivAttr)) {
    650         if (!isDangerousHTTPEquiv(String(attribute.value)))
    651             return false;
    652     }
    653 
    654     request.token.eraseValueOfAttribute(indexOfAttribute);
    655     if (!replacementValue.isEmpty())
    656         request.token.appendToAttributeValue(indexOfAttribute, replacementValue);
    657 
    658     return true;
    659 }
    660 
    661 String XSSAuditor::canonicalizedSnippetForTagName(const FilterTokenRequest& request)
    662 {
    663     // Grab a fixed number of characters equal to the length of the token's name plus one (to account for the "<").
    664     return canonicalize(request.sourceTracker.sourceForToken(request.token).substring(0, request.token.name().size() + 1), NoTruncation);
    665 }
    666 
    667 String XSSAuditor::nameFromAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute)
    668 {
    669     // The range inlcudes the character which terminates the name. So,
    670     // for an input of |name="value"|, the snippet is |name=|.
    671     int start = attribute.nameRange.start - request.token.startIndex();
    672     int end = attribute.valueRange.start - request.token.startIndex();
    673     return request.sourceTracker.sourceForToken(request.token).substring(start, end - start);
    674 }
    675 
    676 String XSSAuditor::snippetFromAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute)
    677 {
    678     // The range doesn't include the character which terminates the value. So,
    679     // for an input of |name="value"|, the snippet is |name="value|. For an
    680     // unquoted input of |name=value |, the snippet is |name=value|.
    681     // FIXME: We should grab one character before the name also.
    682     int start = attribute.nameRange.start - request.token.startIndex();
    683     int end = attribute.valueRange.end - request.token.startIndex();
    684     return request.sourceTracker.sourceForToken(request.token).substring(start, end - start);
    685 }
    686 
    687 String XSSAuditor::canonicalize(String snippet, TruncationKind treatment)
    688 {
    689     String decodedSnippet = fullyDecodeString(snippet, m_encoding);
    690 
    691     if (treatment != NoTruncation) {
    692         decodedSnippet.truncate(kMaximumFragmentLengthTarget);
    693         if (treatment == SrcLikeAttributeTruncation)
    694             truncateForSrcLikeAttribute(decodedSnippet);
    695         else if (treatment == ScriptLikeAttributeTruncation)
    696             truncateForScriptLikeAttribute(decodedSnippet);
    697     }
    698 
    699     return decodedSnippet.removeCharacters(&isNonCanonicalCharacter);
    700 }
    701 
    702 String XSSAuditor::canonicalizedSnippetForJavaScript(const FilterTokenRequest& request)
    703 {
    704     String string = request.sourceTracker.sourceForToken(request.token);
    705     size_t startPosition = 0;
    706     size_t endPosition = string.length();
    707     size_t foundPosition = kNotFound;
    708     size_t lastNonSpacePosition = kNotFound;
    709 
    710     // Skip over initial comments to find start of code.
    711     while (startPosition < endPosition) {
    712         while (startPosition < endPosition && isHTMLSpace<UChar>(string[startPosition]))
    713             startPosition++;
    714 
    715         // Under SVG/XML rules, only HTML comment syntax matters and the parser returns
    716         // these as a separate comment tokens. Having consumed whitespace, we need not look
    717         // further for these.
    718         if (request.shouldAllowCDATA)
    719             break;
    720 
    721         // Under HTML rules, both the HTML and JS comment synatx matters, and the HTML
    722         // comment ends at the end of the line, not with -->.
    723         if (startsHTMLCommentAt(string, startPosition) || startsSingleLineCommentAt(string, startPosition)) {
    724             while (startPosition < endPosition && !isJSNewline(string[startPosition]))
    725                 startPosition++;
    726         } else if (startsMultiLineCommentAt(string, startPosition)) {
    727             if (startPosition + 2 < endPosition && (foundPosition = string.find("*/", startPosition + 2)) != kNotFound)
    728                 startPosition = foundPosition + 2;
    729             else
    730                 startPosition = endPosition;
    731         } else
    732             break;
    733     }
    734 
    735     String result;
    736     while (startPosition < endPosition && !result.length()) {
    737         // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we encounter a comma,
    738         // when we hit an opening <script> tag, or when we exceed the maximum length target. The comma rule
    739         // covers a common parameter concatenation case performed by some web servers.
    740         lastNonSpacePosition = kNotFound;
    741         for (foundPosition = startPosition; foundPosition < endPosition; foundPosition++) {
    742             if (!request.shouldAllowCDATA) {
    743                 if (startsSingleLineCommentAt(string, foundPosition)
    744                     || startsMultiLineCommentAt(string, foundPosition)
    745                     || startsHTMLCommentAt(string, foundPosition)) {
    746                     break;
    747                 }
    748             }
    749             if (string[foundPosition] == ',')
    750                 break;
    751 
    752             if (lastNonSpacePosition != kNotFound && startsOpeningScriptTagAt(string, foundPosition)) {
    753                 foundPosition = lastNonSpacePosition;
    754                 break;
    755             }
    756             if (foundPosition > startPosition + kMaximumFragmentLengthTarget) {
    757                 // After hitting the length target, we can only stop at a point where we know we are
    758                 // not in the middle of a %-escape sequence. For the sake of simplicity, approximate
    759                 // not stopping inside a (possibly multiply encoded) %-escape sequence by breaking on
    760                 // whitespace only. We should have enough text in these cases to avoid false positives.
    761                 if (isHTMLSpace<UChar>(string[foundPosition]))
    762                     break;
    763             }
    764             if (!isHTMLSpace<UChar>(string[foundPosition]))
    765                 lastNonSpacePosition = foundPosition;
    766         }
    767         result = canonicalize(string.substring(startPosition, foundPosition - startPosition), NoTruncation);
    768         startPosition = foundPosition + 1;
    769     }
    770 
    771     return result;
    772 }
    773 
    774 bool XSSAuditor::isContainedInRequest(const String& decodedSnippet)
    775 {
    776     if (decodedSnippet.isEmpty())
    777         return false;
    778     if (m_decodedURL.find(decodedSnippet, 0, false) != kNotFound)
    779         return true;
    780     if (m_decodedHTTPBodySuffixTree && !m_decodedHTTPBodySuffixTree->mightContain(decodedSnippet))
    781         return false;
    782     return m_decodedHTTPBody.find(decodedSnippet, 0, false) != kNotFound;
    783 }
    784 
    785 bool XSSAuditor::isLikelySafeResource(const String& url)
    786 {
    787     // Give empty URLs and about:blank a pass. Making a resourceURL from an
    788     // empty string below will likely later fail the "no query args test" as
    789     // it inherits the document's query args.
    790     if (url.isEmpty() || url == blankURL().string())
    791         return true;
    792 
    793     // If the resource is loaded from the same host as the enclosing page, it's
    794     // probably not an XSS attack, so we reduce false positives by allowing the
    795     // request, ignoring scheme and port considerations. If the resource has a
    796     // query string, we're more suspicious, however, because that's pretty rare
    797     // and the attacker might be able to trick a server-side script into doing
    798     // something dangerous with the query string.
    799     if (m_documentURL.host().isEmpty())
    800         return false;
    801 
    802     KURL resourceURL(m_documentURL, url);
    803     return (m_documentURL.host() == resourceURL.host() && resourceURL.query().isEmpty());
    804 }
    805 
    806 bool XSSAuditor::isSafeToSendToAnotherThread() const
    807 {
    808     return m_documentURL.isSafeToSendToAnotherThread()
    809         && m_decodedURL.isSafeToSendToAnotherThread()
    810         && m_decodedHTTPBody.isSafeToSendToAnotherThread()
    811         && m_httpBodyAsString.isSafeToSendToAnotherThread();
    812 }
    813 
    814 } // namespace blink
    815