Home | History | Annotate | Download | only in page
      1 /*
      2  * Copyright (C) 2008, 2009 Daniel Bates (dbates (at) intudata.com)
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "XSSAuditor.h"
     29 
     30 #include <wtf/StdLibExtras.h>
     31 #include <wtf/Vector.h>
     32 
     33 #include "Console.h"
     34 #include "CString.h"
     35 #include "DocumentLoader.h"
     36 #include "DOMWindow.h"
     37 #include "Frame.h"
     38 #include "KURL.h"
     39 #include "PreloadScanner.h"
     40 #include "ResourceResponseBase.h"
     41 #include "ScriptSourceCode.h"
     42 #include "Settings.h"
     43 #include "TextResourceDecoder.h"
     44 
     45 using namespace WTF;
     46 
     47 namespace WebCore {
     48 
     49 static bool isNonCanonicalCharacter(UChar c)
     50 {
     51     // We remove all non-ASCII characters, including non-printable ASCII characters.
     52     //
     53     // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character.
     54     // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the
     55     // adverse effect that we remove any legitimate zeros from a string.
     56     //
     57     // For instance: new String("http://localhost:8000") => new String("http://localhost:8").
     58     return (c == '\\' || c == '0' || c < ' ' || c >= 127);
     59 }
     60 
     61 static bool isIllegalURICharacter(UChar c)
     62 {
     63     // The characters described in section 2.4.3 of RFC 2396 <http://www.faqs.org/rfcs/rfc2396.html> in addition to the
     64     // single quote character "'" are considered illegal URI characters. That is, the following characters cannot appear
     65     // in a valid URI: ', ", <, >
     66     //
     67     // If the request does not contain these characters then we can assume that no inline scripts have been injected
     68     // into the response page, because it is impossible to write an inline script of the form <script>...</script>
     69     // without "<", ">".
     70     return (c == '\'' || c == '"' || c == '<' || c == '>');
     71 }
     72 
     73 String XSSAuditor::CachingURLCanonicalizer::canonicalizeURL(const String& url, const TextEncoding& encoding, bool decodeEntities,
     74                                                             bool decodeURLEscapeSequencesTwice)
     75 {
     76     if (decodeEntities == m_decodeEntities && decodeURLEscapeSequencesTwice == m_decodeURLEscapeSequencesTwice
     77         && encoding == m_encoding && url == m_inputURL)
     78         return m_cachedCanonicalizedURL;
     79 
     80     m_cachedCanonicalizedURL = canonicalize(decodeURL(url, encoding, decodeEntities, decodeURLEscapeSequencesTwice));
     81     m_inputURL = url;
     82     m_encoding = encoding;
     83     m_decodeEntities = decodeEntities;
     84     m_decodeURLEscapeSequencesTwice = decodeURLEscapeSequencesTwice;
     85     return m_cachedCanonicalizedURL;
     86 }
     87 
     88 XSSAuditor::XSSAuditor(Frame* frame)
     89     : m_frame(frame)
     90 {
     91 }
     92 
     93 XSSAuditor::~XSSAuditor()
     94 {
     95 }
     96 
     97 bool XSSAuditor::isEnabled() const
     98 {
     99     Settings* settings = m_frame->settings();
    100     return (settings && settings->xssAuditorEnabled());
    101 }
    102 
    103 bool XSSAuditor::canEvaluate(const String& code) const
    104 {
    105     if (!isEnabled())
    106         return true;
    107 
    108     FindTask task;
    109     task.string = code;
    110     task.decodeEntities = false;
    111     task.allowRequestIfNoIllegalURICharacters = true;
    112 
    113     if (findInRequest(task)) {
    114         DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n"));
    115         m_frame->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String());
    116         return false;
    117     }
    118     return true;
    119 }
    120 
    121 bool XSSAuditor::canEvaluateJavaScriptURL(const String& code) const
    122 {
    123     if (!isEnabled())
    124         return true;
    125 
    126     FindTask task;
    127     task.string = code;
    128     task.decodeURLEscapeSequencesTwice = true;
    129 
    130     if (findInRequest(task)) {
    131         DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n"));
    132         m_frame->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String());
    133         return false;
    134     }
    135     return true;
    136 }
    137 
    138 bool XSSAuditor::canCreateInlineEventListener(const String&, const String& code) const
    139 {
    140     if (!isEnabled())
    141         return true;
    142 
    143     FindTask task;
    144     task.string = code;
    145     task.allowRequestIfNoIllegalURICharacters = true;
    146 
    147     if (findInRequest(task)) {
    148         DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n"));
    149         m_frame->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String());
    150         return false;
    151     }
    152     return true;
    153 }
    154 
    155 bool XSSAuditor::canLoadExternalScriptFromSrc(const String& context, const String& url) const
    156 {
    157     if (!isEnabled())
    158         return true;
    159 
    160     if (isSameOriginResource(url))
    161         return true;
    162 
    163     FindTask task;
    164     task.context = context;
    165     task.string = url;
    166 
    167     if (findInRequest(task)) {
    168         DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n"));
    169         m_frame->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String());
    170         return false;
    171     }
    172     return true;
    173 }
    174 
    175 bool XSSAuditor::canLoadObject(const String& url) const
    176 {
    177     if (!isEnabled())
    178         return true;
    179 
    180     if (isSameOriginResource(url))
    181         return true;
    182 
    183     FindTask task;
    184     task.string = url;
    185     task.allowRequestIfNoIllegalURICharacters = true;
    186 
    187     if (findInRequest(task)) {
    188         String consoleMessage = String::format("Refused to load an object. URL found within request: \"%s\".\n", url.utf8().data());
    189         m_frame->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String());
    190         return false;
    191     }
    192     return true;
    193 }
    194 
    195 bool XSSAuditor::canSetBaseElementURL(const String& url) const
    196 {
    197     if (!isEnabled())
    198         return true;
    199 
    200     if (isSameOriginResource(url))
    201         return true;
    202 
    203     FindTask task;
    204     task.string = url;
    205     task.allowRequestIfNoIllegalURICharacters = true;
    206 
    207     if (findInRequest(task)) {
    208         DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to load from document base URL. URL found within request.\n"));
    209         m_frame->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String());
    210         return false;
    211     }
    212     return true;
    213 }
    214 
    215 String XSSAuditor::canonicalize(const String& string)
    216 {
    217     String result = decodeHTMLEntities(string);
    218     return result.removeCharacters(&isNonCanonicalCharacter);
    219 }
    220 
    221 String XSSAuditor::decodeURL(const String& string, const TextEncoding& encoding, bool decodeEntities, bool decodeURLEscapeSequencesTwice)
    222 {
    223     String result;
    224     String url = string;
    225 
    226     url.replace('+', ' ');
    227     result = decodeURLEscapeSequences(url);
    228     CString utf8Url = result.utf8();
    229     String decodedResult = encoding.decode(utf8Url.data(), utf8Url.length());
    230     if (!decodedResult.isEmpty())
    231         result = decodedResult;
    232     if (decodeURLEscapeSequencesTwice) {
    233         result = decodeURLEscapeSequences(result);
    234         utf8Url = result.utf8();
    235         decodedResult = encoding.decode(utf8Url.data(), utf8Url.length());
    236         if (!decodedResult.isEmpty())
    237             result = decodedResult;
    238     }
    239     if (decodeEntities)
    240         result = decodeHTMLEntities(result);
    241     return result;
    242 }
    243 
    244 String XSSAuditor::decodeHTMLEntities(const String& string, bool leaveUndecodableEntitiesUntouched)
    245 {
    246     SegmentedString source(string);
    247     SegmentedString sourceShadow;
    248     Vector<UChar> result;
    249 
    250     while (!source.isEmpty()) {
    251         UChar cc = *source;
    252         source.advance();
    253 
    254         if (cc != '&') {
    255             result.append(cc);
    256             continue;
    257         }
    258 
    259         if (leaveUndecodableEntitiesUntouched)
    260             sourceShadow = source;
    261         bool notEnoughCharacters = false;
    262         unsigned entity = PreloadScanner::consumeEntity(source, notEnoughCharacters);
    263         // We ignore notEnoughCharacters because we might as well use this loop
    264         // to copy the remaining characters into |result|.
    265 
    266         if (entity > 0xFFFF) {
    267             result.append(U16_LEAD(entity));
    268             result.append(U16_TRAIL(entity));
    269         } else if (entity && (!leaveUndecodableEntitiesUntouched || entity != 0xFFFD)){
    270             result.append(entity);
    271         } else {
    272             result.append('&');
    273             if (leaveUndecodableEntitiesUntouched)
    274                 source = sourceShadow;
    275         }
    276     }
    277 
    278     return String::adopt(result);
    279 }
    280 
    281 bool XSSAuditor::isSameOriginResource(const String& url) const
    282 {
    283     // If the resource is loaded from the same URL as the enclosing page, it's
    284     // probably not an XSS attack, so we reduce false positives by allowing the
    285     // request. If the resource has a query string, we're more suspicious,
    286     // however, because that's pretty rare and the attacker might be able to
    287     // trick a server-side script into doing something dangerous with the query
    288     // string.
    289     KURL resourceURL(m_frame->document()->url(), url);
    290     return (m_frame->document()->url().host() == resourceURL.host() && resourceURL.query().isEmpty());
    291 }
    292 
    293 bool XSSAuditor::shouldFullPageBlockForXSSProtectionHeader() const
    294 {
    295     // If we detect an XSS attack and find the HTTP header "X-XSS-Protection: 12" then
    296     // we will stop loading the page as opposed to ignoring the script. The value "12"
    297     // came from a personal communication, see <https://bugs.webkit.org/show_bug.cgi?id=27312>
    298     // for more details.
    299     DEFINE_STATIC_LOCAL(String, XSSProtectionHeader, ("X-XSS-Protection"));
    300 
    301     Frame* frame = m_frame;
    302     if (frame->document()->url() == blankURL())
    303         frame = m_frame->tree()->parent();
    304 
    305     // We strip any whitespace characters to conform to the behavior in Internet Explorer.
    306     String xssProtectionValue = frame->loader()->documentLoader()->response().httpHeaderField(XSSProtectionHeader).stripWhiteSpace();
    307     return (xssProtectionValue.length() >= 2 && xssProtectionValue[0] == '1' && xssProtectionValue[1] == '2');
    308 }
    309 
    310 bool XSSAuditor::findInRequest(const FindTask& task) const
    311 {
    312     bool result = false;
    313     Frame* parentFrame = m_frame->tree()->parent();
    314     Frame* blockFrame = parentFrame;
    315     if (parentFrame && m_frame->document()->url() == blankURL())
    316         result = findInRequest(parentFrame, task);
    317     if (!result) {
    318         result = findInRequest(m_frame, task);
    319         blockFrame = m_frame;
    320     }
    321     if (result && blockFrame && shouldFullPageBlockForXSSProtectionHeader()) {
    322         blockFrame->loader()->stopAllLoaders();
    323         blockFrame->redirectScheduler()->scheduleLocationChange(blankURL(), String());
    324     }
    325     return result;
    326 }
    327 
    328 bool XSSAuditor::findInRequest(Frame* frame, const FindTask& task) const
    329 {
    330     ASSERT(frame->document());
    331 
    332     if (!frame->document()->decoder()) {
    333         // Note, JavaScript URLs do not have a charset.
    334         return false;
    335     }
    336 
    337     if (task.string.isEmpty())
    338         return false;
    339 
    340     FormData* formDataObj = frame->loader()->documentLoader()->originalRequest().httpBody();
    341     const bool hasFormData = formDataObj && !formDataObj->isEmpty();
    342     String pageURL = frame->document()->url().string();
    343 
    344     String canonicalizedString;
    345     if (!hasFormData && task.string.length() > 2 * pageURL.length()) {
    346         // Q: Why do we bother to do this check at all?
    347         // A: Canonicalizing large inline scripts can be expensive.  We want to
    348         //    reduce the size of the string before we call canonicalize below,
    349         //    since it could result in an unneeded allocation and memcpy.
    350         //
    351         // Q: Why do we multiply by two here?
    352         // A: We attempt to detect reflected XSS even when the server
    353         //    transforms the attacker's input with addSlashes.  The best the
    354         //    attacker can do get the server to inflate his/her input by a
    355         //    factor of two by sending " characters, which the server
    356         //    transforms to \".
    357         canonicalizedString = task.string.substring(0, 2 * pageURL.length());
    358     } else
    359         canonicalizedString = task.string;
    360 
    361     if (frame->document()->url().protocolIs("data"))
    362         return false;
    363 
    364     canonicalizedString = canonicalize(canonicalizedString);
    365     if (canonicalizedString.isEmpty())
    366         return false;
    367 
    368     if (!task.context.isEmpty())
    369         canonicalizedString = task.context + canonicalizedString;
    370 
    371     String decodedPageURL = m_cache.canonicalizeURL(pageURL, frame->document()->decoder()->encoding(), task.decodeEntities, task.decodeURLEscapeSequencesTwice);
    372 
    373     if (task.allowRequestIfNoIllegalURICharacters && !hasFormData && decodedPageURL.find(&isIllegalURICharacter, 0) == -1)
    374         return false; // Injection is impossible because the request does not contain any illegal URI characters.
    375 
    376     if (decodedPageURL.find(canonicalizedString, 0, false) != -1)
    377         return true; // We've found the string in the GET data.
    378 
    379     if (hasFormData) {
    380         String decodedFormData = m_cache.canonicalizeURL(formDataObj->flattenToString(), frame->document()->decoder()->encoding(), task.decodeEntities, task.decodeURLEscapeSequencesTwice);
    381         if (decodedFormData.find(canonicalizedString, 0, false) != -1)
    382             return true; // We found the string in the POST data.
    383     }
    384 
    385     return false;
    386 }
    387 
    388 } // namespace WebCore
    389 
    390