1 /* 2 * Copyright (C) 2008, 2009 Daniel Bates (dbates (at) intudata.com) 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "XSSAuditor.h" 29 30 #include <wtf/StdLibExtras.h> 31 #include <wtf/Vector.h> 32 33 #include "Console.h" 34 #include "CString.h" 35 #include "DocumentLoader.h" 36 #include "DOMWindow.h" 37 #include "Frame.h" 38 #include "KURL.h" 39 #include "PreloadScanner.h" 40 #include "ResourceResponseBase.h" 41 #include "ScriptSourceCode.h" 42 #include "Settings.h" 43 #include "TextResourceDecoder.h" 44 45 using namespace WTF; 46 47 namespace WebCore { 48 49 static bool isNonCanonicalCharacter(UChar c) 50 { 51 // We remove all non-ASCII characters, including non-printable ASCII characters. 52 // 53 // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character. 54 // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the 55 // adverse effect that we remove any legitimate zeros from a string. 56 // 57 // For instance: new String("http://localhost:8000") => new String("http://localhost:8"). 58 return (c == '\\' || c == '0' || c < ' ' || c >= 127); 59 } 60 61 static bool isIllegalURICharacter(UChar c) 62 { 63 // The characters described in section 2.4.3 of RFC 2396 <http://www.faqs.org/rfcs/rfc2396.html> in addition to the 64 // single quote character "'" are considered illegal URI characters. That is, the following characters cannot appear 65 // in a valid URI: ', ", <, > 66 // 67 // If the request does not contain these characters then we can assume that no inline scripts have been injected 68 // into the response page, because it is impossible to write an inline script of the form <script>...</script> 69 // without "<", ">". 70 return (c == '\'' || c == '"' || c == '<' || c == '>'); 71 } 72 73 String XSSAuditor::CachingURLCanonicalizer::canonicalizeURL(const String& url, const TextEncoding& encoding, bool decodeEntities, 74 bool decodeURLEscapeSequencesTwice) 75 { 76 if (decodeEntities == m_decodeEntities && decodeURLEscapeSequencesTwice == m_decodeURLEscapeSequencesTwice 77 && encoding == m_encoding && url == m_inputURL) 78 return m_cachedCanonicalizedURL; 79 80 m_cachedCanonicalizedURL = canonicalize(decodeURL(url, encoding, decodeEntities, decodeURLEscapeSequencesTwice)); 81 m_inputURL = url; 82 m_encoding = encoding; 83 m_decodeEntities = decodeEntities; 84 m_decodeURLEscapeSequencesTwice = decodeURLEscapeSequencesTwice; 85 return m_cachedCanonicalizedURL; 86 } 87 88 XSSAuditor::XSSAuditor(Frame* frame) 89 : m_frame(frame) 90 { 91 } 92 93 XSSAuditor::~XSSAuditor() 94 { 95 } 96 97 bool XSSAuditor::isEnabled() const 98 { 99 Settings* settings = m_frame->settings(); 100 return (settings && settings->xssAuditorEnabled()); 101 } 102 103 bool XSSAuditor::canEvaluate(const String& code) const 104 { 105 if (!isEnabled()) 106 return true; 107 108 FindTask task; 109 task.string = code; 110 task.decodeEntities = false; 111 task.allowRequestIfNoIllegalURICharacters = true; 112 113 if (findInRequest(task)) { 114 DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n")); 115 m_frame->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String()); 116 return false; 117 } 118 return true; 119 } 120 121 bool XSSAuditor::canEvaluateJavaScriptURL(const String& code) const 122 { 123 if (!isEnabled()) 124 return true; 125 126 FindTask task; 127 task.string = code; 128 task.decodeURLEscapeSequencesTwice = true; 129 130 if (findInRequest(task)) { 131 DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n")); 132 m_frame->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String()); 133 return false; 134 } 135 return true; 136 } 137 138 bool XSSAuditor::canCreateInlineEventListener(const String&, const String& code) const 139 { 140 if (!isEnabled()) 141 return true; 142 143 FindTask task; 144 task.string = code; 145 task.allowRequestIfNoIllegalURICharacters = true; 146 147 if (findInRequest(task)) { 148 DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n")); 149 m_frame->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String()); 150 return false; 151 } 152 return true; 153 } 154 155 bool XSSAuditor::canLoadExternalScriptFromSrc(const String& context, const String& url) const 156 { 157 if (!isEnabled()) 158 return true; 159 160 if (isSameOriginResource(url)) 161 return true; 162 163 FindTask task; 164 task.context = context; 165 task.string = url; 166 167 if (findInRequest(task)) { 168 DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n")); 169 m_frame->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String()); 170 return false; 171 } 172 return true; 173 } 174 175 bool XSSAuditor::canLoadObject(const String& url) const 176 { 177 if (!isEnabled()) 178 return true; 179 180 if (isSameOriginResource(url)) 181 return true; 182 183 FindTask task; 184 task.string = url; 185 task.allowRequestIfNoIllegalURICharacters = true; 186 187 if (findInRequest(task)) { 188 String consoleMessage = String::format("Refused to load an object. URL found within request: \"%s\".\n", url.utf8().data()); 189 m_frame->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String()); 190 return false; 191 } 192 return true; 193 } 194 195 bool XSSAuditor::canSetBaseElementURL(const String& url) const 196 { 197 if (!isEnabled()) 198 return true; 199 200 if (isSameOriginResource(url)) 201 return true; 202 203 FindTask task; 204 task.string = url; 205 task.allowRequestIfNoIllegalURICharacters = true; 206 207 if (findInRequest(task)) { 208 DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to load from document base URL. URL found within request.\n")); 209 m_frame->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String()); 210 return false; 211 } 212 return true; 213 } 214 215 String XSSAuditor::canonicalize(const String& string) 216 { 217 String result = decodeHTMLEntities(string); 218 return result.removeCharacters(&isNonCanonicalCharacter); 219 } 220 221 String XSSAuditor::decodeURL(const String& string, const TextEncoding& encoding, bool decodeEntities, bool decodeURLEscapeSequencesTwice) 222 { 223 String result; 224 String url = string; 225 226 url.replace('+', ' '); 227 result = decodeURLEscapeSequences(url); 228 CString utf8Url = result.utf8(); 229 String decodedResult = encoding.decode(utf8Url.data(), utf8Url.length()); 230 if (!decodedResult.isEmpty()) 231 result = decodedResult; 232 if (decodeURLEscapeSequencesTwice) { 233 result = decodeURLEscapeSequences(result); 234 utf8Url = result.utf8(); 235 decodedResult = encoding.decode(utf8Url.data(), utf8Url.length()); 236 if (!decodedResult.isEmpty()) 237 result = decodedResult; 238 } 239 if (decodeEntities) 240 result = decodeHTMLEntities(result); 241 return result; 242 } 243 244 String XSSAuditor::decodeHTMLEntities(const String& string, bool leaveUndecodableEntitiesUntouched) 245 { 246 SegmentedString source(string); 247 SegmentedString sourceShadow; 248 Vector<UChar> result; 249 250 while (!source.isEmpty()) { 251 UChar cc = *source; 252 source.advance(); 253 254 if (cc != '&') { 255 result.append(cc); 256 continue; 257 } 258 259 if (leaveUndecodableEntitiesUntouched) 260 sourceShadow = source; 261 bool notEnoughCharacters = false; 262 unsigned entity = PreloadScanner::consumeEntity(source, notEnoughCharacters); 263 // We ignore notEnoughCharacters because we might as well use this loop 264 // to copy the remaining characters into |result|. 265 266 if (entity > 0xFFFF) { 267 result.append(U16_LEAD(entity)); 268 result.append(U16_TRAIL(entity)); 269 } else if (entity && (!leaveUndecodableEntitiesUntouched || entity != 0xFFFD)){ 270 result.append(entity); 271 } else { 272 result.append('&'); 273 if (leaveUndecodableEntitiesUntouched) 274 source = sourceShadow; 275 } 276 } 277 278 return String::adopt(result); 279 } 280 281 bool XSSAuditor::isSameOriginResource(const String& url) const 282 { 283 // If the resource is loaded from the same URL as the enclosing page, it's 284 // probably not an XSS attack, so we reduce false positives by allowing the 285 // request. If the resource has a query string, we're more suspicious, 286 // however, because that's pretty rare and the attacker might be able to 287 // trick a server-side script into doing something dangerous with the query 288 // string. 289 KURL resourceURL(m_frame->document()->url(), url); 290 return (m_frame->document()->url().host() == resourceURL.host() && resourceURL.query().isEmpty()); 291 } 292 293 bool XSSAuditor::shouldFullPageBlockForXSSProtectionHeader() const 294 { 295 // If we detect an XSS attack and find the HTTP header "X-XSS-Protection: 12" then 296 // we will stop loading the page as opposed to ignoring the script. The value "12" 297 // came from a personal communication, see <https://bugs.webkit.org/show_bug.cgi?id=27312> 298 // for more details. 299 DEFINE_STATIC_LOCAL(String, XSSProtectionHeader, ("X-XSS-Protection")); 300 301 Frame* frame = m_frame; 302 if (frame->document()->url() == blankURL()) 303 frame = m_frame->tree()->parent(); 304 305 // We strip any whitespace characters to conform to the behavior in Internet Explorer. 306 String xssProtectionValue = frame->loader()->documentLoader()->response().httpHeaderField(XSSProtectionHeader).stripWhiteSpace(); 307 return (xssProtectionValue.length() >= 2 && xssProtectionValue[0] == '1' && xssProtectionValue[1] == '2'); 308 } 309 310 bool XSSAuditor::findInRequest(const FindTask& task) const 311 { 312 bool result = false; 313 Frame* parentFrame = m_frame->tree()->parent(); 314 Frame* blockFrame = parentFrame; 315 if (parentFrame && m_frame->document()->url() == blankURL()) 316 result = findInRequest(parentFrame, task); 317 if (!result) { 318 result = findInRequest(m_frame, task); 319 blockFrame = m_frame; 320 } 321 if (result && blockFrame && shouldFullPageBlockForXSSProtectionHeader()) { 322 blockFrame->loader()->stopAllLoaders(); 323 blockFrame->redirectScheduler()->scheduleLocationChange(blankURL(), String()); 324 } 325 return result; 326 } 327 328 bool XSSAuditor::findInRequest(Frame* frame, const FindTask& task) const 329 { 330 ASSERT(frame->document()); 331 332 if (!frame->document()->decoder()) { 333 // Note, JavaScript URLs do not have a charset. 334 return false; 335 } 336 337 if (task.string.isEmpty()) 338 return false; 339 340 FormData* formDataObj = frame->loader()->documentLoader()->originalRequest().httpBody(); 341 const bool hasFormData = formDataObj && !formDataObj->isEmpty(); 342 String pageURL = frame->document()->url().string(); 343 344 String canonicalizedString; 345 if (!hasFormData && task.string.length() > 2 * pageURL.length()) { 346 // Q: Why do we bother to do this check at all? 347 // A: Canonicalizing large inline scripts can be expensive. We want to 348 // reduce the size of the string before we call canonicalize below, 349 // since it could result in an unneeded allocation and memcpy. 350 // 351 // Q: Why do we multiply by two here? 352 // A: We attempt to detect reflected XSS even when the server 353 // transforms the attacker's input with addSlashes. The best the 354 // attacker can do get the server to inflate his/her input by a 355 // factor of two by sending " characters, which the server 356 // transforms to \". 357 canonicalizedString = task.string.substring(0, 2 * pageURL.length()); 358 } else 359 canonicalizedString = task.string; 360 361 if (frame->document()->url().protocolIs("data")) 362 return false; 363 364 canonicalizedString = canonicalize(canonicalizedString); 365 if (canonicalizedString.isEmpty()) 366 return false; 367 368 if (!task.context.isEmpty()) 369 canonicalizedString = task.context + canonicalizedString; 370 371 String decodedPageURL = m_cache.canonicalizeURL(pageURL, frame->document()->decoder()->encoding(), task.decodeEntities, task.decodeURLEscapeSequencesTwice); 372 373 if (task.allowRequestIfNoIllegalURICharacters && !hasFormData && decodedPageURL.find(&isIllegalURICharacter, 0) == -1) 374 return false; // Injection is impossible because the request does not contain any illegal URI characters. 375 376 if (decodedPageURL.find(canonicalizedString, 0, false) != -1) 377 return true; // We've found the string in the GET data. 378 379 if (hasFormData) { 380 String decodedFormData = m_cache.canonicalizeURL(formDataObj->flattenToString(), frame->document()->decoder()->encoding(), task.decodeEntities, task.decodeURLEscapeSequencesTwice); 381 if (decodedFormData.find(canonicalizedString, 0, false) != -1) 382 return true; // We found the string in the POST data. 383 } 384 385 return false; 386 } 387 388 } // namespace WebCore 389 390