1 /* 2 * Copyright (C) 2011 Adam Barth. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 #include "XSSFilter.h" 28 29 #include "DOMWindow.h" 30 #include "Document.h" 31 #include "DocumentLoader.h" 32 #include "Frame.h" 33 #include "HTMLDocumentParser.h" 34 #include "HTMLNames.h" 35 #include "HTMLParamElement.h" 36 #include "HTMLParserIdioms.h" 37 #include "Settings.h" 38 #include "TextEncoding.h" 39 #include "TextResourceDecoder.h" 40 #include <wtf/text/CString.h> 41 42 namespace WebCore { 43 44 using namespace HTMLNames; 45 46 static bool isNonCanonicalCharacter(UChar c) 47 { 48 // We remove all non-ASCII characters, including non-printable ASCII characters. 49 // 50 // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character. 51 // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the 52 // adverse effect that we remove any legitimate zeros from a string. 53 // 54 // For instance: new String("http://localhost:8000") => new String("http://localhost:8"). 55 return (c == '\\' || c == '0' || c == '\0' || c >= 127); 56 } 57 58 static String canonicalize(const String& string) 59 { 60 return string.removeCharacters(&isNonCanonicalCharacter); 61 } 62 63 static bool isRequiredForInjection(UChar c) 64 { 65 return (c == '\'' || c == '"' || c == '<' || c == '>'); 66 } 67 68 static bool hasName(const HTMLToken& token, const QualifiedName& name) 69 { 70 return equalIgnoringNullity(token.name(), static_cast<const String&>(name.localName())); 71 } 72 73 static bool findAttributeWithName(const HTMLToken& token, const QualifiedName& name, size_t& indexOfMatchingAttribute) 74 { 75 for (size_t i = 0; i < token.attributes().size(); ++i) { 76 if (equalIgnoringNullity(token.attributes().at(i).m_name, name.localName())) { 77 indexOfMatchingAttribute = i; 78 return true; 79 } 80 } 81 return false; 82 } 83 84 static bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name) 85 { 86 const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut. 87 if (name.size() < lengthOfShortestInlineEventHandlerName) 88 return false; 89 return name[0] == 'o' && name[1] == 'n'; 90 } 91 92 static bool isDangerousHTTPEquiv(const String& value) 93 { 94 String equiv = value.stripWhiteSpace(); 95 return equalIgnoringCase(equiv, "refresh") || equalIgnoringCase(equiv, "set-cookie"); 96 } 97 98 static bool containsJavaScriptURL(const Vector<UChar, 32>& value) 99 { 100 static const char javaScriptScheme[] = "javascript:"; 101 static const size_t lengthOfJavaScriptScheme = sizeof(javaScriptScheme) - 1; 102 103 size_t i; 104 for (i = 0; i < value.size(); ++i) { 105 if (!isHTMLSpace(value[i])) 106 break; 107 } 108 109 if (value.size() - i < lengthOfJavaScriptScheme) 110 return false; 111 112 return equalIgnoringCase(value.data() + i, javaScriptScheme, lengthOfJavaScriptScheme); 113 } 114 115 static String decodeURL(const String& string, const TextEncoding& encoding) 116 { 117 String workingString = string; 118 workingString.replace('+', ' '); 119 workingString = decodeURLEscapeSequences(workingString); 120 CString workingStringUTF8 = workingString.utf8(); 121 String decodedString = encoding.decode(workingStringUTF8.data(), workingStringUTF8.length()); 122 // FIXME: Is this check necessary? 123 if (decodedString.isEmpty()) 124 return canonicalize(workingString); 125 return canonicalize(decodedString); 126 } 127 128 XSSFilter::XSSFilter(HTMLDocumentParser* parser) 129 : m_parser(parser) 130 , m_isEnabled(false) 131 , m_xssProtection(XSSProtectionEnabled) 132 , m_state(Uninitialized) 133 { 134 ASSERT(m_parser); 135 if (Frame* frame = parser->document()->frame()) { 136 if (Settings* settings = frame->settings()) 137 m_isEnabled = settings->xssAuditorEnabled(); 138 } 139 // Although tempting to call init() at this point, the various objects 140 // we want to reference might not all have been constructed yet. 141 } 142 143 void XSSFilter::init() 144 { 145 const size_t miniumLengthForSuffixTree = 512; // FIXME: Tune this parameter. 146 const int suffixTreeDepth = 5; 147 148 ASSERT(m_state == Uninitialized); 149 m_state = Initial; 150 151 if (!m_isEnabled) 152 return; 153 154 // In theory, the Document could have detached from the Frame after the 155 // XSSFilter was constructed. 156 if (!m_parser->document()->frame()) { 157 m_isEnabled = false; 158 return; 159 } 160 161 const KURL& url = m_parser->document()->url(); 162 163 if (url.protocolIsData()) { 164 m_isEnabled = false; 165 return; 166 } 167 168 TextResourceDecoder* decoder = m_parser->document()->decoder(); 169 m_decodedURL = decoder ? decodeURL(url.string(), decoder->encoding()) : url.string(); 170 if (m_decodedURL.find(isRequiredForInjection, 0) == notFound) 171 m_decodedURL = String(); 172 173 if (DocumentLoader* documentLoader = m_parser->document()->frame()->loader()->documentLoader()) { 174 DEFINE_STATIC_LOCAL(String, XSSProtectionHeader, ("X-XSS-Protection")); 175 m_xssProtection = parseXSSProtectionHeader(documentLoader->response().httpHeaderField(XSSProtectionHeader)); 176 177 FormData* httpBody = documentLoader->originalRequest().httpBody(); 178 if (httpBody && !httpBody->isEmpty()) { 179 String httpBodyAsString = httpBody->flattenToString(); 180 m_decodedHTTPBody = decoder ? decodeURL(httpBodyAsString, decoder->encoding()) : httpBodyAsString; 181 if (m_decodedHTTPBody.find(isRequiredForInjection, 0) == notFound) 182 m_decodedHTTPBody = String(); 183 if (m_decodedHTTPBody.length() >= miniumLengthForSuffixTree) 184 m_decodedHTTPBodySuffixTree = adoptPtr(new SuffixTree<ASCIICodebook>(m_decodedHTTPBody, suffixTreeDepth)); 185 } 186 } 187 188 if (m_decodedURL.isEmpty() && m_decodedHTTPBody.isEmpty()) 189 m_isEnabled = false; 190 } 191 192 void XSSFilter::filterToken(HTMLToken& token) 193 { 194 if (m_state == Uninitialized) { 195 init(); 196 ASSERT(m_state == Initial); 197 } 198 199 if (!m_isEnabled || m_xssProtection == XSSProtectionDisabled) 200 return; 201 202 bool didBlockScript = false; 203 204 switch (m_state) { 205 case Uninitialized: 206 ASSERT_NOT_REACHED(); 207 break; 208 case Initial: 209 didBlockScript = filterTokenInitial(token); 210 break; 211 case AfterScriptStartTag: 212 didBlockScript = filterTokenAfterScriptStartTag(token); 213 ASSERT(m_state == Initial); 214 m_cachedSnippet = String(); 215 break; 216 } 217 218 if (didBlockScript) { 219 // FIXME: Consider using a more helpful console message. 220 DEFINE_STATIC_LOCAL(String, consoleMessage, ("Refused to execute a JavaScript script. Source code of script found within request.\n")); 221 // FIXME: We should add the real line number to the console. 222 m_parser->document()->domWindow()->console()->addMessage(JSMessageSource, LogMessageType, ErrorMessageLevel, consoleMessage, 1, String()); 223 224 if (m_xssProtection == XSSProtectionBlockEnabled) { 225 m_parser->document()->frame()->loader()->stopAllLoaders(); 226 m_parser->document()->frame()->navigationScheduler()->scheduleLocationChange(m_parser->document()->securityOrigin(), blankURL(), String()); 227 } 228 } 229 } 230 231 bool XSSFilter::filterTokenInitial(HTMLToken& token) 232 { 233 ASSERT(m_state == Initial); 234 235 if (token.type() != HTMLToken::StartTag) 236 return false; 237 238 bool didBlockScript = eraseDangerousAttributesIfInjected(token); 239 240 if (hasName(token, scriptTag)) 241 didBlockScript |= filterScriptToken(token); 242 else if (hasName(token, objectTag)) 243 didBlockScript |= filterObjectToken(token); 244 else if (hasName(token, paramTag)) 245 didBlockScript |= filterParamToken(token); 246 else if (hasName(token, embedTag)) 247 didBlockScript |= filterEmbedToken(token); 248 else if (hasName(token, appletTag)) 249 didBlockScript |= filterAppletToken(token); 250 else if (hasName(token, iframeTag)) 251 didBlockScript |= filterIframeToken(token); 252 else if (hasName(token, metaTag)) 253 didBlockScript |= filterMetaToken(token); 254 else if (hasName(token, baseTag)) 255 didBlockScript |= filterBaseToken(token); 256 else if (hasName(token, formTag)) 257 didBlockScript |= filterFormToken(token); 258 259 return didBlockScript; 260 } 261 262 bool XSSFilter::filterTokenAfterScriptStartTag(HTMLToken& token) 263 { 264 ASSERT(m_state == AfterScriptStartTag); 265 m_state = Initial; 266 267 if (token.type() != HTMLToken::Character) { 268 ASSERT(token.type() == HTMLToken::EndTag || token.type() == HTMLToken::EndOfFile); 269 return false; 270 } 271 272 int start = 0; 273 // FIXME: We probably want to grab only the first few characters of the 274 // contents of the script element. 275 int end = token.endIndex() - token.startIndex(); 276 if (isContainedInRequest(m_cachedSnippet + snippetForRange(token, start, end))) { 277 token.eraseCharacters(); 278 token.appendToCharacter(' '); // Technically, character tokens can't be empty. 279 return true; 280 } 281 return false; 282 } 283 284 bool XSSFilter::filterScriptToken(HTMLToken& token) 285 { 286 ASSERT(m_state == Initial); 287 ASSERT(token.type() == HTMLToken::StartTag); 288 ASSERT(hasName(token, scriptTag)); 289 290 if (eraseAttributeIfInjected(token, srcAttr, blankURL().string())) 291 return true; 292 293 m_state = AfterScriptStartTag; 294 m_cachedSnippet = m_parser->sourceForToken(token); 295 return false; 296 } 297 298 bool XSSFilter::filterObjectToken(HTMLToken& token) 299 { 300 ASSERT(m_state == Initial); 301 ASSERT(token.type() == HTMLToken::StartTag); 302 ASSERT(hasName(token, objectTag)); 303 304 bool didBlockScript = false; 305 306 didBlockScript |= eraseAttributeIfInjected(token, dataAttr, blankURL().string()); 307 didBlockScript |= eraseAttributeIfInjected(token, typeAttr); 308 didBlockScript |= eraseAttributeIfInjected(token, classidAttr); 309 310 return didBlockScript; 311 } 312 313 bool XSSFilter::filterParamToken(HTMLToken& token) 314 { 315 ASSERT(m_state == Initial); 316 ASSERT(token.type() == HTMLToken::StartTag); 317 ASSERT(hasName(token, paramTag)); 318 319 size_t indexOfNameAttribute; 320 if (!findAttributeWithName(token, nameAttr, indexOfNameAttribute)) 321 return false; 322 323 const HTMLToken::Attribute& nameAttribute = token.attributes().at(indexOfNameAttribute); 324 String name = String(nameAttribute.m_value.data(), nameAttribute.m_value.size()); 325 326 if (!HTMLParamElement::isURLParameter(name)) 327 return false; 328 329 return eraseAttributeIfInjected(token, valueAttr, blankURL().string()); 330 } 331 332 bool XSSFilter::filterEmbedToken(HTMLToken& token) 333 { 334 ASSERT(m_state == Initial); 335 ASSERT(token.type() == HTMLToken::StartTag); 336 ASSERT(hasName(token, embedTag)); 337 338 bool didBlockScript = false; 339 340 didBlockScript |= eraseAttributeIfInjected(token, srcAttr, blankURL().string()); 341 didBlockScript |= eraseAttributeIfInjected(token, typeAttr); 342 343 return didBlockScript; 344 } 345 346 bool XSSFilter::filterAppletToken(HTMLToken& token) 347 { 348 ASSERT(m_state == Initial); 349 ASSERT(token.type() == HTMLToken::StartTag); 350 ASSERT(hasName(token, appletTag)); 351 352 bool didBlockScript = false; 353 354 didBlockScript |= eraseAttributeIfInjected(token, codeAttr); 355 didBlockScript |= eraseAttributeIfInjected(token, objectAttr); 356 357 return didBlockScript; 358 } 359 360 bool XSSFilter::filterIframeToken(HTMLToken& token) 361 { 362 ASSERT(m_state == Initial); 363 ASSERT(token.type() == HTMLToken::StartTag); 364 ASSERT(hasName(token, iframeTag)); 365 366 return eraseAttributeIfInjected(token, srcAttr); 367 } 368 369 bool XSSFilter::filterMetaToken(HTMLToken& token) 370 { 371 ASSERT(m_state == Initial); 372 ASSERT(token.type() == HTMLToken::StartTag); 373 ASSERT(hasName(token, metaTag)); 374 375 return eraseAttributeIfInjected(token, http_equivAttr); 376 } 377 378 bool XSSFilter::filterBaseToken(HTMLToken& token) 379 { 380 ASSERT(m_state == Initial); 381 ASSERT(token.type() == HTMLToken::StartTag); 382 ASSERT(hasName(token, baseTag)); 383 384 return eraseAttributeIfInjected(token, hrefAttr); 385 } 386 387 bool XSSFilter::filterFormToken(HTMLToken& token) 388 { 389 ASSERT(m_state == Initial); 390 ASSERT(token.type() == HTMLToken::StartTag); 391 ASSERT(hasName(token, formTag)); 392 393 return eraseAttributeIfInjected(token, actionAttr); 394 } 395 396 bool XSSFilter::eraseDangerousAttributesIfInjected(HTMLToken& token) 397 { 398 DEFINE_STATIC_LOCAL(String, safeJavaScriptURL, ("javascript:void(0)")); 399 400 bool didBlockScript = false; 401 for (size_t i = 0; i < token.attributes().size(); ++i) { 402 const HTMLToken::Attribute& attribute = token.attributes().at(i); 403 bool isInlineEventHandler = isNameOfInlineEventHandler(attribute.m_name); 404 bool valueContainsJavaScriptURL = isInlineEventHandler ? false : containsJavaScriptURL(attribute.m_value); 405 if (!isInlineEventHandler && !valueContainsJavaScriptURL) 406 continue; 407 if (!isContainedInRequest(snippetForAttribute(token, attribute))) 408 continue; 409 token.eraseValueOfAttribute(i); 410 if (valueContainsJavaScriptURL) 411 token.appendToAttributeValue(i, safeJavaScriptURL); 412 didBlockScript = true; 413 } 414 return didBlockScript; 415 } 416 417 bool XSSFilter::eraseAttributeIfInjected(HTMLToken& token, const QualifiedName& attributeName, const String& replacementValue) 418 { 419 size_t indexOfAttribute; 420 if (findAttributeWithName(token, attributeName, indexOfAttribute)) { 421 const HTMLToken::Attribute& attribute = token.attributes().at(indexOfAttribute); 422 if (isContainedInRequest(snippetForAttribute(token, attribute))) { 423 if (attributeName == srcAttr && isSameOriginResource(String(attribute.m_value.data(), attribute.m_value.size()))) 424 return false; 425 if (attributeName == http_equivAttr && !isDangerousHTTPEquiv(String(attribute.m_value.data(), attribute.m_value.size()))) 426 return false; 427 token.eraseValueOfAttribute(indexOfAttribute); 428 if (!replacementValue.isEmpty()) 429 token.appendToAttributeValue(indexOfAttribute, replacementValue); 430 return true; 431 } 432 } 433 return false; 434 } 435 436 String XSSFilter::snippetForRange(const HTMLToken& token, int start, int end) 437 { 438 // FIXME: There's an extra allocation here that we could save by 439 // passing the range to the parser. 440 return m_parser->sourceForToken(token).substring(start, end - start); 441 } 442 443 String XSSFilter::snippetForAttribute(const HTMLToken& token, const HTMLToken::Attribute& attribute) 444 { 445 // FIXME: We should grab one character before the name also. 446 int start = attribute.m_nameRange.m_start - token.startIndex(); 447 // FIXME: We probably want to grab only the first few characters of the attribute value. 448 int end = attribute.m_valueRange.m_end - token.startIndex(); 449 return snippetForRange(token, start, end); 450 } 451 452 bool XSSFilter::isContainedInRequest(const String& snippet) 453 { 454 ASSERT(!snippet.isEmpty()); 455 String canonicalizedSnippet = canonicalize(snippet); 456 ASSERT(!canonicalizedSnippet.isEmpty()); 457 if (m_decodedURL.find(canonicalizedSnippet, 0, false) != notFound) 458 return true; 459 if (m_decodedHTTPBodySuffixTree && !m_decodedHTTPBodySuffixTree->mightContain(canonicalizedSnippet)) 460 return false; 461 return m_decodedHTTPBody.find(canonicalizedSnippet, 0, false) != notFound; 462 } 463 464 bool XSSFilter::isSameOriginResource(const String& url) 465 { 466 // If the resource is loaded from the same URL as the enclosing page, it's 467 // probably not an XSS attack, so we reduce false positives by allowing the 468 // request. If the resource has a query string, we're more suspicious, 469 // however, because that's pretty rare and the attacker might be able to 470 // trick a server-side script into doing something dangerous with the query 471 // string. 472 KURL resourceURL(m_parser->document()->url(), url); 473 return (m_parser->document()->url().host() == resourceURL.host() && resourceURL.query().isEmpty()); 474 } 475 476 } 477