1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "content/child/site_isolation_policy.h" 6 7 #include "base/basictypes.h" 8 #include "base/command_line.h" 9 #include "base/lazy_instance.h" 10 #include "base/logging.h" 11 #include "base/metrics/histogram.h" 12 #include "base/strings/string_util.h" 13 #include "content/public/common/content_switches.h" 14 #include "content/public/common/resource_response_info.h" 15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 16 #include "net/http/http_response_headers.h" 17 18 using base::StringPiece; 19 20 namespace content { 21 22 namespace { 23 24 // The cross-site document blocking/UMA data collection is deactivated by 25 // default, and only activated in renderer processes. 26 static bool g_policy_enabled = false; 27 28 // MIME types 29 const char kTextHtml[] = "text/html"; 30 const char kTextXml[] = "text/xml"; 31 const char xAppRssXml[] = "application/rss+xml"; 32 const char kAppXml[] = "application/xml"; 33 const char kAppJson[] = "application/json"; 34 const char kTextJson[] = "text/json"; 35 const char kTextXjson[] = "text/x-json"; 36 const char kTextPlain[] = "text/plain"; 37 38 // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted 39 // when this class is used for actual blocking. 40 bool IsRenderableStatusCode(int status_code) { 41 // Chrome only uses the content of a response with one of these status codes 42 // for CSS/JavaScript. For images, Chrome just ignores status code. 43 const int renderable_status_code[] = {200, 201, 202, 203, 206, 300, 44 301, 302, 303, 305, 306, 307}; 45 for (size_t i = 0; i < arraysize(renderable_status_code); ++i) { 46 if (renderable_status_code[i] == status_code) 47 return true; 48 } 49 return false; 50 } 51 52 bool MatchesSignature(StringPiece data, 53 const StringPiece signatures[], 54 size_t arr_size) { 55 56 size_t offset = data.find_first_not_of(" \t\r\n"); 57 // There is no not-whitespace character in this document. 58 if (offset == base::StringPiece::npos) 59 return false; 60 61 data.remove_prefix(offset); 62 size_t length = data.length(); 63 64 for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) { 65 const StringPiece& signature = signatures[sig_index]; 66 size_t signature_length = signature.length(); 67 if (length < signature_length) 68 continue; 69 70 if (LowerCaseEqualsASCII( 71 data.begin(), data.begin() + signature_length, signature.data())) 72 return true; 73 } 74 return false; 75 } 76 77 void IncrementHistogramCount(const std::string& name) { 78 // The default value of min, max, bucket_count are copied from histogram.h. 79 base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet( 80 name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag); 81 histogram_pointer->Add(1); 82 } 83 84 void IncrementHistogramEnum(const std::string& name, 85 uint32 sample, 86 uint32 boundary_value) { 87 // The default value of min, max, bucket_count are copied from histogram.h. 88 base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet( 89 name, 90 1, 91 boundary_value, 92 boundary_value + 1, 93 base::HistogramBase::kUmaTargetedHistogramFlag); 94 histogram_pointer->Add(sample); 95 } 96 97 void HistogramCountBlockedResponse( 98 const std::string& bucket_prefix, 99 linked_ptr<SiteIsolationResponseMetaData>& resp_data, 100 bool nosniff_block) { 101 std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked"); 102 IncrementHistogramCount(bucket_prefix + block_label); 103 104 // The content is blocked if it is sniffed as HTML/JSON/XML. When 105 // the blocked response is with an error status code, it is not 106 // disruptive for the following reasons : 1) the blocked content is 107 // not a binary object (such as an image) since it is sniffed as 108 // text; 2) then, this blocking only breaks the renderer behavior 109 // only if it is either JavaScript or CSS. However, the renderer 110 // doesn't use the contents of JS/CSS with unaffected status code 111 // (e.g, 404). 3) the renderer is expected not to use the cross-site 112 // document content for purposes other than JS/CSS (e.g, XHR). 113 bool renderable_status_code = 114 IsRenderableStatusCode(resp_data->http_status_code); 115 116 if (renderable_status_code) { 117 IncrementHistogramEnum( 118 bucket_prefix + block_label + ".RenderableStatusCode", 119 resp_data->resource_type, 120 RESOURCE_TYPE_LAST_TYPE); 121 } else { 122 IncrementHistogramCount(bucket_prefix + block_label + 123 ".NonRenderableStatusCode"); 124 } 125 } 126 127 void HistogramCountNotBlockedResponse(const std::string& bucket_prefix, 128 bool sniffed_as_js) { 129 IncrementHistogramCount(bucket_prefix + ".NotBlocked"); 130 if (sniffed_as_js) 131 IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS"); 132 } 133 134 } // namespace 135 136 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() {} 137 138 void SiteIsolationPolicy::SetPolicyEnabled(bool enabled) { 139 g_policy_enabled = enabled; 140 } 141 142 linked_ptr<SiteIsolationResponseMetaData> 143 SiteIsolationPolicy::OnReceivedResponse(const GURL& frame_origin, 144 const GURL& response_url, 145 ResourceType resource_type, 146 int origin_pid, 147 const ResourceResponseInfo& info) { 148 if (!g_policy_enabled) 149 return linked_ptr<SiteIsolationResponseMetaData>(); 150 151 // if |origin_pid| is non-zero, it means that this response is for a plugin 152 // spawned from this renderer process. We exclude responses for plugins for 153 // now, but eventually, we're going to make plugin processes directly talk to 154 // the browser process so that we don't apply cross-site document blocking to 155 // them. 156 if (origin_pid) 157 return linked_ptr<SiteIsolationResponseMetaData>(); 158 159 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1); 160 161 // See if this is for navigation. If it is, don't block it, under the 162 // assumption that we will put it in an appropriate process. 163 if (IsResourceTypeFrame(resource_type)) 164 return linked_ptr<SiteIsolationResponseMetaData>(); 165 166 if (!IsBlockableScheme(response_url)) 167 return linked_ptr<SiteIsolationResponseMetaData>(); 168 169 if (IsSameSite(frame_origin, response_url)) 170 return linked_ptr<SiteIsolationResponseMetaData>(); 171 172 SiteIsolationResponseMetaData::CanonicalMimeType canonical_mime_type = 173 GetCanonicalMimeType(info.mime_type); 174 175 if (canonical_mime_type == SiteIsolationResponseMetaData::Others) 176 return linked_ptr<SiteIsolationResponseMetaData>(); 177 178 // Every CORS request should have the Access-Control-Allow-Origin header even 179 // if it is preceded by a pre-flight request. Therefore, if this is a CORS 180 // request, it has this header. response.httpHeaderField() internally uses 181 // case-insensitive matching for the header name. 182 std::string access_control_origin; 183 184 // We can use a case-insensitive header name for EnumerateHeader(). 185 info.headers->EnumerateHeader( 186 NULL, "access-control-allow-origin", &access_control_origin); 187 if (IsValidCorsHeaderSet(frame_origin, response_url, access_control_origin)) 188 return linked_ptr<SiteIsolationResponseMetaData>(); 189 190 // Real XSD data collection starts from here. 191 std::string no_sniff; 192 info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff); 193 194 linked_ptr<SiteIsolationResponseMetaData> resp_data( 195 new SiteIsolationResponseMetaData); 196 resp_data->frame_origin = frame_origin.spec(); 197 resp_data->response_url = response_url; 198 resp_data->resource_type = resource_type; 199 resp_data->canonical_mime_type = canonical_mime_type; 200 resp_data->http_status_code = info.headers->response_code(); 201 resp_data->no_sniff = LowerCaseEqualsASCII(no_sniff, "nosniff"); 202 203 return resp_data; 204 } 205 206 bool SiteIsolationPolicy::ShouldBlockResponse( 207 linked_ptr<SiteIsolationResponseMetaData>& resp_data, 208 const char* raw_data, 209 int raw_length, 210 std::string* alternative_data) { 211 if (!g_policy_enabled) 212 return false; 213 214 DCHECK(resp_data.get()); 215 216 StringPiece data(raw_data, raw_length); 217 218 // Record the length of the first received network packet to see if it's 219 // enough for sniffing. 220 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length); 221 222 // Record the number of cross-site document responses with a specific mime 223 // type (text/html, text/xml, etc). 224 UMA_HISTOGRAM_ENUMERATION( 225 "SiteIsolation.XSD.MimeType", 226 resp_data->canonical_mime_type, 227 SiteIsolationResponseMetaData::MaxCanonicalMimeType); 228 229 // Store the result of cross-site document blocking analysis. 230 bool is_blocked = false; 231 bool sniffed_as_js = SniffForJS(data); 232 233 // Record the number of responses whose content is sniffed for what its mime 234 // type claims it to be. For example, we apply a HTML sniffer for a document 235 // tagged with text/html here. Whenever this check becomes true, we'll block 236 // the response. 237 if (resp_data->canonical_mime_type != 238 SiteIsolationResponseMetaData::Plain) { 239 std::string bucket_prefix; 240 bool sniffed_as_target_document = false; 241 if (resp_data->canonical_mime_type == 242 SiteIsolationResponseMetaData::HTML) { 243 bucket_prefix = "SiteIsolation.XSD.HTML"; 244 sniffed_as_target_document = SniffForHTML(data); 245 } else if (resp_data->canonical_mime_type == 246 SiteIsolationResponseMetaData::XML) { 247 bucket_prefix = "SiteIsolation.XSD.XML"; 248 sniffed_as_target_document = SniffForXML(data); 249 } else if (resp_data->canonical_mime_type == 250 SiteIsolationResponseMetaData::JSON) { 251 bucket_prefix = "SiteIsolation.XSD.JSON"; 252 sniffed_as_target_document = SniffForJSON(data); 253 } else { 254 NOTREACHED() << "Not a blockable mime type: " 255 << resp_data->canonical_mime_type; 256 } 257 258 if (sniffed_as_target_document) { 259 is_blocked = true; 260 HistogramCountBlockedResponse(bucket_prefix, resp_data, false); 261 } else { 262 if (resp_data->no_sniff) { 263 is_blocked = true; 264 HistogramCountBlockedResponse(bucket_prefix, resp_data, true); 265 } else { 266 HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js); 267 } 268 } 269 } else { 270 // This block is for plain text documents. We apply our HTML, XML, 271 // and JSON sniffer to a text document in the order, and block it 272 // if any of them succeeds in sniffing. 273 std::string bucket_prefix; 274 if (SniffForHTML(data)) 275 bucket_prefix = "SiteIsolation.XSD.Plain.HTML"; 276 else if (SniffForXML(data)) 277 bucket_prefix = "SiteIsolation.XSD.Plain.XML"; 278 else if (SniffForJSON(data)) 279 bucket_prefix = "SiteIsolation.XSD.Plain.JSON"; 280 281 if (bucket_prefix.size() > 0) { 282 is_blocked = true; 283 HistogramCountBlockedResponse(bucket_prefix, resp_data, false); 284 } else if (resp_data->no_sniff) { 285 is_blocked = true; 286 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true); 287 } else { 288 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain", 289 sniffed_as_js); 290 } 291 } 292 293 if (!CommandLine::ForCurrentProcess()->HasSwitch( 294 switches::kBlockCrossSiteDocuments)) 295 is_blocked = false; 296 297 if (is_blocked) { 298 alternative_data->erase(); 299 alternative_data->insert(0, " "); 300 LOG(ERROR) << resp_data->response_url 301 << " is blocked as an illegal cross-site document from " 302 << resp_data->frame_origin; 303 } 304 return is_blocked; 305 } 306 307 SiteIsolationResponseMetaData::CanonicalMimeType 308 SiteIsolationPolicy::GetCanonicalMimeType(const std::string& mime_type) { 309 if (LowerCaseEqualsASCII(mime_type, kTextHtml)) { 310 return SiteIsolationResponseMetaData::HTML; 311 } 312 313 if (LowerCaseEqualsASCII(mime_type, kTextPlain)) { 314 return SiteIsolationResponseMetaData::Plain; 315 } 316 317 if (LowerCaseEqualsASCII(mime_type, kAppJson) || 318 LowerCaseEqualsASCII(mime_type, kTextJson) || 319 LowerCaseEqualsASCII(mime_type, kTextXjson)) { 320 return SiteIsolationResponseMetaData::JSON; 321 } 322 323 if (LowerCaseEqualsASCII(mime_type, kTextXml) || 324 LowerCaseEqualsASCII(mime_type, xAppRssXml) || 325 LowerCaseEqualsASCII(mime_type, kAppXml)) { 326 return SiteIsolationResponseMetaData::XML; 327 } 328 329 return SiteIsolationResponseMetaData::Others; 330 } 331 332 bool SiteIsolationPolicy::IsBlockableScheme(const GURL& url) { 333 // We exclude ftp:// from here. FTP doesn't provide a Content-Type 334 // header which our policy depends on, so we cannot protect any 335 // document from FTP servers. 336 return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme); 337 } 338 339 bool SiteIsolationPolicy::IsSameSite(const GURL& frame_origin, 340 const GURL& response_url) { 341 342 if (!frame_origin.is_valid() || !response_url.is_valid()) 343 return false; 344 345 if (frame_origin.scheme() != response_url.scheme()) 346 return false; 347 348 // SameDomainOrHost() extracts the effective domains (public suffix plus one) 349 // from the two URLs and compare them. 350 return net::registry_controlled_domains::SameDomainOrHost( 351 frame_origin, 352 response_url, 353 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); 354 } 355 356 // We don't use Webkit's existing CORS policy implementation since 357 // their policy works in terms of origins, not sites. For example, 358 // when frame is sub.a.com and it is not allowed to access a document 359 // with sub1.a.com. But under Site Isolation, it's allowed. 360 bool SiteIsolationPolicy::IsValidCorsHeaderSet( 361 const GURL& frame_origin, 362 const GURL& website_origin, 363 const std::string& access_control_origin) { 364 // Many websites are sending back "\"*\"" instead of "*". This is 365 // non-standard practice, and not supported by Chrome. Refer to 366 // CrossOriginAccessControl::passesAccessControlCheck(). 367 368 // TODO(dsjang): * is not allowed for the response from a request 369 // with cookies. This allows for more than what the renderer will 370 // eventually be able to receive, so we won't see illegal cross-site 371 // documents allowed by this. We have to find a way to see if this 372 // response is from a cookie-tagged request or not in the future. 373 if (access_control_origin == "*") 374 return true; 375 376 // TODO(dsjang): The CORS spec only treats a fully specified URL, except for 377 // "*", but many websites are using just a domain for access_control_origin, 378 // and this is blocked by Webkit's CORS logic here : 379 // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set 380 // is_valid() to false when it is created from a URL containing * in the 381 // domain part. 382 383 GURL cors_origin(access_control_origin); 384 return IsSameSite(frame_origin, cors_origin); 385 } 386 387 // This function is a slight modification of |net::SniffForHTML|. 388 bool SiteIsolationPolicy::SniffForHTML(StringPiece data) { 389 // The content sniffer used by Chrome and Firefox are using "<!--" 390 // as one of the HTML signatures, but it also appears in valid 391 // JavaScript, considered as well-formed JS by the browser. Since 392 // we do not want to block any JS, we exclude it from our HTML 393 // signatures. This can weaken our document block policy, but we can 394 // break less websites. 395 // TODO(dsjang): parameterize |net::SniffForHTML| with an option 396 // that decides whether to include <!-- or not, so that we can 397 // remove this function. 398 // TODO(dsjang): Once SiteIsolationPolicy is moved into the browser 399 // process, we should do single-thread checking here for the static 400 // initializer. 401 static const StringPiece kHtmlSignatures[] = { 402 StringPiece("<!DOCTYPE html"), // HTML5 spec 403 StringPiece("<script"), // HTML5 spec, Mozilla 404 StringPiece("<html"), // HTML5 spec, Mozilla 405 StringPiece("<head"), // HTML5 spec, Mozilla 406 StringPiece("<iframe"), // Mozilla 407 StringPiece("<h1"), // Mozilla 408 StringPiece("<div"), // Mozilla 409 StringPiece("<font"), // Mozilla 410 StringPiece("<table"), // Mozilla 411 StringPiece("<a"), // Mozilla 412 StringPiece("<style"), // Mozilla 413 StringPiece("<title"), // Mozilla 414 StringPiece("<b"), // Mozilla 415 StringPiece("<body"), // Mozilla 416 StringPiece("<br"), // Mozilla 417 StringPiece("<p"), // Mozilla 418 StringPiece("<?xml") // Mozilla 419 }; 420 421 while (data.length() > 0) { 422 if (MatchesSignature( 423 data, kHtmlSignatures, arraysize(kHtmlSignatures))) 424 return true; 425 426 // If we cannot find "<!--", we fail sniffing this as HTML. 427 static const StringPiece kCommentBegins[] = { StringPiece("<!--") }; 428 if (!MatchesSignature(data, kCommentBegins, arraysize(kCommentBegins))) 429 break; 430 431 // Search for --> and do SniffForHTML after that. If we can find the 432 // comment's end, we start HTML sniffing from there again. 433 static const char kEndComment[] = "-->"; 434 size_t offset = data.find(kEndComment); 435 if (offset == base::StringPiece::npos) 436 break; 437 438 // Proceed to the index next to the ending comment (-->). 439 data.remove_prefix(offset + strlen(kEndComment)); 440 } 441 442 return false; 443 } 444 445 bool SiteIsolationPolicy::SniffForXML(base::StringPiece data) { 446 // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for 447 // this signature. However, XML is case-sensitive. Don't we have to 448 // be more lenient only to block documents starting with the exact 449 // string <?xml rather than <?XML ? 450 // TODO(dsjang): Once SiteIsolationPolicy is moved into the browser 451 // process, we should do single-thread checking here for the static 452 // initializer. 453 static const StringPiece kXmlSignatures[] = { StringPiece("<?xml") }; 454 return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures)); 455 } 456 457 bool SiteIsolationPolicy::SniffForJSON(base::StringPiece data) { 458 // TODO(dsjang): We have to come up with a better way to sniff 459 // JSON. However, even RE cannot help us that much due to the fact 460 // that we don't do full parsing. This DFA starts with state 0, and 461 // finds {, "/' and : in that order. We're avoiding adding a 462 // dependency on a regular expression library. 463 enum { 464 kStartState, 465 kLeftBraceState, 466 kLeftQuoteState, 467 kColonState, 468 kTerminalState, 469 } state = kStartState; 470 471 size_t length = data.length(); 472 for (size_t i = 0; i < length && state < kColonState; ++i) { 473 const char c = data[i]; 474 if (c == ' ' || c == '\t' || c == '\r' || c == '\n') 475 continue; 476 477 switch (state) { 478 case kStartState: 479 if (c == '{') 480 state = kLeftBraceState; 481 else 482 state = kTerminalState; 483 break; 484 case kLeftBraceState: 485 if (c == '\"' || c == '\'') 486 state = kLeftQuoteState; 487 else 488 state = kTerminalState; 489 break; 490 case kLeftQuoteState: 491 if (c == ':') 492 state = kColonState; 493 break; 494 case kColonState: 495 case kTerminalState: 496 NOTREACHED(); 497 break; 498 } 499 } 500 return state == kColonState; 501 } 502 503 bool SiteIsolationPolicy::SniffForJS(StringPiece data) { 504 // TODO(dsjang): This is a real hack. The only purpose of this function is to 505 // try to see if there's any possibility that this data can be JavaScript 506 // (superset of JS). This function will be removed once UMA stats are 507 // gathered. 508 509 // Search for "var " for JS detection. 510 return data.find("var ") != base::StringPiece::npos; 511 } 512 513 } // namespace content 514