Home | History | Annotate | Download | only in child
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "content/child/site_isolation_policy.h"
      6 
      7 #include "base/basictypes.h"
      8 #include "base/command_line.h"
      9 #include "base/lazy_instance.h"
     10 #include "base/logging.h"
     11 #include "base/metrics/histogram.h"
     12 #include "base/strings/string_util.h"
     13 #include "content/public/common/content_switches.h"
     14 #include "content/public/common/resource_response_info.h"
     15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
     16 #include "net/http/http_response_headers.h"
     17 
     18 using base::StringPiece;
     19 
     20 namespace content {
     21 
     22 namespace {
     23 
     24 // The cross-site document blocking/UMA data collection is deactivated by
     25 // default, and only activated in renderer processes.
     26 static bool g_policy_enabled = false;
     27 
     28 // MIME types
     29 const char kTextHtml[] = "text/html";
     30 const char kTextXml[] = "text/xml";
     31 const char xAppRssXml[] = "application/rss+xml";
     32 const char kAppXml[] = "application/xml";
     33 const char kAppJson[] = "application/json";
     34 const char kTextJson[] = "text/json";
     35 const char kTextXjson[] = "text/x-json";
     36 const char kTextPlain[] = "text/plain";
     37 
     38 // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted
     39 // when this class is used for actual blocking.
     40 bool IsRenderableStatusCode(int status_code) {
     41   // Chrome only uses the content of a response with one of these status codes
     42   // for CSS/JavaScript. For images, Chrome just ignores status code.
     43   const int renderable_status_code[] = {200, 201, 202, 203, 206, 300,
     44                                         301, 302, 303, 305, 306, 307};
     45   for (size_t i = 0; i < arraysize(renderable_status_code); ++i) {
     46     if (renderable_status_code[i] == status_code)
     47       return true;
     48   }
     49   return false;
     50 }
     51 
     52 bool MatchesSignature(StringPiece data,
     53                       const StringPiece signatures[],
     54                       size_t arr_size) {
     55 
     56   size_t offset = data.find_first_not_of(" \t\r\n");
     57   // There is no not-whitespace character in this document.
     58   if (offset == base::StringPiece::npos)
     59     return false;
     60 
     61   data.remove_prefix(offset);
     62   size_t length = data.length();
     63 
     64   for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) {
     65     const StringPiece& signature = signatures[sig_index];
     66     size_t signature_length = signature.length();
     67     if (length < signature_length)
     68       continue;
     69 
     70     if (LowerCaseEqualsASCII(
     71             data.begin(), data.begin() + signature_length, signature.data()))
     72       return true;
     73   }
     74   return false;
     75 }
     76 
     77 void IncrementHistogramCount(const std::string& name) {
     78   // The default value of min, max, bucket_count are copied from histogram.h.
     79   base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet(
     80       name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag);
     81   histogram_pointer->Add(1);
     82 }
     83 
     84 void IncrementHistogramEnum(const std::string& name,
     85                           uint32 sample,
     86                           uint32 boundary_value) {
     87   // The default value of min, max, bucket_count are copied from histogram.h.
     88   base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet(
     89       name,
     90       1,
     91       boundary_value,
     92       boundary_value + 1,
     93       base::HistogramBase::kUmaTargetedHistogramFlag);
     94   histogram_pointer->Add(sample);
     95 }
     96 
     97 void HistogramCountBlockedResponse(
     98     const std::string& bucket_prefix,
     99     linked_ptr<SiteIsolationResponseMetaData>& resp_data,
    100     bool nosniff_block) {
    101   std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked");
    102   IncrementHistogramCount(bucket_prefix + block_label);
    103 
    104   // The content is blocked if it is sniffed as HTML/JSON/XML. When
    105   // the blocked response is with an error status code, it is not
    106   // disruptive for the following reasons : 1) the blocked content is
    107   // not a binary object (such as an image) since it is sniffed as
    108   // text; 2) then, this blocking only breaks the renderer behavior
    109   // only if it is either JavaScript or CSS. However, the renderer
    110   // doesn't use the contents of JS/CSS with unaffected status code
    111   // (e.g, 404). 3) the renderer is expected not to use the cross-site
    112   // document content for purposes other than JS/CSS (e.g, XHR).
    113   bool renderable_status_code =
    114       IsRenderableStatusCode(resp_data->http_status_code);
    115 
    116   if (renderable_status_code) {
    117     IncrementHistogramEnum(
    118         bucket_prefix + block_label + ".RenderableStatusCode",
    119         resp_data->resource_type,
    120         RESOURCE_TYPE_LAST_TYPE);
    121   } else {
    122     IncrementHistogramCount(bucket_prefix + block_label +
    123                             ".NonRenderableStatusCode");
    124   }
    125 }
    126 
    127 void HistogramCountNotBlockedResponse(const std::string& bucket_prefix,
    128                                       bool sniffed_as_js) {
    129   IncrementHistogramCount(bucket_prefix + ".NotBlocked");
    130   if (sniffed_as_js)
    131     IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS");
    132 }
    133 
    134 }  // namespace
    135 
    136 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() {}
    137 
    138 void SiteIsolationPolicy::SetPolicyEnabled(bool enabled) {
    139   g_policy_enabled = enabled;
    140 }
    141 
    142 linked_ptr<SiteIsolationResponseMetaData>
    143 SiteIsolationPolicy::OnReceivedResponse(const GURL& frame_origin,
    144                                         const GURL& response_url,
    145                                         ResourceType resource_type,
    146                                         int origin_pid,
    147                                         const ResourceResponseInfo& info) {
    148   if (!g_policy_enabled)
    149     return linked_ptr<SiteIsolationResponseMetaData>();
    150 
    151   // if |origin_pid| is non-zero, it means that this response is for a plugin
    152   // spawned from this renderer process. We exclude responses for plugins for
    153   // now, but eventually, we're going to make plugin processes directly talk to
    154   // the browser process so that we don't apply cross-site document blocking to
    155   // them.
    156   if (origin_pid)
    157     return linked_ptr<SiteIsolationResponseMetaData>();
    158 
    159   UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1);
    160 
    161   // See if this is for navigation. If it is, don't block it, under the
    162   // assumption that we will put it in an appropriate process.
    163   if (IsResourceTypeFrame(resource_type))
    164     return linked_ptr<SiteIsolationResponseMetaData>();
    165 
    166   if (!IsBlockableScheme(response_url))
    167     return linked_ptr<SiteIsolationResponseMetaData>();
    168 
    169   if (IsSameSite(frame_origin, response_url))
    170     return linked_ptr<SiteIsolationResponseMetaData>();
    171 
    172   SiteIsolationResponseMetaData::CanonicalMimeType canonical_mime_type =
    173       GetCanonicalMimeType(info.mime_type);
    174 
    175   if (canonical_mime_type == SiteIsolationResponseMetaData::Others)
    176     return linked_ptr<SiteIsolationResponseMetaData>();
    177 
    178   // Every CORS request should have the Access-Control-Allow-Origin header even
    179   // if it is preceded by a pre-flight request. Therefore, if this is a CORS
    180   // request, it has this header.  response.httpHeaderField() internally uses
    181   // case-insensitive matching for the header name.
    182   std::string access_control_origin;
    183 
    184   // We can use a case-insensitive header name for EnumerateHeader().
    185   info.headers->EnumerateHeader(
    186       NULL, "access-control-allow-origin", &access_control_origin);
    187   if (IsValidCorsHeaderSet(frame_origin, response_url, access_control_origin))
    188     return linked_ptr<SiteIsolationResponseMetaData>();
    189 
    190   // Real XSD data collection starts from here.
    191   std::string no_sniff;
    192   info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff);
    193 
    194   linked_ptr<SiteIsolationResponseMetaData> resp_data(
    195       new SiteIsolationResponseMetaData);
    196   resp_data->frame_origin = frame_origin.spec();
    197   resp_data->response_url = response_url;
    198   resp_data->resource_type = resource_type;
    199   resp_data->canonical_mime_type = canonical_mime_type;
    200   resp_data->http_status_code = info.headers->response_code();
    201   resp_data->no_sniff = LowerCaseEqualsASCII(no_sniff, "nosniff");
    202 
    203   return resp_data;
    204 }
    205 
    206 bool SiteIsolationPolicy::ShouldBlockResponse(
    207     linked_ptr<SiteIsolationResponseMetaData>& resp_data,
    208     const char* raw_data,
    209     int raw_length,
    210     std::string* alternative_data) {
    211   if (!g_policy_enabled)
    212     return false;
    213 
    214   DCHECK(resp_data.get());
    215 
    216   StringPiece data(raw_data, raw_length);
    217 
    218   // Record the length of the first received network packet to see if it's
    219   // enough for sniffing.
    220   UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length);
    221 
    222   // Record the number of cross-site document responses with a specific mime
    223   // type (text/html, text/xml, etc).
    224   UMA_HISTOGRAM_ENUMERATION(
    225       "SiteIsolation.XSD.MimeType",
    226       resp_data->canonical_mime_type,
    227       SiteIsolationResponseMetaData::MaxCanonicalMimeType);
    228 
    229   // Store the result of cross-site document blocking analysis.
    230   bool is_blocked = false;
    231   bool sniffed_as_js = SniffForJS(data);
    232 
    233   // Record the number of responses whose content is sniffed for what its mime
    234   // type claims it to be. For example, we apply a HTML sniffer for a document
    235   // tagged with text/html here. Whenever this check becomes true, we'll block
    236   // the response.
    237   if (resp_data->canonical_mime_type !=
    238           SiteIsolationResponseMetaData::Plain) {
    239     std::string bucket_prefix;
    240     bool sniffed_as_target_document = false;
    241     if (resp_data->canonical_mime_type ==
    242             SiteIsolationResponseMetaData::HTML) {
    243       bucket_prefix = "SiteIsolation.XSD.HTML";
    244       sniffed_as_target_document = SniffForHTML(data);
    245     } else if (resp_data->canonical_mime_type ==
    246                    SiteIsolationResponseMetaData::XML) {
    247       bucket_prefix = "SiteIsolation.XSD.XML";
    248       sniffed_as_target_document = SniffForXML(data);
    249     } else if (resp_data->canonical_mime_type ==
    250                    SiteIsolationResponseMetaData::JSON) {
    251       bucket_prefix = "SiteIsolation.XSD.JSON";
    252       sniffed_as_target_document = SniffForJSON(data);
    253     } else {
    254       NOTREACHED() << "Not a blockable mime type: "
    255                    << resp_data->canonical_mime_type;
    256     }
    257 
    258     if (sniffed_as_target_document) {
    259       is_blocked = true;
    260       HistogramCountBlockedResponse(bucket_prefix, resp_data, false);
    261     } else {
    262       if (resp_data->no_sniff) {
    263         is_blocked = true;
    264         HistogramCountBlockedResponse(bucket_prefix, resp_data, true);
    265       } else {
    266         HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js);
    267       }
    268     }
    269   } else {
    270     // This block is for plain text documents. We apply our HTML, XML,
    271     // and JSON sniffer to a text document in the order, and block it
    272     // if any of them succeeds in sniffing.
    273     std::string bucket_prefix;
    274     if (SniffForHTML(data))
    275       bucket_prefix = "SiteIsolation.XSD.Plain.HTML";
    276     else if (SniffForXML(data))
    277       bucket_prefix = "SiteIsolation.XSD.Plain.XML";
    278     else if (SniffForJSON(data))
    279       bucket_prefix = "SiteIsolation.XSD.Plain.JSON";
    280 
    281     if (bucket_prefix.size() > 0) {
    282       is_blocked = true;
    283       HistogramCountBlockedResponse(bucket_prefix, resp_data, false);
    284     } else if (resp_data->no_sniff) {
    285       is_blocked = true;
    286       HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true);
    287     } else {
    288       HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",
    289                                        sniffed_as_js);
    290     }
    291   }
    292 
    293   if (!CommandLine::ForCurrentProcess()->HasSwitch(
    294            switches::kBlockCrossSiteDocuments))
    295     is_blocked = false;
    296 
    297   if (is_blocked) {
    298     alternative_data->erase();
    299     alternative_data->insert(0, " ");
    300     LOG(ERROR) << resp_data->response_url
    301                << " is blocked as an illegal cross-site document from "
    302                << resp_data->frame_origin;
    303   }
    304   return is_blocked;
    305 }
    306 
    307 SiteIsolationResponseMetaData::CanonicalMimeType
    308 SiteIsolationPolicy::GetCanonicalMimeType(const std::string& mime_type) {
    309   if (LowerCaseEqualsASCII(mime_type, kTextHtml)) {
    310     return SiteIsolationResponseMetaData::HTML;
    311   }
    312 
    313   if (LowerCaseEqualsASCII(mime_type, kTextPlain)) {
    314     return SiteIsolationResponseMetaData::Plain;
    315   }
    316 
    317   if (LowerCaseEqualsASCII(mime_type, kAppJson) ||
    318       LowerCaseEqualsASCII(mime_type, kTextJson) ||
    319       LowerCaseEqualsASCII(mime_type, kTextXjson)) {
    320     return SiteIsolationResponseMetaData::JSON;
    321   }
    322 
    323   if (LowerCaseEqualsASCII(mime_type, kTextXml) ||
    324       LowerCaseEqualsASCII(mime_type, xAppRssXml) ||
    325       LowerCaseEqualsASCII(mime_type, kAppXml)) {
    326     return SiteIsolationResponseMetaData::XML;
    327   }
    328 
    329  return SiteIsolationResponseMetaData::Others;
    330 }
    331 
    332 bool SiteIsolationPolicy::IsBlockableScheme(const GURL& url) {
    333   // We exclude ftp:// from here. FTP doesn't provide a Content-Type
    334   // header which our policy depends on, so we cannot protect any
    335   // document from FTP servers.
    336   return url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme);
    337 }
    338 
    339 bool SiteIsolationPolicy::IsSameSite(const GURL& frame_origin,
    340                                      const GURL& response_url) {
    341 
    342   if (!frame_origin.is_valid() || !response_url.is_valid())
    343     return false;
    344 
    345   if (frame_origin.scheme() != response_url.scheme())
    346     return false;
    347 
    348   // SameDomainOrHost() extracts the effective domains (public suffix plus one)
    349   // from the two URLs and compare them.
    350   return net::registry_controlled_domains::SameDomainOrHost(
    351       frame_origin,
    352       response_url,
    353       net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
    354 }
    355 
    356 // We don't use Webkit's existing CORS policy implementation since
    357 // their policy works in terms of origins, not sites. For example,
    358 // when frame is sub.a.com and it is not allowed to access a document
    359 // with sub1.a.com. But under Site Isolation, it's allowed.
    360 bool SiteIsolationPolicy::IsValidCorsHeaderSet(
    361     const GURL& frame_origin,
    362     const GURL& website_origin,
    363     const std::string& access_control_origin) {
    364   // Many websites are sending back "\"*\"" instead of "*". This is
    365   // non-standard practice, and not supported by Chrome. Refer to
    366   // CrossOriginAccessControl::passesAccessControlCheck().
    367 
    368   // TODO(dsjang): * is not allowed for the response from a request
    369   // with cookies. This allows for more than what the renderer will
    370   // eventually be able to receive, so we won't see illegal cross-site
    371   // documents allowed by this. We have to find a way to see if this
    372   // response is from a cookie-tagged request or not in the future.
    373   if (access_control_origin == "*")
    374     return true;
    375 
    376   // TODO(dsjang): The CORS spec only treats a fully specified URL, except for
    377   // "*", but many websites are using just a domain for access_control_origin,
    378   // and this is blocked by Webkit's CORS logic here :
    379   // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set
    380   // is_valid() to false when it is created from a URL containing * in the
    381   // domain part.
    382 
    383   GURL cors_origin(access_control_origin);
    384   return IsSameSite(frame_origin, cors_origin);
    385 }
    386 
    387 // This function is a slight modification of |net::SniffForHTML|.
    388 bool SiteIsolationPolicy::SniffForHTML(StringPiece data) {
    389   // The content sniffer used by Chrome and Firefox are using "<!--"
    390   // as one of the HTML signatures, but it also appears in valid
    391   // JavaScript, considered as well-formed JS by the browser.  Since
    392   // we do not want to block any JS, we exclude it from our HTML
    393   // signatures. This can weaken our document block policy, but we can
    394   // break less websites.
    395   // TODO(dsjang): parameterize |net::SniffForHTML| with an option
    396   // that decides whether to include <!-- or not, so that we can
    397   // remove this function.
    398   // TODO(dsjang): Once SiteIsolationPolicy is moved into the browser
    399   // process, we should do single-thread checking here for the static
    400   // initializer.
    401   static const StringPiece kHtmlSignatures[] = {
    402     StringPiece("<!DOCTYPE html"),  // HTML5 spec
    403     StringPiece("<script"),  // HTML5 spec, Mozilla
    404     StringPiece("<html"),    // HTML5 spec, Mozilla
    405     StringPiece("<head"),    // HTML5 spec, Mozilla
    406     StringPiece("<iframe"),  // Mozilla
    407     StringPiece("<h1"),      // Mozilla
    408     StringPiece("<div"),     // Mozilla
    409     StringPiece("<font"),    // Mozilla
    410     StringPiece("<table"),   // Mozilla
    411     StringPiece("<a"),       // Mozilla
    412     StringPiece("<style"),   // Mozilla
    413     StringPiece("<title"),   // Mozilla
    414     StringPiece("<b"),       // Mozilla
    415     StringPiece("<body"),    // Mozilla
    416     StringPiece("<br"),      // Mozilla
    417     StringPiece("<p"),       // Mozilla
    418     StringPiece("<?xml")     // Mozilla
    419   };
    420 
    421   while (data.length() > 0) {
    422     if (MatchesSignature(
    423           data, kHtmlSignatures, arraysize(kHtmlSignatures)))
    424       return true;
    425 
    426     // If we cannot find "<!--", we fail sniffing this as HTML.
    427     static const StringPiece kCommentBegins[] = { StringPiece("<!--") };
    428     if (!MatchesSignature(data, kCommentBegins, arraysize(kCommentBegins)))
    429       break;
    430 
    431     // Search for --> and do SniffForHTML after that. If we can find the
    432     // comment's end, we start HTML sniffing from there again.
    433     static const char kEndComment[] = "-->";
    434     size_t offset = data.find(kEndComment);
    435     if (offset == base::StringPiece::npos)
    436       break;
    437 
    438     // Proceed to the index next to the ending comment (-->).
    439     data.remove_prefix(offset + strlen(kEndComment));
    440   }
    441 
    442   return false;
    443 }
    444 
    445 bool SiteIsolationPolicy::SniffForXML(base::StringPiece data) {
    446   // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for
    447   // this signature. However, XML is case-sensitive. Don't we have to
    448   // be more lenient only to block documents starting with the exact
    449   // string <?xml rather than <?XML ?
    450   // TODO(dsjang): Once SiteIsolationPolicy is moved into the browser
    451   // process, we should do single-thread checking here for the static
    452   // initializer.
    453   static const StringPiece kXmlSignatures[] = { StringPiece("<?xml") };
    454   return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures));
    455 }
    456 
    457 bool SiteIsolationPolicy::SniffForJSON(base::StringPiece data) {
    458   // TODO(dsjang): We have to come up with a better way to sniff
    459   // JSON. However, even RE cannot help us that much due to the fact
    460   // that we don't do full parsing.  This DFA starts with state 0, and
    461   // finds {, "/' and : in that order. We're avoiding adding a
    462   // dependency on a regular expression library.
    463   enum {
    464     kStartState,
    465     kLeftBraceState,
    466     kLeftQuoteState,
    467     kColonState,
    468     kTerminalState,
    469   } state = kStartState;
    470 
    471   size_t length = data.length();
    472   for (size_t i = 0; i < length && state < kColonState; ++i) {
    473     const char c = data[i];
    474     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
    475       continue;
    476 
    477     switch (state) {
    478       case kStartState:
    479         if (c == '{')
    480           state = kLeftBraceState;
    481         else
    482           state = kTerminalState;
    483         break;
    484       case kLeftBraceState:
    485         if (c == '\"' || c == '\'')
    486           state = kLeftQuoteState;
    487         else
    488           state = kTerminalState;
    489         break;
    490       case kLeftQuoteState:
    491         if (c == ':')
    492           state = kColonState;
    493         break;
    494       case kColonState:
    495       case kTerminalState:
    496         NOTREACHED();
    497         break;
    498     }
    499   }
    500   return state == kColonState;
    501 }
    502 
    503 bool SiteIsolationPolicy::SniffForJS(StringPiece data) {
    504   // TODO(dsjang): This is a real hack. The only purpose of this function is to
    505   // try to see if there's any possibility that this data can be JavaScript
    506   // (superset of JS). This function will be removed once UMA stats are
    507   // gathered.
    508 
    509   // Search for "var " for JS detection.
    510   return data.find("var ") != base::StringPiece::npos;
    511 }
    512 
    513 }  // namespace content
    514