Home | History | Annotate | Download | only in prerender
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/prerender/prerender_util.h"
      6 
      7 #include "base/logging.h"
      8 #include "base/metrics/histogram.h"
      9 #include "base/metrics/sparse_histogram.h"
     10 #include "base/strings/string_util.h"
     11 #include "content/public/browser/resource_request_info.h"
     12 #include "net/http/http_response_headers.h"
     13 #include "net/url_request/url_request.h"
     14 #include "url/url_canon.h"
     15 #include "url/url_parse.h"
     16 #include "url/url_util.h"
     17 #include "webkit/common/resource_type.h"
     18 
     19 namespace prerender {
     20 
     21 namespace {
     22 
     23 const char kModPagespeedHeader[] = "X-Mod-Pagespeed";
     24 const char kPageSpeedHeader[] = "X-Page-Speed";
     25 const char kPagespeedServerHistogram[] =
     26     "Prerender.PagespeedHeader.ServerCounts";
     27 const char kPagespeedVersionHistogram[] =
     28     "Prerender.PagespeedHeader.VersionCounts";
     29 
     30 enum PagespeedHeaderServerType {
     31   PAGESPEED_TOTAL_RESPONSES = 0,
     32   PAGESPEED_MOD_PAGESPEED_SERVER = 1,
     33   PAGESPEED_NGX_PAGESPEED_SERVER = 2,
     34   PAGESPEED_PAGESPEED_SERVICE_SERVER = 3,
     35   PAGESPEED_UNKNOWN_SERVER = 4,
     36   PAGESPEED_SERVER_MAXIMUM = 5
     37 };
     38 
     39 // Private function to parse the PageSpeed version number and encode it in
     40 // buckets 2 through 99: if it is in the format a.b.c.d-e the bucket will be
     41 // 2 + 2 * (max(c, 10) - 10) + (d > 1 ? 1 : 0); if it is not in this format
     42 // we return zero.
     43 int GetXModPagespeedBucketFromVersion(const std::string& version) {
     44   int a, b, c, d, e;
     45   int num_parsed = sscanf(version.c_str(), "%d.%d.%d.%d-%d",
     46                           &a, &b, &c, &d, &e);
     47   int output = 0;
     48   if (num_parsed == 5) {
     49     output = 2;
     50     if (c > 10)
     51       output += 2 * (c - 10);
     52     if (d > 1)
     53       output++;
     54     if (output < 2 || output > 99)
     55       output = 0;
     56   }
     57   return output;
     58 }
     59 
     60 // Private function to parse the X-Page-Speed header value and determine
     61 // whether it is in the PageSpeed Service format, namely m_n_dc were m_n is
     62 // a version number and dc is an encoded 2-character value.
     63 bool IsPageSpeedServiceVersionNumber(const std::string& version) {
     64   int a, b;
     65   char c, d, e;  // e is to detect EOL as we check that it /isn't/ converted.
     66   int num_parsed = sscanf(version.c_str(), "%d_%d_%c%c%c", &a, &b, &c, &d, &e);
     67   return (num_parsed == 4);
     68 }
     69 
     70 enum PrerenderSchemeCancelReason {
     71   PRERENDER_SCHEME_CANCEL_REASON_EXTERNAL_PROTOCOL,
     72   PRERENDER_SCHEME_CANCEL_REASON_DATA,
     73   PRERENDER_SCHEME_CANCEL_REASON_BLOB,
     74   PRERENDER_SCHEME_CANCEL_REASON_FILE,
     75   PRERENDER_SCHEME_CANCEL_REASON_FILESYSTEM,
     76   PRERENDER_SCHEME_CANCEL_REASON_WEBSOCKET,
     77   PRERENDER_SCHEME_CANCEL_REASON_FTP,
     78   PRERENDER_SCHEME_CANCEL_REASON_CHROME,
     79   PRERENDER_SCHEME_CANCEL_REASON_CHROME_EXTENSION,
     80   PRERENDER_SCHEME_CANCEL_REASON_ABOUT,
     81   PRERENDER_SCHEME_CANCEL_REASON_UNKNOWN,
     82   PRERENDER_SCHEME_CANCEL_REASON_MAX,
     83 };
     84 
     85 void ReportPrerenderSchemeCancelReason(PrerenderSchemeCancelReason reason) {
     86   UMA_HISTOGRAM_ENUMERATION(
     87       "Prerender.SchemeCancelReason", reason,
     88       PRERENDER_SCHEME_CANCEL_REASON_MAX);
     89 }
     90 
     91 }  // namespace
     92 
     93 const char kChromeNavigateExtraDataKey[] = "chrome_navigate";
     94 
     95 bool MaybeGetQueryStringBasedAliasURL(
     96     const GURL& url, GURL* alias_url) {
     97   DCHECK(alias_url);
     98   url_parse::Parsed parsed;
     99   url_parse::ParseStandardURL(url.spec().c_str(), url.spec().length(),
    100                               &parsed);
    101   url_parse::Component query = parsed.query;
    102   url_parse::Component key, value;
    103   while (url_parse::ExtractQueryKeyValue(url.spec().c_str(), &query, &key,
    104                                          &value)) {
    105     if (key.len != 3 || strncmp(url.spec().c_str() + key.begin, "url", key.len))
    106       continue;
    107     // We found a url= query string component.
    108     if (value.len < 1)
    109       continue;
    110     url_canon::RawCanonOutputW<1024> decoded_url;
    111     url_util::DecodeURLEscapeSequences(url.spec().c_str() + value.begin,
    112                                        value.len, &decoded_url);
    113     GURL new_url(base::string16(decoded_url.data(), decoded_url.length()));
    114     if (!new_url.is_empty() && new_url.is_valid()) {
    115       *alias_url = new_url;
    116       return true;
    117     }
    118     return false;
    119   }
    120   return false;
    121 }
    122 
    123 uint8 GetQueryStringBasedExperiment(const GURL& url) {
    124   url_parse::Parsed parsed;
    125   url_parse::ParseStandardURL(url.spec().c_str(), url.spec().length(),
    126                               &parsed);
    127   url_parse::Component query = parsed.query;
    128   url_parse::Component key, value;
    129   while (url_parse::ExtractQueryKeyValue(url.spec().c_str(), &query, &key,
    130                                          &value)) {
    131     if (key.len != 3 || strncmp(url.spec().c_str() + key.begin, "lpe", key.len))
    132       continue;
    133 
    134     // We found a lpe= query string component.
    135     if (value.len != 1)
    136       continue;
    137     uint8 exp = *(url.spec().c_str() + value.begin) - '0';
    138     if (exp < 1 || exp > 9)
    139       continue;
    140     return exp;
    141   }
    142   return kNoExperiment;
    143 }
    144 
    145 bool IsGoogleDomain(const GURL& url) {
    146   return StartsWithASCII(url.host(), std::string("www.google."), true);
    147 }
    148 
    149 bool IsGoogleSearchResultURL(const GURL& url) {
    150   if (!IsGoogleDomain(url))
    151     return false;
    152   return (url.path().empty() ||
    153           StartsWithASCII(url.path(), std::string("/search"), true) ||
    154           (url.path() == "/") ||
    155           StartsWithASCII(url.path(), std::string("/webhp"), true));
    156 }
    157 
    158 bool IsNoSwapInExperiment(uint8 experiment_id) {
    159   // Currently, experiments 5 and 6 fall in this category.
    160   return experiment_id == 5 || experiment_id == 6;
    161 }
    162 
    163 bool IsControlGroupExperiment(uint8 experiment_id) {
    164   // Currently, experiments 7 and 8 fall in this category.
    165   return experiment_id == 7 || experiment_id == 8;
    166 }
    167 
    168 void GatherPagespeedData(const ResourceType::Type resource_type,
    169                          const GURL& request_url,
    170                          const net::HttpResponseHeaders* response_headers) {
    171   if (resource_type != ResourceType::MAIN_FRAME ||
    172       !request_url.SchemeIsHTTPOrHTTPS())
    173     return;
    174 
    175   // bucket 0 counts every response seen.
    176   UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
    177                             PAGESPEED_TOTAL_RESPONSES,
    178                             PAGESPEED_SERVER_MAXIMUM);
    179   if (!response_headers)
    180     return;
    181 
    182   void* iter = NULL;
    183   std::string name;
    184   std::string value;
    185   while (response_headers->EnumerateHeaderLines(&iter, &name, &value)) {
    186     if (name == kModPagespeedHeader) {
    187       // Bucket 1 counts occurences of the X-Mod-Pagespeed header.
    188       UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
    189                                 PAGESPEED_MOD_PAGESPEED_SERVER,
    190                                 PAGESPEED_SERVER_MAXIMUM);
    191       if (!value.empty()) {
    192         // If the header value is in the X-Mod-Pagespeed version number format
    193         // then increment the appropriate bucket, otherwise increment bucket 1,
    194         // which is the catch-all "unknown version number" bucket.
    195         int bucket = GetXModPagespeedBucketFromVersion(value);
    196         if (bucket > 0) {
    197           UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, bucket);
    198         } else {
    199           UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, 1);
    200         }
    201       }
    202       break;
    203     } else if (name == kPageSpeedHeader) {
    204       // X-Page-Speed header versions are either in the X-Mod-Pagespeed format,
    205       // indicating an nginx installation, or they're in the PageSpeed Service
    206       // format, indicating a PSS installation, or in some other format,
    207       // indicating an unknown installation [possibly IISpeed].
    208       if (!value.empty()) {
    209         int bucket = GetXModPagespeedBucketFromVersion(value);
    210         if (bucket > 0) {
    211           // Bucket 2 counts occurences of the X-Page-Speed header with a
    212           // value in the X-Mod-Pagespeed version number format. We also
    213           // count these responses in the version histogram.
    214           UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
    215                                     PAGESPEED_NGX_PAGESPEED_SERVER,
    216                                     PAGESPEED_SERVER_MAXIMUM);
    217           UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, bucket);
    218         } else if (IsPageSpeedServiceVersionNumber(value)) {
    219           // Bucket 3 counts occurences of the X-Page-Speed header with a
    220           // value in the PageSpeed Service version number format.
    221           UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
    222                                     PAGESPEED_PAGESPEED_SERVICE_SERVER,
    223                                     PAGESPEED_SERVER_MAXIMUM);
    224         } else {
    225           // Bucket 4 counts occurences of all other values.
    226           UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
    227                                     PAGESPEED_UNKNOWN_SERVER,
    228                                     PAGESPEED_SERVER_MAXIMUM);
    229         }
    230       }
    231       break;
    232     }
    233   }
    234 }
    235 
    236 void URLRequestResponseStarted(net::URLRequest* request) {
    237   const content::ResourceRequestInfo* info =
    238       content::ResourceRequestInfo::ForRequest(request);
    239   GatherPagespeedData(info->GetResourceType(),
    240                       request->url(),
    241                       request->response_headers());
    242 }
    243 
    244 void ReportPrerenderExternalURL() {
    245   ReportPrerenderSchemeCancelReason(
    246       PRERENDER_SCHEME_CANCEL_REASON_EXTERNAL_PROTOCOL);
    247 }
    248 
    249 void ReportUnsupportedPrerenderScheme(const GURL& url) {
    250   if (url.SchemeIs("data")) {
    251     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_DATA);
    252   } else if (url.SchemeIs("blob")) {
    253     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_BLOB);
    254   } else if (url.SchemeIsFile()) {
    255     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_FILE);
    256   } else if (url.SchemeIsFileSystem()) {
    257     ReportPrerenderSchemeCancelReason(
    258         PRERENDER_SCHEME_CANCEL_REASON_FILESYSTEM);
    259   } else if (url.SchemeIs("ws") || url.SchemeIs("wss")) {
    260     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_WEBSOCKET);
    261   } else if (url.SchemeIs("ftp")) {
    262     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_FTP);
    263   } else if (url.SchemeIs("chrome")) {
    264     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_CHROME);
    265   } else if (url.SchemeIs("chrome-extension")) {
    266     ReportPrerenderSchemeCancelReason(
    267         PRERENDER_SCHEME_CANCEL_REASON_CHROME_EXTENSION);
    268   } else if (url.SchemeIs("about")) {
    269     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_ABOUT);
    270   } else {
    271     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_UNKNOWN);
    272   }
    273 }
    274 
    275 }  // namespace prerender
    276