Home | History | Annotate | Download | only in prerender
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/prerender/prerender_util.h"
      6 
      7 #include "base/logging.h"
      8 #include "base/metrics/histogram.h"
      9 #include "base/metrics/sparse_histogram.h"
     10 #include "base/strings/string_util.h"
     11 #include "content/public/browser/resource_request_info.h"
     12 #include "net/http/http_response_headers.h"
     13 #include "net/url_request/url_request.h"
     14 #include "url/url_canon.h"
     15 #include "url/url_parse.h"
     16 #include "url/url_util.h"
     17 #include "webkit/common/resource_type.h"
     18 
     19 namespace prerender {
     20 
     21 namespace {
     22 
     23 const char kModPagespeedHeader[] = "X-Mod-Pagespeed";
     24 const char kPageSpeedHeader[] = "X-Page-Speed";
     25 const char kPagespeedServerHistogram[] =
     26     "Prerender.PagespeedHeader.ServerCounts";
     27 const char kPagespeedVersionHistogram[] =
     28     "Prerender.PagespeedHeader.VersionCounts";
     29 
     30 enum PagespeedHeaderServerType {
     31   PAGESPEED_TOTAL_RESPONSES = 0,
     32   PAGESPEED_MOD_PAGESPEED_SERVER = 1,
     33   PAGESPEED_NGX_PAGESPEED_SERVER = 2,
     34   PAGESPEED_PAGESPEED_SERVICE_SERVER = 3,
     35   PAGESPEED_UNKNOWN_SERVER = 4,
     36   PAGESPEED_SERVER_MAXIMUM = 5
     37 };
     38 
     39 // Private function to parse the PageSpeed version number and encode it in
     40 // buckets 2 through 99: if it is in the format a.b.c.d-e the bucket will be
     41 // 2 + 2 * (max(c, 10) - 10) + (d > 1 ? 1 : 0); if it is not in this format
     42 // we return zero.
     43 int GetXModPagespeedBucketFromVersion(const std::string& version) {
     44   int a, b, c, d, e;
     45   int num_parsed = sscanf(version.c_str(), "%d.%d.%d.%d-%d",
     46                           &a, &b, &c, &d, &e);
     47   int output = 0;
     48   if (num_parsed == 5) {
     49     output = 2;
     50     if (c > 10)
     51       output += 2 * (c - 10);
     52     if (d > 1)
     53       output++;
     54     if (output < 2 || output > 99)
     55       output = 0;
     56   }
     57   return output;
     58 }
     59 
     60 // Private function to parse the X-Page-Speed header value and determine
     61 // whether it is in the PageSpeed Service format, namely m_n_dc were m_n is
     62 // a version number and dc is an encoded 2-character value.
     63 bool IsPageSpeedServiceVersionNumber(const std::string& version) {
     64   int a, b;
     65   char c, d, e;  // e is to detect EOL as we check that it /isn't/ converted.
     66   int num_parsed = sscanf(version.c_str(), "%d_%d_%c%c%c", &a, &b, &c, &d, &e);
     67   return (num_parsed == 4);
     68 }
     69 
     70 enum PrerenderSchemeCancelReason {
     71   PRERENDER_SCHEME_CANCEL_REASON_EXTERNAL_PROTOCOL,
     72   PRERENDER_SCHEME_CANCEL_REASON_DATA,
     73   PRERENDER_SCHEME_CANCEL_REASON_BLOB,
     74   PRERENDER_SCHEME_CANCEL_REASON_FILE,
     75   PRERENDER_SCHEME_CANCEL_REASON_FILESYSTEM,
     76   PRERENDER_SCHEME_CANCEL_REASON_WEBSOCKET,
     77   PRERENDER_SCHEME_CANCEL_REASON_FTP,
     78   PRERENDER_SCHEME_CANCEL_REASON_CHROME,
     79   PRERENDER_SCHEME_CANCEL_REASON_CHROME_EXTENSION,
     80   PRERENDER_SCHEME_CANCEL_REASON_ABOUT,
     81   PRERENDER_SCHEME_CANCEL_REASON_UNKNOWN,
     82   PRERENDER_SCHEME_CANCEL_REASON_MAX,
     83 };
     84 
     85 void ReportPrerenderSchemeCancelReason(PrerenderSchemeCancelReason reason) {
     86   UMA_HISTOGRAM_ENUMERATION(
     87       "Prerender.SchemeCancelReason", reason,
     88       PRERENDER_SCHEME_CANCEL_REASON_MAX);
     89 }
     90 
     91 }  // namespace
     92 
     93 const char kChromeNavigateExtraDataKey[] = "chrome_navigate";
     94 
     95 bool MaybeGetQueryStringBasedAliasURL(
     96     const GURL& url, GURL* alias_url) {
     97   DCHECK(alias_url);
     98   url::Parsed parsed;
     99   url::ParseStandardURL(url.spec().c_str(), url.spec().length(), &parsed);
    100   url::Component query = parsed.query;
    101   url::Component key, value;
    102   while (url::ExtractQueryKeyValue(url.spec().c_str(), &query, &key, &value)) {
    103     if (key.len != 3 || strncmp(url.spec().c_str() + key.begin, "url", key.len))
    104       continue;
    105     // We found a url= query string component.
    106     if (value.len < 1)
    107       continue;
    108     url::RawCanonOutputW<1024> decoded_url;
    109     url::DecodeURLEscapeSequences(url.spec().c_str() + value.begin, value.len,
    110                                   &decoded_url);
    111     GURL new_url(base::string16(decoded_url.data(), decoded_url.length()));
    112     if (!new_url.is_empty() && new_url.is_valid()) {
    113       *alias_url = new_url;
    114       return true;
    115     }
    116     return false;
    117   }
    118   return false;
    119 }
    120 
    121 uint8 GetQueryStringBasedExperiment(const GURL& url) {
    122   url::Parsed parsed;
    123   url::ParseStandardURL(url.spec().c_str(), url.spec().length(), &parsed);
    124   url::Component query = parsed.query;
    125   url::Component key, value;
    126   while (url::ExtractQueryKeyValue(url.spec().c_str(), &query, &key, &value)) {
    127     if (key.len != 3 || strncmp(url.spec().c_str() + key.begin, "lpe", key.len))
    128       continue;
    129 
    130     // We found a lpe= query string component.
    131     if (value.len != 1)
    132       continue;
    133     uint8 exp = *(url.spec().c_str() + value.begin) - '0';
    134     if (exp < 1 || exp > 9)
    135       continue;
    136     return exp;
    137   }
    138   return kNoExperiment;
    139 }
    140 
    141 bool IsGoogleDomain(const GURL& url) {
    142   return StartsWithASCII(url.host(), std::string("www.google."), true);
    143 }
    144 
    145 bool IsGoogleSearchResultURL(const GURL& url) {
    146   if (!IsGoogleDomain(url))
    147     return false;
    148   return (url.path().empty() ||
    149           StartsWithASCII(url.path(), std::string("/search"), true) ||
    150           (url.path() == "/") ||
    151           StartsWithASCII(url.path(), std::string("/webhp"), true));
    152 }
    153 
    154 bool IsNoSwapInExperiment(uint8 experiment_id) {
    155   // Currently, experiments 5 and 6 fall in this category.
    156   return experiment_id == 5 || experiment_id == 6;
    157 }
    158 
    159 bool IsControlGroupExperiment(uint8 experiment_id) {
    160   // Currently, experiments 7 and 8 fall in this category.
    161   return experiment_id == 7 || experiment_id == 8;
    162 }
    163 
    164 void GatherPagespeedData(const ResourceType::Type resource_type,
    165                          const GURL& request_url,
    166                          const net::HttpResponseHeaders* response_headers) {
    167   if (resource_type != ResourceType::MAIN_FRAME ||
    168       !request_url.SchemeIsHTTPOrHTTPS())
    169     return;
    170 
    171   // bucket 0 counts every response seen.
    172   UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
    173                             PAGESPEED_TOTAL_RESPONSES,
    174                             PAGESPEED_SERVER_MAXIMUM);
    175   if (!response_headers)
    176     return;
    177 
    178   void* iter = NULL;
    179   std::string name;
    180   std::string value;
    181   while (response_headers->EnumerateHeaderLines(&iter, &name, &value)) {
    182     if (name == kModPagespeedHeader) {
    183       // Bucket 1 counts occurences of the X-Mod-Pagespeed header.
    184       UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
    185                                 PAGESPEED_MOD_PAGESPEED_SERVER,
    186                                 PAGESPEED_SERVER_MAXIMUM);
    187       if (!value.empty()) {
    188         // If the header value is in the X-Mod-Pagespeed version number format
    189         // then increment the appropriate bucket, otherwise increment bucket 1,
    190         // which is the catch-all "unknown version number" bucket.
    191         int bucket = GetXModPagespeedBucketFromVersion(value);
    192         if (bucket > 0) {
    193           UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, bucket);
    194         } else {
    195           UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, 1);
    196         }
    197       }
    198       break;
    199     } else if (name == kPageSpeedHeader) {
    200       // X-Page-Speed header versions are either in the X-Mod-Pagespeed format,
    201       // indicating an nginx installation, or they're in the PageSpeed Service
    202       // format, indicating a PSS installation, or in some other format,
    203       // indicating an unknown installation [possibly IISpeed].
    204       if (!value.empty()) {
    205         int bucket = GetXModPagespeedBucketFromVersion(value);
    206         if (bucket > 0) {
    207           // Bucket 2 counts occurences of the X-Page-Speed header with a
    208           // value in the X-Mod-Pagespeed version number format. We also
    209           // count these responses in the version histogram.
    210           UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
    211                                     PAGESPEED_NGX_PAGESPEED_SERVER,
    212                                     PAGESPEED_SERVER_MAXIMUM);
    213           UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, bucket);
    214         } else if (IsPageSpeedServiceVersionNumber(value)) {
    215           // Bucket 3 counts occurences of the X-Page-Speed header with a
    216           // value in the PageSpeed Service version number format.
    217           UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
    218                                     PAGESPEED_PAGESPEED_SERVICE_SERVER,
    219                                     PAGESPEED_SERVER_MAXIMUM);
    220         } else {
    221           // Bucket 4 counts occurences of all other values.
    222           UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
    223                                     PAGESPEED_UNKNOWN_SERVER,
    224                                     PAGESPEED_SERVER_MAXIMUM);
    225         }
    226       }
    227       break;
    228     }
    229   }
    230 }
    231 
    232 void URLRequestResponseStarted(net::URLRequest* request) {
    233   const content::ResourceRequestInfo* info =
    234       content::ResourceRequestInfo::ForRequest(request);
    235   GatherPagespeedData(info->GetResourceType(),
    236                       request->url(),
    237                       request->response_headers());
    238 }
    239 
    240 void ReportPrerenderExternalURL() {
    241   ReportPrerenderSchemeCancelReason(
    242       PRERENDER_SCHEME_CANCEL_REASON_EXTERNAL_PROTOCOL);
    243 }
    244 
    245 void ReportUnsupportedPrerenderScheme(const GURL& url) {
    246   if (url.SchemeIs("data")) {
    247     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_DATA);
    248   } else if (url.SchemeIs("blob")) {
    249     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_BLOB);
    250   } else if (url.SchemeIsFile()) {
    251     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_FILE);
    252   } else if (url.SchemeIsFileSystem()) {
    253     ReportPrerenderSchemeCancelReason(
    254         PRERENDER_SCHEME_CANCEL_REASON_FILESYSTEM);
    255   } else if (url.SchemeIs("ws") || url.SchemeIs("wss")) {
    256     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_WEBSOCKET);
    257   } else if (url.SchemeIs("ftp")) {
    258     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_FTP);
    259   } else if (url.SchemeIs("chrome")) {
    260     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_CHROME);
    261   } else if (url.SchemeIs("chrome-extension")) {
    262     ReportPrerenderSchemeCancelReason(
    263         PRERENDER_SCHEME_CANCEL_REASON_CHROME_EXTENSION);
    264   } else if (url.SchemeIs("about")) {
    265     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_ABOUT);
    266   } else {
    267     ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_UNKNOWN);
    268   }
    269 }
    270 
    271 }  // namespace prerender
    272