Home | History | Annotate | Download | only in net
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/net/referrer.h"
      6 
      7 #include <limits.h>
      8 
      9 #include "base/compiler_specific.h"
     10 #include "base/logging.h"
     11 #include "base/message_loop.h"
     12 #include "base/values.h"
     13 #include "chrome/browser/net/predictor.h"
     14 
     15 namespace chrome_browser_net {
     16 
     17 //------------------------------------------------------------------------------
     18 // Smoothing parameter for updating subresource_use_rate_.
     19 
     20 // We always combine our old expected value, weighted by some factor W (we use
     21 // kWeightingForOldConnectsExpectedValue), with the new expected value Enew.
     22 // The new "expected value" is the number of actual connections made due to the
     23 // current navigations.
     24 // That means that IF we end up needing to connect, we should apply the formula:
     25 // Eupdated = Eold * W  +  Enew * (1 - W)
     26 // If we visit the containing url, but don't end up needing a connection, then
     27 // Enew == 0, so we use the formula:
     28 // Eupdated = Eold * W
     29 // To achieve the above updating algorithm, we end up doing the multiplication
     30 // by W every time we contemplate doing a preconnection (i.e., when we navigate
     31 // to the containing URL, and consider doing a preconnection), and then IFF we
     32 // learn that we really needed a connection to the subresource, we complete the
     33 // above algorithm by adding the (1 - W) for each connection we make.
     34 
     35 // We weight the new expected value by a factor which is in the range of 0.0 to
     36 // 1.0.
     37 static const double kWeightingForOldConnectsExpectedValue = 0.66;
     38 
     39 // To estimate the expected value of the number of connections that we'll need
     40 // when a referrer is navigated to, we start with the following low initial
     41 // value.
     42 // Each time we do indeed (again) need the subresource, this value will get
     43 // increased.
     44 // Each time we navigate to the refererrer but never end up needing this
     45 // subresource, the value will decrease.
     46 // Very conservative is 0.0, which will mean that we have to wait for a while
     47 // before doing much speculative acvtivity.  We do persist results, so we'll
     48 // save the asymptotic (correct?) learned answer in the long run.
     49 // Some browsers blindly make 2 connections all the time, so we'll use that as
     50 // a starting point.
     51 static const double kInitialConnectsExpectedValue = 2.0;
     52 
     53 Referrer::Referrer() : use_count_(1) {}
     54 
     55 void Referrer::SuggestHost(const GURL& url) {
     56   // Limit how large our list can get, in case we make mistakes about what
     57   // hostnames are in sub-resources (example: Some advertisments have a link to
     58   // the ad agency, and then provide a "surprising" redirect to the advertised
     59   // entity, which then (mistakenly) appears to be a subresource on the page
     60   // hosting the ad).
     61   // TODO(jar): Do experiments to optimize the max count of suggestions.
     62   static const size_t kMaxSuggestions = 10;
     63 
     64   if (!url.has_host())  // TODO(jar): Is this really needed????
     65     return;
     66   DCHECK(url == url.GetWithEmptyPath());
     67   SubresourceMap::iterator it = find(url);
     68   if (it != end()) {
     69     it->second.SubresourceIsNeeded();
     70     return;
     71   }
     72 
     73   if (kMaxSuggestions <= size()) {
     74     DeleteLeastUseful();
     75     DCHECK(kMaxSuggestions > size());
     76   }
     77   (*this)[url].SubresourceIsNeeded();
     78 }
     79 
     80 void Referrer::DeleteLeastUseful() {
     81   // Find the item with the lowest value.  Most important is preconnection_rate,
     82   // and least is lifetime (age).
     83   GURL least_useful_url;
     84   double lowest_rate_seen = 0.0;
     85   // We use longs for durations because we will use multiplication on them.
     86   int64 least_useful_lifetime = 0;  // Duration in milliseconds.
     87 
     88   const base::Time kNow(base::Time::Now());  // Avoid multiple calls.
     89   for (SubresourceMap::iterator it = begin(); it != end(); ++it) {
     90     int64 lifetime = (kNow - it->second.birth_time()).InMilliseconds();
     91     double rate = it->second.subresource_use_rate();
     92     if (least_useful_url.has_host()) {
     93       if (rate > lowest_rate_seen)
     94         continue;
     95       if (lifetime <= least_useful_lifetime)
     96         continue;
     97     }
     98     least_useful_url = it->first;
     99     lowest_rate_seen = rate;
    100     least_useful_lifetime = lifetime;
    101   }
    102   if (least_useful_url.has_host())
    103     erase(least_useful_url);
    104 }
    105 
    106 bool Referrer::Trim(double reduce_rate, double threshold) {
    107   std::vector<GURL> discarded_urls;
    108   for (SubresourceMap::iterator it = begin(); it != end(); ++it) {
    109     if (!it->second.Trim(reduce_rate, threshold))
    110       discarded_urls.push_back(it->first);
    111   }
    112   for (size_t i = 0; i < discarded_urls.size(); ++i)
    113     erase(discarded_urls[i]);
    114   return size() > 0;
    115 }
    116 
    117 bool ReferrerValue::Trim(double reduce_rate, double threshold) {
    118   subresource_use_rate_ *= reduce_rate;
    119   return subresource_use_rate_ > threshold;
    120 }
    121 
    122 
    123 void Referrer::Deserialize(const Value& value) {
    124   if (value.GetType() != Value::TYPE_LIST)
    125     return;
    126   const ListValue* subresource_list(static_cast<const ListValue*>(&value));
    127   size_t index = 0;  // Bounds checking is done by subresource_list->Get*().
    128   while (true) {
    129     std::string url_spec;
    130     if (!subresource_list->GetString(index++, &url_spec))
    131       return;
    132     double rate;
    133     if (!subresource_list->GetDouble(index++, &rate))
    134       return;
    135 
    136     GURL url(url_spec);
    137     // TODO(jar): We could be more direct, and change birth date or similar to
    138     // show that this is a resurrected value we're adding in.  I'm not yet sure
    139     // of how best to optimize the learning and pruning (Trim) algorithm at this
    140     // level, so for now, we just suggest subresources, which leaves them all
    141     // with the same birth date (typically start of process).
    142     SuggestHost(url);
    143     (*this)[url].SetSubresourceUseRate(rate);
    144   }
    145 }
    146 
    147 Value* Referrer::Serialize() const {
    148   ListValue* subresource_list(new ListValue);
    149   for (const_iterator it = begin(); it != end(); ++it) {
    150     StringValue* url_spec(new StringValue(it->first.spec()));
    151     FundamentalValue* rate(new FundamentalValue(
    152         it->second.subresource_use_rate()));
    153 
    154     subresource_list->Append(url_spec);
    155     subresource_list->Append(rate);
    156   }
    157   return subresource_list;
    158 }
    159 
    160 //------------------------------------------------------------------------------
    161 
    162 ReferrerValue::ReferrerValue()
    163     : birth_time_(base::Time::Now()),
    164       navigation_count_(0),
    165       preconnection_count_(0),
    166       preresolution_count_(0),
    167       subresource_use_rate_(kInitialConnectsExpectedValue) {
    168 }
    169 
    170 void ReferrerValue::SubresourceIsNeeded() {
    171   DCHECK_GE(kWeightingForOldConnectsExpectedValue, 0);
    172   DCHECK_LE(kWeightingForOldConnectsExpectedValue, 1.0);
    173   ++navigation_count_;
    174   subresource_use_rate_ += 1 - kWeightingForOldConnectsExpectedValue;
    175 }
    176 
    177 void ReferrerValue::ReferrerWasObserved() {
    178   subresource_use_rate_ *= kWeightingForOldConnectsExpectedValue;
    179   // Note: the use rate is temporarilly possibly incorect, as we need to find
    180   // out if we really end up connecting.  This will happen in a few hundred
    181   // milliseconds (when content arrives, etc.).
    182   // Value of subresource_use_rate_ should be sampled before this call.
    183 }
    184 
    185 }  // namespace chrome_browser_net
    186