1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/net/referrer.h" 6 7 #include <limits.h> 8 9 #include "base/compiler_specific.h" 10 #include "base/logging.h" 11 #include "base/message_loop.h" 12 #include "base/values.h" 13 #include "chrome/browser/net/predictor.h" 14 15 namespace chrome_browser_net { 16 17 //------------------------------------------------------------------------------ 18 // Smoothing parameter for updating subresource_use_rate_. 19 20 // We always combine our old expected value, weighted by some factor W (we use 21 // kWeightingForOldConnectsExpectedValue), with the new expected value Enew. 22 // The new "expected value" is the number of actual connections made due to the 23 // current navigations. 24 // That means that IF we end up needing to connect, we should apply the formula: 25 // Eupdated = Eold * W + Enew * (1 - W) 26 // If we visit the containing url, but don't end up needing a connection, then 27 // Enew == 0, so we use the formula: 28 // Eupdated = Eold * W 29 // To achieve the above updating algorithm, we end up doing the multiplication 30 // by W every time we contemplate doing a preconnection (i.e., when we navigate 31 // to the containing URL, and consider doing a preconnection), and then IFF we 32 // learn that we really needed a connection to the subresource, we complete the 33 // above algorithm by adding the (1 - W) for each connection we make. 34 35 // We weight the new expected value by a factor which is in the range of 0.0 to 36 // 1.0. 37 static const double kWeightingForOldConnectsExpectedValue = 0.66; 38 39 // To estimate the expected value of the number of connections that we'll need 40 // when a referrer is navigated to, we start with the following low initial 41 // value. 42 // Each time we do indeed (again) need the subresource, this value will get 43 // increased. 44 // Each time we navigate to the refererrer but never end up needing this 45 // subresource, the value will decrease. 46 // Very conservative is 0.0, which will mean that we have to wait for a while 47 // before doing much speculative acvtivity. We do persist results, so we'll 48 // save the asymptotic (correct?) learned answer in the long run. 49 // Some browsers blindly make 2 connections all the time, so we'll use that as 50 // a starting point. 51 static const double kInitialConnectsExpectedValue = 2.0; 52 53 Referrer::Referrer() : use_count_(1) {} 54 55 void Referrer::SuggestHost(const GURL& url) { 56 // Limit how large our list can get, in case we make mistakes about what 57 // hostnames are in sub-resources (example: Some advertisments have a link to 58 // the ad agency, and then provide a "surprising" redirect to the advertised 59 // entity, which then (mistakenly) appears to be a subresource on the page 60 // hosting the ad). 61 // TODO(jar): Do experiments to optimize the max count of suggestions. 62 static const size_t kMaxSuggestions = 10; 63 64 if (!url.has_host()) // TODO(jar): Is this really needed???? 65 return; 66 DCHECK(url == url.GetWithEmptyPath()); 67 SubresourceMap::iterator it = find(url); 68 if (it != end()) { 69 it->second.SubresourceIsNeeded(); 70 return; 71 } 72 73 if (kMaxSuggestions <= size()) { 74 DeleteLeastUseful(); 75 DCHECK(kMaxSuggestions > size()); 76 } 77 (*this)[url].SubresourceIsNeeded(); 78 } 79 80 void Referrer::DeleteLeastUseful() { 81 // Find the item with the lowest value. Most important is preconnection_rate, 82 // and least is lifetime (age). 83 GURL least_useful_url; 84 double lowest_rate_seen = 0.0; 85 // We use longs for durations because we will use multiplication on them. 86 int64 least_useful_lifetime = 0; // Duration in milliseconds. 87 88 const base::Time kNow(base::Time::Now()); // Avoid multiple calls. 89 for (SubresourceMap::iterator it = begin(); it != end(); ++it) { 90 int64 lifetime = (kNow - it->second.birth_time()).InMilliseconds(); 91 double rate = it->second.subresource_use_rate(); 92 if (least_useful_url.has_host()) { 93 if (rate > lowest_rate_seen) 94 continue; 95 if (lifetime <= least_useful_lifetime) 96 continue; 97 } 98 least_useful_url = it->first; 99 lowest_rate_seen = rate; 100 least_useful_lifetime = lifetime; 101 } 102 if (least_useful_url.has_host()) 103 erase(least_useful_url); 104 } 105 106 bool Referrer::Trim(double reduce_rate, double threshold) { 107 std::vector<GURL> discarded_urls; 108 for (SubresourceMap::iterator it = begin(); it != end(); ++it) { 109 if (!it->second.Trim(reduce_rate, threshold)) 110 discarded_urls.push_back(it->first); 111 } 112 for (size_t i = 0; i < discarded_urls.size(); ++i) 113 erase(discarded_urls[i]); 114 return size() > 0; 115 } 116 117 bool ReferrerValue::Trim(double reduce_rate, double threshold) { 118 subresource_use_rate_ *= reduce_rate; 119 return subresource_use_rate_ > threshold; 120 } 121 122 123 void Referrer::Deserialize(const Value& value) { 124 if (value.GetType() != Value::TYPE_LIST) 125 return; 126 const ListValue* subresource_list(static_cast<const ListValue*>(&value)); 127 size_t index = 0; // Bounds checking is done by subresource_list->Get*(). 128 while (true) { 129 std::string url_spec; 130 if (!subresource_list->GetString(index++, &url_spec)) 131 return; 132 double rate; 133 if (!subresource_list->GetDouble(index++, &rate)) 134 return; 135 136 GURL url(url_spec); 137 // TODO(jar): We could be more direct, and change birth date or similar to 138 // show that this is a resurrected value we're adding in. I'm not yet sure 139 // of how best to optimize the learning and pruning (Trim) algorithm at this 140 // level, so for now, we just suggest subresources, which leaves them all 141 // with the same birth date (typically start of process). 142 SuggestHost(url); 143 (*this)[url].SetSubresourceUseRate(rate); 144 } 145 } 146 147 Value* Referrer::Serialize() const { 148 ListValue* subresource_list(new ListValue); 149 for (const_iterator it = begin(); it != end(); ++it) { 150 StringValue* url_spec(new StringValue(it->first.spec())); 151 FundamentalValue* rate(new FundamentalValue( 152 it->second.subresource_use_rate())); 153 154 subresource_list->Append(url_spec); 155 subresource_list->Append(rate); 156 } 157 return subresource_list; 158 } 159 160 //------------------------------------------------------------------------------ 161 162 ReferrerValue::ReferrerValue() 163 : birth_time_(base::Time::Now()), 164 navigation_count_(0), 165 preconnection_count_(0), 166 preresolution_count_(0), 167 subresource_use_rate_(kInitialConnectsExpectedValue) { 168 } 169 170 void ReferrerValue::SubresourceIsNeeded() { 171 DCHECK_GE(kWeightingForOldConnectsExpectedValue, 0); 172 DCHECK_LE(kWeightingForOldConnectsExpectedValue, 1.0); 173 ++navigation_count_; 174 subresource_use_rate_ += 1 - kWeightingForOldConnectsExpectedValue; 175 } 176 177 void ReferrerValue::ReferrerWasObserved() { 178 subresource_use_rate_ *= kWeightingForOldConnectsExpectedValue; 179 // Note: the use rate is temporarilly possibly incorect, as we need to find 180 // out if we really end up connecting. This will happen in a few hundred 181 // milliseconds (when content arrives, etc.). 182 // Value of subresource_use_rate_ should be sampled before this call. 183 } 184 185 } // namespace chrome_browser_net 186