1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // A Predictor object is instantiated once in the browser process, and manages 6 // both preresolution of hostnames, as well as TCP/IP preconnection to expected 7 // subresources. 8 // Most hostname lists are provided by the renderer processes, and include URLs 9 // that *might* be used in the near future by the browsing user. One goal of 10 // this class is to cause the underlying DNS structure to lookup a hostname 11 // before it is really needed, and hence reduce latency in the standard lookup 12 // paths. 13 // Subresource relationships are usually acquired from the referrer field in a 14 // navigation. A subresource URL may be associated with a referrer URL. Later 15 // navigations may, if the likelihood of needing the subresource is high enough, 16 // cause this module to speculatively create a TCP/IP connection. If there is 17 // only a low likelihood, then a DNS pre-resolution operation may be performed. 18 19 #ifndef CHROME_BROWSER_NET_PREDICTOR_H_ 20 #define CHROME_BROWSER_NET_PREDICTOR_H_ 21 #pragma once 22 23 #include <map> 24 #include <queue> 25 #include <set> 26 #include <string> 27 #include <vector> 28 29 #include "base/gtest_prod_util.h" 30 #include "base/memory/ref_counted.h" 31 #include "chrome/browser/net/url_info.h" 32 #include "chrome/browser/net/referrer.h" 33 #include "chrome/common/net/predictor_common.h" 34 #include "net/base/host_port_pair.h" 35 36 class ListValue; 37 38 namespace net { 39 class HostResolver; 40 } // namespace net 41 42 namespace chrome_browser_net { 43 44 typedef chrome_common_net::UrlList UrlList; 45 typedef chrome_common_net::NameList NameList; 46 typedef std::map<GURL, UrlInfo> Results; 47 48 // Note that Predictor is not thread safe, and must only be called from 49 // the IO thread. Failure to do so will result in a DCHECK at runtime. 50 class Predictor : public base::RefCountedThreadSafe<Predictor> { 51 public: 52 // A version number for prefs that are saved. This should be incremented when 53 // we change the format so that we discard old data. 54 enum { PREDICTOR_REFERRER_VERSION = 2 }; 55 56 // |max_concurrent| specifies how many concurrent (parallel) prefetches will 57 // be performed. Host lookups will be issued through |host_resolver|. 58 Predictor(net::HostResolver* host_resolver, 59 base::TimeDelta max_queue_delay_ms, size_t max_concurrent, 60 bool preconnect_enabled); 61 62 // Cancel pending requests and prevent new ones from being made. 63 void Shutdown(); 64 65 // In some circumstances, for privacy reasons, all results should be 66 // discarded. This method gracefully handles that activity. 67 // Destroy all our internal state, which shows what names we've looked up, and 68 // how long each has taken, etc. etc. We also destroy records of suggesses 69 // (cache hits etc.). 70 void DiscardAllResults(); 71 72 // Add hostname(s) to the queue for processing. 73 void ResolveList(const UrlList& urls, 74 UrlInfo::ResolutionMotivation motivation); 75 void Resolve(const GURL& url, 76 UrlInfo::ResolutionMotivation motivation); 77 78 // Instigate pre-connection to any URLs, or pre-resolution of related host, 79 // that we predict will be needed after this navigation (typically 80 // more-embedded resources on a page). This method will actually post a task 81 // to do the actual work, so as not to jump ahead of the frame navigation that 82 // instigated this activity. 83 void PredictFrameSubresources(const GURL& url); 84 85 // The Omnibox has proposed a given url to the user, and if it is a search 86 // URL, then it also indicates that this is preconnectable (i.e., we could 87 // preconnect to the search server). 88 void AnticipateOmniboxUrl(const GURL& url, bool preconnectable); 89 90 // Preconnect a URL and all of its subresource domains. 91 void PreconnectUrlAndSubresources(const GURL& url); 92 93 // Record details of a navigation so that we can preresolve the host name 94 // ahead of time the next time the users navigates to the indicated host. 95 // Should only be called when urls are distinct, and they should already be 96 // canonicalized to not have a path. 97 void LearnFromNavigation(const GURL& referring_url, const GURL& target_url); 98 99 // Dump HTML table containing list of referrers for about:dns. 100 void GetHtmlReferrerLists(std::string* output); 101 102 // Dump the list of currently known referrer domains and related prefetchable 103 // domains. 104 void GetHtmlInfo(std::string* output); 105 106 // Discards any referrer for which all the suggested host names are currently 107 // annotated with negligible expected-use. Scales down (diminishes) the 108 // expected-use of those that remain, so that their use will go down by a 109 // factor each time we trim (moving the referrer closer to being discarded in 110 // a future call). 111 // The task is performed synchronously and completes before returing. 112 void TrimReferrersNow(); 113 114 // Construct a ListValue object that contains all the data in the referrers_ 115 // so that it can be persisted in a pref. 116 void SerializeReferrers(ListValue* referral_list); 117 118 // Process a ListValue that contains all the data from a previous reference 119 // list, as constructed by SerializeReferrers(), and add all the identified 120 // values into the current referrer list. 121 void DeserializeReferrers(const ListValue& referral_list); 122 123 void DeserializeReferrersThenDelete(ListValue* referral_list); 124 125 // For unit test code only. 126 size_t max_concurrent_dns_lookups() const { 127 return max_concurrent_dns_lookups_; 128 } 129 130 // Flag setting to use preconnection instead of just DNS pre-fetching. 131 bool preconnect_enabled() const { return preconnect_enabled_; } 132 133 // Put URL in canonical form, including a scheme, host, and port. 134 // Returns GURL::EmptyGURL() if the scheme is not http/https or if the url 135 // cannot be otherwise canonicalized. 136 static GURL CanonicalizeUrl(const GURL& url); 137 138 private: 139 friend class base::RefCountedThreadSafe<Predictor>; 140 FRIEND_TEST_ALL_PREFIXES(PredictorTest, BenefitLookupTest); 141 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ShutdownWhenResolutionIsPendingTest); 142 FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTest); 143 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ConcurrentLookupTest); 144 FRIEND_TEST_ALL_PREFIXES(PredictorTest, MassiveConcurrentLookupTest); 145 FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueuePushPopTest); 146 FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueueReorderTest); 147 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ReferrerSerializationTrimTest); 148 friend class WaitForResolutionHelper; // For testing. 149 150 class LookupRequest; 151 152 // A simple priority queue for handling host names. 153 // Some names that are queued up have |motivation| that requires very rapid 154 // handling. For example, a sub-resource name lookup MUST be done before the 155 // actual sub-resource is fetched. In contrast, a name that was speculatively 156 // noted in a page has to be resolved before the user "gets around to" 157 // clicking on a link. By tagging (with a motivation) each push we make into 158 // this FIFO queue, the queue can re-order the more important names to service 159 // them sooner (relative to some low priority background resolutions). 160 class HostNameQueue { 161 public: 162 HostNameQueue(); 163 ~HostNameQueue(); 164 void Push(const GURL& url, 165 UrlInfo::ResolutionMotivation motivation); 166 bool IsEmpty() const; 167 GURL Pop(); 168 169 private: 170 // The names in the queue that should be serviced (popped) ASAP. 171 std::queue<GURL> rush_queue_; 172 // The names in the queue that should only be serviced when rush_queue is 173 // empty. 174 std::queue<GURL> background_queue_; 175 176 DISALLOW_COPY_AND_ASSIGN(HostNameQueue); 177 }; 178 179 // A map that is keyed with the host/port that we've learned were the cause 180 // of loading additional URLs. The list of additional targets is held 181 // in a Referrer instance, which is a value in this map. 182 typedef std::map<GURL, Referrer> Referrers; 183 184 // Depending on the expected_subresource_use_, we may either make a TCP/IP 185 // preconnection, or merely pre-resolve the hostname via DNS (or even do 186 // nothing). The following are the threasholds for taking those actions. 187 static const double kPreconnectWorthyExpectedValue; 188 static const double kDNSPreresolutionWorthyExpectedValue; 189 // Referred hosts with a subresource_use_rate_ that are less than the 190 // following threshold will be discarded when we Trim() the list. 191 static const double kDiscardableExpectedValue; 192 // During trimming operation to discard hosts for which we don't have likely 193 // subresources, we multiply the expected_subresource_use_ value by the 194 // following ratio until that value is less than kDiscardableExpectedValue. 195 // This number should always be less than 1, an more than 0. 196 static const double kReferrerTrimRatio; 197 198 // Interval between periodic trimming of our whole referrer list. 199 // We only do a major trimming about once an hour, and then only when the user 200 // is actively browsing. 201 static const base::TimeDelta kDurationBetweenTrimmings; 202 // Interval between incremental trimmings (to avoid inducing Jank). 203 static const base::TimeDelta kDurationBetweenTrimmingIncrements; 204 // Number of referring URLs processed in an incremental trimming. 205 static const size_t kUrlsTrimmedPerIncrement; 206 207 ~Predictor(); 208 209 // Perform actual resolution or preconnection to subresources now. This is 210 // an internal worker method that is reached via a post task from 211 // PredictFrameSubresources(). 212 void PrepareFrameSubresources(const GURL& url); 213 214 // Only for testing. Returns true if hostname has been successfully resolved 215 // (name found). 216 bool WasFound(const GURL& url) const { 217 Results::const_iterator it(results_.find(url)); 218 return (it != results_.end()) && 219 it->second.was_found(); 220 } 221 222 // Only for testing. Return how long was the resolution 223 // or UrlInfo::kNullDuration if it hasn't been resolved yet. 224 base::TimeDelta GetResolutionDuration(const GURL& url) { 225 if (results_.find(url) == results_.end()) 226 return UrlInfo::kNullDuration; 227 return results_[url].resolve_duration(); 228 } 229 230 // Only for testing; 231 size_t peak_pending_lookups() const { return peak_pending_lookups_; } 232 233 // Access method for use by async lookup request to pass resolution result. 234 void OnLookupFinished(LookupRequest* request, const GURL& url, bool found); 235 236 // Underlying method for both async and synchronous lookup to update state. 237 void LookupFinished(LookupRequest* request, 238 const GURL& url, bool found); 239 240 // Queue hostname for resolution. If queueing was done, return the pointer 241 // to the queued instance, otherwise return NULL. 242 UrlInfo* AppendToResolutionQueue(const GURL& url, 243 UrlInfo::ResolutionMotivation motivation); 244 245 // Check to see if too much queuing delay has been noted for the given info, 246 // which indicates that there is "congestion" or growing delay in handling the 247 // resolution of names. Rather than letting this congestion potentially grow 248 // without bounds, we abandon our queued efforts at pre-resolutions in such a 249 // case. 250 // To do this, we will recycle |info|, as well as all queued items, back to 251 // the state they had before they were queued up. We can't do anything about 252 // the resolutions we've already sent off for processing on another thread, so 253 // we just let them complete. On a slow system, subject to congestion, this 254 // will greatly reduce the number of resolutions done, but it will assure that 255 // any resolutions that are done, are in a timely and hence potentially 256 // helpful manner. 257 bool CongestionControlPerformed(UrlInfo* info); 258 259 // Take lookup requests from work_queue_ and tell HostResolver to look them up 260 // asynchronously, provided we don't exceed concurrent resolution limit. 261 void StartSomeQueuedResolutions(); 262 263 // Performs trimming similar to TrimReferrersNow(), except it does it as a 264 // series of short tasks by posting continuations again an again until done. 265 void TrimReferrers(); 266 267 // Loads urls_being_trimmed_ from keys of current referrers_. 268 void LoadUrlsForTrimming(); 269 270 // Posts a task to do additional incremental trimming of referrers_. 271 void PostIncrementalTrimTask(); 272 273 // Calls Trim() on some or all of urls_being_trimmed_. 274 // If it does not process all the URLs in that vector, it posts a task to 275 // continue with them shortly (i.e., it yeilds and continues). 276 void IncrementalTrimReferrers(bool trim_all_now); 277 278 // work_queue_ holds a list of names we need to look up. 279 HostNameQueue work_queue_; 280 281 // results_ contains information for existing/prior prefetches. 282 Results results_; 283 284 std::set<LookupRequest*> pending_lookups_; 285 286 // For testing, to verify that we don't exceed the limit. 287 size_t peak_pending_lookups_; 288 289 // When true, we don't make new lookup requests. 290 bool shutdown_; 291 292 // The number of concurrent speculative lookups currently allowed to be sent 293 // to the resolver. Any additional lookups will be queued to avoid exceeding 294 // this value. The queue is a priority queue that will accelerate 295 // sub-resource speculation, and retard resolutions suggested by page scans. 296 const size_t max_concurrent_dns_lookups_; 297 298 // The maximum queueing delay that is acceptable before we enter congestion 299 // reduction mode, and discard all queued (but not yet assigned) resolutions. 300 const base::TimeDelta max_dns_queue_delay_; 301 302 // The host resolver we warm DNS entries for. 303 net::HostResolver* const host_resolver_; 304 305 // Are we currently using preconnection, rather than just DNS resolution, for 306 // subresources and omni-box search URLs. 307 bool preconnect_enabled_; 308 309 // Most recent suggestion from Omnibox provided via AnticipateOmniboxUrl(). 310 std::string last_omnibox_host_; 311 312 // The time when the last preresolve was done for last_omnibox_host_. 313 base::TimeTicks last_omnibox_preresolve_; 314 315 // The number of consecutive requests to AnticipateOmniboxUrl() that suggested 316 // preconnecting (because it was to a search service). 317 int consecutive_omnibox_preconnect_count_; 318 319 // The time when the last preconnection was requested to a search service. 320 base::TimeTicks last_omnibox_preconnect_; 321 322 // For each URL that we might navigate to (that we've "learned about") 323 // we have a Referrer list. Each Referrer list has all hostnames we might 324 // need to pre-resolve or pre-connect to when there is a navigation to the 325 // orginial hostname. 326 Referrers referrers_; 327 328 // List of URLs in referrers_ currently being trimmed (scaled down to 329 // eventually be aged out of use). 330 std::vector<GURL> urls_being_trimmed_; 331 332 // A time after which we need to do more trimming of referrers. 333 base::TimeTicks next_trim_time_; 334 335 ScopedRunnableMethodFactory<Predictor> trim_task_factory_; 336 337 DISALLOW_COPY_AND_ASSIGN(Predictor); 338 }; 339 340 } // namespace chrome_browser_net 341 342 #endif // CHROME_BROWSER_NET_PREDICTOR_H_ 343