1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // A Predictor object is instantiated once in the browser process, and manages 6 // both preresolution of hostnames, as well as TCP/IP preconnection to expected 7 // subresources. 8 // Most hostname lists are provided by the renderer processes, and include URLs 9 // that *might* be used in the near future by the browsing user. One goal of 10 // this class is to cause the underlying DNS structure to lookup a hostname 11 // before it is really needed, and hence reduce latency in the standard lookup 12 // paths. 13 // Subresource relationships are usually acquired from the referrer field in a 14 // navigation. A subresource URL may be associated with a referrer URL. Later 15 // navigations may, if the likelihood of needing the subresource is high enough, 16 // cause this module to speculatively create a TCP/IP connection. If there is 17 // only a low likelihood, then a DNS pre-resolution operation may be performed. 18 19 #ifndef CHROME_BROWSER_NET_PREDICTOR_H_ 20 #define CHROME_BROWSER_NET_PREDICTOR_H_ 21 22 #include <map> 23 #include <queue> 24 #include <set> 25 #include <string> 26 #include <vector> 27 28 #include "base/gtest_prod_util.h" 29 #include "base/memory/scoped_ptr.h" 30 #include "base/memory/weak_ptr.h" 31 #include "chrome/browser/net/referrer.h" 32 #include "chrome/browser/net/spdyproxy/proxy_advisor.h" 33 #include "chrome/browser/net/timed_cache.h" 34 #include "chrome/browser/net/url_info.h" 35 #include "chrome/common/net/predictor_common.h" 36 #include "net/base/host_port_pair.h" 37 38 class IOThread; 39 class PrefService; 40 class Profile; 41 42 namespace base { 43 class ListValue; 44 class WaitableEvent; 45 } 46 47 namespace net { 48 class HostResolver; 49 class URLRequestContextGetter; 50 } 51 52 namespace user_prefs { 53 class PrefRegistrySyncable; 54 } 55 56 namespace chrome_browser_net { 57 58 typedef chrome_common_net::UrlList UrlList; 59 typedef chrome_common_net::NameList NameList; 60 typedef std::map<GURL, UrlInfo> Results; 61 62 // Predictor is constructed during Profile construction (on the UI thread), 63 // but it is destroyed on the IO thread when ProfileIOData goes away. All of 64 // its core state and functionality happens on the IO thread. The only UI 65 // methods are initialization / shutdown related (including preconnect 66 // initialization), or convenience methods that internally forward calls to 67 // the IO thread. 68 class Predictor { 69 public: 70 // A version number for prefs that are saved. This should be incremented when 71 // we change the format so that we discard old data. 72 static const int kPredictorReferrerVersion; 73 74 // Given that the underlying Chromium resolver defaults to a total maximum of 75 // 8 paralell resolutions, we will avoid any chance of starving navigational 76 // resolutions by limiting the number of paralell speculative resolutions. 77 // This is used in the field trials and testing. 78 // TODO(jar): Move this limitation into the resolver. 79 static const size_t kMaxSpeculativeParallelResolves; 80 81 // To control the congestion avoidance system, we need an estimate of how 82 // many speculative requests may arrive at once. Since we currently only 83 // keep 8 subresource names for each frame, we'll use that as our basis. 84 // Note that when scanning search results lists, we might actually get 10 at 85 // a time, and wikipedia can often supply (during a page scan) upwards of 50. 86 // In those odd cases, we may discard some of the later speculative requests 87 // mistakenly assuming that the resolutions took too long. 88 static const int kTypicalSpeculativeGroupSize; 89 90 // The next constant specifies an amount of queueing delay that is 91 // "too large," and indicative of problems with resolutions (perhaps due to 92 // an overloaded router, or such). When we exceed this delay, congestion 93 // avoidance will kick in and all speculations in the queue will be discarded. 94 static const int kMaxSpeculativeResolveQueueDelayMs; 95 96 // We don't bother learning to preconnect via a GET if the original URL 97 // navigation was so long ago, that a preconnection would have been dropped 98 // anyway. We believe most servers will drop the connection in 10 seconds, so 99 // we currently estimate this time-till-drop at 10 seconds. 100 // TODO(jar): We should do a persistent field trial to validate/optimize this. 101 static const int kMaxUnusedSocketLifetimeSecondsWithoutAGet; 102 103 // |max_concurrent| specifies how many concurrent (parallel) prefetches will 104 // be performed. Host lookups will be issued through |host_resolver|. 105 explicit Predictor(bool preconnect_enabled); 106 107 virtual ~Predictor(); 108 109 // This function is used to create a predictor. For testing, we can create 110 // a version which does a simpler shutdown. 111 static Predictor* CreatePredictor(bool preconnect_enabled, 112 bool simple_shutdown); 113 114 static void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry); 115 116 // ------------- Start UI thread methods. 117 118 virtual void InitNetworkPredictor(PrefService* user_prefs, 119 PrefService* local_state, 120 IOThread* io_thread, 121 net::URLRequestContextGetter* getter); 122 123 // The Omnibox has proposed a given url to the user, and if it is a search 124 // URL, then it also indicates that this is preconnectable (i.e., we could 125 // preconnect to the search server). 126 void AnticipateOmniboxUrl(const GURL& url, bool preconnectable); 127 128 // Preconnect a URL and all of its subresource domains. 129 void PreconnectUrlAndSubresources(const GURL& url, 130 const GURL& first_party_for_cookies); 131 132 static UrlList GetPredictedUrlListAtStartup(PrefService* user_prefs, 133 PrefService* local_state); 134 135 static void set_max_queueing_delay(int max_queueing_delay_ms); 136 137 static void set_max_parallel_resolves(size_t max_parallel_resolves); 138 139 virtual void ShutdownOnUIThread(); 140 141 // ------------- End UI thread methods. 142 143 // ------------- Start IO thread methods. 144 145 // Cancel pending requests and prevent new ones from being made. 146 void Shutdown(); 147 148 // In some circumstances, for privacy reasons, all results should be 149 // discarded. This method gracefully handles that activity. 150 // Destroy all our internal state, which shows what names we've looked up, and 151 // how long each has taken, etc. etc. We also destroy records of suggesses 152 // (cache hits etc.). 153 void DiscardAllResults(); 154 155 // Add hostname(s) to the queue for processing. 156 void ResolveList(const UrlList& urls, 157 UrlInfo::ResolutionMotivation motivation); 158 159 void Resolve(const GURL& url, UrlInfo::ResolutionMotivation motivation); 160 161 // Record details of a navigation so that we can preresolve the host name 162 // ahead of time the next time the users navigates to the indicated host. 163 // Should only be called when urls are distinct, and they should already be 164 // canonicalized to not have a path. 165 void LearnFromNavigation(const GURL& referring_url, const GURL& target_url); 166 167 // When displaying info in about:dns, the following API is called. 168 static void PredictorGetHtmlInfo(Predictor* predictor, std::string* output); 169 170 // Dump HTML table containing list of referrers for about:dns. 171 void GetHtmlReferrerLists(std::string* output); 172 173 // Dump the list of currently known referrer domains and related prefetchable 174 // domains for about:dns. 175 void GetHtmlInfo(std::string* output); 176 177 // Discards any referrer for which all the suggested host names are currently 178 // annotated with negligible expected-use. Scales down (diminishes) the 179 // expected-use of those that remain, so that their use will go down by a 180 // factor each time we trim (moving the referrer closer to being discarded in 181 // a future call). 182 // The task is performed synchronously and completes before returing. 183 void TrimReferrersNow(); 184 185 // Construct a ListValue object that contains all the data in the referrers_ 186 // so that it can be persisted in a pref. 187 void SerializeReferrers(base::ListValue* referral_list); 188 189 // Process a ListValue that contains all the data from a previous reference 190 // list, as constructed by SerializeReferrers(), and add all the identified 191 // values into the current referrer list. 192 void DeserializeReferrers(const base::ListValue& referral_list); 193 194 void DeserializeReferrersThenDelete(base::ListValue* referral_list); 195 196 void DiscardInitialNavigationHistory(); 197 198 void FinalizeInitializationOnIOThread( 199 const std::vector<GURL>& urls_to_prefetch, 200 base::ListValue* referral_list, 201 IOThread* io_thread, 202 bool predictor_enabled); 203 204 // During startup, we learn what the first N urls visited are, and then 205 // resolve the associated hosts ASAP during our next startup. 206 void LearnAboutInitialNavigation(const GURL& url); 207 208 // Renderer bundles up list and sends to this browser API via IPC. 209 // TODO(jar): Use UrlList instead to include port and scheme. 210 void DnsPrefetchList(const NameList& hostnames); 211 212 // May be called from either the IO or UI thread and will PostTask 213 // to the IO thread if necessary. 214 void DnsPrefetchMotivatedList(const UrlList& urls, 215 UrlInfo::ResolutionMotivation motivation); 216 217 // May be called from either the IO or UI thread and will PostTask 218 // to the IO thread if necessary. 219 void SaveStateForNextStartupAndTrim(PrefService* prefs); 220 221 void SaveDnsPrefetchStateForNextStartupAndTrim( 222 base::ListValue* startup_list, 223 base::ListValue* referral_list, 224 base::WaitableEvent* completion); 225 226 // May be called from either the IO or UI thread and will PostTask 227 // to the IO thread if necessary. 228 void EnablePredictor(bool enable); 229 230 void EnablePredictorOnIOThread(bool enable); 231 232 // May be called from either the IO or UI thread and will PostTask 233 // to the IO thread if necessary. 234 void PreconnectUrl(const GURL& url, const GURL& first_party_for_cookies, 235 UrlInfo::ResolutionMotivation motivation, int count); 236 237 void PreconnectUrlOnIOThread(const GURL& url, 238 const GURL& first_party_for_cookies, 239 UrlInfo::ResolutionMotivation motivation, 240 int count); 241 242 void RecordPreconnectTrigger(const GURL& url); 243 244 void RecordPreconnectNavigationStat(const std::vector<GURL>& url_chain, 245 bool is_subresource); 246 247 void RecordLinkNavigation(const GURL& url); 248 249 // ------------- End IO thread methods. 250 251 // The following methods may be called on either the IO or UI threads. 252 253 // Instigate pre-connection to any URLs, or pre-resolution of related host, 254 // that we predict will be needed after this navigation (typically 255 // more-embedded resources on a page). This method will actually post a task 256 // to do the actual work, so as not to jump ahead of the frame navigation that 257 // instigated this activity. 258 void PredictFrameSubresources(const GURL& url, 259 const GURL& first_party_for_cookies); 260 261 // Put URL in canonical form, including a scheme, host, and port. 262 // Returns GURL::EmptyGURL() if the scheme is not http/https or if the url 263 // cannot be otherwise canonicalized. 264 static GURL CanonicalizeUrl(const GURL& url); 265 266 // Used for testing. 267 void SetHostResolver(net::HostResolver* host_resolver) { 268 host_resolver_ = host_resolver; 269 } 270 // Used for testing. 271 void SetProxyAdvisor(ProxyAdvisor* proxy_advisor) { 272 proxy_advisor_.reset(proxy_advisor); 273 } 274 // Used for testing. 275 size_t max_concurrent_dns_lookups() const { 276 return max_concurrent_dns_lookups_; 277 } 278 // Used for testing. 279 void SetShutdown(bool shutdown) { 280 shutdown_ = shutdown; 281 } 282 283 // Flag setting to use preconnection instead of just DNS pre-fetching. 284 bool preconnect_enabled() const { 285 return preconnect_enabled_; 286 } 287 288 // Flag setting for whether we are prefetching dns lookups. 289 bool predictor_enabled() const { 290 return predictor_enabled_; 291 } 292 293 294 private: 295 FRIEND_TEST_ALL_PREFIXES(PredictorTest, BenefitLookupTest); 296 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ShutdownWhenResolutionIsPendingTest); 297 FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTest); 298 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ConcurrentLookupTest); 299 FRIEND_TEST_ALL_PREFIXES(PredictorTest, MassiveConcurrentLookupTest); 300 FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueuePushPopTest); 301 FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueueReorderTest); 302 FRIEND_TEST_ALL_PREFIXES(PredictorTest, ReferrerSerializationTrimTest); 303 FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTestWithDisabledAdvisor); 304 FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTestWithEnabledAdvisor); 305 FRIEND_TEST_ALL_PREFIXES(PredictorTest, TestSimplePreconnectAdvisor); 306 friend class WaitForResolutionHelper; // For testing. 307 308 class LookupRequest; 309 310 // A simple priority queue for handling host names. 311 // Some names that are queued up have |motivation| that requires very rapid 312 // handling. For example, a sub-resource name lookup MUST be done before the 313 // actual sub-resource is fetched. In contrast, a name that was speculatively 314 // noted in a page has to be resolved before the user "gets around to" 315 // clicking on a link. By tagging (with a motivation) each push we make into 316 // this FIFO queue, the queue can re-order the more important names to service 317 // them sooner (relative to some low priority background resolutions). 318 class HostNameQueue { 319 public: 320 HostNameQueue(); 321 ~HostNameQueue(); 322 void Push(const GURL& url, 323 UrlInfo::ResolutionMotivation motivation); 324 bool IsEmpty() const; 325 GURL Pop(); 326 327 private: 328 // The names in the queue that should be serviced (popped) ASAP. 329 std::queue<GURL> rush_queue_; 330 // The names in the queue that should only be serviced when rush_queue is 331 // empty. 332 std::queue<GURL> background_queue_; 333 334 DISALLOW_COPY_AND_ASSIGN(HostNameQueue); 335 }; 336 337 // The InitialObserver monitors navigations made by the network stack. This 338 // is only used to identify startup time resolutions (for re-resolution 339 // during our next process startup). 340 // TODO(jar): Consider preconnecting at startup, which may be faster than 341 // waiting for render process to start and request a connection. 342 class InitialObserver { 343 public: 344 InitialObserver(); 345 ~InitialObserver(); 346 // Recording of when we observed each navigation. 347 typedef std::map<GURL, base::TimeTicks> FirstNavigations; 348 349 // Potentially add a new URL to our startup list. 350 void Append(const GURL& url, Predictor* predictor); 351 352 // Get an HTML version of our current planned first_navigations_. 353 void GetFirstResolutionsHtml(std::string* output); 354 355 // Persist the current first_navigations_ for storage in a list. 356 void GetInitialDnsResolutionList(base::ListValue* startup_list); 357 358 // Discards all initial loading history. 359 void DiscardInitialNavigationHistory() { first_navigations_.clear(); } 360 361 private: 362 // List of the first N URL resolutions observed in this run. 363 FirstNavigations first_navigations_; 364 365 // The number of URLs we'll save for pre-resolving at next startup. 366 static const size_t kStartupResolutionCount = 10; 367 }; 368 369 // A map that is keyed with the host/port that we've learned were the cause 370 // of loading additional URLs. The list of additional targets is held 371 // in a Referrer instance, which is a value in this map. 372 typedef std::map<GURL, Referrer> Referrers; 373 374 // Depending on the expected_subresource_use_, we may either make a TCP/IP 375 // preconnection, or merely pre-resolve the hostname via DNS (or even do 376 // nothing). The following are the threasholds for taking those actions. 377 static const double kPreconnectWorthyExpectedValue; 378 static const double kDNSPreresolutionWorthyExpectedValue; 379 // Referred hosts with a subresource_use_rate_ that are less than the 380 // following threshold will be discarded when we Trim() the list. 381 static const double kDiscardableExpectedValue; 382 // During trimming operation to discard hosts for which we don't have likely 383 // subresources, we multiply the expected_subresource_use_ value by the 384 // following ratio until that value is less than kDiscardableExpectedValue. 385 // This number should always be less than 1, an more than 0. 386 static const double kReferrerTrimRatio; 387 388 // Interval between periodic trimming of our whole referrer list. 389 // We only do a major trimming about once an hour, and then only when the user 390 // is actively browsing. 391 static const int64 kDurationBetweenTrimmingsHours; 392 // Interval between incremental trimmings (to avoid inducing Jank). 393 static const int64 kDurationBetweenTrimmingIncrementsSeconds; 394 // Number of referring URLs processed in an incremental trimming. 395 static const size_t kUrlsTrimmedPerIncrement; 396 397 // Only for testing. Returns true if hostname has been successfully resolved 398 // (name found). 399 bool WasFound(const GURL& url) const { 400 Results::const_iterator it(results_.find(url)); 401 return (it != results_.end()) && 402 it->second.was_found(); 403 } 404 405 // Only for testing. Return how long was the resolution 406 // or UrlInfo::NullDuration() if it hasn't been resolved yet. 407 base::TimeDelta GetResolutionDuration(const GURL& url) { 408 if (results_.find(url) == results_.end()) 409 return UrlInfo::NullDuration(); 410 return results_[url].resolve_duration(); 411 } 412 413 // Only for testing; 414 size_t peak_pending_lookups() const { return peak_pending_lookups_; } 415 416 // If a proxy advisor is defined, let it know that |url| will be prefetched or 417 // preconnected to. Can be called on either UI or IO threads and will post to 418 // the IO thread if necessary, invoking AdviseProxyOnIOThread(). 419 void AdviseProxy(const GURL& url, 420 UrlInfo::ResolutionMotivation motivation, 421 bool is_preconnect); 422 423 // ------------- Start IO thread methods. 424 425 // Perform actual resolution or preconnection to subresources now. This is 426 // an internal worker method that is reached via a post task from 427 // PredictFrameSubresources(). 428 void PrepareFrameSubresources(const GURL& url, 429 const GURL& first_party_for_cookies); 430 431 // Access method for use by async lookup request to pass resolution result. 432 void OnLookupFinished(LookupRequest* request, const GURL& url, bool found); 433 434 // Underlying method for both async and synchronous lookup to update state. 435 void LookupFinished(LookupRequest* request, 436 const GURL& url, bool found); 437 438 // Queue hostname for resolution. If queueing was done, return the pointer 439 // to the queued instance, otherwise return NULL. If the proxy advisor is 440 // enabled, and |url| is likely to be proxied, the hostname will not be 441 // queued as the browser is not expected to fetch it directly. 442 UrlInfo* AppendToResolutionQueue(const GURL& url, 443 UrlInfo::ResolutionMotivation motivation); 444 445 // Check to see if too much queuing delay has been noted for the given info, 446 // which indicates that there is "congestion" or growing delay in handling the 447 // resolution of names. Rather than letting this congestion potentially grow 448 // without bounds, we abandon our queued efforts at pre-resolutions in such a 449 // case. 450 // To do this, we will recycle |info|, as well as all queued items, back to 451 // the state they had before they were queued up. We can't do anything about 452 // the resolutions we've already sent off for processing on another thread, so 453 // we just let them complete. On a slow system, subject to congestion, this 454 // will greatly reduce the number of resolutions done, but it will assure that 455 // any resolutions that are done, are in a timely and hence potentially 456 // helpful manner. 457 bool CongestionControlPerformed(UrlInfo* info); 458 459 // Take lookup requests from work_queue_ and tell HostResolver to look them up 460 // asynchronously, provided we don't exceed concurrent resolution limit. 461 void StartSomeQueuedResolutions(); 462 463 // Performs trimming similar to TrimReferrersNow(), except it does it as a 464 // series of short tasks by posting continuations again an again until done. 465 void TrimReferrers(); 466 467 // Loads urls_being_trimmed_ from keys of current referrers_. 468 void LoadUrlsForTrimming(); 469 470 // Posts a task to do additional incremental trimming of referrers_. 471 void PostIncrementalTrimTask(); 472 473 // Calls Trim() on some or all of urls_being_trimmed_. 474 // If it does not process all the URLs in that vector, it posts a task to 475 // continue with them shortly (i.e., it yeilds and continues). 476 void IncrementalTrimReferrers(bool trim_all_now); 477 478 // If a proxy advisor is defined, let it know that |url| will be prefetched or 479 // preconnected to. 480 void AdviseProxyOnIOThread(const GURL& url, 481 UrlInfo::ResolutionMotivation motivation, 482 bool is_preconnect); 483 484 // ------------- End IO thread methods. 485 486 scoped_ptr<InitialObserver> initial_observer_; 487 488 // Reference to URLRequestContextGetter from the Profile which owns the 489 // predictor. Used by Preconnect. 490 scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_; 491 492 // Status of speculative DNS resolution and speculative TCP/IP connection 493 // feature. 494 bool predictor_enabled_; 495 496 // work_queue_ holds a list of names we need to look up. 497 HostNameQueue work_queue_; 498 499 // results_ contains information for existing/prior prefetches. 500 Results results_; 501 502 std::set<LookupRequest*> pending_lookups_; 503 504 // For testing, to verify that we don't exceed the limit. 505 size_t peak_pending_lookups_; 506 507 // When true, we don't make new lookup requests. 508 bool shutdown_; 509 510 // The number of concurrent speculative lookups currently allowed to be sent 511 // to the resolver. Any additional lookups will be queued to avoid exceeding 512 // this value. The queue is a priority queue that will accelerate 513 // sub-resource speculation, and retard resolutions suggested by page scans. 514 const size_t max_concurrent_dns_lookups_; 515 516 // The maximum queueing delay that is acceptable before we enter congestion 517 // reduction mode, and discard all queued (but not yet assigned) resolutions. 518 const base::TimeDelta max_dns_queue_delay_; 519 520 // The host resolver we warm DNS entries for. 521 net::HostResolver* host_resolver_; 522 523 // Are we currently using preconnection, rather than just DNS resolution, for 524 // subresources and omni-box search URLs. 525 bool preconnect_enabled_; 526 527 // Most recent suggestion from Omnibox provided via AnticipateOmniboxUrl(). 528 std::string last_omnibox_host_; 529 530 // The time when the last preresolve was done for last_omnibox_host_. 531 base::TimeTicks last_omnibox_preresolve_; 532 533 // The number of consecutive requests to AnticipateOmniboxUrl() that suggested 534 // preconnecting (because it was to a search service). 535 int consecutive_omnibox_preconnect_count_; 536 537 // The time when the last preconnection was requested to a search service. 538 base::TimeTicks last_omnibox_preconnect_; 539 540 class PreconnectUsage; 541 scoped_ptr<PreconnectUsage> preconnect_usage_; 542 543 // For each URL that we might navigate to (that we've "learned about") 544 // we have a Referrer list. Each Referrer list has all hostnames we might 545 // need to pre-resolve or pre-connect to when there is a navigation to the 546 // orginial hostname. 547 Referrers referrers_; 548 549 // List of URLs in referrers_ currently being trimmed (scaled down to 550 // eventually be aged out of use). 551 std::vector<GURL> urls_being_trimmed_; 552 553 // A time after which we need to do more trimming of referrers. 554 base::TimeTicks next_trim_time_; 555 556 scoped_ptr<base::WeakPtrFactory<Predictor> > weak_factory_; 557 558 scoped_ptr<ProxyAdvisor> proxy_advisor_; 559 560 DISALLOW_COPY_AND_ASSIGN(Predictor); 561 }; 562 563 // This version of the predictor is used for testing. 564 class SimplePredictor : public Predictor { 565 public: 566 explicit SimplePredictor(bool preconnect_enabled) 567 : Predictor(preconnect_enabled) {} 568 virtual ~SimplePredictor() {} 569 virtual void InitNetworkPredictor( 570 PrefService* user_prefs, 571 PrefService* local_state, 572 IOThread* io_thread, 573 net::URLRequestContextGetter* getter) OVERRIDE; 574 virtual void ShutdownOnUIThread() OVERRIDE; 575 }; 576 577 } // namespace chrome_browser_net 578 579 #endif // CHROME_BROWSER_NET_PREDICTOR_H_ 580