Home | History | Annotate | Download | only in net
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // A Predictor object is instantiated once in the browser process, and manages
      6 // both preresolution of hostnames, as well as TCP/IP preconnection to expected
      7 // subresources.
      8 // Most hostname lists are provided by the renderer processes, and include URLs
      9 // that *might* be used in the near future by the browsing user.  One goal of
     10 // this class is to cause the underlying DNS structure to lookup a hostname
     11 // before it is really needed, and hence reduce latency in the standard lookup
     12 // paths.
     13 // Subresource relationships are usually acquired from the referrer field in a
     14 // navigation.  A subresource URL may be associated with a referrer URL.  Later
     15 // navigations may, if the likelihood of needing the subresource is high enough,
     16 // cause this module to speculatively create a TCP/IP connection. If there is
     17 // only a low likelihood, then a DNS pre-resolution operation may be performed.
     18 
     19 #ifndef CHROME_BROWSER_NET_PREDICTOR_H_
     20 #define CHROME_BROWSER_NET_PREDICTOR_H_
     21 
     22 #include <map>
     23 #include <queue>
     24 #include <set>
     25 #include <string>
     26 #include <vector>
     27 
     28 #include "base/gtest_prod_util.h"
     29 #include "base/memory/scoped_ptr.h"
     30 #include "base/memory/weak_ptr.h"
     31 #include "chrome/browser/net/referrer.h"
     32 #include "chrome/browser/net/timed_cache.h"
     33 #include "chrome/browser/net/url_info.h"
     34 #include "chrome/common/net/predictor_common.h"
     35 #include "net/base/host_port_pair.h"
     36 
     37 class IOThread;
     38 class PrefService;
     39 class Profile;
     40 
     41 namespace base {
     42 class ListValue;
     43 class WaitableEvent;
     44 }
     45 
     46 namespace net {
     47 class HostResolver;
     48 class URLRequestContextGetter;
     49 }
     50 
     51 namespace user_prefs {
     52 class PrefRegistrySyncable;
     53 }
     54 
     55 namespace chrome_browser_net {
     56 
     57 typedef chrome_common_net::UrlList UrlList;
     58 typedef chrome_common_net::NameList NameList;
     59 typedef std::map<GURL, UrlInfo> Results;
     60 
     61 // Predictor is constructed during Profile construction (on the UI thread),
     62 // but it is destroyed on the IO thread when ProfileIOData goes away. All of
     63 // its core state and functionality happens on the IO thread. The only UI
     64 // methods are initialization / shutdown related (including preconnect
     65 // initialization), or convenience methods that internally forward calls to
     66 // the IO thread.
     67 class Predictor {
     68  public:
     69   // A version number for prefs that are saved. This should be incremented when
     70   // we change the format so that we discard old data.
     71   static const int kPredictorReferrerVersion;
     72 
     73   // Given that the underlying Chromium resolver defaults to a total maximum of
     74   // 8 paralell resolutions, we will avoid any chance of starving navigational
     75   // resolutions by limiting the number of paralell speculative resolutions.
     76   // This is used in the field trials and testing.
     77   // TODO(jar): Move this limitation into the resolver.
     78   static const size_t kMaxSpeculativeParallelResolves;
     79 
     80   // To control the congestion avoidance system, we need an estimate of how
     81   // many speculative requests may arrive at once.  Since we currently only
     82   // keep 8 subresource names for each frame, we'll use that as our basis.
     83   // Note that when scanning search results lists, we might actually get 10 at
     84   // a time, and wikipedia can often supply (during a page scan) upwards of 50.
     85   // In those odd cases, we may discard some of the later speculative requests
     86   // mistakenly assuming that the resolutions took too long.
     87   static const int kTypicalSpeculativeGroupSize;
     88 
     89   // The next constant specifies an amount of queueing delay that is
     90   // "too large," and indicative of problems with resolutions (perhaps due to
     91   // an overloaded router, or such).  When we exceed this delay, congestion
     92   // avoidance will kick in and all speculations in the queue will be discarded.
     93   static const int kMaxSpeculativeResolveQueueDelayMs;
     94 
     95   // We don't bother learning to preconnect via a GET if the original URL
     96   // navigation was so long ago, that a preconnection would have been dropped
     97   // anyway.  We believe most servers will drop the connection in 10 seconds, so
     98   // we currently estimate this time-till-drop at 10 seconds.
     99   // TODO(jar): We should do a persistent field trial to validate/optimize this.
    100   static const int kMaxUnusedSocketLifetimeSecondsWithoutAGet;
    101 
    102   // |max_concurrent| specifies how many concurrent (parallel) prefetches will
    103   // be performed. Host lookups will be issued through |host_resolver|.
    104   explicit Predictor(bool preconnect_enabled);
    105 
    106   virtual ~Predictor();
    107 
    108   // This function is used to create a predictor. For testing, we can create
    109   // a version which does a simpler shutdown.
    110   static Predictor* CreatePredictor(bool preconnect_enabled,
    111                                     bool simple_shutdown);
    112 
    113   static void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry);
    114 
    115   // ------------- Start UI thread methods.
    116 
    117   virtual void InitNetworkPredictor(PrefService* user_prefs,
    118                                     PrefService* local_state,
    119                                     IOThread* io_thread,
    120                                     net::URLRequestContextGetter* getter);
    121 
    122   // The Omnibox has proposed a given url to the user, and if it is a search
    123   // URL, then it also indicates that this is preconnectable (i.e., we could
    124   // preconnect to the search server).
    125   void AnticipateOmniboxUrl(const GURL& url, bool preconnectable);
    126 
    127   // Preconnect a URL and all of its subresource domains.
    128   void PreconnectUrlAndSubresources(const GURL& url,
    129                                     const GURL& first_party_for_cookies);
    130 
    131   static UrlList GetPredictedUrlListAtStartup(PrefService* user_prefs,
    132                                               PrefService* local_state);
    133 
    134   static void set_max_queueing_delay(int max_queueing_delay_ms);
    135 
    136   static void set_max_parallel_resolves(size_t max_parallel_resolves);
    137 
    138   virtual void ShutdownOnUIThread(PrefService* user_prefs);
    139 
    140   // ------------- End UI thread methods.
    141 
    142   // ------------- Start IO thread methods.
    143 
    144   // Cancel pending requests and prevent new ones from being made.
    145   void Shutdown();
    146 
    147   // In some circumstances, for privacy reasons, all results should be
    148   // discarded.  This method gracefully handles that activity.
    149   // Destroy all our internal state, which shows what names we've looked up, and
    150   // how long each has taken, etc. etc.  We also destroy records of suggesses
    151   // (cache hits etc.).
    152   void DiscardAllResults();
    153 
    154   // Add hostname(s) to the queue for processing.
    155   void ResolveList(const UrlList& urls,
    156                    UrlInfo::ResolutionMotivation motivation);
    157 
    158   void Resolve(const GURL& url, UrlInfo::ResolutionMotivation motivation);
    159 
    160   // Record details of a navigation so that we can preresolve the host name
    161   // ahead of time the next time the users navigates to the indicated host.
    162   // Should only be called when urls are distinct, and they should already be
    163   // canonicalized to not have a path.
    164   void LearnFromNavigation(const GURL& referring_url, const GURL& target_url);
    165 
    166   // When displaying info in about:dns, the following API is called.
    167   static void PredictorGetHtmlInfo(Predictor* predictor, std::string* output);
    168 
    169   // Dump HTML table containing list of referrers for about:dns.
    170   void GetHtmlReferrerLists(std::string* output);
    171 
    172   // Dump the list of currently known referrer domains and related prefetchable
    173   // domains for about:dns.
    174   void GetHtmlInfo(std::string* output);
    175 
    176   // Discards any referrer for which all the suggested host names are currently
    177   // annotated with negligible expected-use.  Scales down (diminishes) the
    178   // expected-use of those that remain, so that their use will go down by a
    179   // factor each time we trim (moving the referrer closer to being discarded in
    180   // a future call).
    181   // The task is performed synchronously and completes before returing.
    182   void TrimReferrersNow();
    183 
    184   // Construct a ListValue object that contains all the data in the referrers_
    185   // so that it can be persisted in a pref.
    186   void SerializeReferrers(base::ListValue* referral_list);
    187 
    188   // Process a ListValue that contains all the data from a previous reference
    189   // list, as constructed by SerializeReferrers(), and add all the identified
    190   // values into the current referrer list.
    191   void DeserializeReferrers(const base::ListValue& referral_list);
    192 
    193   void DeserializeReferrersThenDelete(base::ListValue* referral_list);
    194 
    195   void DiscardInitialNavigationHistory();
    196 
    197   void FinalizeInitializationOnIOThread(
    198       const std::vector<GURL>& urls_to_prefetch,
    199       base::ListValue* referral_list,
    200       IOThread* io_thread,
    201       bool predictor_enabled);
    202 
    203   // During startup, we learn what the first N urls visited are, and then
    204   // resolve the associated hosts ASAP during our next startup.
    205   void LearnAboutInitialNavigation(const GURL& url);
    206 
    207   // Renderer bundles up list and sends to this browser API via IPC.
    208   // TODO(jar): Use UrlList instead to include port and scheme.
    209   void DnsPrefetchList(const NameList& hostnames);
    210 
    211   // May be called from either the IO or UI thread and will PostTask
    212   // to the IO thread if necessary.
    213   void DnsPrefetchMotivatedList(const UrlList& urls,
    214                                 UrlInfo::ResolutionMotivation motivation);
    215 
    216   // May be called from either the IO or UI thread and will PostTask
    217   // to the IO thread if necessary.
    218   void SaveStateForNextStartupAndTrim(PrefService* prefs);
    219 
    220   void SaveDnsPrefetchStateForNextStartupAndTrim(
    221       base::ListValue* startup_list,
    222       base::ListValue* referral_list,
    223       base::WaitableEvent* completion);
    224 
    225   // May be called from either the IO or UI thread and will PostTask
    226   // to the IO thread if necessary.
    227   void EnablePredictor(bool enable);
    228 
    229   void EnablePredictorOnIOThread(bool enable);
    230 
    231   // May be called from either the IO or UI thread and will PostTask
    232   // to the IO thread if necessary.
    233   void PreconnectUrl(const GURL& url, const GURL& first_party_for_cookies,
    234                      UrlInfo::ResolutionMotivation motivation, int count);
    235 
    236   void PreconnectUrlOnIOThread(const GURL& url,
    237                                const GURL& first_party_for_cookies,
    238                                UrlInfo::ResolutionMotivation motivation,
    239                                int count);
    240 
    241   void RecordPreconnectTrigger(const GURL& url);
    242 
    243   void RecordPreconnectNavigationStat(const std::vector<GURL>& url_chain,
    244                                       bool is_subresource);
    245 
    246   void RecordLinkNavigation(const GURL& url);
    247 
    248   // ------------- End IO thread methods.
    249 
    250   // The following methods may be called on either the IO or UI threads.
    251 
    252   // Instigate pre-connection to any URLs, or pre-resolution of related host,
    253   // that we predict will be needed after this navigation (typically
    254   // more-embedded resources on a page).  This method will actually post a task
    255   // to do the actual work, so as not to jump ahead of the frame navigation that
    256   // instigated this activity.
    257   void PredictFrameSubresources(const GURL& url,
    258                                 const GURL& first_party_for_cookies);
    259 
    260   // Put URL in canonical form, including a scheme, host, and port.
    261   // Returns GURL::EmptyGURL() if the scheme is not http/https or if the url
    262   // cannot be otherwise canonicalized.
    263   static GURL CanonicalizeUrl(const GURL& url);
    264 
    265   // Used for testing.
    266   void SetHostResolver(net::HostResolver* host_resolver) {
    267     host_resolver_ = host_resolver;
    268   }
    269   // Used for testing.
    270   size_t max_concurrent_dns_lookups() const {
    271     return max_concurrent_dns_lookups_;
    272   }
    273   // Used for testing.
    274   void SetShutdown(bool shutdown) {
    275     shutdown_ = shutdown;
    276   }
    277 
    278   // Flag setting to use preconnection instead of just DNS pre-fetching.
    279   bool preconnect_enabled() const {
    280     return preconnect_enabled_;
    281   }
    282 
    283   // Flag setting for whether we are prefetching dns lookups.
    284   bool predictor_enabled() const {
    285     return predictor_enabled_;
    286   }
    287 
    288 
    289  private:
    290   FRIEND_TEST_ALL_PREFIXES(PredictorTest, BenefitLookupTest);
    291   FRIEND_TEST_ALL_PREFIXES(PredictorTest, ShutdownWhenResolutionIsPendingTest);
    292   FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTest);
    293   FRIEND_TEST_ALL_PREFIXES(PredictorTest, ConcurrentLookupTest);
    294   FRIEND_TEST_ALL_PREFIXES(PredictorTest, MassiveConcurrentLookupTest);
    295   FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueuePushPopTest);
    296   FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueueReorderTest);
    297   FRIEND_TEST_ALL_PREFIXES(PredictorTest, ReferrerSerializationTrimTest);
    298   friend class WaitForResolutionHelper;  // For testing.
    299 
    300   class LookupRequest;
    301 
    302   // A simple priority queue for handling host names.
    303   // Some names that are queued up have |motivation| that requires very rapid
    304   // handling.  For example, a sub-resource name lookup MUST be done before the
    305   // actual sub-resource is fetched.  In contrast, a name that was speculatively
    306   // noted in a page has to be resolved before the user "gets around to"
    307   // clicking on a link.  By tagging (with a motivation) each push we make into
    308   // this FIFO queue, the queue can re-order the more important names to service
    309   // them sooner (relative to some low priority background resolutions).
    310   class HostNameQueue {
    311    public:
    312     HostNameQueue();
    313     ~HostNameQueue();
    314     void Push(const GURL& url,
    315               UrlInfo::ResolutionMotivation motivation);
    316     bool IsEmpty() const;
    317     GURL Pop();
    318 
    319    private:
    320     // The names in the queue that should be serviced (popped) ASAP.
    321     std::queue<GURL> rush_queue_;
    322     // The names in the queue that should only be serviced when rush_queue is
    323     // empty.
    324     std::queue<GURL> background_queue_;
    325 
    326   DISALLOW_COPY_AND_ASSIGN(HostNameQueue);
    327   };
    328 
    329   // The InitialObserver monitors navigations made by the network stack. This
    330   // is only used to identify startup time resolutions (for re-resolution
    331   // during our next process startup).
    332   // TODO(jar): Consider preconnecting at startup, which may be faster than
    333   // waiting for render process to start and request a connection.
    334   class InitialObserver {
    335    public:
    336     InitialObserver();
    337     ~InitialObserver();
    338     // Recording of when we observed each navigation.
    339     typedef std::map<GURL, base::TimeTicks> FirstNavigations;
    340 
    341     // Potentially add a new URL to our startup list.
    342     void Append(const GURL& url, Predictor* predictor);
    343 
    344     // Get an HTML version of our current planned first_navigations_.
    345     void GetFirstResolutionsHtml(std::string* output);
    346 
    347     // Persist the current first_navigations_ for storage in a list.
    348     void GetInitialDnsResolutionList(base::ListValue* startup_list);
    349 
    350     // Discards all initial loading history.
    351     void DiscardInitialNavigationHistory() { first_navigations_.clear(); }
    352 
    353    private:
    354     // List of the first N URL resolutions observed in this run.
    355     FirstNavigations first_navigations_;
    356 
    357     // The number of URLs we'll save for pre-resolving at next startup.
    358     static const size_t kStartupResolutionCount = 10;
    359   };
    360 
    361   // A map that is keyed with the host/port that we've learned were the cause
    362   // of loading additional URLs.  The list of additional targets is held
    363   // in a Referrer instance, which is a value in this map.
    364   typedef std::map<GURL, Referrer> Referrers;
    365 
    366   // Depending on the expected_subresource_use_, we may either make a TCP/IP
    367   // preconnection, or merely pre-resolve the hostname via DNS (or even do
    368   // nothing).  The following are the threasholds for taking those actions.
    369   static const double kPreconnectWorthyExpectedValue;
    370   static const double kDNSPreresolutionWorthyExpectedValue;
    371   // Referred hosts with a subresource_use_rate_ that are less than the
    372   // following threshold will be discarded when we Trim() the list.
    373   static const double kDiscardableExpectedValue;
    374   // During trimming operation to discard hosts for which we don't have likely
    375   // subresources, we multiply the expected_subresource_use_ value by the
    376   // following ratio until that value is less than kDiscardableExpectedValue.
    377   // This number should always be less than 1, an more than 0.
    378   static const double kReferrerTrimRatio;
    379 
    380   // Interval between periodic trimming of our whole referrer list.
    381   // We only do a major trimming about once an hour, and then only when the user
    382   // is actively browsing.
    383   static const int64 kDurationBetweenTrimmingsHours;
    384   // Interval between incremental trimmings (to avoid inducing Jank).
    385   static const int64 kDurationBetweenTrimmingIncrementsSeconds;
    386   // Number of referring URLs processed in an incremental trimming.
    387   static const size_t kUrlsTrimmedPerIncrement;
    388 
    389   // Only for testing. Returns true if hostname has been successfully resolved
    390   // (name found).
    391   bool WasFound(const GURL& url) const {
    392     Results::const_iterator it(results_.find(url));
    393     return (it != results_.end()) &&
    394             it->second.was_found();
    395   }
    396 
    397   // Only for testing. Return how long was the resolution
    398   // or UrlInfo::NullDuration() if it hasn't been resolved yet.
    399   base::TimeDelta GetResolutionDuration(const GURL& url) {
    400     if (results_.find(url) == results_.end())
    401       return UrlInfo::NullDuration();
    402     return results_[url].resolve_duration();
    403   }
    404 
    405   // Only for testing;
    406   size_t peak_pending_lookups() const { return peak_pending_lookups_; }
    407 
    408   // ------------- Start IO thread methods.
    409 
    410   // Perform actual resolution or preconnection to subresources now.  This is
    411   // an internal worker method that is reached via a post task from
    412   // PredictFrameSubresources().
    413   void PrepareFrameSubresources(const GURL& url,
    414                                 const GURL& first_party_for_cookies);
    415 
    416   // Access method for use by async lookup request to pass resolution result.
    417   void OnLookupFinished(LookupRequest* request, const GURL& url, bool found);
    418 
    419   // Underlying method for both async and synchronous lookup to update state.
    420   void LookupFinished(LookupRequest* request,
    421                       const GURL& url, bool found);
    422 
    423   // Queue hostname for resolution.  If queueing was done, return the pointer
    424   // to the queued instance, otherwise return NULL.
    425   UrlInfo* AppendToResolutionQueue(const GURL& url,
    426       UrlInfo::ResolutionMotivation motivation);
    427 
    428   // Check to see if too much queuing delay has been noted for the given info,
    429   // which indicates that there is "congestion" or growing delay in handling the
    430   // resolution of names.  Rather than letting this congestion potentially grow
    431   // without bounds, we abandon our queued efforts at pre-resolutions in such a
    432   // case.
    433   // To do this, we will recycle |info|, as well as all queued items, back to
    434   // the state they had before they were queued up.  We can't do anything about
    435   // the resolutions we've already sent off for processing on another thread, so
    436   // we just let them complete.  On a slow system, subject to congestion, this
    437   // will greatly reduce the number of resolutions done, but it will assure that
    438   // any resolutions that are done, are in a timely and hence potentially
    439   // helpful manner.
    440   bool CongestionControlPerformed(UrlInfo* info);
    441 
    442   // Take lookup requests from work_queue_ and tell HostResolver to look them up
    443   // asynchronously, provided we don't exceed concurrent resolution limit.
    444   void StartSomeQueuedResolutions();
    445 
    446   // Performs trimming similar to TrimReferrersNow(), except it does it as a
    447   // series of short tasks by posting continuations again an again until done.
    448   void TrimReferrers();
    449 
    450   // Loads urls_being_trimmed_ from keys of current referrers_.
    451   void LoadUrlsForTrimming();
    452 
    453   // Posts a task to do additional incremental trimming of referrers_.
    454   void PostIncrementalTrimTask();
    455 
    456   // Calls Trim() on some or all of urls_being_trimmed_.
    457   // If it does not process all the URLs in that vector, it posts a task to
    458   // continue with them shortly (i.e., it yeilds and continues).
    459   void IncrementalTrimReferrers(bool trim_all_now);
    460 
    461   // ------------- End IO thread methods.
    462 
    463   scoped_ptr<InitialObserver> initial_observer_;
    464 
    465   // Reference to URLRequestContextGetter from the Profile which owns the
    466   // predictor. Used by Preconnect.
    467   scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_;
    468 
    469   // Status of speculative DNS resolution and speculative TCP/IP connection
    470   // feature.
    471   bool predictor_enabled_;
    472 
    473   // work_queue_ holds a list of names we need to look up.
    474   HostNameQueue work_queue_;
    475 
    476   // results_ contains information for existing/prior prefetches.
    477   Results results_;
    478 
    479   std::set<LookupRequest*> pending_lookups_;
    480 
    481   // For testing, to verify that we don't exceed the limit.
    482   size_t peak_pending_lookups_;
    483 
    484   // When true, we don't make new lookup requests.
    485   bool shutdown_;
    486 
    487   // The number of concurrent speculative lookups currently allowed to be sent
    488   // to the resolver.  Any additional lookups will be queued to avoid exceeding
    489   // this value.  The queue is a priority queue that will accelerate
    490   // sub-resource speculation, and retard resolutions suggested by page scans.
    491   const size_t max_concurrent_dns_lookups_;
    492 
    493   // The maximum queueing delay that is acceptable before we enter congestion
    494   // reduction mode, and discard all queued (but not yet assigned) resolutions.
    495   const base::TimeDelta max_dns_queue_delay_;
    496 
    497   // The host resolver we warm DNS entries for.
    498   net::HostResolver* host_resolver_;
    499 
    500   // Are we currently using preconnection, rather than just DNS resolution, for
    501   // subresources and omni-box search URLs.
    502   bool preconnect_enabled_;
    503 
    504   // Most recent suggestion from Omnibox provided via AnticipateOmniboxUrl().
    505   std::string last_omnibox_host_;
    506 
    507   // The time when the last preresolve was done for last_omnibox_host_.
    508   base::TimeTicks last_omnibox_preresolve_;
    509 
    510   // The number of consecutive requests to AnticipateOmniboxUrl() that suggested
    511   // preconnecting (because it was to a search service).
    512   int consecutive_omnibox_preconnect_count_;
    513 
    514   // The time when the last preconnection was requested to a search service.
    515   base::TimeTicks last_omnibox_preconnect_;
    516 
    517   class PreconnectUsage;
    518   scoped_ptr<PreconnectUsage> preconnect_usage_;
    519 
    520   // For each URL that we might navigate to (that we've "learned about")
    521   // we have a Referrer list. Each Referrer list has all hostnames we might
    522   // need to pre-resolve or pre-connect to when there is a navigation to the
    523   // orginial hostname.
    524   Referrers referrers_;
    525 
    526   // List of URLs in referrers_ currently being trimmed (scaled down to
    527   // eventually be aged out of use).
    528   std::vector<GURL> urls_being_trimmed_;
    529 
    530   // A time after which we need to do more trimming of referrers.
    531   base::TimeTicks next_trim_time_;
    532 
    533   scoped_ptr<base::WeakPtrFactory<Predictor> > weak_factory_;
    534 
    535   DISALLOW_COPY_AND_ASSIGN(Predictor);
    536 };
    537 
    538 // This version of the predictor is used for testing.
    539 class SimplePredictor : public Predictor {
    540  public:
    541   explicit SimplePredictor(bool preconnect_enabled)
    542       : Predictor(preconnect_enabled) {}
    543   virtual ~SimplePredictor() {}
    544   virtual void InitNetworkPredictor(
    545       PrefService* user_prefs,
    546       PrefService* local_state,
    547       IOThread* io_thread,
    548       net::URLRequestContextGetter* getter) OVERRIDE;
    549   virtual void ShutdownOnUIThread(PrefService* user_prefs) OVERRIDE;
    550 };
    551 
    552 }  // namespace chrome_browser_net
    553 
    554 #endif  // CHROME_BROWSER_NET_PREDICTOR_H_
    555