Home | History | Annotate | Download | only in net
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // A Predictor object is instantiated once in the browser process, and manages
      6 // both preresolution of hostnames, as well as TCP/IP preconnection to expected
      7 // subresources.
      8 // Most hostname lists are provided by the renderer processes, and include URLs
      9 // that *might* be used in the near future by the browsing user.  One goal of
     10 // this class is to cause the underlying DNS structure to lookup a hostname
     11 // before it is really needed, and hence reduce latency in the standard lookup
     12 // paths.
     13 // Subresource relationships are usually acquired from the referrer field in a
     14 // navigation.  A subresource URL may be associated with a referrer URL.  Later
     15 // navigations may, if the likelihood of needing the subresource is high enough,
     16 // cause this module to speculatively create a TCP/IP connection. If there is
     17 // only a low likelihood, then a DNS pre-resolution operation may be performed.
     18 
     19 #ifndef CHROME_BROWSER_NET_PREDICTOR_H_
     20 #define CHROME_BROWSER_NET_PREDICTOR_H_
     21 #pragma once
     22 
     23 #include <map>
     24 #include <queue>
     25 #include <set>
     26 #include <string>
     27 #include <vector>
     28 
     29 #include "base/gtest_prod_util.h"
     30 #include "base/memory/ref_counted.h"
     31 #include "chrome/browser/net/url_info.h"
     32 #include "chrome/browser/net/referrer.h"
     33 #include "chrome/common/net/predictor_common.h"
     34 #include "net/base/host_port_pair.h"
     35 
     36 class ListValue;
     37 
     38 namespace net {
     39 class HostResolver;
     40 }  // namespace net
     41 
     42 namespace chrome_browser_net {
     43 
     44 typedef chrome_common_net::UrlList UrlList;
     45 typedef chrome_common_net::NameList NameList;
     46 typedef std::map<GURL, UrlInfo> Results;
     47 
     48 // Note that Predictor is not thread safe, and must only be called from
     49 // the IO thread. Failure to do so will result in a DCHECK at runtime.
     50 class Predictor : public base::RefCountedThreadSafe<Predictor> {
     51  public:
     52   // A version number for prefs that are saved. This should be incremented when
     53   // we change the format so that we discard old data.
     54   enum { PREDICTOR_REFERRER_VERSION = 2 };
     55 
     56   // |max_concurrent| specifies how many concurrent (parallel) prefetches will
     57   // be performed. Host lookups will be issued through |host_resolver|.
     58   Predictor(net::HostResolver* host_resolver,
     59             base::TimeDelta max_queue_delay_ms, size_t max_concurrent,
     60             bool preconnect_enabled);
     61 
     62   // Cancel pending requests and prevent new ones from being made.
     63   void Shutdown();
     64 
     65   // In some circumstances, for privacy reasons, all results should be
     66   // discarded.  This method gracefully handles that activity.
     67   // Destroy all our internal state, which shows what names we've looked up, and
     68   // how long each has taken, etc. etc.  We also destroy records of suggesses
     69   // (cache hits etc.).
     70   void DiscardAllResults();
     71 
     72   // Add hostname(s) to the queue for processing.
     73   void ResolveList(const UrlList& urls,
     74                    UrlInfo::ResolutionMotivation motivation);
     75   void Resolve(const GURL& url,
     76                UrlInfo::ResolutionMotivation motivation);
     77 
     78   // Instigate pre-connection to any URLs, or pre-resolution of related host,
     79   // that we predict will be needed after this navigation (typically
     80   // more-embedded resources on a page).  This method will actually post a task
     81   // to do the actual work, so as not to jump ahead of the frame navigation that
     82   // instigated this activity.
     83   void PredictFrameSubresources(const GURL& url);
     84 
     85   // The Omnibox has proposed a given url to the user, and if it is a search
     86   // URL, then it also indicates that this is preconnectable (i.e., we could
     87   // preconnect to the search server).
     88   void AnticipateOmniboxUrl(const GURL& url, bool preconnectable);
     89 
     90   // Preconnect a URL and all of its subresource domains.
     91   void PreconnectUrlAndSubresources(const GURL& url);
     92 
     93   // Record details of a navigation so that we can preresolve the host name
     94   // ahead of time the next time the users navigates to the indicated host.
     95   // Should only be called when urls are distinct, and they should already be
     96   // canonicalized to not have a path.
     97   void LearnFromNavigation(const GURL& referring_url, const GURL& target_url);
     98 
     99   // Dump HTML table containing list of referrers for about:dns.
    100   void GetHtmlReferrerLists(std::string* output);
    101 
    102   // Dump the list of currently known referrer domains and related prefetchable
    103   // domains.
    104   void GetHtmlInfo(std::string* output);
    105 
    106   // Discards any referrer for which all the suggested host names are currently
    107   // annotated with negligible expected-use.  Scales down (diminishes) the
    108   // expected-use of those that remain, so that their use will go down by a
    109   // factor each time we trim (moving the referrer closer to being discarded in
    110   // a future call).
    111   // The task is performed synchronously and completes before returing.
    112   void TrimReferrersNow();
    113 
    114   // Construct a ListValue object that contains all the data in the referrers_
    115   // so that it can be persisted in a pref.
    116   void SerializeReferrers(ListValue* referral_list);
    117 
    118   // Process a ListValue that contains all the data from a previous reference
    119   // list, as constructed by SerializeReferrers(), and add all the identified
    120   // values into the current referrer list.
    121   void DeserializeReferrers(const ListValue& referral_list);
    122 
    123   void DeserializeReferrersThenDelete(ListValue* referral_list);
    124 
    125   // For unit test code only.
    126   size_t max_concurrent_dns_lookups() const {
    127     return max_concurrent_dns_lookups_;
    128   }
    129 
    130   // Flag setting to use preconnection instead of just DNS pre-fetching.
    131   bool preconnect_enabled() const { return preconnect_enabled_; }
    132 
    133   // Put URL in canonical form, including a scheme, host, and port.
    134   // Returns GURL::EmptyGURL() if the scheme is not http/https or if the url
    135   // cannot be otherwise canonicalized.
    136   static GURL CanonicalizeUrl(const GURL& url);
    137 
    138  private:
    139   friend class base::RefCountedThreadSafe<Predictor>;
    140   FRIEND_TEST_ALL_PREFIXES(PredictorTest, BenefitLookupTest);
    141   FRIEND_TEST_ALL_PREFIXES(PredictorTest, ShutdownWhenResolutionIsPendingTest);
    142   FRIEND_TEST_ALL_PREFIXES(PredictorTest, SingleLookupTest);
    143   FRIEND_TEST_ALL_PREFIXES(PredictorTest, ConcurrentLookupTest);
    144   FRIEND_TEST_ALL_PREFIXES(PredictorTest, MassiveConcurrentLookupTest);
    145   FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueuePushPopTest);
    146   FRIEND_TEST_ALL_PREFIXES(PredictorTest, PriorityQueueReorderTest);
    147   FRIEND_TEST_ALL_PREFIXES(PredictorTest, ReferrerSerializationTrimTest);
    148   friend class WaitForResolutionHelper;  // For testing.
    149 
    150   class LookupRequest;
    151 
    152   // A simple priority queue for handling host names.
    153   // Some names that are queued up have |motivation| that requires very rapid
    154   // handling.  For example, a sub-resource name lookup MUST be done before the
    155   // actual sub-resource is fetched.  In contrast, a name that was speculatively
    156   // noted in a page has to be resolved before the user "gets around to"
    157   // clicking on a link.  By tagging (with a motivation) each push we make into
    158   // this FIFO queue, the queue can re-order the more important names to service
    159   // them sooner (relative to some low priority background resolutions).
    160   class HostNameQueue {
    161    public:
    162     HostNameQueue();
    163     ~HostNameQueue();
    164     void Push(const GURL& url,
    165               UrlInfo::ResolutionMotivation motivation);
    166     bool IsEmpty() const;
    167     GURL Pop();
    168 
    169    private:
    170     // The names in the queue that should be serviced (popped) ASAP.
    171     std::queue<GURL> rush_queue_;
    172     // The names in the queue that should only be serviced when rush_queue is
    173     // empty.
    174     std::queue<GURL> background_queue_;
    175 
    176   DISALLOW_COPY_AND_ASSIGN(HostNameQueue);
    177   };
    178 
    179   // A map that is keyed with the host/port that we've learned were the cause
    180   // of loading additional URLs.  The list of additional targets is held
    181   // in a Referrer instance, which is a value in this map.
    182   typedef std::map<GURL, Referrer> Referrers;
    183 
    184   // Depending on the expected_subresource_use_, we may either make a TCP/IP
    185   // preconnection, or merely pre-resolve the hostname via DNS (or even do
    186   // nothing).  The following are the threasholds for taking those actions.
    187   static const double kPreconnectWorthyExpectedValue;
    188   static const double kDNSPreresolutionWorthyExpectedValue;
    189   // Referred hosts with a subresource_use_rate_ that are less than the
    190   // following threshold will be discarded when we Trim() the list.
    191   static const double kDiscardableExpectedValue;
    192   // During trimming operation to discard hosts for which we don't have likely
    193   // subresources, we multiply the expected_subresource_use_ value by the
    194   // following ratio until that value is less than kDiscardableExpectedValue.
    195   // This number should always be less than 1, an more than 0.
    196   static const double kReferrerTrimRatio;
    197 
    198   // Interval between periodic trimming of our whole referrer list.
    199   // We only do a major trimming about once an hour, and then only when the user
    200   // is actively browsing.
    201   static const base::TimeDelta kDurationBetweenTrimmings;
    202   // Interval between incremental trimmings (to avoid inducing Jank).
    203   static const base::TimeDelta kDurationBetweenTrimmingIncrements;
    204   // Number of referring URLs processed in an incremental trimming.
    205   static const size_t kUrlsTrimmedPerIncrement;
    206 
    207   ~Predictor();
    208 
    209   // Perform actual resolution or preconnection to subresources now.  This is
    210   // an internal worker method that is reached via a post task from
    211   // PredictFrameSubresources().
    212   void PrepareFrameSubresources(const GURL& url);
    213 
    214   // Only for testing. Returns true if hostname has been successfully resolved
    215   // (name found).
    216   bool WasFound(const GURL& url) const {
    217     Results::const_iterator it(results_.find(url));
    218     return (it != results_.end()) &&
    219             it->second.was_found();
    220   }
    221 
    222   // Only for testing. Return how long was the resolution
    223   // or UrlInfo::kNullDuration if it hasn't been resolved yet.
    224   base::TimeDelta GetResolutionDuration(const GURL& url) {
    225     if (results_.find(url) == results_.end())
    226       return UrlInfo::kNullDuration;
    227     return results_[url].resolve_duration();
    228   }
    229 
    230   // Only for testing;
    231   size_t peak_pending_lookups() const { return peak_pending_lookups_; }
    232 
    233   // Access method for use by async lookup request to pass resolution result.
    234   void OnLookupFinished(LookupRequest* request, const GURL& url, bool found);
    235 
    236   // Underlying method for both async and synchronous lookup to update state.
    237   void LookupFinished(LookupRequest* request,
    238                       const GURL& url, bool found);
    239 
    240   // Queue hostname for resolution.  If queueing was done, return the pointer
    241   // to the queued instance, otherwise return NULL.
    242   UrlInfo* AppendToResolutionQueue(const GURL& url,
    243       UrlInfo::ResolutionMotivation motivation);
    244 
    245   // Check to see if too much queuing delay has been noted for the given info,
    246   // which indicates that there is "congestion" or growing delay in handling the
    247   // resolution of names.  Rather than letting this congestion potentially grow
    248   // without bounds, we abandon our queued efforts at pre-resolutions in such a
    249   // case.
    250   // To do this, we will recycle |info|, as well as all queued items, back to
    251   // the state they had before they were queued up.  We can't do anything about
    252   // the resolutions we've already sent off for processing on another thread, so
    253   // we just let them complete.  On a slow system, subject to congestion, this
    254   // will greatly reduce the number of resolutions done, but it will assure that
    255   // any resolutions that are done, are in a timely and hence potentially
    256   // helpful manner.
    257   bool CongestionControlPerformed(UrlInfo* info);
    258 
    259   // Take lookup requests from work_queue_ and tell HostResolver to look them up
    260   // asynchronously, provided we don't exceed concurrent resolution limit.
    261   void StartSomeQueuedResolutions();
    262 
    263   // Performs trimming similar to TrimReferrersNow(), except it does it as a
    264   // series of short tasks by posting continuations again an again until done.
    265   void TrimReferrers();
    266 
    267   // Loads urls_being_trimmed_ from keys of current referrers_.
    268   void LoadUrlsForTrimming();
    269 
    270   // Posts a task to do additional incremental trimming of referrers_.
    271   void PostIncrementalTrimTask();
    272 
    273   // Calls Trim() on some or all of urls_being_trimmed_.
    274   // If it does not process all the URLs in that vector, it posts a task to
    275   // continue with them shortly (i.e., it yeilds and continues).
    276   void IncrementalTrimReferrers(bool trim_all_now);
    277 
    278   // work_queue_ holds a list of names we need to look up.
    279   HostNameQueue work_queue_;
    280 
    281   // results_ contains information for existing/prior prefetches.
    282   Results results_;
    283 
    284   std::set<LookupRequest*> pending_lookups_;
    285 
    286   // For testing, to verify that we don't exceed the limit.
    287   size_t peak_pending_lookups_;
    288 
    289   // When true, we don't make new lookup requests.
    290   bool shutdown_;
    291 
    292   // The number of concurrent speculative lookups currently allowed to be sent
    293   // to the resolver.  Any additional lookups will be queued to avoid exceeding
    294   // this value.  The queue is a priority queue that will accelerate
    295   // sub-resource speculation, and retard resolutions suggested by page scans.
    296   const size_t max_concurrent_dns_lookups_;
    297 
    298   // The maximum queueing delay that is acceptable before we enter congestion
    299   // reduction mode, and discard all queued (but not yet assigned) resolutions.
    300   const base::TimeDelta max_dns_queue_delay_;
    301 
    302   // The host resolver we warm DNS entries for.
    303   net::HostResolver* const host_resolver_;
    304 
    305   // Are we currently using preconnection, rather than just DNS resolution, for
    306   // subresources and omni-box search URLs.
    307   bool preconnect_enabled_;
    308 
    309   // Most recent suggestion from Omnibox provided via AnticipateOmniboxUrl().
    310   std::string last_omnibox_host_;
    311 
    312   // The time when the last preresolve was done for last_omnibox_host_.
    313   base::TimeTicks last_omnibox_preresolve_;
    314 
    315   // The number of consecutive requests to AnticipateOmniboxUrl() that suggested
    316   // preconnecting (because it was to a search service).
    317   int consecutive_omnibox_preconnect_count_;
    318 
    319   // The time when the last preconnection was requested to a search service.
    320   base::TimeTicks last_omnibox_preconnect_;
    321 
    322   // For each URL that we might navigate to (that we've "learned about")
    323   // we have a Referrer list. Each Referrer list has all hostnames we might
    324   // need to pre-resolve or pre-connect to when there is a navigation to the
    325   // orginial hostname.
    326   Referrers referrers_;
    327 
    328   // List of URLs in referrers_ currently being trimmed (scaled down to
    329   // eventually be aged out of use).
    330   std::vector<GURL> urls_being_trimmed_;
    331 
    332   // A time after which we need to do more trimming of referrers.
    333   base::TimeTicks next_trim_time_;
    334 
    335   ScopedRunnableMethodFactory<Predictor> trim_task_factory_;
    336 
    337   DISALLOW_COPY_AND_ASSIGN(Predictor);
    338 };
    339 
    340 }  // namespace chrome_browser_net
    341 
    342 #endif  // CHROME_BROWSER_NET_PREDICTOR_H_
    343