Home | History | Annotate | Download | only in net
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/net/predictor.h"
      6 
      7 #include <algorithm>
      8 #include <cmath>
      9 #include <set>
     10 #include <sstream>
     11 
     12 #include "base/basictypes.h"
     13 #include "base/bind.h"
     14 #include "base/command_line.h"
     15 #include "base/compiler_specific.h"
     16 #include "base/containers/mru_cache.h"
     17 #include "base/metrics/histogram.h"
     18 #include "base/prefs/pref_service.h"
     19 #include "base/stl_util.h"
     20 #include "base/strings/string_split.h"
     21 #include "base/strings/string_util.h"
     22 #include "base/strings/stringprintf.h"
     23 #include "base/synchronization/waitable_event.h"
     24 #include "base/threading/thread_restrictions.h"
     25 #include "base/time/time.h"
     26 #include "base/values.h"
     27 #include "chrome/browser/io_thread.h"
     28 #include "chrome/browser/net/preconnect.h"
     29 #include "chrome/browser/prefs/scoped_user_pref_update.h"
     30 #include "chrome/browser/prefs/session_startup_pref.h"
     31 #include "chrome/common/chrome_switches.h"
     32 #include "chrome/common/pref_names.h"
     33 #include "components/user_prefs/pref_registry_syncable.h"
     34 #include "content/public/browser/browser_thread.h"
     35 #include "net/base/address_list.h"
     36 #include "net/base/completion_callback.h"
     37 #include "net/base/host_port_pair.h"
     38 #include "net/base/net_errors.h"
     39 #include "net/base/net_log.h"
     40 #include "net/dns/host_resolver.h"
     41 #include "net/dns/single_request_host_resolver.h"
     42 #include "net/url_request/url_request_context_getter.h"
     43 
     44 using base::TimeDelta;
     45 using content::BrowserThread;
     46 
     47 namespace chrome_browser_net {
     48 
     49 // static
     50 const int Predictor::kPredictorReferrerVersion = 2;
     51 const double Predictor::kPreconnectWorthyExpectedValue = 0.8;
     52 const double Predictor::kDNSPreresolutionWorthyExpectedValue = 0.1;
     53 const double Predictor::kDiscardableExpectedValue = 0.05;
     54 // The goal is of trimming is to to reduce the importance (number of expected
     55 // subresources needed) by a factor of 2 after about 24 hours of uptime. We will
     56 // trim roughly once-an-hour of uptime.  The ratio to use in each trim operation
     57 // is then the 24th root of 0.5.  If a user only surfs for 4 hours a day, then
     58 // after about 6 days they will have halved all their estimates of subresource
     59 // connections.  Once this falls below kDiscardableExpectedValue the referrer
     60 // will be discarded.
     61 // TODO(jar): Measure size of referrer lists in the field.  Consider an adaptive
     62 // system that uses a higher trim ratio when the list is large.
     63 // static
     64 const double Predictor::kReferrerTrimRatio = 0.97153;
     65 const int64 Predictor::kDurationBetweenTrimmingsHours = 1;
     66 const int64 Predictor::kDurationBetweenTrimmingIncrementsSeconds = 15;
     67 const size_t Predictor::kUrlsTrimmedPerIncrement = 5u;
     68 const size_t Predictor::kMaxSpeculativeParallelResolves = 3;
     69 const int Predictor::kMaxUnusedSocketLifetimeSecondsWithoutAGet = 10;
     70 // To control our congestion avoidance system, which discards a queue when
     71 // resolutions are "taking too long," we need an expected resolution time.
     72 // Common average is in the range of 300-500ms.
     73 const int kExpectedResolutionTimeMs = 500;
     74 const int Predictor::kTypicalSpeculativeGroupSize = 8;
     75 const int Predictor::kMaxSpeculativeResolveQueueDelayMs =
     76     (kExpectedResolutionTimeMs * Predictor::kTypicalSpeculativeGroupSize) /
     77     Predictor::kMaxSpeculativeParallelResolves;
     78 
     79 static int g_max_queueing_delay_ms =
     80     Predictor::kMaxSpeculativeResolveQueueDelayMs;
     81 static size_t g_max_parallel_resolves =
     82     Predictor::kMaxSpeculativeParallelResolves;
     83 
     84 // A version number for prefs that are saved. This should be incremented when
     85 // we change the format so that we discard old data.
     86 static const int kPredictorStartupFormatVersion = 1;
     87 
     88 class Predictor::LookupRequest {
     89  public:
     90   LookupRequest(Predictor* predictor,
     91                 net::HostResolver* host_resolver,
     92                 const GURL& url)
     93       : predictor_(predictor),
     94         url_(url),
     95         resolver_(host_resolver) {
     96   }
     97 
     98   // Return underlying network resolver status.
     99   // net::OK ==> Host was found synchronously.
    100   // net:ERR_IO_PENDING ==> Network will callback later with result.
    101   // anything else ==> Host was not found synchronously.
    102   int Start() {
    103     net::HostResolver::RequestInfo resolve_info(
    104         net::HostPortPair::FromURL(url_));
    105 
    106     // Make a note that this is a speculative resolve request. This allows us
    107     // to separate it from real navigations in the observer's callback, and
    108     // lets the HostResolver know it can de-prioritize it.
    109     resolve_info.set_is_speculative(true);
    110     return resolver_.Resolve(
    111         resolve_info, &addresses_,
    112         base::Bind(&LookupRequest::OnLookupFinished, base::Unretained(this)),
    113         net::BoundNetLog());
    114   }
    115 
    116  private:
    117   void OnLookupFinished(int result) {
    118     predictor_->OnLookupFinished(this, url_, result == net::OK);
    119   }
    120 
    121   Predictor* predictor_;  // The predictor which started us.
    122 
    123   const GURL url_;  // Hostname to resolve.
    124   net::SingleRequestHostResolver resolver_;
    125   net::AddressList addresses_;
    126 
    127   DISALLOW_COPY_AND_ASSIGN(LookupRequest);
    128 };
    129 
    130 // This records UMAs for preconnect usage based on navigation URLs to
    131 // gather precision/recall for user-event based preconnect triggers.
    132 // Stats are gathered via a LRU cache that remembers all preconnect within the
    133 // last N seconds.
    134 // A preconnect trigger is considered as used iff a navigation including
    135 // access to the preconnected host occurs within a time period specified by
    136 // kMaxUnusedSocketLifetimeSecondsWithoutAGet.
    137 class Predictor::PreconnectUsage {
    138  public:
    139   PreconnectUsage();
    140   ~PreconnectUsage();
    141 
    142   // Record a preconnect trigger to |url|.
    143   void ObservePreconnect(const GURL& url);
    144 
    145   // Record a user navigation with its redirect history, |url_chain|.
    146   // We are uncertain if this is actually a link navigation.
    147   void ObserveNavigationChain(const std::vector<GURL>& url_chain,
    148                               bool is_subresource);
    149 
    150   // Record a user link navigation to |final_url|.
    151   // We are certain that this is a user-triggered link navigation.
    152   void ObserveLinkNavigation(const GURL& final_url);
    153 
    154  private:
    155   // This tracks whether a preconnect was used in some navigation or not
    156   class PreconnectPrecisionStat {
    157    public:
    158     PreconnectPrecisionStat()
    159         : timestamp_(base::TimeTicks::Now()),
    160           was_used_(false) {
    161     }
    162 
    163     const base::TimeTicks& timestamp() { return timestamp_; }
    164 
    165     void set_was_used() { was_used_ = true; }
    166     bool was_used() const { return was_used_; }
    167 
    168    private:
    169     base::TimeTicks timestamp_;
    170     bool was_used_;
    171   };
    172 
    173   typedef base::MRUCache<GURL, PreconnectPrecisionStat> MRUPreconnects;
    174   MRUPreconnects mru_preconnects_;
    175 
    176   // The longest time an entry can persist in mru_preconnect_
    177   const base::TimeDelta max_duration_;
    178 
    179   std::vector<GURL> recent_navigation_chain_;
    180 
    181   DISALLOW_COPY_AND_ASSIGN(PreconnectUsage);
    182 };
    183 
    184 Predictor::PreconnectUsage::PreconnectUsage()
    185     : mru_preconnects_(MRUPreconnects::NO_AUTO_EVICT),
    186       max_duration_(base::TimeDelta::FromSeconds(
    187           Predictor::kMaxUnusedSocketLifetimeSecondsWithoutAGet)) {
    188 }
    189 
    190 Predictor::PreconnectUsage::~PreconnectUsage() {}
    191 
    192 void Predictor::PreconnectUsage::ObservePreconnect(const GURL& url) {
    193   // Evict any overly old entries and record stats.
    194   base::TimeTicks now = base::TimeTicks::Now();
    195 
    196   MRUPreconnects::reverse_iterator eldest_preconnect =
    197       mru_preconnects_.rbegin();
    198   while (!mru_preconnects_.empty()) {
    199     DCHECK(eldest_preconnect == mru_preconnects_.rbegin());
    200     if (now - eldest_preconnect->second.timestamp() < max_duration_)
    201       break;
    202 
    203     UMA_HISTOGRAM_BOOLEAN("Net.PreconnectTriggerUsed",
    204                           eldest_preconnect->second.was_used());
    205     eldest_preconnect = mru_preconnects_.Erase(eldest_preconnect);
    206   }
    207 
    208   // Add new entry.
    209   GURL canonical_url(Predictor::CanonicalizeUrl(url));
    210   mru_preconnects_.Put(canonical_url, PreconnectPrecisionStat());
    211 }
    212 
    213 void Predictor::PreconnectUsage::ObserveNavigationChain(
    214     const std::vector<GURL>& url_chain,
    215     bool is_subresource) {
    216   if (url_chain.empty())
    217     return;
    218 
    219   if (!is_subresource)
    220     recent_navigation_chain_ = url_chain;
    221 
    222   GURL canonical_url(Predictor::CanonicalizeUrl(url_chain.back()));
    223 
    224   MRUPreconnects::iterator itPreconnect = mru_preconnects_.Peek(canonical_url);
    225   bool was_preconnected = (itPreconnect != mru_preconnects_.end());
    226 
    227   // This is an UMA which was named incorrectly. This actually measures the
    228   // ratio of URLRequests which have used a preconnected session.
    229   UMA_HISTOGRAM_BOOLEAN("Net.PreconnectedNavigation", was_preconnected);
    230 }
    231 
    232 void Predictor::PreconnectUsage::ObserveLinkNavigation(const GURL& url) {
    233   if (recent_navigation_chain_.empty() ||
    234       url != recent_navigation_chain_.back()) {
    235     // The navigation chain is not available for this navigation.
    236     recent_navigation_chain_.clear();
    237     recent_navigation_chain_.push_back(url);
    238   }
    239 
    240   // See if the link navigation involved preconnected session.
    241   bool did_use_preconnect = false;
    242   for (std::vector<GURL>::const_iterator it = recent_navigation_chain_.begin();
    243        it != recent_navigation_chain_.end();
    244        ++it) {
    245     GURL canonical_url(Predictor::CanonicalizeUrl(*it));
    246 
    247     // Record the preconnect trigger for the url as used if exist
    248     MRUPreconnects::iterator itPreconnect =
    249         mru_preconnects_.Peek(canonical_url);
    250     bool was_preconnected = (itPreconnect != mru_preconnects_.end());
    251     if (was_preconnected) {
    252       itPreconnect->second.set_was_used();
    253       did_use_preconnect = true;
    254     }
    255   }
    256 
    257   UMA_HISTOGRAM_BOOLEAN("Net.PreconnectedLinkNavigations", did_use_preconnect);
    258 }
    259 
    260 Predictor::Predictor(bool preconnect_enabled)
    261     : url_request_context_getter_(NULL),
    262       predictor_enabled_(true),
    263       peak_pending_lookups_(0),
    264       shutdown_(false),
    265       max_concurrent_dns_lookups_(g_max_parallel_resolves),
    266       max_dns_queue_delay_(
    267           TimeDelta::FromMilliseconds(g_max_queueing_delay_ms)),
    268       host_resolver_(NULL),
    269       preconnect_enabled_(preconnect_enabled),
    270       consecutive_omnibox_preconnect_count_(0),
    271       next_trim_time_(base::TimeTicks::Now() +
    272                       TimeDelta::FromHours(kDurationBetweenTrimmingsHours)) {
    273   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    274 }
    275 
    276 Predictor::~Predictor() {
    277   // TODO(rlp): Add DCHECK for CurrentlyOn(BrowserThread::IO) when the
    278   // ProfileManagerTest has been updated with a mock profile.
    279   DCHECK(shutdown_);
    280 }
    281 
    282 // static
    283 Predictor* Predictor::CreatePredictor(bool preconnect_enabled,
    284                                       bool simple_shutdown) {
    285   if (simple_shutdown)
    286     return new SimplePredictor(preconnect_enabled);
    287   return new Predictor(preconnect_enabled);
    288 }
    289 
    290 void Predictor::RegisterProfilePrefs(
    291     user_prefs::PrefRegistrySyncable* registry) {
    292   registry->RegisterListPref(prefs::kDnsPrefetchingStartupList,
    293                              user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
    294   registry->RegisterListPref(prefs::kDnsPrefetchingHostReferralList,
    295                              user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
    296 }
    297 
    298 // --------------------- Start UI methods. ------------------------------------
    299 
    300 void Predictor::InitNetworkPredictor(PrefService* user_prefs,
    301                                      PrefService* local_state,
    302                                      IOThread* io_thread,
    303                                      net::URLRequestContextGetter* getter) {
    304   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    305 
    306   bool predictor_enabled =
    307       user_prefs->GetBoolean(prefs::kNetworkPredictionEnabled);
    308 
    309   url_request_context_getter_ = getter;
    310 
    311   // Gather the list of hostnames to prefetch on startup.
    312   UrlList urls = GetPredictedUrlListAtStartup(user_prefs, local_state);
    313 
    314   base::ListValue* referral_list =
    315       static_cast<base::ListValue*>(user_prefs->GetList(
    316           prefs::kDnsPrefetchingHostReferralList)->DeepCopy());
    317 
    318   // Now that we have the statistics in memory, wipe them from the Preferences
    319   // file. They will be serialized back on a clean shutdown. This way we only
    320   // have to worry about clearing our in-memory state when Clearing Browsing
    321   // Data.
    322   user_prefs->ClearPref(prefs::kDnsPrefetchingStartupList);
    323   user_prefs->ClearPref(prefs::kDnsPrefetchingHostReferralList);
    324 
    325   BrowserThread::PostTask(
    326       BrowserThread::IO,
    327       FROM_HERE,
    328       base::Bind(
    329           &Predictor::FinalizeInitializationOnIOThread,
    330           base::Unretained(this),
    331           urls, referral_list,
    332           io_thread, predictor_enabled));
    333 }
    334 
    335 void Predictor::AnticipateOmniboxUrl(const GURL& url, bool preconnectable) {
    336   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    337   if (!predictor_enabled_)
    338     return;
    339   if (!url.is_valid() || !url.has_host())
    340     return;
    341   std::string host = url.HostNoBrackets();
    342   bool is_new_host_request = (host != last_omnibox_host_);
    343   last_omnibox_host_ = host;
    344 
    345   UrlInfo::ResolutionMotivation motivation(UrlInfo::OMNIBOX_MOTIVATED);
    346   base::TimeTicks now = base::TimeTicks::Now();
    347 
    348   if (preconnect_enabled()) {
    349     if (preconnectable && !is_new_host_request) {
    350       ++consecutive_omnibox_preconnect_count_;
    351       // The omnibox suggests a search URL (for which we can preconnect) after
    352       // one or two characters are typed, even though such typing often (1 in
    353       // 3?) becomes a real URL.  This code waits till is has more evidence of a
    354       // preconnectable URL (search URL) before forming a preconnection, so as
    355       // to reduce the useless preconnect rate.
    356       // Perchance this logic should be pushed back into the omnibox, where the
    357       // actual characters typed, such as a space, can better forcast whether
    358       // we need to search/preconnect or not.  By waiting for at least 4
    359       // characters in a row that have lead to a search proposal, we avoid
    360       // preconnections for a prefix like "www." and we also wait until we have
    361       // at least a 4 letter word to search for.
    362       // Each character typed appears to induce 2 calls to
    363       // AnticipateOmniboxUrl(), so we double 4 characters and limit at 8
    364       // requests.
    365       // TODO(jar): Use an A/B test to optimize this.
    366       const int kMinConsecutiveRequests = 8;
    367       if (consecutive_omnibox_preconnect_count_ >= kMinConsecutiveRequests) {
    368         // TODO(jar): Perhaps we should do a GET to leave the socket open in the
    369         // pool.  Currently, we just do a connect, which MAY be reset if we
    370         // don't use it in 10 secondes!!!  As a result, we may do more
    371         // connections, and actually cost the server more than if we did a real
    372         // get with a fake request (/gen_204 might be the good path on Google).
    373         const int kMaxSearchKeepaliveSeconds(10);
    374         if ((now - last_omnibox_preconnect_).InSeconds() <
    375              kMaxSearchKeepaliveSeconds)
    376           return;  // We've done a preconnect recently.
    377         last_omnibox_preconnect_ = now;
    378         const int kConnectionsNeeded = 1;
    379         PreconnectUrl(CanonicalizeUrl(url), GURL(), motivation,
    380                       kConnectionsNeeded);
    381         return;  // Skip pre-resolution, since we'll open a connection.
    382       }
    383     } else {
    384       consecutive_omnibox_preconnect_count_ = 0;
    385     }
    386   }
    387 
    388   // Fall through and consider pre-resolution.
    389 
    390   // Omnibox tends to call in pairs (just a few milliseconds apart), and we
    391   // really don't need to keep resolving a name that often.
    392   // TODO(jar): A/B tests could check for perf impact of the early returns.
    393   if (!is_new_host_request) {
    394     const int kMinPreresolveSeconds(10);
    395     if (kMinPreresolveSeconds > (now - last_omnibox_preresolve_).InSeconds())
    396       return;
    397   }
    398   last_omnibox_preresolve_ = now;
    399 
    400   // Perform at least DNS pre-resolution.
    401   BrowserThread::PostTask(
    402       BrowserThread::IO,
    403       FROM_HERE,
    404       base::Bind(&Predictor::Resolve, base::Unretained(this),
    405                  CanonicalizeUrl(url), motivation));
    406 }
    407 
    408 void Predictor::PreconnectUrlAndSubresources(const GURL& url,
    409     const GURL& first_party_for_cookies) {
    410   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI) ||
    411          BrowserThread::CurrentlyOn(BrowserThread::IO));
    412   if (!predictor_enabled_ || !preconnect_enabled() ||
    413       !url.is_valid() || !url.has_host())
    414     return;
    415 
    416   UrlInfo::ResolutionMotivation motivation(UrlInfo::EARLY_LOAD_MOTIVATED);
    417   const int kConnectionsNeeded = 1;
    418   PreconnectUrl(CanonicalizeUrl(url), first_party_for_cookies,
    419                 motivation, kConnectionsNeeded);
    420   PredictFrameSubresources(url.GetWithEmptyPath(), first_party_for_cookies);
    421 }
    422 
    423 UrlList Predictor::GetPredictedUrlListAtStartup(
    424     PrefService* user_prefs,
    425     PrefService* local_state) {
    426   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    427   UrlList urls;
    428   // Recall list of URLs we learned about during last session.
    429   // This may catch secondary hostnames, pulled in by the homepages.  It will
    430   // also catch more of the "primary" home pages, since that was (presumably)
    431   // rendered first (and will be rendered first this time too).
    432   const ListValue* startup_list =
    433       user_prefs->GetList(prefs::kDnsPrefetchingStartupList);
    434 
    435   if (startup_list) {
    436     base::ListValue::const_iterator it = startup_list->begin();
    437     int format_version = -1;
    438     if (it != startup_list->end() &&
    439         (*it)->GetAsInteger(&format_version) &&
    440         format_version == kPredictorStartupFormatVersion) {
    441       ++it;
    442       for (; it != startup_list->end(); ++it) {
    443         std::string url_spec;
    444         if (!(*it)->GetAsString(&url_spec)) {
    445           LOG(DFATAL);
    446           break;  // Format incompatibility.
    447         }
    448         GURL url(url_spec);
    449         if (!url.has_host() || !url.has_scheme()) {
    450           LOG(DFATAL);
    451           break;  // Format incompatibility.
    452         }
    453 
    454         urls.push_back(url);
    455       }
    456     }
    457   }
    458 
    459   // Prepare for any static home page(s) the user has in prefs.  The user may
    460   // have a LOT of tab's specified, so we may as well try to warm them all.
    461   SessionStartupPref tab_start_pref =
    462       SessionStartupPref::GetStartupPref(user_prefs);
    463   if (SessionStartupPref::URLS == tab_start_pref.type) {
    464     for (size_t i = 0; i < tab_start_pref.urls.size(); i++) {
    465       GURL gurl = tab_start_pref.urls[i];
    466       if (!gurl.is_valid() || gurl.SchemeIsFile() || gurl.host().empty())
    467         continue;
    468       if (gurl.SchemeIs("http") || gurl.SchemeIs("https"))
    469         urls.push_back(gurl.GetWithEmptyPath());
    470     }
    471   }
    472 
    473   if (urls.empty())
    474     urls.push_back(GURL("http://www.google.com:80"));
    475 
    476   return urls;
    477 }
    478 
    479 void Predictor::set_max_queueing_delay(int max_queueing_delay_ms) {
    480   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    481   g_max_queueing_delay_ms = max_queueing_delay_ms;
    482 }
    483 
    484 void Predictor::set_max_parallel_resolves(size_t max_parallel_resolves) {
    485   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    486   g_max_parallel_resolves = max_parallel_resolves;
    487 }
    488 
    489 void Predictor::ShutdownOnUIThread(PrefService* user_prefs) {
    490   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    491 
    492   SaveStateForNextStartupAndTrim(user_prefs);
    493 
    494   BrowserThread::PostTask(
    495       BrowserThread::IO,
    496       FROM_HERE,
    497       base::Bind(&Predictor::Shutdown, base::Unretained(this)));
    498 }
    499 
    500 // ---------------------- End UI methods. -------------------------------------
    501 
    502 // --------------------- Start IO methods. ------------------------------------
    503 
    504 void Predictor::Shutdown() {
    505   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    506   DCHECK(!shutdown_);
    507   shutdown_ = true;
    508 
    509   STLDeleteElements(&pending_lookups_);
    510 }
    511 
    512 void Predictor::DiscardAllResults() {
    513   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    514   // Delete anything listed so far in this session that shows in about:dns.
    515   referrers_.clear();
    516 
    517 
    518   // Try to delete anything in our work queue.
    519   while (!work_queue_.IsEmpty()) {
    520     // Emulate processing cycle as though host was not found.
    521     GURL url = work_queue_.Pop();
    522     UrlInfo* info = &results_[url];
    523     DCHECK(info->HasUrl(url));
    524     info->SetAssignedState();
    525     info->SetNoSuchNameState();
    526   }
    527   // Now every result_ is either resolved, or is being resolved
    528   // (see LookupRequest).
    529 
    530   // Step through result_, recording names of all hosts that can't be erased.
    531   // We can't erase anything being worked on.
    532   Results assignees;
    533   for (Results::iterator it = results_.begin(); results_.end() != it; ++it) {
    534     GURL url(it->first);
    535     UrlInfo* info = &it->second;
    536     DCHECK(info->HasUrl(url));
    537     if (info->is_assigned()) {
    538       info->SetPendingDeleteState();
    539       assignees[url] = *info;
    540     }
    541   }
    542   DCHECK_LE(assignees.size(), max_concurrent_dns_lookups_);
    543   results_.clear();
    544   // Put back in the names being worked on.
    545   for (Results::iterator it = assignees.begin(); assignees.end() != it; ++it) {
    546     DCHECK(it->second.is_marked_to_delete());
    547     results_[it->first] = it->second;
    548   }
    549 }
    550 
    551 // Overloaded Resolve() to take a vector of names.
    552 void Predictor::ResolveList(const UrlList& urls,
    553                             UrlInfo::ResolutionMotivation motivation) {
    554   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    555 
    556   for (UrlList::const_iterator it = urls.begin(); it < urls.end(); ++it) {
    557     AppendToResolutionQueue(*it, motivation);
    558   }
    559 }
    560 
    561 // Basic Resolve() takes an invidual name, and adds it
    562 // to the queue.
    563 void Predictor::Resolve(const GURL& url,
    564                         UrlInfo::ResolutionMotivation motivation) {
    565   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    566   if (!url.has_host())
    567     return;
    568   AppendToResolutionQueue(url, motivation);
    569 }
    570 
    571 void Predictor::LearnFromNavigation(const GURL& referring_url,
    572                                     const GURL& target_url) {
    573   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    574   if (!predictor_enabled_)
    575     return;
    576   DCHECK_EQ(referring_url, Predictor::CanonicalizeUrl(referring_url));
    577   DCHECK_NE(referring_url, GURL::EmptyGURL());
    578   DCHECK_EQ(target_url, Predictor::CanonicalizeUrl(target_url));
    579   DCHECK_NE(target_url, GURL::EmptyGURL());
    580 
    581   referrers_[referring_url].SuggestHost(target_url);
    582   // Possibly do some referrer trimming.
    583   TrimReferrers();
    584 }
    585 
    586 //-----------------------------------------------------------------------------
    587 // This section supports the about:dns page.
    588 
    589 void Predictor::PredictorGetHtmlInfo(Predictor* predictor,
    590                                      std::string* output) {
    591   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    592 
    593   output->append("<html><head><title>About DNS</title>"
    594                  // We'd like the following no-cache... but it doesn't work.
    595                  // "<META HTTP-EQUIV=\"Pragma\" CONTENT=\"no-cache\">"
    596                  "</head><body>");
    597   if (predictor && predictor->predictor_enabled()) {
    598     predictor->GetHtmlInfo(output);
    599   } else {
    600     output->append("DNS pre-resolution and TCP pre-connection is disabled.");
    601   }
    602   output->append("</body></html>");
    603 }
    604 
    605 // Provide sort order so all .com's are together, etc.
    606 struct RightToLeftStringSorter {
    607   bool operator()(const GURL& left, const GURL& right) const {
    608     return ReverseComponents(left) < ReverseComponents(right);
    609   }
    610 
    611  private:
    612   // Transforms something like "http://www.google.com/xyz" to
    613   // "http://com.google.www/xyz".
    614   static std::string ReverseComponents(const GURL& url) {
    615     // Reverse the components in the hostname.
    616     std::vector<std::string> parts;
    617     base::SplitString(url.host(), '.', &parts);
    618     std::reverse(parts.begin(), parts.end());
    619     std::string reversed_host = JoinString(parts, '.');
    620 
    621     // Return the new URL.
    622     GURL::Replacements url_components;
    623     url_components.SetHostStr(reversed_host);
    624     return url.ReplaceComponents(url_components).spec();
    625   }
    626 };
    627 
    628 void Predictor::GetHtmlReferrerLists(std::string* output) {
    629   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    630   if (referrers_.empty())
    631     return;
    632 
    633   // TODO(jar): Remove any plausible JavaScript from names before displaying.
    634 
    635   typedef std::set<GURL, struct RightToLeftStringSorter>
    636       SortedNames;
    637   SortedNames sorted_names;
    638 
    639   for (Referrers::iterator it = referrers_.begin();
    640        referrers_.end() != it; ++it)
    641     sorted_names.insert(it->first);
    642 
    643   output->append("<br><table border>");
    644   output->append(
    645       "<tr><th>Host for Page</th>"
    646       "<th>Page Load<br>Count</th>"
    647       "<th>Subresource<br>Navigations</th>"
    648       "<th>Subresource<br>PreConnects</th>"
    649       "<th>Subresource<br>PreResolves</th>"
    650       "<th>Expected<br>Connects</th>"
    651       "<th>Subresource Spec</th></tr>");
    652 
    653   for (SortedNames::iterator it = sorted_names.begin();
    654        sorted_names.end() != it; ++it) {
    655     Referrer* referrer = &(referrers_[*it]);
    656     bool first_set_of_futures = true;
    657     for (Referrer::iterator future_url = referrer->begin();
    658          future_url != referrer->end(); ++future_url) {
    659       output->append("<tr align=right>");
    660       if (first_set_of_futures) {
    661         base::StringAppendF(output,
    662                             "<td rowspan=%d>%s</td><td rowspan=%d>%d</td>",
    663                             static_cast<int>(referrer->size()),
    664                             it->spec().c_str(),
    665                             static_cast<int>(referrer->size()),
    666                             static_cast<int>(referrer->use_count()));
    667       }
    668       first_set_of_futures = false;
    669       base::StringAppendF(output,
    670           "<td>%d</td><td>%d</td><td>%d</td><td>%2.3f</td><td>%s</td></tr>",
    671           static_cast<int>(future_url->second.navigation_count()),
    672           static_cast<int>(future_url->second.preconnection_count()),
    673           static_cast<int>(future_url->second.preresolution_count()),
    674           static_cast<double>(future_url->second.subresource_use_rate()),
    675           future_url->first.spec().c_str());
    676     }
    677   }
    678   output->append("</table>");
    679 }
    680 
    681 void Predictor::GetHtmlInfo(std::string* output) {
    682   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    683   if (initial_observer_.get())
    684     initial_observer_->GetFirstResolutionsHtml(output);
    685   // Show list of subresource predictions and stats.
    686   GetHtmlReferrerLists(output);
    687 
    688   // Local lists for calling UrlInfo
    689   UrlInfo::UrlInfoTable name_not_found;
    690   UrlInfo::UrlInfoTable name_preresolved;
    691 
    692   // Get copies of all useful data.
    693   typedef std::map<GURL, UrlInfo, RightToLeftStringSorter> SortedUrlInfo;
    694   SortedUrlInfo snapshot;
    695   // UrlInfo supports value semantics, so we can do a shallow copy.
    696   for (Results::iterator it(results_.begin()); it != results_.end(); it++)
    697     snapshot[it->first] = it->second;
    698 
    699   // Partition the UrlInfo's into categories.
    700   for (SortedUrlInfo::iterator it(snapshot.begin());
    701        it != snapshot.end(); it++) {
    702     if (it->second.was_nonexistent()) {
    703       name_not_found.push_back(it->second);
    704       continue;
    705     }
    706     if (!it->second.was_found())
    707       continue;  // Still being processed.
    708     name_preresolved.push_back(it->second);
    709   }
    710 
    711   bool brief = false;
    712 #ifdef NDEBUG
    713   brief = true;
    714 #endif  // NDEBUG
    715 
    716   // Call for display of each table, along with title.
    717   UrlInfo::GetHtmlTable(name_preresolved,
    718       "Preresolution DNS records performed for ", brief, output);
    719   UrlInfo::GetHtmlTable(name_not_found,
    720       "Preresolving DNS records revealed non-existence for ", brief, output);
    721 }
    722 
    723 void Predictor::TrimReferrersNow() {
    724   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    725   // Just finish up work if an incremental trim is in progress.
    726   if (urls_being_trimmed_.empty())
    727     LoadUrlsForTrimming();
    728   IncrementalTrimReferrers(true);  // Do everything now.
    729 }
    730 
    731 void Predictor::SerializeReferrers(base::ListValue* referral_list) {
    732   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    733   referral_list->Clear();
    734   referral_list->Append(new base::FundamentalValue(kPredictorReferrerVersion));
    735   for (Referrers::const_iterator it = referrers_.begin();
    736        it != referrers_.end(); ++it) {
    737     // Serialize the list of subresource names.
    738     Value* subresource_list(it->second.Serialize());
    739 
    740     // Create a list for each referer.
    741     ListValue* motivator(new ListValue);
    742     motivator->Append(new StringValue(it->first.spec()));
    743     motivator->Append(subresource_list);
    744 
    745     referral_list->Append(motivator);
    746   }
    747 }
    748 
    749 void Predictor::DeserializeReferrers(const base::ListValue& referral_list) {
    750   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    751   int format_version = -1;
    752   if (referral_list.GetSize() > 0 &&
    753       referral_list.GetInteger(0, &format_version) &&
    754       format_version == kPredictorReferrerVersion) {
    755     for (size_t i = 1; i < referral_list.GetSize(); ++i) {
    756       const base::ListValue* motivator;
    757       if (!referral_list.GetList(i, &motivator)) {
    758         NOTREACHED();
    759         return;
    760       }
    761       std::string motivating_url_spec;
    762       if (!motivator->GetString(0, &motivating_url_spec)) {
    763         NOTREACHED();
    764         return;
    765       }
    766 
    767       const Value* subresource_list;
    768       if (!motivator->Get(1, &subresource_list)) {
    769         NOTREACHED();
    770         return;
    771       }
    772 
    773       referrers_[GURL(motivating_url_spec)].Deserialize(*subresource_list);
    774     }
    775   }
    776 }
    777 
    778 void Predictor::DeserializeReferrersThenDelete(
    779     base::ListValue* referral_list) {
    780   DeserializeReferrers(*referral_list);
    781   delete referral_list;
    782 }
    783 
    784 void Predictor::DiscardInitialNavigationHistory() {
    785   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    786   if (initial_observer_.get())
    787     initial_observer_->DiscardInitialNavigationHistory();
    788 }
    789 
    790 void Predictor::FinalizeInitializationOnIOThread(
    791     const UrlList& startup_urls,
    792     base::ListValue* referral_list,
    793     IOThread* io_thread,
    794     bool predictor_enabled) {
    795   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    796 
    797   predictor_enabled_ = predictor_enabled;
    798   initial_observer_.reset(new InitialObserver());
    799   host_resolver_ = io_thread->globals()->host_resolver.get();
    800   preconnect_usage_.reset(new PreconnectUsage());
    801 
    802   // base::WeakPtrFactory instances need to be created and destroyed
    803   // on the same thread. The predictor lives on the IO thread and will die
    804   // from there so now that we're on the IO thread we need to properly
    805   // initialize the base::WeakPtrFactory.
    806   // TODO(groby): Check if WeakPtrFactory has the same constraint.
    807   weak_factory_.reset(new base::WeakPtrFactory<Predictor>(this));
    808 
    809   // Prefetch these hostnames on startup.
    810   DnsPrefetchMotivatedList(startup_urls, UrlInfo::STARTUP_LIST_MOTIVATED);
    811   DeserializeReferrersThenDelete(referral_list);
    812 }
    813 
    814 //-----------------------------------------------------------------------------
    815 // This section intermingles prefetch results with actual browser HTTP
    816 // network activity.  It supports calculating of the benefit of a prefetch, as
    817 // well as recording what prefetched hostname resolutions might be potentially
    818 // helpful during the next chrome-startup.
    819 //-----------------------------------------------------------------------------
    820 
    821 void Predictor::LearnAboutInitialNavigation(const GURL& url) {
    822   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    823   if (!predictor_enabled_ || NULL == initial_observer_.get() )
    824     return;
    825   initial_observer_->Append(url, this);
    826 }
    827 
    828 // This API is only used in the browser process.
    829 // It is called from an IPC message originating in the renderer.  It currently
    830 // includes both Page-Scan, and Link-Hover prefetching.
    831 // TODO(jar): Separate out link-hover prefetching, and page-scan results.
    832 void Predictor::DnsPrefetchList(const NameList& hostnames) {
    833   // TODO(jar): Push GURL transport further back into renderer, but this will
    834   // require a Webkit change in the observer :-/.
    835   UrlList urls;
    836   for (NameList::const_iterator it = hostnames.begin();
    837        it < hostnames.end();
    838        ++it) {
    839     urls.push_back(GURL("http://" + *it + ":80"));
    840   }
    841 
    842   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    843   DnsPrefetchMotivatedList(urls, UrlInfo::PAGE_SCAN_MOTIVATED);
    844 }
    845 
    846 void Predictor::DnsPrefetchMotivatedList(
    847     const UrlList& urls,
    848     UrlInfo::ResolutionMotivation motivation) {
    849   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI) ||
    850          BrowserThread::CurrentlyOn(BrowserThread::IO));
    851   if (!predictor_enabled_)
    852     return;
    853 
    854   if (BrowserThread::CurrentlyOn(BrowserThread::IO)) {
    855     ResolveList(urls, motivation);
    856   } else {
    857     BrowserThread::PostTask(
    858         BrowserThread::IO,
    859         FROM_HERE,
    860         base::Bind(&Predictor::ResolveList, base::Unretained(this),
    861                    urls, motivation));
    862   }
    863 }
    864 
    865 //-----------------------------------------------------------------------------
    866 // Functions to handle saving of hostnames from one session to the next, to
    867 // expedite startup times.
    868 
    869 static void SaveDnsPrefetchStateForNextStartupAndTrimOnIOThread(
    870     base::ListValue* startup_list,
    871     base::ListValue* referral_list,
    872     base::WaitableEvent* completion,
    873     Predictor* predictor) {
    874   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    875 
    876   if (NULL == predictor) {
    877     completion->Signal();
    878     return;
    879   }
    880   predictor->SaveDnsPrefetchStateForNextStartupAndTrim(
    881       startup_list, referral_list, completion);
    882 }
    883 
    884 void Predictor::SaveStateForNextStartupAndTrim(PrefService* prefs) {
    885   if (!predictor_enabled_)
    886     return;
    887 
    888   base::WaitableEvent completion(true, false);
    889 
    890   ListPrefUpdate update_startup_list(prefs, prefs::kDnsPrefetchingStartupList);
    891   ListPrefUpdate update_referral_list(prefs,
    892                                       prefs::kDnsPrefetchingHostReferralList);
    893   if (BrowserThread::CurrentlyOn(BrowserThread::IO)) {
    894     SaveDnsPrefetchStateForNextStartupAndTrimOnIOThread(
    895         update_startup_list.Get(),
    896         update_referral_list.Get(),
    897         &completion,
    898         this);
    899   } else {
    900     bool posted = BrowserThread::PostTask(
    901         BrowserThread::IO,
    902         FROM_HERE,
    903         base::Bind(
    904             &SaveDnsPrefetchStateForNextStartupAndTrimOnIOThread,
    905             update_startup_list.Get(),
    906             update_referral_list.Get(),
    907             &completion,
    908             this));
    909 
    910     // TODO(jar): Synchronous waiting for the IO thread is a potential source
    911     // to deadlocks and should be investigated. See http://crbug.com/78451.
    912     DCHECK(posted);
    913     if (posted) {
    914       // http://crbug.com/124954
    915       base::ThreadRestrictions::ScopedAllowWait allow_wait;
    916       completion.Wait();
    917     }
    918   }
    919 }
    920 
    921 void Predictor::SaveDnsPrefetchStateForNextStartupAndTrim(
    922     base::ListValue* startup_list,
    923     base::ListValue* referral_list,
    924     base::WaitableEvent* completion) {
    925   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    926   if (initial_observer_.get())
    927     initial_observer_->GetInitialDnsResolutionList(startup_list);
    928 
    929   // Do at least one trim at shutdown, in case the user wasn't running long
    930   // enough to do any regular trimming of referrers.
    931   TrimReferrersNow();
    932   SerializeReferrers(referral_list);
    933 
    934   completion->Signal();
    935 }
    936 
    937 void Predictor::EnablePredictor(bool enable) {
    938   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI) ||
    939          BrowserThread::CurrentlyOn(BrowserThread::IO));
    940 
    941   if (BrowserThread::CurrentlyOn(BrowserThread::IO)) {
    942     EnablePredictorOnIOThread(enable);
    943   } else {
    944     BrowserThread::PostTask(
    945         BrowserThread::IO,
    946         FROM_HERE,
    947         base::Bind(&Predictor::EnablePredictorOnIOThread,
    948                    base::Unretained(this), enable));
    949   }
    950 }
    951 
    952 void Predictor::EnablePredictorOnIOThread(bool enable) {
    953   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    954   predictor_enabled_ = enable;
    955 }
    956 
    957 void Predictor::PreconnectUrl(const GURL& url,
    958                               const GURL& first_party_for_cookies,
    959                               UrlInfo::ResolutionMotivation motivation,
    960                               int count) {
    961   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI) ||
    962          BrowserThread::CurrentlyOn(BrowserThread::IO));
    963 
    964   if (BrowserThread::CurrentlyOn(BrowserThread::IO)) {
    965     PreconnectUrlOnIOThread(url, first_party_for_cookies, motivation, count);
    966   } else {
    967     BrowserThread::PostTask(
    968         BrowserThread::IO,
    969         FROM_HERE,
    970         base::Bind(&Predictor::PreconnectUrlOnIOThread,
    971                    base::Unretained(this), url, first_party_for_cookies,
    972                    motivation, count));
    973   }
    974 }
    975 
    976 void Predictor::PreconnectUrlOnIOThread(
    977     const GURL& url,
    978     const GURL& first_party_for_cookies,
    979     UrlInfo::ResolutionMotivation motivation,
    980     int count) {
    981   if (motivation == UrlInfo::MOUSE_OVER_MOTIVATED)
    982     RecordPreconnectTrigger(url);
    983 
    984   PreconnectOnIOThread(url,
    985                        first_party_for_cookies,
    986                        motivation,
    987                        count,
    988                        url_request_context_getter_.get());
    989 }
    990 
    991 void Predictor::RecordPreconnectTrigger(const GURL& url) {
    992   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    993   if (preconnect_usage_)
    994     preconnect_usage_->ObservePreconnect(url);
    995 }
    996 
    997 void Predictor::RecordPreconnectNavigationStat(
    998     const std::vector<GURL>& url_chain,
    999     bool is_subresource) {
   1000   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
   1001 
   1002   if (preconnect_usage_)
   1003     preconnect_usage_->ObserveNavigationChain(url_chain, is_subresource);
   1004 }
   1005 
   1006 void Predictor::RecordLinkNavigation(const GURL& url) {
   1007   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
   1008   if (preconnect_usage_)
   1009     preconnect_usage_->ObserveLinkNavigation(url);
   1010 }
   1011 
   1012 void Predictor::PredictFrameSubresources(const GURL& url,
   1013                                          const GURL& first_party_for_cookies) {
   1014   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI) ||
   1015          BrowserThread::CurrentlyOn(BrowserThread::IO));
   1016   if (!predictor_enabled_)
   1017     return;
   1018   DCHECK_EQ(url.GetWithEmptyPath(), url);
   1019   // Add one pass through the message loop to allow current navigation to
   1020   // proceed.
   1021   if (BrowserThread::CurrentlyOn(BrowserThread::IO)) {
   1022     PrepareFrameSubresources(url, first_party_for_cookies);
   1023   } else {
   1024     BrowserThread::PostTask(
   1025         BrowserThread::IO,
   1026         FROM_HERE,
   1027         base::Bind(&Predictor::PrepareFrameSubresources,
   1028                    base::Unretained(this), url, first_party_for_cookies));
   1029   }
   1030 }
   1031 
   1032 enum SubresourceValue {
   1033   PRECONNECTION,
   1034   PRERESOLUTION,
   1035   TOO_NEW,
   1036   SUBRESOURCE_VALUE_MAX
   1037 };
   1038 
   1039 void Predictor::PrepareFrameSubresources(const GURL& url,
   1040                                          const GURL& first_party_for_cookies) {
   1041   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
   1042   DCHECK_EQ(url.GetWithEmptyPath(), url);
   1043   Referrers::iterator it = referrers_.find(url);
   1044   if (referrers_.end() == it) {
   1045     // Only when we don't know anything about this url, make 2 connections
   1046     // available.  We could do this completely via learning (by prepopulating
   1047     // the referrer_ list with this expected value), but it would swell the
   1048     // size of the list with all the "Leaf" nodes in the tree (nodes that don't
   1049     // load any subresources).  If we learn about this resource, we will instead
   1050     // provide a more carefully estimated preconnection count.
   1051     if (preconnect_enabled_) {
   1052       PreconnectUrlOnIOThread(url, first_party_for_cookies,
   1053                               UrlInfo::SELF_REFERAL_MOTIVATED, 2);
   1054     }
   1055     return;
   1056   }
   1057 
   1058   Referrer* referrer = &(it->second);
   1059   referrer->IncrementUseCount();
   1060   const UrlInfo::ResolutionMotivation motivation =
   1061       UrlInfo::LEARNED_REFERAL_MOTIVATED;
   1062   for (Referrer::iterator future_url = referrer->begin();
   1063        future_url != referrer->end(); ++future_url) {
   1064     SubresourceValue evalution(TOO_NEW);
   1065     double connection_expectation = future_url->second.subresource_use_rate();
   1066     UMA_HISTOGRAM_CUSTOM_COUNTS("Net.PreconnectSubresourceExpectation",
   1067                                 static_cast<int>(connection_expectation * 100),
   1068                                 10, 5000, 50);
   1069     future_url->second.ReferrerWasObserved();
   1070     if (preconnect_enabled_ &&
   1071         connection_expectation > kPreconnectWorthyExpectedValue) {
   1072       evalution = PRECONNECTION;
   1073       future_url->second.IncrementPreconnectionCount();
   1074       int count = static_cast<int>(std::ceil(connection_expectation));
   1075       if (url.host() == future_url->first.host())
   1076         ++count;
   1077       PreconnectUrlOnIOThread(future_url->first, first_party_for_cookies,
   1078                               motivation, count);
   1079     } else if (connection_expectation > kDNSPreresolutionWorthyExpectedValue) {
   1080       evalution = PRERESOLUTION;
   1081       future_url->second.preresolution_increment();
   1082       UrlInfo* queued_info = AppendToResolutionQueue(future_url->first,
   1083                                                      motivation);
   1084       if (queued_info)
   1085         queued_info->SetReferringHostname(url);
   1086     }
   1087     UMA_HISTOGRAM_ENUMERATION("Net.PreconnectSubresourceEval", evalution,
   1088                               SUBRESOURCE_VALUE_MAX);
   1089   }
   1090 }
   1091 
   1092 void Predictor::OnLookupFinished(LookupRequest* request, const GURL& url,
   1093                                  bool found) {
   1094   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
   1095 
   1096   LookupFinished(request, url, found);
   1097   pending_lookups_.erase(request);
   1098   delete request;
   1099 
   1100   StartSomeQueuedResolutions();
   1101 }
   1102 
   1103 void Predictor::LookupFinished(LookupRequest* request, const GURL& url,
   1104                                bool found) {
   1105   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
   1106   UrlInfo* info = &results_[url];
   1107   DCHECK(info->HasUrl(url));
   1108   if (info->is_marked_to_delete()) {
   1109     results_.erase(url);
   1110   } else {
   1111     if (found)
   1112       info->SetFoundState();
   1113     else
   1114       info->SetNoSuchNameState();
   1115   }
   1116 }
   1117 
   1118 UrlInfo* Predictor::AppendToResolutionQueue(
   1119     const GURL& url,
   1120     UrlInfo::ResolutionMotivation motivation) {
   1121   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
   1122   DCHECK(url.has_host());
   1123 
   1124   if (shutdown_)
   1125     return NULL;
   1126 
   1127   UrlInfo* info = &results_[url];
   1128   info->SetUrl(url);  // Initialize or DCHECK.
   1129   // TODO(jar):  I need to discard names that have long since expired.
   1130   // Currently we only add to the domain map :-/
   1131 
   1132   DCHECK(info->HasUrl(url));
   1133 
   1134   if (!info->NeedsDnsUpdate()) {
   1135     info->DLogResultsStats("DNS PrefetchNotUpdated");
   1136     return NULL;
   1137   }
   1138 
   1139   info->SetQueuedState(motivation);
   1140   work_queue_.Push(url, motivation);
   1141   StartSomeQueuedResolutions();
   1142   return info;
   1143 }
   1144 
   1145 bool Predictor::CongestionControlPerformed(UrlInfo* info) {
   1146   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
   1147   // Note: queue_duration is ONLY valid after we go to assigned state.
   1148   if (info->queue_duration() < max_dns_queue_delay_)
   1149     return false;
   1150   // We need to discard all entries in our queue, as we're keeping them waiting
   1151   // too long.  By doing this, we'll have a chance to quickly service urgent
   1152   // resolutions, and not have a bogged down system.
   1153   while (true) {
   1154     info->RemoveFromQueue();
   1155     if (work_queue_.IsEmpty())
   1156       break;
   1157     info = &results_[work_queue_.Pop()];
   1158     info->SetAssignedState();
   1159   }
   1160   return true;
   1161 }
   1162 
   1163 void Predictor::StartSomeQueuedResolutions() {
   1164   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
   1165 
   1166   while (!work_queue_.IsEmpty() &&
   1167          pending_lookups_.size() < max_concurrent_dns_lookups_) {
   1168     const GURL url(work_queue_.Pop());
   1169     UrlInfo* info = &results_[url];
   1170     DCHECK(info->HasUrl(url));
   1171     info->SetAssignedState();
   1172 
   1173     if (CongestionControlPerformed(info)) {
   1174       DCHECK(work_queue_.IsEmpty());
   1175       return;
   1176     }
   1177 
   1178     LookupRequest* request = new LookupRequest(this, host_resolver_, url);
   1179     int status = request->Start();
   1180     if (status == net::ERR_IO_PENDING) {
   1181       // Will complete asynchronously.
   1182       pending_lookups_.insert(request);
   1183       peak_pending_lookups_ = std::max(peak_pending_lookups_,
   1184                                        pending_lookups_.size());
   1185     } else {
   1186       // Completed synchronously (was already cached by HostResolver), or else
   1187       // there was (equivalently) some network error that prevents us from
   1188       // finding the name.  Status net::OK means it was "found."
   1189       LookupFinished(request, url, status == net::OK);
   1190       delete request;
   1191     }
   1192   }
   1193 }
   1194 
   1195 void Predictor::TrimReferrers() {
   1196   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
   1197   if (!urls_being_trimmed_.empty())
   1198     return;   // There is incremental trimming in progress already.
   1199 
   1200   // Check to see if it is time to trim yet.
   1201   base::TimeTicks now = base::TimeTicks::Now();
   1202   if (now < next_trim_time_)
   1203     return;
   1204   next_trim_time_ = now + TimeDelta::FromHours(kDurationBetweenTrimmingsHours);
   1205 
   1206   LoadUrlsForTrimming();
   1207   PostIncrementalTrimTask();
   1208 }
   1209 
   1210 void Predictor::LoadUrlsForTrimming() {
   1211   DCHECK(urls_being_trimmed_.empty());
   1212   for (Referrers::const_iterator it = referrers_.begin();
   1213        it != referrers_.end(); ++it)
   1214     urls_being_trimmed_.push_back(it->first);
   1215   UMA_HISTOGRAM_COUNTS("Net.PredictionTrimSize", urls_being_trimmed_.size());
   1216 }
   1217 
   1218 void Predictor::PostIncrementalTrimTask() {
   1219   if (urls_being_trimmed_.empty())
   1220     return;
   1221   const TimeDelta kDurationBetweenTrimmingIncrements =
   1222       TimeDelta::FromSeconds(kDurationBetweenTrimmingIncrementsSeconds);
   1223   base::MessageLoop::current()->PostDelayedTask(
   1224       FROM_HERE,
   1225       base::Bind(&Predictor::IncrementalTrimReferrers,
   1226                  weak_factory_->GetWeakPtr(), false),
   1227       kDurationBetweenTrimmingIncrements);
   1228 }
   1229 
   1230 void Predictor::IncrementalTrimReferrers(bool trim_all_now) {
   1231   size_t trim_count = urls_being_trimmed_.size();
   1232   if (!trim_all_now)
   1233     trim_count = std::min(trim_count, kUrlsTrimmedPerIncrement);
   1234   while (trim_count-- != 0) {
   1235     Referrers::iterator it = referrers_.find(urls_being_trimmed_.back());
   1236     urls_being_trimmed_.pop_back();
   1237     if (it == referrers_.end())
   1238       continue;  // Defensive code: It got trimmed away already.
   1239     if (!it->second.Trim(kReferrerTrimRatio, kDiscardableExpectedValue))
   1240       referrers_.erase(it);
   1241   }
   1242   PostIncrementalTrimTask();
   1243 }
   1244 
   1245 // ---------------------- End IO methods. -------------------------------------
   1246 
   1247 //-----------------------------------------------------------------------------
   1248 
   1249 Predictor::HostNameQueue::HostNameQueue() {
   1250 }
   1251 
   1252 Predictor::HostNameQueue::~HostNameQueue() {
   1253 }
   1254 
   1255 void Predictor::HostNameQueue::Push(const GURL& url,
   1256     UrlInfo::ResolutionMotivation motivation) {
   1257   switch (motivation) {
   1258     case UrlInfo::STATIC_REFERAL_MOTIVATED:
   1259     case UrlInfo::LEARNED_REFERAL_MOTIVATED:
   1260     case UrlInfo::MOUSE_OVER_MOTIVATED:
   1261       rush_queue_.push(url);
   1262       break;
   1263 
   1264     default:
   1265       background_queue_.push(url);
   1266       break;
   1267   }
   1268 }
   1269 
   1270 bool Predictor::HostNameQueue::IsEmpty() const {
   1271   return rush_queue_.empty() && background_queue_.empty();
   1272 }
   1273 
   1274 GURL Predictor::HostNameQueue::Pop() {
   1275   DCHECK(!IsEmpty());
   1276   std::queue<GURL> *queue(rush_queue_.empty() ? &background_queue_
   1277                                               : &rush_queue_);
   1278   GURL url(queue->front());
   1279   queue->pop();
   1280   return url;
   1281 }
   1282 
   1283 //-----------------------------------------------------------------------------
   1284 // Member definitions for InitialObserver class.
   1285 
   1286 Predictor::InitialObserver::InitialObserver() {
   1287 }
   1288 
   1289 Predictor::InitialObserver::~InitialObserver() {
   1290 }
   1291 
   1292 void Predictor::InitialObserver::Append(const GURL& url,
   1293                                         Predictor* predictor) {
   1294   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
   1295 
   1296   // TODO(rlp): Do we really need the predictor check here?
   1297   if (NULL == predictor)
   1298     return;
   1299   if (kStartupResolutionCount <= first_navigations_.size())
   1300     return;
   1301 
   1302   DCHECK(url.SchemeIs("http") || url.SchemeIs("https"));
   1303   DCHECK_EQ(url, Predictor::CanonicalizeUrl(url));
   1304   if (first_navigations_.find(url) == first_navigations_.end())
   1305     first_navigations_[url] = base::TimeTicks::Now();
   1306 }
   1307 
   1308 void Predictor::InitialObserver::GetInitialDnsResolutionList(
   1309     base::ListValue* startup_list) {
   1310   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
   1311   DCHECK(startup_list);
   1312   startup_list->Clear();
   1313   DCHECK_EQ(0u, startup_list->GetSize());
   1314   startup_list->Append(
   1315       new base::FundamentalValue(kPredictorStartupFormatVersion));
   1316   for (FirstNavigations::iterator it = first_navigations_.begin();
   1317        it != first_navigations_.end();
   1318        ++it) {
   1319     DCHECK(it->first == Predictor::CanonicalizeUrl(it->first));
   1320     startup_list->Append(new StringValue(it->first.spec()));
   1321   }
   1322 }
   1323 
   1324 void Predictor::InitialObserver::GetFirstResolutionsHtml(
   1325     std::string* output) {
   1326   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
   1327 
   1328   UrlInfo::UrlInfoTable resolution_list;
   1329   {
   1330     for (FirstNavigations::iterator it(first_navigations_.begin());
   1331          it != first_navigations_.end();
   1332          it++) {
   1333       UrlInfo info;
   1334       info.SetUrl(it->first);
   1335       info.set_time(it->second);
   1336       resolution_list.push_back(info);
   1337     }
   1338   }
   1339   UrlInfo::GetHtmlTable(resolution_list,
   1340       "Future startups will prefetch DNS records for ", false, output);
   1341 }
   1342 
   1343 //-----------------------------------------------------------------------------
   1344 // Helper functions
   1345 //-----------------------------------------------------------------------------
   1346 
   1347 // static
   1348 GURL Predictor::CanonicalizeUrl(const GURL& url) {
   1349   if (!url.has_host())
   1350      return GURL::EmptyGURL();
   1351 
   1352   std::string scheme;
   1353   if (url.has_scheme()) {
   1354     scheme = url.scheme();
   1355     if (scheme != "http" && scheme != "https")
   1356       return GURL::EmptyGURL();
   1357     if (url.has_port())
   1358       return url.GetWithEmptyPath();
   1359   } else {
   1360     scheme = "http";
   1361   }
   1362 
   1363   // If we omit a port, it will default to 80 or 443 as appropriate.
   1364   std::string colon_plus_port;
   1365   if (url.has_port())
   1366     colon_plus_port = ":" + url.port();
   1367 
   1368   return GURL(scheme + "://" + url.host() + colon_plus_port);
   1369 }
   1370 
   1371 void SimplePredictor::InitNetworkPredictor(
   1372     PrefService* user_prefs,
   1373     PrefService* local_state,
   1374     IOThread* io_thread,
   1375     net::URLRequestContextGetter* getter) {
   1376   // Empty function for unittests.
   1377 }
   1378 
   1379 void SimplePredictor::ShutdownOnUIThread(PrefService* user_prefs) {
   1380   SetShutdown(true);
   1381 }
   1382 
   1383 }  // namespace chrome_browser_net
   1384