1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/net/predictor.h" 6 7 #include <algorithm> 8 #include <cmath> 9 #include <set> 10 #include <sstream> 11 12 #include "base/compiler_specific.h" 13 #include "base/metrics/histogram.h" 14 #include "base/string_util.h" 15 #include "base/time.h" 16 #include "base/values.h" 17 #include "chrome/browser/net/preconnect.h" 18 #include "content/browser/browser_thread.h" 19 #include "net/base/address_list.h" 20 #include "net/base/completion_callback.h" 21 #include "net/base/host_port_pair.h" 22 #include "net/base/host_resolver.h" 23 #include "net/base/net_errors.h" 24 #include "net/base/net_log.h" 25 26 using base::TimeDelta; 27 28 namespace chrome_browser_net { 29 30 // static 31 const double Predictor::kPreconnectWorthyExpectedValue = 0.8; 32 // static 33 const double Predictor::kDNSPreresolutionWorthyExpectedValue = 0.1; 34 // static 35 const double Predictor::kDiscardableExpectedValue = 0.05; 36 // The goal is of trimming is to to reduce the importance (number of expected 37 // subresources needed) by a factor of 2 after about 24 hours of uptime. We will 38 // trim roughly once-an-hour of uptime. The ratio to use in each trim operation 39 // is then the 24th root of 0.5. If a user only surfs for 4 hours a day, then 40 // after about 6 days they will have halved all their estimates of subresource 41 // connections. Once this falls below kDiscardableExpectedValue the referrer 42 // will be discarded. 43 // TODO(jar): Measure size of referrer lists in the field. Consider an adaptive 44 // system that uses a higher trim ratio when the list is large. 45 // static 46 const double Predictor::kReferrerTrimRatio = 0.97153; 47 48 // static 49 const TimeDelta Predictor::kDurationBetweenTrimmings = TimeDelta::FromHours(1); 50 // static 51 const TimeDelta Predictor::kDurationBetweenTrimmingIncrements = 52 TimeDelta::FromSeconds(15); 53 // static 54 const size_t Predictor::kUrlsTrimmedPerIncrement = 5u; 55 56 class Predictor::LookupRequest { 57 public: 58 LookupRequest(Predictor* predictor, 59 net::HostResolver* host_resolver, 60 const GURL& url) 61 : ALLOW_THIS_IN_INITIALIZER_LIST( 62 net_callback_(this, &LookupRequest::OnLookupFinished)), 63 predictor_(predictor), 64 url_(url), 65 resolver_(host_resolver) { 66 } 67 68 // Return underlying network resolver status. 69 // net::OK ==> Host was found synchronously. 70 // net:ERR_IO_PENDING ==> Network will callback later with result. 71 // anything else ==> Host was not found synchronously. 72 int Start() { 73 net::HostResolver::RequestInfo resolve_info( 74 net::HostPortPair::FromURL(url_)); 75 76 // Make a note that this is a speculative resolve request. This allows us 77 // to separate it from real navigations in the observer's callback, and 78 // lets the HostResolver know it can de-prioritize it. 79 resolve_info.set_is_speculative(true); 80 return resolver_.Resolve( 81 resolve_info, &addresses_, &net_callback_, net::BoundNetLog()); 82 } 83 84 private: 85 void OnLookupFinished(int result) { 86 predictor_->OnLookupFinished(this, url_, result == net::OK); 87 } 88 89 // HostResolver will call us using this callback when resolution is complete. 90 net::CompletionCallbackImpl<LookupRequest> net_callback_; 91 92 Predictor* predictor_; // The predictor which started us. 93 94 const GURL url_; // Hostname to resolve. 95 net::SingleRequestHostResolver resolver_; 96 net::AddressList addresses_; 97 98 DISALLOW_COPY_AND_ASSIGN(LookupRequest); 99 }; 100 101 Predictor::Predictor(net::HostResolver* host_resolver, 102 TimeDelta max_dns_queue_delay, 103 size_t max_concurrent, 104 bool preconnect_enabled) 105 : peak_pending_lookups_(0), 106 shutdown_(false), 107 max_concurrent_dns_lookups_(max_concurrent), 108 max_dns_queue_delay_(max_dns_queue_delay), 109 host_resolver_(host_resolver), 110 preconnect_enabled_(preconnect_enabled), 111 consecutive_omnibox_preconnect_count_(0), 112 next_trim_time_(base::TimeTicks::Now() + kDurationBetweenTrimmings), 113 ALLOW_THIS_IN_INITIALIZER_LIST(trim_task_factory_(this)) { 114 } 115 116 Predictor::~Predictor() { 117 DCHECK(shutdown_); 118 } 119 120 void Predictor::Shutdown() { 121 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 122 DCHECK(!shutdown_); 123 shutdown_ = true; 124 125 std::set<LookupRequest*>::iterator it; 126 for (it = pending_lookups_.begin(); it != pending_lookups_.end(); ++it) 127 delete *it; 128 } 129 130 // Overloaded Resolve() to take a vector of names. 131 void Predictor::ResolveList(const UrlList& urls, 132 UrlInfo::ResolutionMotivation motivation) { 133 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 134 135 for (UrlList::const_iterator it = urls.begin(); it < urls.end(); ++it) { 136 AppendToResolutionQueue(*it, motivation); 137 } 138 } 139 140 // Basic Resolve() takes an invidual name, and adds it 141 // to the queue. 142 void Predictor::Resolve(const GURL& url, 143 UrlInfo::ResolutionMotivation motivation) { 144 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 145 if (!url.has_host()) 146 return; 147 AppendToResolutionQueue(url, motivation); 148 } 149 150 void Predictor::LearnFromNavigation(const GURL& referring_url, 151 const GURL& target_url) { 152 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 153 DCHECK(referring_url == referring_url.GetWithEmptyPath()); 154 DCHECK(target_url == target_url.GetWithEmptyPath()); 155 if (referring_url.has_host()) { 156 referrers_[referring_url].SuggestHost(target_url); 157 // Possibly do some referrer trimming. 158 TrimReferrers(); 159 } 160 } 161 162 enum SubresourceValue { 163 PRECONNECTION, 164 PRERESOLUTION, 165 TOO_NEW, 166 SUBRESOURCE_VALUE_MAX 167 }; 168 169 void Predictor::AnticipateOmniboxUrl(const GURL& url, bool preconnectable) { 170 std::string host = url.HostNoBrackets(); 171 bool is_new_host_request = (host != last_omnibox_host_); 172 last_omnibox_host_ = host; 173 174 UrlInfo::ResolutionMotivation motivation(UrlInfo::OMNIBOX_MOTIVATED); 175 base::TimeTicks now = base::TimeTicks::Now(); 176 177 if (preconnect_enabled()) { 178 if (preconnectable && !is_new_host_request) { 179 ++consecutive_omnibox_preconnect_count_; 180 // The omnibox suggests a search URL (for which we can preconnect) after 181 // one or two characters are typed, even though such typing often (1 in 182 // 3?) becomes a real URL. This code waits till is has more evidence of a 183 // preconnectable URL (search URL) before forming a preconnection, so as 184 // to reduce the useless preconnect rate. 185 // Perchance this logic should be pushed back into the omnibox, where the 186 // actual characters typed, such as a space, can better forcast whether 187 // we need to search/preconnect or not. By waiting for at least 4 188 // characters in a row that have lead to a search proposal, we avoid 189 // preconnections for a prefix like "www." and we also wait until we have 190 // at least a 4 letter word to search for. 191 // Each character typed appears to induce 2 calls to 192 // AnticipateOmniboxUrl(), so we double 4 characters and limit at 8 193 // requests. 194 // TODO(jar): Use an A/B test to optimize this. 195 const int kMinConsecutiveRequests = 8; 196 if (consecutive_omnibox_preconnect_count_ >= kMinConsecutiveRequests) { 197 // TODO(jar): The wild guess of 30 seconds could be tuned/tested, but it 198 // currently is just a guess that most sockets will remain open for at 199 // least 30 seconds. This avoids a lot of cross-thread posting, and 200 // exercise of the network stack in this common case. 201 const int kMaxSearchKeepaliveSeconds(30); 202 if ((now - last_omnibox_preconnect_).InSeconds() < 203 kMaxSearchKeepaliveSeconds) 204 return; // We've done a preconnect recently. 205 last_omnibox_preconnect_ = now; 206 const int kConnectionsNeeded = 1; 207 PreconnectOnUIThread(CanonicalizeUrl(url), motivation, 208 kConnectionsNeeded); 209 return; // Skip pre-resolution, since we'll open a connection. 210 } 211 } else { 212 consecutive_omnibox_preconnect_count_ = 0; 213 } 214 } 215 216 // Fall through and consider pre-resolution. 217 218 // Omnibox tends to call in pairs (just a few milliseconds apart), and we 219 // really don't need to keep resolving a name that often. 220 // TODO(jar): A/B tests could check for perf impact of the early returns. 221 if (!is_new_host_request) { 222 const int kMinPreresolveSeconds(10); 223 if (kMinPreresolveSeconds > (now - last_omnibox_preresolve_).InSeconds()) 224 return; 225 } 226 last_omnibox_preresolve_ = now; 227 228 // Perform at least DNS pre-resolution. 229 BrowserThread::PostTask( 230 BrowserThread::IO, 231 FROM_HERE, 232 NewRunnableMethod(this, &Predictor::Resolve, CanonicalizeUrl(url), 233 motivation)); 234 } 235 236 void Predictor::PreconnectUrlAndSubresources(const GURL& url) { 237 if (preconnect_enabled()) { 238 std::string host = url.HostNoBrackets(); 239 UrlInfo::ResolutionMotivation motivation(UrlInfo::EARLY_LOAD_MOTIVATED); 240 const int kConnectionsNeeded = 1; 241 PreconnectOnUIThread(CanonicalizeUrl(url), motivation, 242 kConnectionsNeeded); 243 PredictFrameSubresources(url.GetWithEmptyPath()); 244 } 245 } 246 247 void Predictor::PredictFrameSubresources(const GURL& url) { 248 DCHECK(url.GetWithEmptyPath() == url); 249 // Add one pass through the message loop to allow current navigation to 250 // proceed. 251 BrowserThread::PostTask( 252 BrowserThread::IO, 253 FROM_HERE, 254 NewRunnableMethod(this, &Predictor::PrepareFrameSubresources, url)); 255 } 256 257 void Predictor::PrepareFrameSubresources(const GURL& url) { 258 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 259 DCHECK(url.GetWithEmptyPath() == url); 260 Referrers::iterator it = referrers_.find(url); 261 if (referrers_.end() == it) { 262 // Only when we don't know anything about this url, make 2 connections 263 // available. We could do this completely via learning (by prepopulating 264 // the referrer_ list with this expected value), but it would swell the 265 // size of the list with all the "Leaf" nodes in the tree (nodes that don't 266 // load any subresources). If we learn about this resource, we will instead 267 // provide a more carefully estimated preconnection count. 268 if (preconnect_enabled_) 269 PreconnectOnIOThread(url, UrlInfo::SELF_REFERAL_MOTIVATED, 2); 270 return; 271 } 272 273 Referrer* referrer = &(it->second); 274 referrer->IncrementUseCount(); 275 const UrlInfo::ResolutionMotivation motivation = 276 UrlInfo::LEARNED_REFERAL_MOTIVATED; 277 for (Referrer::iterator future_url = referrer->begin(); 278 future_url != referrer->end(); ++future_url) { 279 SubresourceValue evalution(TOO_NEW); 280 double connection_expectation = future_url->second.subresource_use_rate(); 281 UMA_HISTOGRAM_CUSTOM_COUNTS("Net.PreconnectSubresourceExpectation", 282 static_cast<int>(connection_expectation * 100), 283 10, 5000, 50); 284 future_url->second.ReferrerWasObserved(); 285 if (preconnect_enabled_ && 286 connection_expectation > kPreconnectWorthyExpectedValue) { 287 evalution = PRECONNECTION; 288 future_url->second.IncrementPreconnectionCount(); 289 int count = static_cast<int>(std::ceil(connection_expectation)); 290 if (url.host() == future_url->first.host()) 291 ++count; 292 PreconnectOnIOThread(future_url->first, motivation, count); 293 } else if (connection_expectation > kDNSPreresolutionWorthyExpectedValue) { 294 evalution = PRERESOLUTION; 295 future_url->second.preresolution_increment(); 296 UrlInfo* queued_info = AppendToResolutionQueue(future_url->first, 297 motivation); 298 if (queued_info) 299 queued_info->SetReferringHostname(url); 300 } 301 UMA_HISTOGRAM_ENUMERATION("Net.PreconnectSubresourceEval", evalution, 302 SUBRESOURCE_VALUE_MAX); 303 } 304 } 305 306 // Provide sort order so all .com's are together, etc. 307 struct RightToLeftStringSorter { 308 bool operator()(const GURL& left, 309 const GURL& right) const { 310 return string_compare(left.host(), right.host()); 311 } 312 313 static bool string_compare(const std::string& left_host, 314 const std::string& right_host) { 315 if (left_host == right_host) return true; 316 size_t left_already_matched = left_host.size(); 317 size_t right_already_matched = right_host.size(); 318 319 // Ensure both strings have characters. 320 if (!left_already_matched) return true; 321 if (!right_already_matched) return false; 322 323 // Watch for trailing dot, so we'll always be safe to go one beyond dot. 324 if ('.' == left_host[left_already_matched - 1]) { 325 if ('.' != right_host[right_already_matched - 1]) 326 return true; 327 // Both have dots at end of string. 328 --left_already_matched; 329 --right_already_matched; 330 } else { 331 if ('.' == right_host[right_already_matched - 1]) 332 return false; 333 } 334 335 while (1) { 336 if (!left_already_matched) return true; 337 if (!right_already_matched) return false; 338 339 size_t left_length, right_length; 340 size_t left_start = left_host.find_last_of('.', left_already_matched - 1); 341 if (std::string::npos == left_start) { 342 left_length = left_already_matched; 343 left_already_matched = left_start = 0; 344 } else { 345 left_length = left_already_matched - left_start; 346 left_already_matched = left_start; 347 ++left_start; // Don't compare the dot. 348 } 349 size_t right_start = right_host.find_last_of('.', 350 right_already_matched - 1); 351 if (std::string::npos == right_start) { 352 right_length = right_already_matched; 353 right_already_matched = right_start = 0; 354 } else { 355 right_length = right_already_matched - right_start; 356 right_already_matched = right_start; 357 ++right_start; // Don't compare the dot. 358 } 359 360 int diff = left_host.compare(left_start, left_host.size(), 361 right_host, right_start, right_host.size()); 362 if (diff > 0) return false; 363 if (diff < 0) return true; 364 } 365 } 366 }; 367 368 void Predictor::GetHtmlReferrerLists(std::string* output) { 369 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 370 if (referrers_.empty()) 371 return; 372 373 // TODO(jar): Remove any plausible JavaScript from names before displaying. 374 375 typedef std::set<GURL, struct RightToLeftStringSorter> 376 SortedNames; 377 SortedNames sorted_names; 378 379 for (Referrers::iterator it = referrers_.begin(); 380 referrers_.end() != it; ++it) 381 sorted_names.insert(it->first); 382 383 output->append("<br><table border>"); 384 output->append( 385 "<tr><th>Host for Page</th>" 386 "<th>Page Load<br>Count</th>" 387 "<th>Subresource<br>Navigations</th>" 388 "<th>Subresource<br>PreConnects</th>" 389 "<th>Subresource<br>PreResolves</th>" 390 "<th>Expected<br>Connects</th>" 391 "<th>Subresource Spec</th></tr>"); 392 393 for (SortedNames::iterator it = sorted_names.begin(); 394 sorted_names.end() != it; ++it) { 395 Referrer* referrer = &(referrers_[*it]); 396 bool first_set_of_futures = true; 397 for (Referrer::iterator future_url = referrer->begin(); 398 future_url != referrer->end(); ++future_url) { 399 output->append("<tr align=right>"); 400 if (first_set_of_futures) { 401 base::StringAppendF(output, 402 "<td rowspan=%d>%s</td><td rowspan=%d>%d</td>", 403 static_cast<int>(referrer->size()), 404 it->spec().c_str(), 405 static_cast<int>(referrer->size()), 406 static_cast<int>(referrer->use_count())); 407 } 408 first_set_of_futures = false; 409 base::StringAppendF(output, 410 "<td>%d</td><td>%d</td><td>%d</td><td>%2.3f</td><td>%s</td></tr>", 411 static_cast<int>(future_url->second.navigation_count()), 412 static_cast<int>(future_url->second.preconnection_count()), 413 static_cast<int>(future_url->second.preresolution_count()), 414 static_cast<double>(future_url->second.subresource_use_rate()), 415 future_url->first.spec().c_str()); 416 } 417 } 418 output->append("</table>"); 419 } 420 421 void Predictor::GetHtmlInfo(std::string* output) { 422 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 423 // Local lists for calling UrlInfo 424 UrlInfo::UrlInfoTable name_not_found; 425 UrlInfo::UrlInfoTable name_preresolved; 426 427 // Get copies of all useful data. 428 typedef std::map<GURL, UrlInfo, RightToLeftStringSorter> SortedUrlInfo; 429 SortedUrlInfo snapshot; 430 // UrlInfo supports value semantics, so we can do a shallow copy. 431 for (Results::iterator it(results_.begin()); it != results_.end(); it++) 432 snapshot[it->first] = it->second; 433 434 // Partition the UrlInfo's into categories. 435 for (SortedUrlInfo::iterator it(snapshot.begin()); 436 it != snapshot.end(); it++) { 437 if (it->second.was_nonexistent()) { 438 name_not_found.push_back(it->second); 439 continue; 440 } 441 if (!it->second.was_found()) 442 continue; // Still being processed. 443 name_preresolved.push_back(it->second); 444 } 445 446 bool brief = false; 447 #ifdef NDEBUG 448 brief = true; 449 #endif // NDEBUG 450 451 // Call for display of each table, along with title. 452 UrlInfo::GetHtmlTable(name_preresolved, 453 "Preresolution DNS records performed for ", brief, output); 454 UrlInfo::GetHtmlTable(name_not_found, 455 "Preresolving DNS records revealed non-existence for ", brief, output); 456 } 457 458 UrlInfo* Predictor::AppendToResolutionQueue( 459 const GURL& url, 460 UrlInfo::ResolutionMotivation motivation) { 461 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 462 DCHECK(url.has_host()); 463 464 if (shutdown_) 465 return NULL; 466 467 UrlInfo* info = &results_[url]; 468 info->SetUrl(url); // Initialize or DCHECK. 469 // TODO(jar): I need to discard names that have long since expired. 470 // Currently we only add to the domain map :-/ 471 472 DCHECK(info->HasUrl(url)); 473 474 if (!info->NeedsDnsUpdate()) { 475 info->DLogResultsStats("DNS PrefetchNotUpdated"); 476 return NULL; 477 } 478 479 info->SetQueuedState(motivation); 480 work_queue_.Push(url, motivation); 481 StartSomeQueuedResolutions(); 482 return info; 483 } 484 485 void Predictor::StartSomeQueuedResolutions() { 486 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 487 488 while (!work_queue_.IsEmpty() && 489 pending_lookups_.size() < max_concurrent_dns_lookups_) { 490 const GURL url(work_queue_.Pop()); 491 UrlInfo* info = &results_[url]; 492 DCHECK(info->HasUrl(url)); 493 info->SetAssignedState(); 494 495 if (CongestionControlPerformed(info)) { 496 DCHECK(work_queue_.IsEmpty()); 497 return; 498 } 499 500 LookupRequest* request = new LookupRequest(this, host_resolver_, url); 501 int status = request->Start(); 502 if (status == net::ERR_IO_PENDING) { 503 // Will complete asynchronously. 504 pending_lookups_.insert(request); 505 peak_pending_lookups_ = std::max(peak_pending_lookups_, 506 pending_lookups_.size()); 507 } else { 508 // Completed synchronously (was already cached by HostResolver), or else 509 // there was (equivalently) some network error that prevents us from 510 // finding the name. Status net::OK means it was "found." 511 LookupFinished(request, url, status == net::OK); 512 delete request; 513 } 514 } 515 } 516 517 bool Predictor::CongestionControlPerformed(UrlInfo* info) { 518 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 519 // Note: queue_duration is ONLY valid after we go to assigned state. 520 if (info->queue_duration() < max_dns_queue_delay_) 521 return false; 522 // We need to discard all entries in our queue, as we're keeping them waiting 523 // too long. By doing this, we'll have a chance to quickly service urgent 524 // resolutions, and not have a bogged down system. 525 while (true) { 526 info->RemoveFromQueue(); 527 if (work_queue_.IsEmpty()) 528 break; 529 info = &results_[work_queue_.Pop()]; 530 info->SetAssignedState(); 531 } 532 return true; 533 } 534 535 void Predictor::OnLookupFinished(LookupRequest* request, const GURL& url, 536 bool found) { 537 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 538 539 LookupFinished(request, url, found); 540 pending_lookups_.erase(request); 541 delete request; 542 543 StartSomeQueuedResolutions(); 544 } 545 546 void Predictor::LookupFinished(LookupRequest* request, const GURL& url, 547 bool found) { 548 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 549 UrlInfo* info = &results_[url]; 550 DCHECK(info->HasUrl(url)); 551 if (info->is_marked_to_delete()) { 552 results_.erase(url); 553 } else { 554 if (found) 555 info->SetFoundState(); 556 else 557 info->SetNoSuchNameState(); 558 } 559 } 560 561 void Predictor::DiscardAllResults() { 562 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 563 // Delete anything listed so far in this session that shows in about:dns. 564 referrers_.clear(); 565 566 567 // Try to delete anything in our work queue. 568 while (!work_queue_.IsEmpty()) { 569 // Emulate processing cycle as though host was not found. 570 GURL url = work_queue_.Pop(); 571 UrlInfo* info = &results_[url]; 572 DCHECK(info->HasUrl(url)); 573 info->SetAssignedState(); 574 info->SetNoSuchNameState(); 575 } 576 // Now every result_ is either resolved, or is being resolved 577 // (see LookupRequest). 578 579 // Step through result_, recording names of all hosts that can't be erased. 580 // We can't erase anything being worked on. 581 Results assignees; 582 for (Results::iterator it = results_.begin(); results_.end() != it; ++it) { 583 GURL url(it->first); 584 UrlInfo* info = &it->second; 585 DCHECK(info->HasUrl(url)); 586 if (info->is_assigned()) { 587 info->SetPendingDeleteState(); 588 assignees[url] = *info; 589 } 590 } 591 DCHECK(assignees.size() <= max_concurrent_dns_lookups_); 592 results_.clear(); 593 // Put back in the names being worked on. 594 for (Results::iterator it = assignees.begin(); assignees.end() != it; ++it) { 595 DCHECK(it->second.is_marked_to_delete()); 596 results_[it->first] = it->second; 597 } 598 } 599 600 void Predictor::TrimReferrersNow() { 601 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 602 // Just finish up work if an incremental trim is in progress. 603 if (urls_being_trimmed_.empty()) 604 LoadUrlsForTrimming(); 605 IncrementalTrimReferrers(true); // Do everything now. 606 } 607 608 void Predictor::SerializeReferrers(ListValue* referral_list) { 609 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 610 referral_list->Clear(); 611 referral_list->Append(new FundamentalValue(PREDICTOR_REFERRER_VERSION)); 612 for (Referrers::const_iterator it = referrers_.begin(); 613 it != referrers_.end(); ++it) { 614 // Serialize the list of subresource names. 615 Value* subresource_list(it->second.Serialize()); 616 617 // Create a list for each referer. 618 ListValue* motivator(new ListValue); 619 motivator->Append(new StringValue(it->first.spec())); 620 motivator->Append(subresource_list); 621 622 referral_list->Append(motivator); 623 } 624 } 625 626 void Predictor::DeserializeReferrers(const ListValue& referral_list) { 627 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 628 int format_version = -1; 629 if (referral_list.GetSize() > 0 && 630 referral_list.GetInteger(0, &format_version) && 631 format_version == PREDICTOR_REFERRER_VERSION) { 632 for (size_t i = 1; i < referral_list.GetSize(); ++i) { 633 ListValue* motivator; 634 if (!referral_list.GetList(i, &motivator)) { 635 NOTREACHED(); 636 return; 637 } 638 std::string motivating_url_spec; 639 if (!motivator->GetString(0, &motivating_url_spec)) { 640 NOTREACHED(); 641 return; 642 } 643 644 Value* subresource_list; 645 if (!motivator->Get(1, &subresource_list)) { 646 NOTREACHED(); 647 return; 648 } 649 650 referrers_[GURL(motivating_url_spec)].Deserialize(*subresource_list); 651 } 652 } 653 } 654 655 void Predictor::TrimReferrers() { 656 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 657 if (!urls_being_trimmed_.empty()) 658 return; // There is incremental trimming in progress already. 659 660 // Check to see if it is time to trim yet. 661 base::TimeTicks now = base::TimeTicks::Now(); 662 if (now < next_trim_time_) 663 return; 664 next_trim_time_ = now + kDurationBetweenTrimmings; 665 666 LoadUrlsForTrimming(); 667 PostIncrementalTrimTask(); 668 } 669 670 void Predictor::LoadUrlsForTrimming() { 671 DCHECK(urls_being_trimmed_.empty()); 672 for (Referrers::const_iterator it = referrers_.begin(); 673 it != referrers_.end(); ++it) 674 urls_being_trimmed_.push_back(it->first); 675 UMA_HISTOGRAM_COUNTS("Net.PredictionTrimSize", urls_being_trimmed_.size()); 676 } 677 678 void Predictor::PostIncrementalTrimTask() { 679 if (urls_being_trimmed_.empty()) 680 return; 681 MessageLoop::current()->PostDelayedTask( 682 FROM_HERE, 683 trim_task_factory_.NewRunnableMethod(&Predictor::IncrementalTrimReferrers, 684 false), 685 kDurationBetweenTrimmingIncrements.InMilliseconds()); 686 } 687 688 void Predictor::IncrementalTrimReferrers(bool trim_all_now) { 689 size_t trim_count = urls_being_trimmed_.size(); 690 if (!trim_all_now) 691 trim_count = std::min(trim_count, kUrlsTrimmedPerIncrement); 692 while (trim_count-- != 0) { 693 Referrers::iterator it = referrers_.find(urls_being_trimmed_.back()); 694 urls_being_trimmed_.pop_back(); 695 if (it == referrers_.end()) 696 continue; // Defensive code: It got trimmed away already. 697 if (!it->second.Trim(kReferrerTrimRatio, kDiscardableExpectedValue)) 698 referrers_.erase(it); 699 } 700 PostIncrementalTrimTask(); 701 } 702 703 //------------------------------------------------------------------------------ 704 705 Predictor::HostNameQueue::HostNameQueue() { 706 } 707 708 Predictor::HostNameQueue::~HostNameQueue() { 709 } 710 711 void Predictor::HostNameQueue::Push(const GURL& url, 712 UrlInfo::ResolutionMotivation motivation) { 713 switch (motivation) { 714 case UrlInfo::STATIC_REFERAL_MOTIVATED: 715 case UrlInfo::LEARNED_REFERAL_MOTIVATED: 716 case UrlInfo::MOUSE_OVER_MOTIVATED: 717 rush_queue_.push(url); 718 break; 719 720 default: 721 background_queue_.push(url); 722 break; 723 } 724 } 725 726 bool Predictor::HostNameQueue::IsEmpty() const { 727 return rush_queue_.empty() && background_queue_.empty(); 728 } 729 730 GURL Predictor::HostNameQueue::Pop() { 731 DCHECK(!IsEmpty()); 732 std::queue<GURL> *queue(rush_queue_.empty() ? &background_queue_ 733 : &rush_queue_); 734 GURL url(queue->front()); 735 queue->pop(); 736 return url; 737 } 738 739 void Predictor::DeserializeReferrersThenDelete(ListValue* referral_list) { 740 DeserializeReferrers(*referral_list); 741 delete referral_list; 742 } 743 744 745 //------------------------------------------------------------------------------ 746 // Helper functions 747 //------------------------------------------------------------------------------ 748 749 // static 750 GURL Predictor::CanonicalizeUrl(const GURL& url) { 751 if (!url.has_host()) 752 return GURL::EmptyGURL(); 753 754 std::string scheme; 755 if (url.has_scheme()) { 756 scheme = url.scheme(); 757 if (scheme != "http" && scheme != "https") 758 return GURL::EmptyGURL(); 759 if (url.has_port()) 760 return url.GetWithEmptyPath(); 761 } else { 762 scheme = "http"; 763 } 764 765 // If we omit a port, it will default to 80 or 443 as appropriate. 766 std::string colon_plus_port; 767 if (url.has_port()) 768 colon_plus_port = ":" + url.port(); 769 770 return GURL(scheme + "://" + url.host() + colon_plus_port); 771 } 772 773 774 } // namespace chrome_browser_net 775