1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/history/top_sites_impl.h" 6 7 #include <algorithm> 8 #include <set> 9 10 #include "base/bind.h" 11 #include "base/bind_helpers.h" 12 #include "base/logging.h" 13 #include "base/md5.h" 14 #include "base/memory/ref_counted_memory.h" 15 #include "base/message_loop/message_loop_proxy.h" 16 #include "base/metrics/histogram.h" 17 #include "base/prefs/pref_service.h" 18 #include "base/prefs/scoped_user_pref_update.h" 19 #include "base/strings/string_util.h" 20 #include "base/strings/utf_string_conversions.h" 21 #include "base/task_runner.h" 22 #include "base/values.h" 23 #include "chrome/browser/chrome_notification_types.h" 24 #include "chrome/browser/history/history_backend.h" 25 #include "chrome/browser/history/history_db_task.h" 26 #include "chrome/browser/history/history_notifications.h" 27 #include "chrome/browser/history/history_service_factory.h" 28 #include "chrome/browser/history/top_sites_cache.h" 29 #include "chrome/browser/history/url_utils.h" 30 #include "chrome/browser/profiles/profile.h" 31 #include "chrome/common/pref_names.h" 32 #include "components/history/core/browser/page_usage_data.h" 33 #include "components/history/core/common/thumbnail_score.h" 34 #include "content/public/browser/browser_thread.h" 35 #include "content/public/browser/navigation_controller.h" 36 #include "content/public/browser/navigation_details.h" 37 #include "content/public/browser/navigation_entry.h" 38 #include "content/public/browser/notification_service.h" 39 #include "content/public/browser/web_contents.h" 40 #include "ui/base/l10n/l10n_util.h" 41 #include "ui/base/layout.h" 42 #include "ui/base/resource/resource_bundle.h" 43 #include "ui/gfx/image/image_util.h" 44 45 using base::DictionaryValue; 46 using content::BrowserThread; 47 using content::NavigationController; 48 49 namespace history { 50 51 namespace { 52 53 void RunOrPostGetMostVisitedURLsCallback( 54 base::TaskRunner* task_runner, 55 bool include_forced_urls, 56 const TopSitesImpl::GetMostVisitedURLsCallback& callback, 57 const MostVisitedURLList& all_urls, 58 const MostVisitedURLList& nonforced_urls) { 59 const MostVisitedURLList* urls = 60 include_forced_urls ? &all_urls : &nonforced_urls; 61 if (task_runner->RunsTasksOnCurrentThread()) 62 callback.Run(*urls); 63 else 64 task_runner->PostTask(FROM_HERE, base::Bind(callback, *urls)); 65 } 66 67 // Compares two MostVisitedURL having a non-null |last_forced_time|. 68 bool ForcedURLComparator(const MostVisitedURL& first, 69 const MostVisitedURL& second) { 70 DCHECK(!first.last_forced_time.is_null() && 71 !second.last_forced_time.is_null()); 72 return first.last_forced_time < second.last_forced_time; 73 } 74 75 } // namespace 76 77 // How many non-forced top sites to store in the cache. 78 static const size_t kNonForcedTopSitesNumber = 20; 79 80 // How many forced top sites to store in the cache. 81 static const size_t kForcedTopSitesNumber = 20; 82 83 // Max number of temporary images we'll cache. See comment above 84 // temp_images_ for details. 85 static const size_t kMaxTempTopImages = 8; 86 87 static const int kDaysOfHistory = 90; 88 // Time from startup to first HistoryService query. 89 static const int64 kUpdateIntervalSecs = 15; 90 // Intervals between requests to HistoryService. 91 static const int64 kMinUpdateIntervalMinutes = 1; 92 static const int64 kMaxUpdateIntervalMinutes = 60; 93 94 // Use 100 quality (highest quality) because we're very sensitive to 95 // artifacts for these small sized, highly detailed images. 96 static const int kTopSitesImageQuality = 100; 97 98 TopSitesImpl::TopSitesImpl(Profile* profile) 99 : backend_(NULL), 100 cache_(new TopSitesCache()), 101 thread_safe_cache_(new TopSitesCache()), 102 profile_(profile), 103 last_num_urls_changed_(0), 104 loaded_(false) { 105 if (!profile_) 106 return; 107 108 if (content::NotificationService::current()) { 109 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, 110 content::Source<Profile>(profile_)); 111 // Listen for any nav commits. We'll ignore those not related to this 112 // profile when we get the notification. 113 registrar_.Add(this, content::NOTIFICATION_NAV_ENTRY_COMMITTED, 114 content::NotificationService::AllSources()); 115 } 116 for (size_t i = 0; i < arraysize(kPrepopulatedPages); i++) { 117 int url_id = kPrepopulatedPages[i].url_id; 118 prepopulated_page_urls_.push_back( 119 GURL(l10n_util::GetStringUTF8(url_id))); 120 } 121 } 122 123 void TopSitesImpl::Init(const base::FilePath& db_name) { 124 // Create the backend here, rather than in the constructor, so that 125 // unit tests that do not need the backend can run without a problem. 126 backend_ = new TopSitesBackend; 127 backend_->Init(db_name); 128 backend_->GetMostVisitedThumbnails( 129 base::Bind(&TopSitesImpl::OnGotMostVisitedThumbnails, 130 base::Unretained(this)), 131 &cancelable_task_tracker_); 132 } 133 134 bool TopSitesImpl::SetPageThumbnail(const GURL& url, 135 const gfx::Image& thumbnail, 136 const ThumbnailScore& score) { 137 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 138 139 if (!loaded_) { 140 // TODO(sky): I need to cache these and apply them after the load 141 // completes. 142 return false; 143 } 144 145 bool add_temp_thumbnail = false; 146 if (!IsKnownURL(url)) { 147 if (!IsNonForcedFull()) { 148 add_temp_thumbnail = true; 149 } else { 150 return false; // This URL is not known to us. 151 } 152 } 153 154 if (!HistoryService::CanAddURL(url)) 155 return false; // It's not a real webpage. 156 157 scoped_refptr<base::RefCountedBytes> thumbnail_data; 158 if (!EncodeBitmap(thumbnail, &thumbnail_data)) 159 return false; 160 161 if (add_temp_thumbnail) { 162 // Always remove the existing entry and then add it back. That way if we end 163 // up with too many temp thumbnails we'll prune the oldest first. 164 RemoveTemporaryThumbnailByURL(url); 165 AddTemporaryThumbnail(url, thumbnail_data.get(), score); 166 return true; 167 } 168 169 return SetPageThumbnailEncoded(url, thumbnail_data.get(), score); 170 } 171 172 bool TopSitesImpl::SetPageThumbnailToJPEGBytes( 173 const GURL& url, 174 const base::RefCountedMemory* memory, 175 const ThumbnailScore& score) { 176 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 177 178 if (!loaded_) { 179 // TODO(sky): I need to cache these and apply them after the load 180 // completes. 181 return false; 182 } 183 184 bool add_temp_thumbnail = false; 185 if (!IsKnownURL(url)) { 186 if (!IsNonForcedFull()) { 187 add_temp_thumbnail = true; 188 } else { 189 return false; // This URL is not known to us. 190 } 191 } 192 193 if (!HistoryService::CanAddURL(url)) 194 return false; // It's not a real webpage. 195 196 if (add_temp_thumbnail) { 197 // Always remove the existing entry and then add it back. That way if we end 198 // up with too many temp thumbnails we'll prune the oldest first. 199 RemoveTemporaryThumbnailByURL(url); 200 AddTemporaryThumbnail(url, memory, score); 201 return true; 202 } 203 204 return SetPageThumbnailEncoded(url, memory, score); 205 } 206 207 // WARNING: this function may be invoked on any thread. 208 void TopSitesImpl::GetMostVisitedURLs( 209 const GetMostVisitedURLsCallback& callback, 210 bool include_forced_urls) { 211 MostVisitedURLList filtered_urls; 212 { 213 base::AutoLock lock(lock_); 214 if (!loaded_) { 215 // A request came in before we finished loading. Store the callback and 216 // we'll run it on current thread when we finish loading. 217 pending_callbacks_.push_back( 218 base::Bind(&RunOrPostGetMostVisitedURLsCallback, 219 base::MessageLoopProxy::current(), 220 include_forced_urls, 221 callback)); 222 return; 223 } 224 if (include_forced_urls) { 225 filtered_urls = thread_safe_cache_->top_sites(); 226 } else { 227 filtered_urls.assign(thread_safe_cache_->top_sites().begin() + 228 thread_safe_cache_->GetNumForcedURLs(), 229 thread_safe_cache_->top_sites().end()); 230 } 231 } 232 callback.Run(filtered_urls); 233 } 234 235 bool TopSitesImpl::GetPageThumbnail( 236 const GURL& url, 237 bool prefix_match, 238 scoped_refptr<base::RefCountedMemory>* bytes) { 239 // WARNING: this may be invoked on any thread. 240 // Perform exact match. 241 { 242 base::AutoLock lock(lock_); 243 if (thread_safe_cache_->GetPageThumbnail(url, bytes)) 244 return true; 245 } 246 247 // Resource bundle is thread safe. 248 for (size_t i = 0; i < arraysize(kPrepopulatedPages); i++) { 249 if (url == prepopulated_page_urls_[i]) { 250 *bytes = ResourceBundle::GetSharedInstance(). 251 LoadDataResourceBytesForScale( 252 kPrepopulatedPages[i].thumbnail_id, 253 ui::SCALE_FACTOR_100P); 254 return true; 255 } 256 } 257 258 if (prefix_match) { 259 // If http or https, search with |url| first, then try the other one. 260 std::vector<GURL> url_list; 261 url_list.push_back(url); 262 if (url.SchemeIsHTTPOrHTTPS()) 263 url_list.push_back(ToggleHTTPAndHTTPS(url)); 264 265 for (std::vector<GURL>::iterator it = url_list.begin(); 266 it != url_list.end(); ++it) { 267 base::AutoLock lock(lock_); 268 269 GURL canonical_url; 270 // Test whether any stored URL is a prefix of |url|. 271 canonical_url = thread_safe_cache_->GetGeneralizedCanonicalURL(*it); 272 if (!canonical_url.is_empty() && 273 thread_safe_cache_->GetPageThumbnail(canonical_url, bytes)) { 274 return true; 275 } 276 } 277 } 278 279 return false; 280 } 281 282 bool TopSitesImpl::GetPageThumbnailScore(const GURL& url, 283 ThumbnailScore* score) { 284 // WARNING: this may be invoked on any thread. 285 base::AutoLock lock(lock_); 286 return thread_safe_cache_->GetPageThumbnailScore(url, score); 287 } 288 289 bool TopSitesImpl::GetTemporaryPageThumbnailScore(const GURL& url, 290 ThumbnailScore* score) { 291 for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end(); 292 ++i) { 293 if (i->first == url) { 294 *score = i->second.thumbnail_score; 295 return true; 296 } 297 } 298 return false; 299 } 300 301 302 // Returns the index of |url| in |urls|, or -1 if not found. 303 static int IndexOf(const MostVisitedURLList& urls, const GURL& url) { 304 for (size_t i = 0; i < urls.size(); i++) { 305 if (urls[i].url == url) 306 return i; 307 } 308 return -1; 309 } 310 311 void TopSitesImpl::SyncWithHistory() { 312 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 313 if (loaded_ && temp_images_.size()) { 314 // If we have temporary thumbnails it means there isn't much data, and most 315 // likely the user is first running Chrome. During this time we throttle 316 // updating from history by 30 seconds. If the user creates a new tab page 317 // during this window of time we force updating from history so that the new 318 // tab page isn't so far out of date. 319 timer_.Stop(); 320 StartQueryForMostVisited(); 321 } 322 } 323 324 bool TopSitesImpl::HasBlacklistedItems() const { 325 const base::DictionaryValue* blacklist = 326 profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist); 327 return blacklist && !blacklist->empty(); 328 } 329 330 void TopSitesImpl::AddBlacklistedURL(const GURL& url) { 331 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 332 333 base::Value* dummy = base::Value::CreateNullValue(); 334 { 335 DictionaryPrefUpdate update(profile_->GetPrefs(), 336 prefs::kNtpMostVisitedURLsBlacklist); 337 base::DictionaryValue* blacklist = update.Get(); 338 blacklist->SetWithoutPathExpansion(GetURLHash(url), dummy); 339 } 340 341 ResetThreadSafeCache(); 342 NotifyTopSitesChanged(); 343 } 344 345 void TopSitesImpl::RemoveBlacklistedURL(const GURL& url) { 346 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 347 { 348 DictionaryPrefUpdate update(profile_->GetPrefs(), 349 prefs::kNtpMostVisitedURLsBlacklist); 350 base::DictionaryValue* blacklist = update.Get(); 351 blacklist->RemoveWithoutPathExpansion(GetURLHash(url), NULL); 352 } 353 ResetThreadSafeCache(); 354 NotifyTopSitesChanged(); 355 } 356 357 bool TopSitesImpl::IsBlacklisted(const GURL& url) { 358 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 359 const base::DictionaryValue* blacklist = 360 profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist); 361 return blacklist && blacklist->HasKey(GetURLHash(url)); 362 } 363 364 void TopSitesImpl::ClearBlacklistedURLs() { 365 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 366 { 367 DictionaryPrefUpdate update(profile_->GetPrefs(), 368 prefs::kNtpMostVisitedURLsBlacklist); 369 base::DictionaryValue* blacklist = update.Get(); 370 blacklist->Clear(); 371 } 372 ResetThreadSafeCache(); 373 NotifyTopSitesChanged(); 374 } 375 376 void TopSitesImpl::Shutdown() { 377 profile_ = NULL; 378 // Cancel all requests so that the service doesn't callback to us after we've 379 // invoked Shutdown (this could happen if we have a pending request and 380 // Shutdown is invoked). 381 cancelable_task_tracker_.TryCancelAll(); 382 backend_->Shutdown(); 383 } 384 385 // static 386 void TopSitesImpl::DiffMostVisited(const MostVisitedURLList& old_list, 387 const MostVisitedURLList& new_list, 388 TopSitesDelta* delta) { 389 390 // Add all the old URLs for quick lookup. This maps URLs to the corresponding 391 // index in the input. 392 std::map<GURL, size_t> all_old_urls; 393 size_t num_old_forced = 0; 394 for (size_t i = 0; i < old_list.size(); i++) { 395 if (!old_list[i].last_forced_time.is_null()) 396 num_old_forced++; 397 DCHECK(old_list[i].last_forced_time.is_null() || i < num_old_forced) 398 << "Forced URLs must all appear before non-forced URLs."; 399 all_old_urls[old_list[i].url] = i; 400 } 401 402 // Check all the URLs in the new set to see which ones are new or just moved. 403 // When we find a match in the old set, we'll reset its index to our special 404 // marker. This allows us to quickly identify the deleted ones in a later 405 // pass. 406 const size_t kAlreadyFoundMarker = static_cast<size_t>(-1); 407 int rank = -1; // Forced URLs have a rank of -1. 408 for (size_t i = 0; i < new_list.size(); i++) { 409 // Increase the rank if we're going through forced URLs. This works because 410 // non-forced URLs all come after forced URLs. 411 if (new_list[i].last_forced_time.is_null()) 412 rank++; 413 DCHECK(new_list[i].last_forced_time.is_null() == (rank != -1)) 414 << "Forced URLs must all appear before non-forced URLs."; 415 std::map<GURL, size_t>::iterator found = all_old_urls.find(new_list[i].url); 416 if (found == all_old_urls.end()) { 417 MostVisitedURLWithRank added; 418 added.url = new_list[i]; 419 added.rank = rank; 420 delta->added.push_back(added); 421 } else { 422 DCHECK(found->second != kAlreadyFoundMarker) 423 << "Same URL appears twice in the new list."; 424 int old_rank = found->second >= num_old_forced ? 425 found->second - num_old_forced : -1; 426 if (old_rank != rank || 427 old_list[found->second].last_forced_time != 428 new_list[i].last_forced_time) { 429 MostVisitedURLWithRank moved; 430 moved.url = new_list[i]; 431 moved.rank = rank; 432 delta->moved.push_back(moved); 433 } 434 found->second = kAlreadyFoundMarker; 435 } 436 } 437 438 // Any member without the special marker in the all_old_urls list means that 439 // there wasn't a "new" URL that mapped to it, so it was deleted. 440 for (std::map<GURL, size_t>::const_iterator i = all_old_urls.begin(); 441 i != all_old_urls.end(); ++i) { 442 if (i->second != kAlreadyFoundMarker) 443 delta->deleted.push_back(old_list[i->second]); 444 } 445 } 446 447 base::CancelableTaskTracker::TaskId TopSitesImpl::StartQueryForMostVisited() { 448 DCHECK(loaded_); 449 if (!profile_) 450 return base::CancelableTaskTracker::kBadTaskId; 451 452 HistoryService* hs = HistoryServiceFactory::GetForProfile( 453 profile_, Profile::EXPLICIT_ACCESS); 454 // |hs| may be null during unit tests. 455 if (hs) { 456 return hs->QueryMostVisitedURLs( 457 num_results_to_request_from_history(), 458 kDaysOfHistory, 459 base::Bind(&TopSitesImpl::OnTopSitesAvailableFromHistory, 460 base::Unretained(this)), 461 &cancelable_task_tracker_); 462 } 463 return base::CancelableTaskTracker::kBadTaskId; 464 } 465 466 bool TopSitesImpl::IsKnownURL(const GURL& url) { 467 return loaded_ && cache_->IsKnownURL(url); 468 } 469 470 const std::string& TopSitesImpl::GetCanonicalURLString(const GURL& url) const { 471 return cache_->GetCanonicalURL(url).spec(); 472 } 473 474 bool TopSitesImpl::IsNonForcedFull() { 475 return loaded_ && cache_->GetNumNonForcedURLs() >= kNonForcedTopSitesNumber; 476 } 477 478 bool TopSitesImpl::IsForcedFull() { 479 return loaded_ && cache_->GetNumForcedURLs() >= kForcedTopSitesNumber; 480 } 481 482 TopSitesImpl::~TopSitesImpl() { 483 } 484 485 bool TopSitesImpl::SetPageThumbnailNoDB( 486 const GURL& url, 487 const base::RefCountedMemory* thumbnail_data, 488 const ThumbnailScore& score) { 489 // This should only be invoked when we know about the url. 490 DCHECK(cache_->IsKnownURL(url)); 491 492 const MostVisitedURL& most_visited = 493 cache_->top_sites()[cache_->GetURLIndex(url)]; 494 Images* image = cache_->GetImage(url); 495 496 // When comparing the thumbnail scores, we need to take into account the 497 // redirect hops, which are not generated when the thumbnail is because the 498 // redirects weren't known. We fill that in here since we know the redirects. 499 ThumbnailScore new_score_with_redirects(score); 500 new_score_with_redirects.redirect_hops_from_dest = 501 GetRedirectDistanceForURL(most_visited, url); 502 503 if (!ShouldReplaceThumbnailWith(image->thumbnail_score, 504 new_score_with_redirects) && 505 image->thumbnail.get()) 506 return false; // The one we already have is better. 507 508 image->thumbnail = const_cast<base::RefCountedMemory*>(thumbnail_data); 509 image->thumbnail_score = new_score_with_redirects; 510 511 ResetThreadSafeImageCache(); 512 return true; 513 } 514 515 bool TopSitesImpl::SetPageThumbnailEncoded( 516 const GURL& url, 517 const base::RefCountedMemory* thumbnail, 518 const ThumbnailScore& score) { 519 if (!SetPageThumbnailNoDB(url, thumbnail, score)) 520 return false; 521 522 // Update the database. 523 if (!cache_->IsKnownURL(url)) 524 return false; 525 526 size_t index = cache_->GetURLIndex(url); 527 int url_rank = index - cache_->GetNumForcedURLs(); 528 const MostVisitedURL& most_visited = cache_->top_sites()[index]; 529 backend_->SetPageThumbnail(most_visited, 530 url_rank < 0 ? -1 : url_rank, 531 *(cache_->GetImage(most_visited.url))); 532 return true; 533 } 534 535 // static 536 bool TopSitesImpl::EncodeBitmap(const gfx::Image& bitmap, 537 scoped_refptr<base::RefCountedBytes>* bytes) { 538 if (bitmap.IsEmpty()) 539 return false; 540 *bytes = new base::RefCountedBytes(); 541 std::vector<unsigned char> data; 542 if (!gfx::JPEG1xEncodedDataFromImage(bitmap, kTopSitesImageQuality, &data)) 543 return false; 544 545 // As we're going to cache this data, make sure the vector is only as big as 546 // it needs to be, as JPEGCodec::Encode() over-allocates data.capacity(). 547 // (In a C++0x future, we can just call shrink_to_fit() in Encode()) 548 (*bytes)->data() = data; 549 return true; 550 } 551 552 void TopSitesImpl::RemoveTemporaryThumbnailByURL(const GURL& url) { 553 for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end(); 554 ++i) { 555 if (i->first == url) { 556 temp_images_.erase(i); 557 return; 558 } 559 } 560 } 561 562 void TopSitesImpl::AddTemporaryThumbnail( 563 const GURL& url, 564 const base::RefCountedMemory* thumbnail, 565 const ThumbnailScore& score) { 566 if (temp_images_.size() == kMaxTempTopImages) 567 temp_images_.erase(temp_images_.begin()); 568 569 TempImage image; 570 image.first = url; 571 image.second.thumbnail = const_cast<base::RefCountedMemory*>(thumbnail); 572 image.second.thumbnail_score = score; 573 temp_images_.push_back(image); 574 } 575 576 void TopSitesImpl::TimerFired() { 577 StartQueryForMostVisited(); 578 } 579 580 // static 581 int TopSitesImpl::GetRedirectDistanceForURL(const MostVisitedURL& most_visited, 582 const GURL& url) { 583 for (size_t i = 0; i < most_visited.redirects.size(); i++) { 584 if (most_visited.redirects[i] == url) 585 return static_cast<int>(most_visited.redirects.size() - i - 1); 586 } 587 NOTREACHED() << "URL should always be found."; 588 return 0; 589 } 590 591 MostVisitedURLList TopSitesImpl::GetPrepopulatePages() { 592 MostVisitedURLList urls; 593 urls.resize(arraysize(kPrepopulatedPages)); 594 for (size_t i = 0; i < urls.size(); ++i) { 595 MostVisitedURL& url = urls[i]; 596 url.url = GURL(prepopulated_page_urls_[i]); 597 url.redirects.push_back(url.url); 598 url.title = l10n_util::GetStringUTF16(kPrepopulatedPages[i].title_id); 599 } 600 return urls; 601 } 602 603 bool TopSitesImpl::loaded() const { 604 return loaded_; 605 } 606 607 bool TopSitesImpl::AddForcedURL(const GURL& url, const base::Time& time) { 608 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 609 size_t num_forced = cache_->GetNumForcedURLs(); 610 MostVisitedURLList new_list(cache_->top_sites()); 611 MostVisitedURL new_url; 612 613 if (cache_->IsKnownURL(url)) { 614 size_t index = cache_->GetURLIndex(url); 615 // Do nothing if we currently have that URL as non-forced. 616 if (new_list[index].last_forced_time.is_null()) 617 return false; 618 619 // Update the |last_forced_time| of the already existing URL. Delete it and 620 // reinsert it at the right location. 621 new_url = new_list[index]; 622 new_list.erase(new_list.begin() + index); 623 num_forced--; 624 } else { 625 new_url.url = url; 626 new_url.redirects.push_back(url); 627 } 628 new_url.last_forced_time = time; 629 // Add forced URLs and sort. Added to the end of the list of forced URLs 630 // since this is almost always where it needs to go, unless the user's local 631 // clock is fiddled with. 632 MostVisitedURLList::iterator mid = new_list.begin() + num_forced; 633 new_list.insert(mid, new_url); 634 mid = new_list.begin() + num_forced; // Mid was invalidated. 635 std::inplace_merge(new_list.begin(), mid, mid + 1, ForcedURLComparator); 636 SetTopSites(new_list); 637 return true; 638 } 639 640 bool TopSitesImpl::AddPrepopulatedPages(MostVisitedURLList* urls, 641 size_t num_forced_urls) { 642 bool added = false; 643 MostVisitedURLList prepopulate_urls = GetPrepopulatePages(); 644 for (size_t i = 0; i < prepopulate_urls.size(); ++i) { 645 if (urls->size() - num_forced_urls < kNonForcedTopSitesNumber && 646 IndexOf(*urls, prepopulate_urls[i].url) == -1) { 647 urls->push_back(prepopulate_urls[i]); 648 added = true; 649 } 650 } 651 return added; 652 } 653 654 size_t TopSitesImpl::MergeCachedForcedURLs(MostVisitedURLList* new_list) { 655 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 656 // Add all the new URLs for quick lookup. Take that opportunity to count the 657 // number of forced URLs in |new_list|. 658 std::set<GURL> all_new_urls; 659 size_t num_forced = 0; 660 for (size_t i = 0; i < new_list->size(); ++i) { 661 for (size_t j = 0; j < (*new_list)[i].redirects.size(); j++) { 662 all_new_urls.insert((*new_list)[i].redirects[j]); 663 } 664 if (!(*new_list)[i].last_forced_time.is_null()) 665 ++num_forced; 666 } 667 668 // Keep the forced URLs from |cache_| that are not found in |new_list|. 669 MostVisitedURLList filtered_forced_urls; 670 for (size_t i = 0; i < cache_->GetNumForcedURLs(); ++i) { 671 if (all_new_urls.find(cache_->top_sites()[i].url) == all_new_urls.end()) 672 filtered_forced_urls.push_back(cache_->top_sites()[i]); 673 } 674 num_forced += filtered_forced_urls.size(); 675 676 // Prepend forced URLs and sort in order of ascending |last_forced_time|. 677 new_list->insert(new_list->begin(), filtered_forced_urls.begin(), 678 filtered_forced_urls.end()); 679 std::inplace_merge( 680 new_list->begin(), new_list->begin() + filtered_forced_urls.size(), 681 new_list->begin() + num_forced, ForcedURLComparator); 682 683 // Drop older forced URLs if the list overflows. Since forced URLs are always 684 // sort in increasing order of |last_forced_time|, drop the first ones. 685 if (num_forced > kForcedTopSitesNumber) { 686 new_list->erase(new_list->begin(), 687 new_list->begin() + (num_forced - kForcedTopSitesNumber)); 688 num_forced = kForcedTopSitesNumber; 689 } 690 691 return num_forced; 692 } 693 694 void TopSitesImpl::ApplyBlacklist(const MostVisitedURLList& urls, 695 MostVisitedURLList* out) { 696 // Log the number of times ApplyBlacklist is called so we can compute the 697 // average number of blacklisted items per user. 698 const base::DictionaryValue* blacklist = 699 profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist); 700 UMA_HISTOGRAM_BOOLEAN("TopSites.NumberOfApplyBlacklist", true); 701 UMA_HISTOGRAM_COUNTS_100("TopSites.NumberOfBlacklistedItems", 702 (blacklist ? blacklist->size() : 0)); 703 size_t num_non_forced_urls = 0; 704 size_t num_forced_urls = 0; 705 for (size_t i = 0; i < urls.size(); ++i) { 706 if (!IsBlacklisted(urls[i].url)) { 707 if (urls[i].last_forced_time.is_null()) { 708 // Non-forced URL. 709 if (num_non_forced_urls >= kNonForcedTopSitesNumber) 710 continue; 711 num_non_forced_urls++; 712 } else { 713 // Forced URL. 714 if (num_forced_urls >= kForcedTopSitesNumber) 715 continue; 716 num_forced_urls++; 717 } 718 out->push_back(urls[i]); 719 } 720 } 721 } 722 723 std::string TopSitesImpl::GetURLHash(const GURL& url) { 724 // We don't use canonical URLs here to be able to blacklist only one of 725 // the two 'duplicate' sites, e.g. 'gmail.com' and 'mail.google.com'. 726 return base::MD5String(url.spec()); 727 } 728 729 base::TimeDelta TopSitesImpl::GetUpdateDelay() { 730 if (cache_->top_sites().size() <= arraysize(kPrepopulatedPages)) 731 return base::TimeDelta::FromSeconds(30); 732 733 int64 range = kMaxUpdateIntervalMinutes - kMinUpdateIntervalMinutes; 734 int64 minutes = kMaxUpdateIntervalMinutes - 735 last_num_urls_changed_ * range / cache_->top_sites().size(); 736 return base::TimeDelta::FromMinutes(minutes); 737 } 738 739 void TopSitesImpl::Observe(int type, 740 const content::NotificationSource& source, 741 const content::NotificationDetails& details) { 742 if (!loaded_) 743 return; 744 745 if (type == chrome::NOTIFICATION_HISTORY_URLS_DELETED) { 746 content::Details<history::URLsDeletedDetails> deleted_details(details); 747 if (deleted_details->all_history) { 748 SetTopSites(MostVisitedURLList()); 749 backend_->ResetDatabase(); 750 } else { 751 std::set<size_t> indices_to_delete; // Indices into top_sites_. 752 for (URLRows::const_iterator i = deleted_details->rows.begin(); 753 i != deleted_details->rows.end(); ++i) { 754 if (cache_->IsKnownURL(i->url())) 755 indices_to_delete.insert(cache_->GetURLIndex(i->url())); 756 } 757 758 if (indices_to_delete.empty()) 759 return; 760 761 MostVisitedURLList new_top_sites(cache_->top_sites()); 762 for (std::set<size_t>::reverse_iterator i = indices_to_delete.rbegin(); 763 i != indices_to_delete.rend(); i++) { 764 new_top_sites.erase(new_top_sites.begin() + *i); 765 } 766 SetTopSites(new_top_sites); 767 } 768 StartQueryForMostVisited(); 769 } else if (type == content::NOTIFICATION_NAV_ENTRY_COMMITTED) { 770 NavigationController* controller = 771 content::Source<NavigationController>(source).ptr(); 772 Profile* profile = Profile::FromBrowserContext( 773 controller->GetWebContents()->GetBrowserContext()); 774 if (profile == profile_ && !IsNonForcedFull()) { 775 content::LoadCommittedDetails* load_details = 776 content::Details<content::LoadCommittedDetails>(details).ptr(); 777 if (!load_details) 778 return; 779 const GURL& url = load_details->entry->GetURL(); 780 if (!cache_->IsKnownURL(url) && HistoryService::CanAddURL(url)) { 781 // To avoid slamming history we throttle requests when the url updates. 782 // To do otherwise negatively impacts perf tests. 783 RestartQueryForTopSitesTimer(GetUpdateDelay()); 784 } 785 } 786 } 787 } 788 789 void TopSitesImpl::SetTopSites(const MostVisitedURLList& new_top_sites) { 790 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 791 792 MostVisitedURLList top_sites(new_top_sites); 793 size_t num_forced_urls = MergeCachedForcedURLs(&top_sites); 794 AddPrepopulatedPages(&top_sites, num_forced_urls); 795 796 TopSitesDelta delta; 797 DiffMostVisited(cache_->top_sites(), top_sites, &delta); 798 if (!delta.deleted.empty() || !delta.added.empty() || !delta.moved.empty()) { 799 backend_->UpdateTopSites(delta); 800 } 801 802 last_num_urls_changed_ = delta.added.size() + delta.moved.size(); 803 804 // We always do the following steps (setting top sites in cache, and resetting 805 // thread safe cache ...) as this method is invoked during startup at which 806 // point the caches haven't been updated yet. 807 cache_->SetTopSites(top_sites); 808 809 // See if we have any tmp thumbnails for the new sites. 810 if (!temp_images_.empty()) { 811 for (size_t i = 0; i < top_sites.size(); ++i) { 812 const MostVisitedURL& mv = top_sites[i]; 813 GURL canonical_url = cache_->GetCanonicalURL(mv.url); 814 // At the time we get the thumbnail redirects aren't known, so we have to 815 // iterate through all the images. 816 for (TempImages::iterator it = temp_images_.begin(); 817 it != temp_images_.end(); ++it) { 818 if (canonical_url == cache_->GetCanonicalURL(it->first)) { 819 SetPageThumbnailEncoded( 820 mv.url, it->second.thumbnail.get(), it->second.thumbnail_score); 821 temp_images_.erase(it); 822 break; 823 } 824 } 825 } 826 } 827 828 if (top_sites.size() - num_forced_urls >= kNonForcedTopSitesNumber) 829 temp_images_.clear(); 830 831 ResetThreadSafeCache(); 832 ResetThreadSafeImageCache(); 833 NotifyTopSitesChanged(); 834 835 // Restart the timer that queries history for top sites. This is done to 836 // ensure we stay in sync with history. 837 RestartQueryForTopSitesTimer(GetUpdateDelay()); 838 } 839 840 int TopSitesImpl::num_results_to_request_from_history() const { 841 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 842 843 const base::DictionaryValue* blacklist = 844 profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist); 845 return kNonForcedTopSitesNumber + (blacklist ? blacklist->size() : 0); 846 } 847 848 void TopSitesImpl::MoveStateToLoaded() { 849 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 850 851 MostVisitedURLList filtered_urls_all; 852 MostVisitedURLList filtered_urls_nonforced; 853 PendingCallbacks pending_callbacks; 854 { 855 base::AutoLock lock(lock_); 856 857 if (loaded_) 858 return; // Don't do anything if we're already loaded. 859 loaded_ = true; 860 861 // Now that we're loaded we can service the queued up callbacks. Copy them 862 // here and service them outside the lock. 863 if (!pending_callbacks_.empty()) { 864 // We always filter out forced URLs because callers of GetMostVisitedURLs 865 // are not interested in them. 866 filtered_urls_all = thread_safe_cache_->top_sites(); 867 filtered_urls_nonforced.assign(thread_safe_cache_->top_sites().begin() + 868 thread_safe_cache_->GetNumForcedURLs(), 869 thread_safe_cache_->top_sites().end()); 870 pending_callbacks.swap(pending_callbacks_); 871 } 872 } 873 874 for (size_t i = 0; i < pending_callbacks.size(); i++) 875 pending_callbacks[i].Run(filtered_urls_all, filtered_urls_nonforced); 876 877 NotifyTopSitesLoaded(); 878 } 879 880 void TopSitesImpl::ResetThreadSafeCache() { 881 base::AutoLock lock(lock_); 882 MostVisitedURLList cached; 883 ApplyBlacklist(cache_->top_sites(), &cached); 884 thread_safe_cache_->SetTopSites(cached); 885 } 886 887 void TopSitesImpl::ResetThreadSafeImageCache() { 888 base::AutoLock lock(lock_); 889 thread_safe_cache_->SetThumbnails(cache_->images()); 890 } 891 892 void TopSitesImpl::RestartQueryForTopSitesTimer(base::TimeDelta delta) { 893 if (timer_.IsRunning() && ((timer_start_time_ + timer_.GetCurrentDelay()) < 894 (base::TimeTicks::Now() + delta))) { 895 return; 896 } 897 898 timer_start_time_ = base::TimeTicks::Now(); 899 timer_.Stop(); 900 timer_.Start(FROM_HERE, delta, this, &TopSitesImpl::TimerFired); 901 } 902 903 void TopSitesImpl::OnGotMostVisitedThumbnails( 904 const scoped_refptr<MostVisitedThumbnails>& thumbnails) { 905 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 906 907 // Set the top sites directly in the cache so that SetTopSites diffs 908 // correctly. 909 cache_->SetTopSites(thumbnails->most_visited); 910 SetTopSites(thumbnails->most_visited); 911 cache_->SetThumbnails(thumbnails->url_to_images_map); 912 913 ResetThreadSafeImageCache(); 914 915 MoveStateToLoaded(); 916 917 // Start a timer that refreshes top sites from history. 918 RestartQueryForTopSitesTimer( 919 base::TimeDelta::FromSeconds(kUpdateIntervalSecs)); 920 } 921 922 void TopSitesImpl::OnTopSitesAvailableFromHistory( 923 const MostVisitedURLList* pages) { 924 DCHECK(pages); 925 SetTopSites(*pages); 926 } 927 928 } // namespace history 929