1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/history/top_sites_impl.h" 6 7 #include <algorithm> 8 #include <set> 9 10 #include "base/bind.h" 11 #include "base/bind_helpers.h" 12 #include "base/logging.h" 13 #include "base/md5.h" 14 #include "base/memory/ref_counted_memory.h" 15 #include "base/message_loop/message_loop_proxy.h" 16 #include "base/metrics/histogram.h" 17 #include "base/prefs/pref_service.h" 18 #include "base/prefs/scoped_user_pref_update.h" 19 #include "base/strings/string_util.h" 20 #include "base/strings/utf_string_conversions.h" 21 #include "base/task_runner.h" 22 #include "base/values.h" 23 #include "chrome/browser/chrome_notification_types.h" 24 #include "chrome/browser/history/history_backend.h" 25 #include "chrome/browser/history/history_db_task.h" 26 #include "chrome/browser/history/history_notifications.h" 27 #include "chrome/browser/history/history_service_factory.h" 28 #include "chrome/browser/history/page_usage_data.h" 29 #include "chrome/browser/history/top_sites_cache.h" 30 #include "chrome/browser/history/url_utils.h" 31 #include "chrome/browser/profiles/profile.h" 32 #include "chrome/browser/ui/webui/ntp/most_visited_handler.h" 33 #include "chrome/browser/ui/webui/ntp/new_tab_ui.h" 34 #include "chrome/common/pref_names.h" 35 #include "chrome/common/thumbnail_score.h" 36 #include "content/public/browser/browser_thread.h" 37 #include "content/public/browser/navigation_controller.h" 38 #include "content/public/browser/navigation_details.h" 39 #include "content/public/browser/navigation_entry.h" 40 #include "content/public/browser/notification_service.h" 41 #include "content/public/browser/web_contents.h" 42 #include "grit/locale_settings.h" 43 #include "ui/base/l10n/l10n_util.h" 44 #include "ui/base/layout.h" 45 #include "ui/base/resource/resource_bundle.h" 46 #include "ui/gfx/image/image_util.h" 47 48 using base::DictionaryValue; 49 using content::BrowserThread; 50 using content::NavigationController; 51 52 namespace history { 53 54 namespace { 55 56 void RunOrPostGetMostVisitedURLsCallback( 57 base::TaskRunner* task_runner, 58 bool include_forced_urls, 59 const TopSitesImpl::GetMostVisitedURLsCallback& callback, 60 const MostVisitedURLList& all_urls, 61 const MostVisitedURLList& nonforced_urls) { 62 const MostVisitedURLList* urls = 63 include_forced_urls ? &all_urls : &nonforced_urls; 64 if (task_runner->RunsTasksOnCurrentThread()) 65 callback.Run(*urls); 66 else 67 task_runner->PostTask(FROM_HERE, base::Bind(callback, *urls)); 68 } 69 70 // Compares two MostVisitedURL having a non-null |last_forced_time|. 71 bool ForcedURLComparator(const MostVisitedURL& first, 72 const MostVisitedURL& second) { 73 DCHECK(!first.last_forced_time.is_null() && 74 !second.last_forced_time.is_null()); 75 return first.last_forced_time < second.last_forced_time; 76 } 77 78 } // namespace 79 80 // How many non-forced top sites to store in the cache. 81 static const size_t kNonForcedTopSitesNumber = 20; 82 83 // How many forced top sites to store in the cache. 84 static const size_t kForcedTopSitesNumber = 20; 85 86 // Max number of temporary images we'll cache. See comment above 87 // temp_images_ for details. 88 static const size_t kMaxTempTopImages = 8; 89 90 static const int kDaysOfHistory = 90; 91 // Time from startup to first HistoryService query. 92 static const int64 kUpdateIntervalSecs = 15; 93 // Intervals between requests to HistoryService. 94 static const int64 kMinUpdateIntervalMinutes = 1; 95 static const int64 kMaxUpdateIntervalMinutes = 60; 96 97 // Use 100 quality (highest quality) because we're very sensitive to 98 // artifacts for these small sized, highly detailed images. 99 static const int kTopSitesImageQuality = 100; 100 101 TopSitesImpl::TopSitesImpl(Profile* profile) 102 : backend_(NULL), 103 cache_(new TopSitesCache()), 104 thread_safe_cache_(new TopSitesCache()), 105 profile_(profile), 106 last_num_urls_changed_(0), 107 loaded_(false) { 108 if (!profile_) 109 return; 110 111 if (content::NotificationService::current()) { 112 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, 113 content::Source<Profile>(profile_)); 114 // Listen for any nav commits. We'll ignore those not related to this 115 // profile when we get the notification. 116 registrar_.Add(this, content::NOTIFICATION_NAV_ENTRY_COMMITTED, 117 content::NotificationService::AllSources()); 118 } 119 for (size_t i = 0; i < arraysize(kPrepopulatedPages); i++) { 120 int url_id = kPrepopulatedPages[i].url_id; 121 prepopulated_page_urls_.push_back( 122 GURL(l10n_util::GetStringUTF8(url_id))); 123 } 124 } 125 126 void TopSitesImpl::Init(const base::FilePath& db_name) { 127 // Create the backend here, rather than in the constructor, so that 128 // unit tests that do not need the backend can run without a problem. 129 backend_ = new TopSitesBackend; 130 backend_->Init(db_name); 131 backend_->GetMostVisitedThumbnails( 132 base::Bind(&TopSitesImpl::OnGotMostVisitedThumbnails, 133 base::Unretained(this)), 134 &cancelable_task_tracker_); 135 } 136 137 bool TopSitesImpl::SetPageThumbnail(const GURL& url, 138 const gfx::Image& thumbnail, 139 const ThumbnailScore& score) { 140 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 141 142 if (!loaded_) { 143 // TODO(sky): I need to cache these and apply them after the load 144 // completes. 145 return false; 146 } 147 148 bool add_temp_thumbnail = false; 149 if (!IsKnownURL(url)) { 150 if (!IsNonForcedFull()) { 151 add_temp_thumbnail = true; 152 } else { 153 return false; // This URL is not known to us. 154 } 155 } 156 157 if (!HistoryService::CanAddURL(url)) 158 return false; // It's not a real webpage. 159 160 scoped_refptr<base::RefCountedBytes> thumbnail_data; 161 if (!EncodeBitmap(thumbnail, &thumbnail_data)) 162 return false; 163 164 if (add_temp_thumbnail) { 165 // Always remove the existing entry and then add it back. That way if we end 166 // up with too many temp thumbnails we'll prune the oldest first. 167 RemoveTemporaryThumbnailByURL(url); 168 AddTemporaryThumbnail(url, thumbnail_data.get(), score); 169 return true; 170 } 171 172 return SetPageThumbnailEncoded(url, thumbnail_data.get(), score); 173 } 174 175 bool TopSitesImpl::SetPageThumbnailToJPEGBytes( 176 const GURL& url, 177 const base::RefCountedMemory* memory, 178 const ThumbnailScore& score) { 179 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 180 181 if (!loaded_) { 182 // TODO(sky): I need to cache these and apply them after the load 183 // completes. 184 return false; 185 } 186 187 bool add_temp_thumbnail = false; 188 if (!IsKnownURL(url)) { 189 if (!IsNonForcedFull()) { 190 add_temp_thumbnail = true; 191 } else { 192 return false; // This URL is not known to us. 193 } 194 } 195 196 if (!HistoryService::CanAddURL(url)) 197 return false; // It's not a real webpage. 198 199 if (add_temp_thumbnail) { 200 // Always remove the existing entry and then add it back. That way if we end 201 // up with too many temp thumbnails we'll prune the oldest first. 202 RemoveTemporaryThumbnailByURL(url); 203 AddTemporaryThumbnail(url, memory, score); 204 return true; 205 } 206 207 return SetPageThumbnailEncoded(url, memory, score); 208 } 209 210 // WARNING: this function may be invoked on any thread. 211 void TopSitesImpl::GetMostVisitedURLs( 212 const GetMostVisitedURLsCallback& callback, 213 bool include_forced_urls) { 214 MostVisitedURLList filtered_urls; 215 { 216 base::AutoLock lock(lock_); 217 if (!loaded_) { 218 // A request came in before we finished loading. Store the callback and 219 // we'll run it on current thread when we finish loading. 220 pending_callbacks_.push_back( 221 base::Bind(&RunOrPostGetMostVisitedURLsCallback, 222 base::MessageLoopProxy::current(), 223 include_forced_urls, 224 callback)); 225 return; 226 } 227 if (include_forced_urls) { 228 filtered_urls = thread_safe_cache_->top_sites(); 229 } else { 230 filtered_urls.assign(thread_safe_cache_->top_sites().begin() + 231 thread_safe_cache_->GetNumForcedURLs(), 232 thread_safe_cache_->top_sites().end()); 233 } 234 } 235 callback.Run(filtered_urls); 236 } 237 238 bool TopSitesImpl::GetPageThumbnail( 239 const GURL& url, 240 bool prefix_match, 241 scoped_refptr<base::RefCountedMemory>* bytes) { 242 // WARNING: this may be invoked on any thread. 243 // Perform exact match. 244 { 245 base::AutoLock lock(lock_); 246 if (thread_safe_cache_->GetPageThumbnail(url, bytes)) 247 return true; 248 } 249 250 // Resource bundle is thread safe. 251 for (size_t i = 0; i < arraysize(kPrepopulatedPages); i++) { 252 if (url == prepopulated_page_urls_[i]) { 253 *bytes = ResourceBundle::GetSharedInstance(). 254 LoadDataResourceBytesForScale( 255 kPrepopulatedPages[i].thumbnail_id, 256 ui::SCALE_FACTOR_100P); 257 return true; 258 } 259 } 260 261 if (prefix_match) { 262 // If http or https, search with |url| first, then try the other one. 263 std::vector<GURL> url_list; 264 url_list.push_back(url); 265 if (url.SchemeIsHTTPOrHTTPS()) 266 url_list.push_back(ToggleHTTPAndHTTPS(url)); 267 268 for (std::vector<GURL>::iterator it = url_list.begin(); 269 it != url_list.end(); ++it) { 270 base::AutoLock lock(lock_); 271 272 GURL canonical_url; 273 // Test whether any stored URL is a prefix of |url|. 274 canonical_url = thread_safe_cache_->GetGeneralizedCanonicalURL(*it); 275 if (!canonical_url.is_empty() && 276 thread_safe_cache_->GetPageThumbnail(canonical_url, bytes)) { 277 return true; 278 } 279 } 280 } 281 282 return false; 283 } 284 285 bool TopSitesImpl::GetPageThumbnailScore(const GURL& url, 286 ThumbnailScore* score) { 287 // WARNING: this may be invoked on any thread. 288 base::AutoLock lock(lock_); 289 return thread_safe_cache_->GetPageThumbnailScore(url, score); 290 } 291 292 bool TopSitesImpl::GetTemporaryPageThumbnailScore(const GURL& url, 293 ThumbnailScore* score) { 294 for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end(); 295 ++i) { 296 if (i->first == url) { 297 *score = i->second.thumbnail_score; 298 return true; 299 } 300 } 301 return false; 302 } 303 304 305 // Returns the index of |url| in |urls|, or -1 if not found. 306 static int IndexOf(const MostVisitedURLList& urls, const GURL& url) { 307 for (size_t i = 0; i < urls.size(); i++) { 308 if (urls[i].url == url) 309 return i; 310 } 311 return -1; 312 } 313 314 void TopSitesImpl::SyncWithHistory() { 315 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 316 if (loaded_ && temp_images_.size()) { 317 // If we have temporary thumbnails it means there isn't much data, and most 318 // likely the user is first running Chrome. During this time we throttle 319 // updating from history by 30 seconds. If the user creates a new tab page 320 // during this window of time we force updating from history so that the new 321 // tab page isn't so far out of date. 322 timer_.Stop(); 323 StartQueryForMostVisited(); 324 } 325 } 326 327 bool TopSitesImpl::HasBlacklistedItems() const { 328 const DictionaryValue* blacklist = 329 profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist); 330 return blacklist && !blacklist->empty(); 331 } 332 333 void TopSitesImpl::AddBlacklistedURL(const GURL& url) { 334 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 335 336 Value* dummy = Value::CreateNullValue(); 337 { 338 DictionaryPrefUpdate update(profile_->GetPrefs(), 339 prefs::kNtpMostVisitedURLsBlacklist); 340 DictionaryValue* blacklist = update.Get(); 341 blacklist->SetWithoutPathExpansion(GetURLHash(url), dummy); 342 } 343 344 ResetThreadSafeCache(); 345 NotifyTopSitesChanged(); 346 } 347 348 void TopSitesImpl::RemoveBlacklistedURL(const GURL& url) { 349 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 350 { 351 DictionaryPrefUpdate update(profile_->GetPrefs(), 352 prefs::kNtpMostVisitedURLsBlacklist); 353 DictionaryValue* blacklist = update.Get(); 354 blacklist->RemoveWithoutPathExpansion(GetURLHash(url), NULL); 355 } 356 ResetThreadSafeCache(); 357 NotifyTopSitesChanged(); 358 } 359 360 bool TopSitesImpl::IsBlacklisted(const GURL& url) { 361 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 362 const DictionaryValue* blacklist = 363 profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist); 364 return blacklist && blacklist->HasKey(GetURLHash(url)); 365 } 366 367 void TopSitesImpl::ClearBlacklistedURLs() { 368 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 369 { 370 DictionaryPrefUpdate update(profile_->GetPrefs(), 371 prefs::kNtpMostVisitedURLsBlacklist); 372 DictionaryValue* blacklist = update.Get(); 373 blacklist->Clear(); 374 } 375 ResetThreadSafeCache(); 376 NotifyTopSitesChanged(); 377 } 378 379 void TopSitesImpl::Shutdown() { 380 profile_ = NULL; 381 // Cancel all requests so that the service doesn't callback to us after we've 382 // invoked Shutdown (this could happen if we have a pending request and 383 // Shutdown is invoked). 384 history_consumer_.CancelAllRequests(); 385 backend_->Shutdown(); 386 } 387 388 // static 389 void TopSitesImpl::DiffMostVisited(const MostVisitedURLList& old_list, 390 const MostVisitedURLList& new_list, 391 TopSitesDelta* delta) { 392 393 // Add all the old URLs for quick lookup. This maps URLs to the corresponding 394 // index in the input. 395 std::map<GURL, size_t> all_old_urls; 396 size_t num_old_forced = 0; 397 for (size_t i = 0; i < old_list.size(); i++) { 398 if (!old_list[i].last_forced_time.is_null()) 399 num_old_forced++; 400 DCHECK(old_list[i].last_forced_time.is_null() || i < num_old_forced) 401 << "Forced URLs must all appear before non-forced URLs."; 402 all_old_urls[old_list[i].url] = i; 403 } 404 405 // Check all the URLs in the new set to see which ones are new or just moved. 406 // When we find a match in the old set, we'll reset its index to our special 407 // marker. This allows us to quickly identify the deleted ones in a later 408 // pass. 409 const size_t kAlreadyFoundMarker = static_cast<size_t>(-1); 410 int rank = -1; // Forced URLs have a rank of -1. 411 for (size_t i = 0; i < new_list.size(); i++) { 412 // Increase the rank if we're going through forced URLs. This works because 413 // non-forced URLs all come after forced URLs. 414 if (new_list[i].last_forced_time.is_null()) 415 rank++; 416 DCHECK(new_list[i].last_forced_time.is_null() == (rank != -1)) 417 << "Forced URLs must all appear before non-forced URLs."; 418 std::map<GURL, size_t>::iterator found = all_old_urls.find(new_list[i].url); 419 if (found == all_old_urls.end()) { 420 MostVisitedURLWithRank added; 421 added.url = new_list[i]; 422 added.rank = rank; 423 delta->added.push_back(added); 424 } else { 425 DCHECK(found->second != kAlreadyFoundMarker) 426 << "Same URL appears twice in the new list."; 427 int old_rank = found->second >= num_old_forced ? 428 found->second - num_old_forced : -1; 429 if (old_rank != rank || 430 old_list[found->second].last_forced_time != 431 new_list[i].last_forced_time) { 432 MostVisitedURLWithRank moved; 433 moved.url = new_list[i]; 434 moved.rank = rank; 435 delta->moved.push_back(moved); 436 } 437 found->second = kAlreadyFoundMarker; 438 } 439 } 440 441 // Any member without the special marker in the all_old_urls list means that 442 // there wasn't a "new" URL that mapped to it, so it was deleted. 443 for (std::map<GURL, size_t>::const_iterator i = all_old_urls.begin(); 444 i != all_old_urls.end(); ++i) { 445 if (i->second != kAlreadyFoundMarker) 446 delta->deleted.push_back(old_list[i->second]); 447 } 448 } 449 450 CancelableRequestProvider::Handle TopSitesImpl::StartQueryForMostVisited() { 451 DCHECK(loaded_); 452 if (!profile_) 453 return 0; 454 455 HistoryService* hs = HistoryServiceFactory::GetForProfile( 456 profile_, Profile::EXPLICIT_ACCESS); 457 // |hs| may be null during unit tests. 458 if (hs) { 459 return hs->QueryMostVisitedURLs( 460 num_results_to_request_from_history(), 461 kDaysOfHistory, 462 &history_consumer_, 463 base::Bind(&TopSitesImpl::OnTopSitesAvailableFromHistory, 464 base::Unretained(this))); 465 } 466 return 0; 467 } 468 469 bool TopSitesImpl::IsKnownURL(const GURL& url) { 470 return loaded_ && cache_->IsKnownURL(url); 471 } 472 473 const std::string& TopSitesImpl::GetCanonicalURLString(const GURL& url) const { 474 return cache_->GetCanonicalURL(url).spec(); 475 } 476 477 bool TopSitesImpl::IsNonForcedFull() { 478 return loaded_ && cache_->GetNumNonForcedURLs() >= kNonForcedTopSitesNumber; 479 } 480 481 bool TopSitesImpl::IsForcedFull() { 482 return loaded_ && cache_->GetNumForcedURLs() >= kForcedTopSitesNumber; 483 } 484 485 TopSitesImpl::~TopSitesImpl() { 486 } 487 488 bool TopSitesImpl::SetPageThumbnailNoDB( 489 const GURL& url, 490 const base::RefCountedMemory* thumbnail_data, 491 const ThumbnailScore& score) { 492 // This should only be invoked when we know about the url. 493 DCHECK(cache_->IsKnownURL(url)); 494 495 const MostVisitedURL& most_visited = 496 cache_->top_sites()[cache_->GetURLIndex(url)]; 497 Images* image = cache_->GetImage(url); 498 499 // When comparing the thumbnail scores, we need to take into account the 500 // redirect hops, which are not generated when the thumbnail is because the 501 // redirects weren't known. We fill that in here since we know the redirects. 502 ThumbnailScore new_score_with_redirects(score); 503 new_score_with_redirects.redirect_hops_from_dest = 504 GetRedirectDistanceForURL(most_visited, url); 505 506 if (!ShouldReplaceThumbnailWith(image->thumbnail_score, 507 new_score_with_redirects) && 508 image->thumbnail.get()) 509 return false; // The one we already have is better. 510 511 image->thumbnail = const_cast<base::RefCountedMemory*>(thumbnail_data); 512 image->thumbnail_score = new_score_with_redirects; 513 514 ResetThreadSafeImageCache(); 515 return true; 516 } 517 518 bool TopSitesImpl::SetPageThumbnailEncoded( 519 const GURL& url, 520 const base::RefCountedMemory* thumbnail, 521 const ThumbnailScore& score) { 522 if (!SetPageThumbnailNoDB(url, thumbnail, score)) 523 return false; 524 525 // Update the database. 526 if (!cache_->IsKnownURL(url)) 527 return false; 528 529 size_t index = cache_->GetURLIndex(url); 530 int url_rank = index - cache_->GetNumForcedURLs(); 531 const MostVisitedURL& most_visited = cache_->top_sites()[index]; 532 backend_->SetPageThumbnail(most_visited, 533 url_rank < 0 ? -1 : url_rank, 534 *(cache_->GetImage(most_visited.url))); 535 return true; 536 } 537 538 // static 539 bool TopSitesImpl::EncodeBitmap(const gfx::Image& bitmap, 540 scoped_refptr<base::RefCountedBytes>* bytes) { 541 if (bitmap.IsEmpty()) 542 return false; 543 *bytes = new base::RefCountedBytes(); 544 std::vector<unsigned char> data; 545 if (!gfx::JPEG1xEncodedDataFromImage(bitmap, kTopSitesImageQuality, &data)) 546 return false; 547 548 // As we're going to cache this data, make sure the vector is only as big as 549 // it needs to be, as JPEGCodec::Encode() over-allocates data.capacity(). 550 // (In a C++0x future, we can just call shrink_to_fit() in Encode()) 551 (*bytes)->data() = data; 552 return true; 553 } 554 555 void TopSitesImpl::RemoveTemporaryThumbnailByURL(const GURL& url) { 556 for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end(); 557 ++i) { 558 if (i->first == url) { 559 temp_images_.erase(i); 560 return; 561 } 562 } 563 } 564 565 void TopSitesImpl::AddTemporaryThumbnail( 566 const GURL& url, 567 const base::RefCountedMemory* thumbnail, 568 const ThumbnailScore& score) { 569 if (temp_images_.size() == kMaxTempTopImages) 570 temp_images_.erase(temp_images_.begin()); 571 572 TempImage image; 573 image.first = url; 574 image.second.thumbnail = const_cast<base::RefCountedMemory*>(thumbnail); 575 image.second.thumbnail_score = score; 576 temp_images_.push_back(image); 577 } 578 579 void TopSitesImpl::TimerFired() { 580 StartQueryForMostVisited(); 581 } 582 583 // static 584 int TopSitesImpl::GetRedirectDistanceForURL(const MostVisitedURL& most_visited, 585 const GURL& url) { 586 for (size_t i = 0; i < most_visited.redirects.size(); i++) { 587 if (most_visited.redirects[i] == url) 588 return static_cast<int>(most_visited.redirects.size() - i - 1); 589 } 590 NOTREACHED() << "URL should always be found."; 591 return 0; 592 } 593 594 MostVisitedURLList TopSitesImpl::GetPrepopulatePages() { 595 MostVisitedURLList urls; 596 urls.resize(arraysize(kPrepopulatedPages)); 597 for (size_t i = 0; i < urls.size(); ++i) { 598 MostVisitedURL& url = urls[i]; 599 url.url = GURL(prepopulated_page_urls_[i]); 600 url.redirects.push_back(url.url); 601 url.title = l10n_util::GetStringUTF16(kPrepopulatedPages[i].title_id); 602 } 603 return urls; 604 } 605 606 bool TopSitesImpl::loaded() const { 607 return loaded_; 608 } 609 610 bool TopSitesImpl::AddForcedURL(const GURL& url, const base::Time& time) { 611 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 612 size_t num_forced = cache_->GetNumForcedURLs(); 613 MostVisitedURLList new_list(cache_->top_sites()); 614 MostVisitedURL new_url; 615 616 if (cache_->IsKnownURL(url)) { 617 size_t index = cache_->GetURLIndex(url); 618 // Do nothing if we currently have that URL as non-forced. 619 if (new_list[index].last_forced_time.is_null()) 620 return false; 621 622 // Update the |last_forced_time| of the already existing URL. Delete it and 623 // reinsert it at the right location. 624 new_url = new_list[index]; 625 new_list.erase(new_list.begin() + index); 626 num_forced--; 627 } else { 628 new_url.url = url; 629 new_url.redirects.push_back(url); 630 } 631 new_url.last_forced_time = time; 632 // Add forced URLs and sort. Added to the end of the list of forced URLs 633 // since this is almost always where it needs to go, unless the user's local 634 // clock is fiddled with. 635 MostVisitedURLList::iterator mid = new_list.begin() + num_forced; 636 new_list.insert(mid, new_url); 637 mid = new_list.begin() + num_forced; // Mid was invalidated. 638 std::inplace_merge(new_list.begin(), mid, mid + 1, ForcedURLComparator); 639 SetTopSites(new_list); 640 return true; 641 } 642 643 bool TopSitesImpl::AddPrepopulatedPages(MostVisitedURLList* urls, 644 size_t num_forced_urls) { 645 bool added = false; 646 MostVisitedURLList prepopulate_urls = GetPrepopulatePages(); 647 for (size_t i = 0; i < prepopulate_urls.size(); ++i) { 648 if (urls->size() - num_forced_urls < kNonForcedTopSitesNumber && 649 IndexOf(*urls, prepopulate_urls[i].url) == -1) { 650 urls->push_back(prepopulate_urls[i]); 651 added = true; 652 } 653 } 654 return added; 655 } 656 657 size_t TopSitesImpl::MergeCachedForcedURLs(MostVisitedURLList* new_list) { 658 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 659 // Add all the new URLs for quick lookup. Take that opportunity to count the 660 // number of forced URLs in |new_list|. 661 std::set<GURL> all_new_urls; 662 size_t num_forced = 0; 663 for (size_t i = 0; i < new_list->size(); ++i) { 664 all_new_urls.insert((*new_list)[i].url); 665 if (!(*new_list)[i].last_forced_time.is_null()) 666 ++num_forced; 667 } 668 669 // Keep the forced URLs from |cache_| that are not found in |new_list|. 670 MostVisitedURLList filtered_forced_urls; 671 for (size_t i = 0; i < cache_->GetNumForcedURLs(); ++i) { 672 if (all_new_urls.find(cache_->top_sites()[i].url) == all_new_urls.end()) 673 filtered_forced_urls.push_back(cache_->top_sites()[i]); 674 } 675 num_forced += filtered_forced_urls.size(); 676 677 // Prepend forced URLs and sort in order of ascending |last_forced_time|. 678 new_list->insert(new_list->begin(), filtered_forced_urls.begin(), 679 filtered_forced_urls.end()); 680 std::inplace_merge( 681 new_list->begin(), new_list->begin() + filtered_forced_urls.size(), 682 new_list->begin() + num_forced, ForcedURLComparator); 683 684 // Drop older forced URLs if the list overflows. Since forced URLs are always 685 // sort in increasing order of |last_forced_time|, drop the first ones. 686 if (num_forced > kForcedTopSitesNumber) { 687 new_list->erase(new_list->begin(), 688 new_list->begin() + (num_forced - kForcedTopSitesNumber)); 689 num_forced = kForcedTopSitesNumber; 690 } 691 692 return num_forced; 693 } 694 695 void TopSitesImpl::ApplyBlacklist(const MostVisitedURLList& urls, 696 MostVisitedURLList* out) { 697 // Log the number of times ApplyBlacklist is called so we can compute the 698 // average number of blacklisted items per user. 699 const DictionaryValue* blacklist = 700 profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist); 701 UMA_HISTOGRAM_BOOLEAN("TopSites.NumberOfApplyBlacklist", true); 702 UMA_HISTOGRAM_COUNTS_100("TopSites.NumberOfBlacklistedItems", 703 (blacklist ? blacklist->size() : 0)); 704 size_t num_non_forced_urls = 0; 705 size_t num_forced_urls = 0; 706 for (size_t i = 0; i < urls.size(); ++i) { 707 if (!IsBlacklisted(urls[i].url)) { 708 if (urls[i].last_forced_time.is_null()) { 709 // Non-forced URL. 710 if (num_non_forced_urls >= kNonForcedTopSitesNumber) 711 continue; 712 num_non_forced_urls++; 713 } else { 714 // Forced URL. 715 if (num_forced_urls >= kForcedTopSitesNumber) 716 continue; 717 num_forced_urls++; 718 } 719 out->push_back(urls[i]); 720 } 721 } 722 } 723 724 std::string TopSitesImpl::GetURLHash(const GURL& url) { 725 // We don't use canonical URLs here to be able to blacklist only one of 726 // the two 'duplicate' sites, e.g. 'gmail.com' and 'mail.google.com'. 727 return base::MD5String(url.spec()); 728 } 729 730 base::TimeDelta TopSitesImpl::GetUpdateDelay() { 731 if (cache_->top_sites().size() <= arraysize(kPrepopulatedPages)) 732 return base::TimeDelta::FromSeconds(30); 733 734 int64 range = kMaxUpdateIntervalMinutes - kMinUpdateIntervalMinutes; 735 int64 minutes = kMaxUpdateIntervalMinutes - 736 last_num_urls_changed_ * range / cache_->top_sites().size(); 737 return base::TimeDelta::FromMinutes(minutes); 738 } 739 740 void TopSitesImpl::Observe(int type, 741 const content::NotificationSource& source, 742 const content::NotificationDetails& details) { 743 if (!loaded_) 744 return; 745 746 if (type == chrome::NOTIFICATION_HISTORY_URLS_DELETED) { 747 content::Details<history::URLsDeletedDetails> deleted_details(details); 748 if (deleted_details->all_history) { 749 SetTopSites(MostVisitedURLList()); 750 backend_->ResetDatabase(); 751 } else { 752 std::set<size_t> indices_to_delete; // Indices into top_sites_. 753 for (URLRows::const_iterator i = deleted_details->rows.begin(); 754 i != deleted_details->rows.end(); ++i) { 755 if (cache_->IsKnownURL(i->url())) 756 indices_to_delete.insert(cache_->GetURLIndex(i->url())); 757 } 758 759 if (indices_to_delete.empty()) 760 return; 761 762 MostVisitedURLList new_top_sites(cache_->top_sites()); 763 for (std::set<size_t>::reverse_iterator i = indices_to_delete.rbegin(); 764 i != indices_to_delete.rend(); i++) { 765 new_top_sites.erase(new_top_sites.begin() + *i); 766 } 767 SetTopSites(new_top_sites); 768 } 769 StartQueryForMostVisited(); 770 } else if (type == content::NOTIFICATION_NAV_ENTRY_COMMITTED) { 771 NavigationController* controller = 772 content::Source<NavigationController>(source).ptr(); 773 Profile* profile = Profile::FromBrowserContext( 774 controller->GetWebContents()->GetBrowserContext()); 775 if (profile == profile_ && !IsNonForcedFull()) { 776 content::LoadCommittedDetails* load_details = 777 content::Details<content::LoadCommittedDetails>(details).ptr(); 778 if (!load_details) 779 return; 780 const GURL& url = load_details->entry->GetURL(); 781 if (!cache_->IsKnownURL(url) && HistoryService::CanAddURL(url)) { 782 // To avoid slamming history we throttle requests when the url updates. 783 // To do otherwise negatively impacts perf tests. 784 RestartQueryForTopSitesTimer(GetUpdateDelay()); 785 } 786 } 787 } 788 } 789 790 void TopSitesImpl::SetTopSites(const MostVisitedURLList& new_top_sites) { 791 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 792 793 MostVisitedURLList top_sites(new_top_sites); 794 size_t num_forced_urls = MergeCachedForcedURLs(&top_sites); 795 AddPrepopulatedPages(&top_sites, num_forced_urls); 796 797 TopSitesDelta delta; 798 DiffMostVisited(cache_->top_sites(), top_sites, &delta); 799 if (!delta.deleted.empty() || !delta.added.empty() || !delta.moved.empty()) { 800 backend_->UpdateTopSites(delta); 801 } 802 803 last_num_urls_changed_ = delta.added.size() + delta.moved.size(); 804 805 // We always do the following steps (setting top sites in cache, and resetting 806 // thread safe cache ...) as this method is invoked during startup at which 807 // point the caches haven't been updated yet. 808 cache_->SetTopSites(top_sites); 809 810 // See if we have any tmp thumbnails for the new sites. 811 if (!temp_images_.empty()) { 812 for (size_t i = 0; i < top_sites.size(); ++i) { 813 const MostVisitedURL& mv = top_sites[i]; 814 GURL canonical_url = cache_->GetCanonicalURL(mv.url); 815 // At the time we get the thumbnail redirects aren't known, so we have to 816 // iterate through all the images. 817 for (TempImages::iterator it = temp_images_.begin(); 818 it != temp_images_.end(); ++it) { 819 if (canonical_url == cache_->GetCanonicalURL(it->first)) { 820 SetPageThumbnailEncoded( 821 mv.url, it->second.thumbnail.get(), it->second.thumbnail_score); 822 temp_images_.erase(it); 823 break; 824 } 825 } 826 } 827 } 828 829 if (top_sites.size() - num_forced_urls >= kNonForcedTopSitesNumber) 830 temp_images_.clear(); 831 832 ResetThreadSafeCache(); 833 ResetThreadSafeImageCache(); 834 NotifyTopSitesChanged(); 835 836 // Restart the timer that queries history for top sites. This is done to 837 // ensure we stay in sync with history. 838 RestartQueryForTopSitesTimer(GetUpdateDelay()); 839 } 840 841 int TopSitesImpl::num_results_to_request_from_history() const { 842 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 843 844 const DictionaryValue* blacklist = 845 profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist); 846 return kNonForcedTopSitesNumber + (blacklist ? blacklist->size() : 0); 847 } 848 849 void TopSitesImpl::MoveStateToLoaded() { 850 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 851 852 MostVisitedURLList filtered_urls_all; 853 MostVisitedURLList filtered_urls_nonforced; 854 PendingCallbacks pending_callbacks; 855 { 856 base::AutoLock lock(lock_); 857 858 if (loaded_) 859 return; // Don't do anything if we're already loaded. 860 loaded_ = true; 861 862 // Now that we're loaded we can service the queued up callbacks. Copy them 863 // here and service them outside the lock. 864 if (!pending_callbacks_.empty()) { 865 // We always filter out forced URLs because callers of GetMostVisitedURLs 866 // are not interested in them. 867 filtered_urls_all = thread_safe_cache_->top_sites(); 868 filtered_urls_nonforced.assign(thread_safe_cache_->top_sites().begin() + 869 thread_safe_cache_->GetNumForcedURLs(), 870 thread_safe_cache_->top_sites().end()); 871 pending_callbacks.swap(pending_callbacks_); 872 } 873 } 874 875 for (size_t i = 0; i < pending_callbacks.size(); i++) 876 pending_callbacks[i].Run(filtered_urls_all, filtered_urls_nonforced); 877 878 content::NotificationService::current()->Notify( 879 chrome::NOTIFICATION_TOP_SITES_LOADED, 880 content::Source<Profile>(profile_), 881 content::Details<TopSites>(this)); 882 } 883 884 void TopSitesImpl::ResetThreadSafeCache() { 885 base::AutoLock lock(lock_); 886 MostVisitedURLList cached; 887 ApplyBlacklist(cache_->top_sites(), &cached); 888 thread_safe_cache_->SetTopSites(cached); 889 } 890 891 void TopSitesImpl::ResetThreadSafeImageCache() { 892 base::AutoLock lock(lock_); 893 thread_safe_cache_->SetThumbnails(cache_->images()); 894 } 895 896 void TopSitesImpl::NotifyTopSitesChanged() { 897 content::NotificationService::current()->Notify( 898 chrome::NOTIFICATION_TOP_SITES_CHANGED, 899 content::Source<TopSites>(this), 900 content::NotificationService::NoDetails()); 901 } 902 903 void TopSitesImpl::RestartQueryForTopSitesTimer(base::TimeDelta delta) { 904 if (timer_.IsRunning() && ((timer_start_time_ + timer_.GetCurrentDelay()) < 905 (base::TimeTicks::Now() + delta))) { 906 return; 907 } 908 909 timer_start_time_ = base::TimeTicks::Now(); 910 timer_.Stop(); 911 timer_.Start(FROM_HERE, delta, this, &TopSitesImpl::TimerFired); 912 } 913 914 void TopSitesImpl::OnGotMostVisitedThumbnails( 915 const scoped_refptr<MostVisitedThumbnails>& thumbnails) { 916 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 917 918 // Set the top sites directly in the cache so that SetTopSites diffs 919 // correctly. 920 cache_->SetTopSites(thumbnails->most_visited); 921 SetTopSites(thumbnails->most_visited); 922 cache_->SetThumbnails(thumbnails->url_to_images_map); 923 924 ResetThreadSafeImageCache(); 925 926 MoveStateToLoaded(); 927 928 // Start a timer that refreshes top sites from history. 929 RestartQueryForTopSitesTimer( 930 base::TimeDelta::FromSeconds(kUpdateIntervalSecs)); 931 } 932 933 void TopSitesImpl::OnTopSitesAvailableFromHistory( 934 CancelableRequestProvider::Handle handle, 935 MostVisitedURLList pages) { 936 SetTopSites(pages); 937 938 // Used only in testing. 939 content::NotificationService::current()->Notify( 940 chrome::NOTIFICATION_TOP_SITES_UPDATED, 941 content::Source<TopSitesImpl>(this), 942 content::Details<CancelableRequestProvider::Handle>(&handle)); 943 } 944 945 } // namespace history 946