Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/history/top_sites_impl.h"
      6 
      7 #include <algorithm>
      8 #include <set>
      9 
     10 #include "base/bind.h"
     11 #include "base/bind_helpers.h"
     12 #include "base/logging.h"
     13 #include "base/md5.h"
     14 #include "base/memory/ref_counted_memory.h"
     15 #include "base/message_loop/message_loop_proxy.h"
     16 #include "base/metrics/histogram.h"
     17 #include "base/prefs/pref_service.h"
     18 #include "base/prefs/scoped_user_pref_update.h"
     19 #include "base/strings/string_util.h"
     20 #include "base/strings/utf_string_conversions.h"
     21 #include "base/task_runner.h"
     22 #include "base/values.h"
     23 #include "chrome/browser/chrome_notification_types.h"
     24 #include "chrome/browser/history/history_backend.h"
     25 #include "chrome/browser/history/history_db_task.h"
     26 #include "chrome/browser/history/history_notifications.h"
     27 #include "chrome/browser/history/history_service_factory.h"
     28 #include "chrome/browser/history/top_sites_cache.h"
     29 #include "chrome/browser/history/url_utils.h"
     30 #include "chrome/browser/profiles/profile.h"
     31 #include "chrome/common/pref_names.h"
     32 #include "components/history/core/browser/page_usage_data.h"
     33 #include "components/history/core/common/thumbnail_score.h"
     34 #include "content/public/browser/browser_thread.h"
     35 #include "content/public/browser/navigation_controller.h"
     36 #include "content/public/browser/navigation_details.h"
     37 #include "content/public/browser/navigation_entry.h"
     38 #include "content/public/browser/notification_service.h"
     39 #include "content/public/browser/web_contents.h"
     40 #include "ui/base/l10n/l10n_util.h"
     41 #include "ui/base/layout.h"
     42 #include "ui/base/resource/resource_bundle.h"
     43 #include "ui/gfx/image/image_util.h"
     44 
     45 using base::DictionaryValue;
     46 using content::BrowserThread;
     47 using content::NavigationController;
     48 
     49 namespace history {
     50 
     51 namespace {
     52 
     53 void RunOrPostGetMostVisitedURLsCallback(
     54     base::TaskRunner* task_runner,
     55     bool include_forced_urls,
     56     const TopSitesImpl::GetMostVisitedURLsCallback& callback,
     57     const MostVisitedURLList& all_urls,
     58     const MostVisitedURLList& nonforced_urls) {
     59   const MostVisitedURLList* urls =
     60       include_forced_urls ? &all_urls : &nonforced_urls;
     61   if (task_runner->RunsTasksOnCurrentThread())
     62     callback.Run(*urls);
     63   else
     64     task_runner->PostTask(FROM_HERE, base::Bind(callback, *urls));
     65 }
     66 
     67 // Compares two MostVisitedURL having a non-null |last_forced_time|.
     68 bool ForcedURLComparator(const MostVisitedURL& first,
     69                          const MostVisitedURL& second) {
     70   DCHECK(!first.last_forced_time.is_null() &&
     71          !second.last_forced_time.is_null());
     72   return first.last_forced_time < second.last_forced_time;
     73 }
     74 
     75 }  // namespace
     76 
     77 // How many non-forced top sites to store in the cache.
     78 static const size_t kNonForcedTopSitesNumber = 20;
     79 
     80 // How many forced top sites to store in the cache.
     81 static const size_t kForcedTopSitesNumber = 20;
     82 
     83 // Max number of temporary images we'll cache. See comment above
     84 // temp_images_ for details.
     85 static const size_t kMaxTempTopImages = 8;
     86 
     87 static const int kDaysOfHistory = 90;
     88 // Time from startup to first HistoryService query.
     89 static const int64 kUpdateIntervalSecs = 15;
     90 // Intervals between requests to HistoryService.
     91 static const int64 kMinUpdateIntervalMinutes = 1;
     92 static const int64 kMaxUpdateIntervalMinutes = 60;
     93 
     94 // Use 100 quality (highest quality) because we're very sensitive to
     95 // artifacts for these small sized, highly detailed images.
     96 static const int kTopSitesImageQuality = 100;
     97 
     98 TopSitesImpl::TopSitesImpl(Profile* profile)
     99     : backend_(NULL),
    100       cache_(new TopSitesCache()),
    101       thread_safe_cache_(new TopSitesCache()),
    102       profile_(profile),
    103       last_num_urls_changed_(0),
    104       loaded_(false) {
    105   if (!profile_)
    106     return;
    107 
    108   if (content::NotificationService::current()) {
    109     registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED,
    110                    content::Source<Profile>(profile_));
    111     // Listen for any nav commits. We'll ignore those not related to this
    112     // profile when we get the notification.
    113     registrar_.Add(this, content::NOTIFICATION_NAV_ENTRY_COMMITTED,
    114                    content::NotificationService::AllSources());
    115   }
    116   for (size_t i = 0; i < arraysize(kPrepopulatedPages); i++) {
    117     int url_id = kPrepopulatedPages[i].url_id;
    118     prepopulated_page_urls_.push_back(
    119         GURL(l10n_util::GetStringUTF8(url_id)));
    120   }
    121 }
    122 
    123 void TopSitesImpl::Init(const base::FilePath& db_name) {
    124   // Create the backend here, rather than in the constructor, so that
    125   // unit tests that do not need the backend can run without a problem.
    126   backend_ = new TopSitesBackend;
    127   backend_->Init(db_name);
    128   backend_->GetMostVisitedThumbnails(
    129       base::Bind(&TopSitesImpl::OnGotMostVisitedThumbnails,
    130                  base::Unretained(this)),
    131       &cancelable_task_tracker_);
    132 }
    133 
    134 bool TopSitesImpl::SetPageThumbnail(const GURL& url,
    135                                     const gfx::Image& thumbnail,
    136                                     const ThumbnailScore& score) {
    137   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    138 
    139   if (!loaded_) {
    140     // TODO(sky): I need to cache these and apply them after the load
    141     // completes.
    142     return false;
    143   }
    144 
    145   bool add_temp_thumbnail = false;
    146   if (!IsKnownURL(url)) {
    147     if (!IsNonForcedFull()) {
    148       add_temp_thumbnail = true;
    149     } else {
    150       return false;  // This URL is not known to us.
    151     }
    152   }
    153 
    154   if (!HistoryService::CanAddURL(url))
    155     return false;  // It's not a real webpage.
    156 
    157   scoped_refptr<base::RefCountedBytes> thumbnail_data;
    158   if (!EncodeBitmap(thumbnail, &thumbnail_data))
    159     return false;
    160 
    161   if (add_temp_thumbnail) {
    162     // Always remove the existing entry and then add it back. That way if we end
    163     // up with too many temp thumbnails we'll prune the oldest first.
    164     RemoveTemporaryThumbnailByURL(url);
    165     AddTemporaryThumbnail(url, thumbnail_data.get(), score);
    166     return true;
    167   }
    168 
    169   return SetPageThumbnailEncoded(url, thumbnail_data.get(), score);
    170 }
    171 
    172 bool TopSitesImpl::SetPageThumbnailToJPEGBytes(
    173     const GURL& url,
    174     const base::RefCountedMemory* memory,
    175     const ThumbnailScore& score) {
    176   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    177 
    178   if (!loaded_) {
    179     // TODO(sky): I need to cache these and apply them after the load
    180     // completes.
    181     return false;
    182   }
    183 
    184   bool add_temp_thumbnail = false;
    185   if (!IsKnownURL(url)) {
    186     if (!IsNonForcedFull()) {
    187       add_temp_thumbnail = true;
    188     } else {
    189       return false;  // This URL is not known to us.
    190     }
    191   }
    192 
    193   if (!HistoryService::CanAddURL(url))
    194     return false;  // It's not a real webpage.
    195 
    196   if (add_temp_thumbnail) {
    197     // Always remove the existing entry and then add it back. That way if we end
    198     // up with too many temp thumbnails we'll prune the oldest first.
    199     RemoveTemporaryThumbnailByURL(url);
    200     AddTemporaryThumbnail(url, memory, score);
    201     return true;
    202   }
    203 
    204   return SetPageThumbnailEncoded(url, memory, score);
    205 }
    206 
    207 // WARNING: this function may be invoked on any thread.
    208 void TopSitesImpl::GetMostVisitedURLs(
    209     const GetMostVisitedURLsCallback& callback,
    210     bool include_forced_urls) {
    211   MostVisitedURLList filtered_urls;
    212   {
    213     base::AutoLock lock(lock_);
    214     if (!loaded_) {
    215       // A request came in before we finished loading. Store the callback and
    216       // we'll run it on current thread when we finish loading.
    217       pending_callbacks_.push_back(
    218           base::Bind(&RunOrPostGetMostVisitedURLsCallback,
    219                      base::MessageLoopProxy::current(),
    220                      include_forced_urls,
    221                      callback));
    222       return;
    223     }
    224     if (include_forced_urls) {
    225       filtered_urls = thread_safe_cache_->top_sites();
    226     } else {
    227       filtered_urls.assign(thread_safe_cache_->top_sites().begin() +
    228                               thread_safe_cache_->GetNumForcedURLs(),
    229                            thread_safe_cache_->top_sites().end());
    230     }
    231   }
    232   callback.Run(filtered_urls);
    233 }
    234 
    235 bool TopSitesImpl::GetPageThumbnail(
    236     const GURL& url,
    237     bool prefix_match,
    238     scoped_refptr<base::RefCountedMemory>* bytes) {
    239   // WARNING: this may be invoked on any thread.
    240   // Perform exact match.
    241   {
    242     base::AutoLock lock(lock_);
    243     if (thread_safe_cache_->GetPageThumbnail(url, bytes))
    244       return true;
    245   }
    246 
    247   // Resource bundle is thread safe.
    248   for (size_t i = 0; i < arraysize(kPrepopulatedPages); i++) {
    249     if (url == prepopulated_page_urls_[i]) {
    250       *bytes = ResourceBundle::GetSharedInstance().
    251           LoadDataResourceBytesForScale(
    252               kPrepopulatedPages[i].thumbnail_id,
    253               ui::SCALE_FACTOR_100P);
    254       return true;
    255     }
    256   }
    257 
    258   if (prefix_match) {
    259     // If http or https, search with |url| first, then try the other one.
    260     std::vector<GURL> url_list;
    261     url_list.push_back(url);
    262     if (url.SchemeIsHTTPOrHTTPS())
    263       url_list.push_back(ToggleHTTPAndHTTPS(url));
    264 
    265     for (std::vector<GURL>::iterator it = url_list.begin();
    266          it != url_list.end(); ++it) {
    267       base::AutoLock lock(lock_);
    268 
    269       GURL canonical_url;
    270       // Test whether any stored URL is a prefix of |url|.
    271       canonical_url = thread_safe_cache_->GetGeneralizedCanonicalURL(*it);
    272       if (!canonical_url.is_empty() &&
    273           thread_safe_cache_->GetPageThumbnail(canonical_url, bytes)) {
    274         return true;
    275       }
    276     }
    277   }
    278 
    279   return false;
    280 }
    281 
    282 bool TopSitesImpl::GetPageThumbnailScore(const GURL& url,
    283                                          ThumbnailScore* score) {
    284   // WARNING: this may be invoked on any thread.
    285   base::AutoLock lock(lock_);
    286   return thread_safe_cache_->GetPageThumbnailScore(url, score);
    287 }
    288 
    289 bool TopSitesImpl::GetTemporaryPageThumbnailScore(const GURL& url,
    290                                                   ThumbnailScore* score) {
    291   for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end();
    292        ++i) {
    293     if (i->first == url) {
    294       *score = i->second.thumbnail_score;
    295       return true;
    296     }
    297   }
    298   return false;
    299 }
    300 
    301 
    302 // Returns the index of |url| in |urls|, or -1 if not found.
    303 static int IndexOf(const MostVisitedURLList& urls, const GURL& url) {
    304   for (size_t i = 0; i < urls.size(); i++) {
    305     if (urls[i].url == url)
    306       return i;
    307   }
    308   return -1;
    309 }
    310 
    311 void TopSitesImpl::SyncWithHistory() {
    312   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    313   if (loaded_ && temp_images_.size()) {
    314     // If we have temporary thumbnails it means there isn't much data, and most
    315     // likely the user is first running Chrome. During this time we throttle
    316     // updating from history by 30 seconds. If the user creates a new tab page
    317     // during this window of time we force updating from history so that the new
    318     // tab page isn't so far out of date.
    319     timer_.Stop();
    320     StartQueryForMostVisited();
    321   }
    322 }
    323 
    324 bool TopSitesImpl::HasBlacklistedItems() const {
    325   const base::DictionaryValue* blacklist =
    326       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
    327   return blacklist && !blacklist->empty();
    328 }
    329 
    330 void TopSitesImpl::AddBlacklistedURL(const GURL& url) {
    331   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    332 
    333   base::Value* dummy = base::Value::CreateNullValue();
    334   {
    335     DictionaryPrefUpdate update(profile_->GetPrefs(),
    336                                 prefs::kNtpMostVisitedURLsBlacklist);
    337     base::DictionaryValue* blacklist = update.Get();
    338     blacklist->SetWithoutPathExpansion(GetURLHash(url), dummy);
    339   }
    340 
    341   ResetThreadSafeCache();
    342   NotifyTopSitesChanged();
    343 }
    344 
    345 void TopSitesImpl::RemoveBlacklistedURL(const GURL& url) {
    346   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    347   {
    348     DictionaryPrefUpdate update(profile_->GetPrefs(),
    349                                 prefs::kNtpMostVisitedURLsBlacklist);
    350     base::DictionaryValue* blacklist = update.Get();
    351     blacklist->RemoveWithoutPathExpansion(GetURLHash(url), NULL);
    352   }
    353   ResetThreadSafeCache();
    354   NotifyTopSitesChanged();
    355 }
    356 
    357 bool TopSitesImpl::IsBlacklisted(const GURL& url) {
    358   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    359   const base::DictionaryValue* blacklist =
    360       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
    361   return blacklist && blacklist->HasKey(GetURLHash(url));
    362 }
    363 
    364 void TopSitesImpl::ClearBlacklistedURLs() {
    365   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    366   {
    367     DictionaryPrefUpdate update(profile_->GetPrefs(),
    368                                 prefs::kNtpMostVisitedURLsBlacklist);
    369     base::DictionaryValue* blacklist = update.Get();
    370     blacklist->Clear();
    371   }
    372   ResetThreadSafeCache();
    373   NotifyTopSitesChanged();
    374 }
    375 
    376 void TopSitesImpl::Shutdown() {
    377   profile_ = NULL;
    378   // Cancel all requests so that the service doesn't callback to us after we've
    379   // invoked Shutdown (this could happen if we have a pending request and
    380   // Shutdown is invoked).
    381   cancelable_task_tracker_.TryCancelAll();
    382   backend_->Shutdown();
    383 }
    384 
    385 // static
    386 void TopSitesImpl::DiffMostVisited(const MostVisitedURLList& old_list,
    387                                    const MostVisitedURLList& new_list,
    388                                    TopSitesDelta* delta) {
    389 
    390   // Add all the old URLs for quick lookup. This maps URLs to the corresponding
    391   // index in the input.
    392   std::map<GURL, size_t> all_old_urls;
    393   size_t num_old_forced = 0;
    394   for (size_t i = 0; i < old_list.size(); i++) {
    395     if (!old_list[i].last_forced_time.is_null())
    396       num_old_forced++;
    397     DCHECK(old_list[i].last_forced_time.is_null() || i < num_old_forced)
    398         << "Forced URLs must all appear before non-forced URLs.";
    399     all_old_urls[old_list[i].url] = i;
    400   }
    401 
    402   // Check all the URLs in the new set to see which ones are new or just moved.
    403   // When we find a match in the old set, we'll reset its index to our special
    404   // marker. This allows us to quickly identify the deleted ones in a later
    405   // pass.
    406   const size_t kAlreadyFoundMarker = static_cast<size_t>(-1);
    407   int rank = -1;  // Forced URLs have a rank of -1.
    408   for (size_t i = 0; i < new_list.size(); i++) {
    409     // Increase the rank if we're going through forced URLs. This works because
    410     // non-forced URLs all come after forced URLs.
    411     if (new_list[i].last_forced_time.is_null())
    412       rank++;
    413     DCHECK(new_list[i].last_forced_time.is_null() == (rank != -1))
    414         << "Forced URLs must all appear before non-forced URLs.";
    415     std::map<GURL, size_t>::iterator found = all_old_urls.find(new_list[i].url);
    416     if (found == all_old_urls.end()) {
    417       MostVisitedURLWithRank added;
    418       added.url = new_list[i];
    419       added.rank = rank;
    420       delta->added.push_back(added);
    421     } else {
    422       DCHECK(found->second != kAlreadyFoundMarker)
    423           << "Same URL appears twice in the new list.";
    424       int old_rank = found->second >= num_old_forced ?
    425           found->second - num_old_forced : -1;
    426       if (old_rank != rank ||
    427           old_list[found->second].last_forced_time !=
    428               new_list[i].last_forced_time) {
    429         MostVisitedURLWithRank moved;
    430         moved.url = new_list[i];
    431         moved.rank = rank;
    432         delta->moved.push_back(moved);
    433       }
    434       found->second = kAlreadyFoundMarker;
    435     }
    436   }
    437 
    438   // Any member without the special marker in the all_old_urls list means that
    439   // there wasn't a "new" URL that mapped to it, so it was deleted.
    440   for (std::map<GURL, size_t>::const_iterator i = all_old_urls.begin();
    441        i != all_old_urls.end(); ++i) {
    442     if (i->second != kAlreadyFoundMarker)
    443       delta->deleted.push_back(old_list[i->second]);
    444   }
    445 }
    446 
    447 base::CancelableTaskTracker::TaskId TopSitesImpl::StartQueryForMostVisited() {
    448   DCHECK(loaded_);
    449   if (!profile_)
    450     return base::CancelableTaskTracker::kBadTaskId;
    451 
    452   HistoryService* hs = HistoryServiceFactory::GetForProfile(
    453       profile_, Profile::EXPLICIT_ACCESS);
    454   // |hs| may be null during unit tests.
    455   if (hs) {
    456     return hs->QueryMostVisitedURLs(
    457         num_results_to_request_from_history(),
    458         kDaysOfHistory,
    459         base::Bind(&TopSitesImpl::OnTopSitesAvailableFromHistory,
    460                    base::Unretained(this)),
    461         &cancelable_task_tracker_);
    462   }
    463   return base::CancelableTaskTracker::kBadTaskId;
    464 }
    465 
    466 bool TopSitesImpl::IsKnownURL(const GURL& url) {
    467   return loaded_ && cache_->IsKnownURL(url);
    468 }
    469 
    470 const std::string& TopSitesImpl::GetCanonicalURLString(const GURL& url) const {
    471   return cache_->GetCanonicalURL(url).spec();
    472 }
    473 
    474 bool TopSitesImpl::IsNonForcedFull() {
    475   return loaded_ && cache_->GetNumNonForcedURLs() >= kNonForcedTopSitesNumber;
    476 }
    477 
    478 bool TopSitesImpl::IsForcedFull() {
    479   return loaded_ && cache_->GetNumForcedURLs() >= kForcedTopSitesNumber;
    480 }
    481 
    482 TopSitesImpl::~TopSitesImpl() {
    483 }
    484 
    485 bool TopSitesImpl::SetPageThumbnailNoDB(
    486     const GURL& url,
    487     const base::RefCountedMemory* thumbnail_data,
    488     const ThumbnailScore& score) {
    489   // This should only be invoked when we know about the url.
    490   DCHECK(cache_->IsKnownURL(url));
    491 
    492   const MostVisitedURL& most_visited =
    493       cache_->top_sites()[cache_->GetURLIndex(url)];
    494   Images* image = cache_->GetImage(url);
    495 
    496   // When comparing the thumbnail scores, we need to take into account the
    497   // redirect hops, which are not generated when the thumbnail is because the
    498   // redirects weren't known. We fill that in here since we know the redirects.
    499   ThumbnailScore new_score_with_redirects(score);
    500   new_score_with_redirects.redirect_hops_from_dest =
    501       GetRedirectDistanceForURL(most_visited, url);
    502 
    503   if (!ShouldReplaceThumbnailWith(image->thumbnail_score,
    504                                   new_score_with_redirects) &&
    505       image->thumbnail.get())
    506     return false;  // The one we already have is better.
    507 
    508   image->thumbnail = const_cast<base::RefCountedMemory*>(thumbnail_data);
    509   image->thumbnail_score = new_score_with_redirects;
    510 
    511   ResetThreadSafeImageCache();
    512   return true;
    513 }
    514 
    515 bool TopSitesImpl::SetPageThumbnailEncoded(
    516     const GURL& url,
    517     const base::RefCountedMemory* thumbnail,
    518     const ThumbnailScore& score) {
    519   if (!SetPageThumbnailNoDB(url, thumbnail, score))
    520     return false;
    521 
    522   // Update the database.
    523   if (!cache_->IsKnownURL(url))
    524     return false;
    525 
    526   size_t index = cache_->GetURLIndex(url);
    527   int url_rank = index - cache_->GetNumForcedURLs();
    528   const MostVisitedURL& most_visited = cache_->top_sites()[index];
    529   backend_->SetPageThumbnail(most_visited,
    530                              url_rank < 0 ? -1 : url_rank,
    531                              *(cache_->GetImage(most_visited.url)));
    532   return true;
    533 }
    534 
    535 // static
    536 bool TopSitesImpl::EncodeBitmap(const gfx::Image& bitmap,
    537                                 scoped_refptr<base::RefCountedBytes>* bytes) {
    538   if (bitmap.IsEmpty())
    539     return false;
    540   *bytes = new base::RefCountedBytes();
    541   std::vector<unsigned char> data;
    542   if (!gfx::JPEG1xEncodedDataFromImage(bitmap, kTopSitesImageQuality, &data))
    543     return false;
    544 
    545   // As we're going to cache this data, make sure the vector is only as big as
    546   // it needs to be, as JPEGCodec::Encode() over-allocates data.capacity().
    547   // (In a C++0x future, we can just call shrink_to_fit() in Encode())
    548   (*bytes)->data() = data;
    549   return true;
    550 }
    551 
    552 void TopSitesImpl::RemoveTemporaryThumbnailByURL(const GURL& url) {
    553   for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end();
    554        ++i) {
    555     if (i->first == url) {
    556       temp_images_.erase(i);
    557       return;
    558     }
    559   }
    560 }
    561 
    562 void TopSitesImpl::AddTemporaryThumbnail(
    563     const GURL& url,
    564     const base::RefCountedMemory* thumbnail,
    565     const ThumbnailScore& score) {
    566   if (temp_images_.size() == kMaxTempTopImages)
    567     temp_images_.erase(temp_images_.begin());
    568 
    569   TempImage image;
    570   image.first = url;
    571   image.second.thumbnail = const_cast<base::RefCountedMemory*>(thumbnail);
    572   image.second.thumbnail_score = score;
    573   temp_images_.push_back(image);
    574 }
    575 
    576 void TopSitesImpl::TimerFired() {
    577   StartQueryForMostVisited();
    578 }
    579 
    580 // static
    581 int TopSitesImpl::GetRedirectDistanceForURL(const MostVisitedURL& most_visited,
    582                                             const GURL& url) {
    583   for (size_t i = 0; i < most_visited.redirects.size(); i++) {
    584     if (most_visited.redirects[i] == url)
    585       return static_cast<int>(most_visited.redirects.size() - i - 1);
    586   }
    587   NOTREACHED() << "URL should always be found.";
    588   return 0;
    589 }
    590 
    591 MostVisitedURLList TopSitesImpl::GetPrepopulatePages() {
    592   MostVisitedURLList urls;
    593   urls.resize(arraysize(kPrepopulatedPages));
    594   for (size_t i = 0; i < urls.size(); ++i) {
    595     MostVisitedURL& url = urls[i];
    596     url.url = GURL(prepopulated_page_urls_[i]);
    597     url.redirects.push_back(url.url);
    598     url.title = l10n_util::GetStringUTF16(kPrepopulatedPages[i].title_id);
    599   }
    600   return urls;
    601 }
    602 
    603 bool TopSitesImpl::loaded() const {
    604   return loaded_;
    605 }
    606 
    607 bool TopSitesImpl::AddForcedURL(const GURL& url, const base::Time& time) {
    608   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    609   size_t num_forced = cache_->GetNumForcedURLs();
    610   MostVisitedURLList new_list(cache_->top_sites());
    611   MostVisitedURL new_url;
    612 
    613   if (cache_->IsKnownURL(url)) {
    614     size_t index = cache_->GetURLIndex(url);
    615     // Do nothing if we currently have that URL as non-forced.
    616     if (new_list[index].last_forced_time.is_null())
    617       return false;
    618 
    619     // Update the |last_forced_time| of the already existing URL. Delete it and
    620     // reinsert it at the right location.
    621     new_url = new_list[index];
    622     new_list.erase(new_list.begin() + index);
    623     num_forced--;
    624   } else {
    625     new_url.url = url;
    626     new_url.redirects.push_back(url);
    627   }
    628   new_url.last_forced_time = time;
    629   // Add forced URLs and sort. Added to the end of the list of forced URLs
    630   // since this is almost always where it needs to go, unless the user's local
    631   // clock is fiddled with.
    632   MostVisitedURLList::iterator mid = new_list.begin() + num_forced;
    633   new_list.insert(mid, new_url);
    634   mid = new_list.begin() + num_forced;  // Mid was invalidated.
    635   std::inplace_merge(new_list.begin(), mid, mid + 1, ForcedURLComparator);
    636   SetTopSites(new_list);
    637   return true;
    638 }
    639 
    640 bool TopSitesImpl::AddPrepopulatedPages(MostVisitedURLList* urls,
    641                                         size_t num_forced_urls) {
    642   bool added = false;
    643   MostVisitedURLList prepopulate_urls = GetPrepopulatePages();
    644   for (size_t i = 0; i < prepopulate_urls.size(); ++i) {
    645     if (urls->size() - num_forced_urls < kNonForcedTopSitesNumber &&
    646         IndexOf(*urls, prepopulate_urls[i].url) == -1) {
    647       urls->push_back(prepopulate_urls[i]);
    648       added = true;
    649     }
    650   }
    651   return added;
    652 }
    653 
    654 size_t TopSitesImpl::MergeCachedForcedURLs(MostVisitedURLList* new_list) {
    655   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    656   // Add all the new URLs for quick lookup. Take that opportunity to count the
    657   // number of forced URLs in |new_list|.
    658   std::set<GURL> all_new_urls;
    659   size_t num_forced = 0;
    660   for (size_t i = 0; i < new_list->size(); ++i) {
    661     for (size_t j = 0; j < (*new_list)[i].redirects.size(); j++) {
    662       all_new_urls.insert((*new_list)[i].redirects[j]);
    663     }
    664     if (!(*new_list)[i].last_forced_time.is_null())
    665       ++num_forced;
    666   }
    667 
    668   // Keep the forced URLs from |cache_| that are not found in |new_list|.
    669   MostVisitedURLList filtered_forced_urls;
    670   for (size_t i = 0; i < cache_->GetNumForcedURLs(); ++i) {
    671     if (all_new_urls.find(cache_->top_sites()[i].url) == all_new_urls.end())
    672       filtered_forced_urls.push_back(cache_->top_sites()[i]);
    673   }
    674   num_forced += filtered_forced_urls.size();
    675 
    676   // Prepend forced URLs and sort in order of ascending |last_forced_time|.
    677   new_list->insert(new_list->begin(), filtered_forced_urls.begin(),
    678                    filtered_forced_urls.end());
    679   std::inplace_merge(
    680       new_list->begin(), new_list->begin() + filtered_forced_urls.size(),
    681       new_list->begin() + num_forced, ForcedURLComparator);
    682 
    683   // Drop older forced URLs if the list overflows. Since forced URLs are always
    684   // sort in increasing order of |last_forced_time|, drop the first ones.
    685   if (num_forced > kForcedTopSitesNumber) {
    686     new_list->erase(new_list->begin(),
    687                     new_list->begin() + (num_forced - kForcedTopSitesNumber));
    688     num_forced = kForcedTopSitesNumber;
    689   }
    690 
    691   return num_forced;
    692 }
    693 
    694 void TopSitesImpl::ApplyBlacklist(const MostVisitedURLList& urls,
    695                                   MostVisitedURLList* out) {
    696   // Log the number of times ApplyBlacklist is called so we can compute the
    697   // average number of blacklisted items per user.
    698   const base::DictionaryValue* blacklist =
    699       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
    700   UMA_HISTOGRAM_BOOLEAN("TopSites.NumberOfApplyBlacklist", true);
    701   UMA_HISTOGRAM_COUNTS_100("TopSites.NumberOfBlacklistedItems",
    702       (blacklist ? blacklist->size() : 0));
    703   size_t num_non_forced_urls = 0;
    704   size_t num_forced_urls = 0;
    705   for (size_t i = 0; i < urls.size(); ++i) {
    706     if (!IsBlacklisted(urls[i].url)) {
    707       if (urls[i].last_forced_time.is_null()) {
    708         // Non-forced URL.
    709         if (num_non_forced_urls >= kNonForcedTopSitesNumber)
    710           continue;
    711         num_non_forced_urls++;
    712       } else {
    713         // Forced URL.
    714         if (num_forced_urls >= kForcedTopSitesNumber)
    715           continue;
    716         num_forced_urls++;
    717       }
    718       out->push_back(urls[i]);
    719     }
    720   }
    721 }
    722 
    723 std::string TopSitesImpl::GetURLHash(const GURL& url) {
    724   // We don't use canonical URLs here to be able to blacklist only one of
    725   // the two 'duplicate' sites, e.g. 'gmail.com' and 'mail.google.com'.
    726   return base::MD5String(url.spec());
    727 }
    728 
    729 base::TimeDelta TopSitesImpl::GetUpdateDelay() {
    730   if (cache_->top_sites().size() <= arraysize(kPrepopulatedPages))
    731     return base::TimeDelta::FromSeconds(30);
    732 
    733   int64 range = kMaxUpdateIntervalMinutes - kMinUpdateIntervalMinutes;
    734   int64 minutes = kMaxUpdateIntervalMinutes -
    735       last_num_urls_changed_ * range / cache_->top_sites().size();
    736   return base::TimeDelta::FromMinutes(minutes);
    737 }
    738 
    739 void TopSitesImpl::Observe(int type,
    740                            const content::NotificationSource& source,
    741                            const content::NotificationDetails& details) {
    742   if (!loaded_)
    743     return;
    744 
    745   if (type == chrome::NOTIFICATION_HISTORY_URLS_DELETED) {
    746     content::Details<history::URLsDeletedDetails> deleted_details(details);
    747     if (deleted_details->all_history) {
    748       SetTopSites(MostVisitedURLList());
    749       backend_->ResetDatabase();
    750     } else {
    751       std::set<size_t> indices_to_delete;  // Indices into top_sites_.
    752       for (URLRows::const_iterator i = deleted_details->rows.begin();
    753            i != deleted_details->rows.end(); ++i) {
    754         if (cache_->IsKnownURL(i->url()))
    755           indices_to_delete.insert(cache_->GetURLIndex(i->url()));
    756       }
    757 
    758       if (indices_to_delete.empty())
    759         return;
    760 
    761       MostVisitedURLList new_top_sites(cache_->top_sites());
    762       for (std::set<size_t>::reverse_iterator i = indices_to_delete.rbegin();
    763            i != indices_to_delete.rend(); i++) {
    764         new_top_sites.erase(new_top_sites.begin() + *i);
    765       }
    766       SetTopSites(new_top_sites);
    767     }
    768     StartQueryForMostVisited();
    769   } else if (type == content::NOTIFICATION_NAV_ENTRY_COMMITTED) {
    770     NavigationController* controller =
    771         content::Source<NavigationController>(source).ptr();
    772     Profile* profile = Profile::FromBrowserContext(
    773         controller->GetWebContents()->GetBrowserContext());
    774     if (profile == profile_ && !IsNonForcedFull()) {
    775       content::LoadCommittedDetails* load_details =
    776           content::Details<content::LoadCommittedDetails>(details).ptr();
    777       if (!load_details)
    778         return;
    779       const GURL& url = load_details->entry->GetURL();
    780       if (!cache_->IsKnownURL(url) && HistoryService::CanAddURL(url)) {
    781         // To avoid slamming history we throttle requests when the url updates.
    782         // To do otherwise negatively impacts perf tests.
    783         RestartQueryForTopSitesTimer(GetUpdateDelay());
    784       }
    785     }
    786   }
    787 }
    788 
    789 void TopSitesImpl::SetTopSites(const MostVisitedURLList& new_top_sites) {
    790   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    791 
    792   MostVisitedURLList top_sites(new_top_sites);
    793   size_t num_forced_urls = MergeCachedForcedURLs(&top_sites);
    794   AddPrepopulatedPages(&top_sites, num_forced_urls);
    795 
    796   TopSitesDelta delta;
    797   DiffMostVisited(cache_->top_sites(), top_sites, &delta);
    798   if (!delta.deleted.empty() || !delta.added.empty() || !delta.moved.empty()) {
    799     backend_->UpdateTopSites(delta);
    800   }
    801 
    802   last_num_urls_changed_ = delta.added.size() + delta.moved.size();
    803 
    804   // We always do the following steps (setting top sites in cache, and resetting
    805   // thread safe cache ...) as this method is invoked during startup at which
    806   // point the caches haven't been updated yet.
    807   cache_->SetTopSites(top_sites);
    808 
    809   // See if we have any tmp thumbnails for the new sites.
    810   if (!temp_images_.empty()) {
    811     for (size_t i = 0; i < top_sites.size(); ++i) {
    812       const MostVisitedURL& mv = top_sites[i];
    813       GURL canonical_url = cache_->GetCanonicalURL(mv.url);
    814       // At the time we get the thumbnail redirects aren't known, so we have to
    815       // iterate through all the images.
    816       for (TempImages::iterator it = temp_images_.begin();
    817            it != temp_images_.end(); ++it) {
    818         if (canonical_url == cache_->GetCanonicalURL(it->first)) {
    819           SetPageThumbnailEncoded(
    820               mv.url, it->second.thumbnail.get(), it->second.thumbnail_score);
    821           temp_images_.erase(it);
    822           break;
    823         }
    824       }
    825     }
    826   }
    827 
    828   if (top_sites.size() - num_forced_urls >= kNonForcedTopSitesNumber)
    829     temp_images_.clear();
    830 
    831   ResetThreadSafeCache();
    832   ResetThreadSafeImageCache();
    833   NotifyTopSitesChanged();
    834 
    835   // Restart the timer that queries history for top sites. This is done to
    836   // ensure we stay in sync with history.
    837   RestartQueryForTopSitesTimer(GetUpdateDelay());
    838 }
    839 
    840 int TopSitesImpl::num_results_to_request_from_history() const {
    841   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    842 
    843   const base::DictionaryValue* blacklist =
    844       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
    845   return kNonForcedTopSitesNumber + (blacklist ? blacklist->size() : 0);
    846 }
    847 
    848 void TopSitesImpl::MoveStateToLoaded() {
    849   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    850 
    851   MostVisitedURLList filtered_urls_all;
    852   MostVisitedURLList filtered_urls_nonforced;
    853   PendingCallbacks pending_callbacks;
    854   {
    855     base::AutoLock lock(lock_);
    856 
    857     if (loaded_)
    858       return;  // Don't do anything if we're already loaded.
    859     loaded_ = true;
    860 
    861     // Now that we're loaded we can service the queued up callbacks. Copy them
    862     // here and service them outside the lock.
    863     if (!pending_callbacks_.empty()) {
    864       // We always filter out forced URLs because callers of GetMostVisitedURLs
    865       // are not interested in them.
    866       filtered_urls_all = thread_safe_cache_->top_sites();
    867       filtered_urls_nonforced.assign(thread_safe_cache_->top_sites().begin() +
    868                                        thread_safe_cache_->GetNumForcedURLs(),
    869                                      thread_safe_cache_->top_sites().end());
    870       pending_callbacks.swap(pending_callbacks_);
    871     }
    872   }
    873 
    874   for (size_t i = 0; i < pending_callbacks.size(); i++)
    875     pending_callbacks[i].Run(filtered_urls_all, filtered_urls_nonforced);
    876 
    877   NotifyTopSitesLoaded();
    878 }
    879 
    880 void TopSitesImpl::ResetThreadSafeCache() {
    881   base::AutoLock lock(lock_);
    882   MostVisitedURLList cached;
    883   ApplyBlacklist(cache_->top_sites(), &cached);
    884   thread_safe_cache_->SetTopSites(cached);
    885 }
    886 
    887 void TopSitesImpl::ResetThreadSafeImageCache() {
    888   base::AutoLock lock(lock_);
    889   thread_safe_cache_->SetThumbnails(cache_->images());
    890 }
    891 
    892 void TopSitesImpl::RestartQueryForTopSitesTimer(base::TimeDelta delta) {
    893   if (timer_.IsRunning() && ((timer_start_time_ + timer_.GetCurrentDelay()) <
    894                              (base::TimeTicks::Now() + delta))) {
    895     return;
    896   }
    897 
    898   timer_start_time_ = base::TimeTicks::Now();
    899   timer_.Stop();
    900   timer_.Start(FROM_HERE, delta, this, &TopSitesImpl::TimerFired);
    901 }
    902 
    903 void TopSitesImpl::OnGotMostVisitedThumbnails(
    904     const scoped_refptr<MostVisitedThumbnails>& thumbnails) {
    905   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    906 
    907   // Set the top sites directly in the cache so that SetTopSites diffs
    908   // correctly.
    909   cache_->SetTopSites(thumbnails->most_visited);
    910   SetTopSites(thumbnails->most_visited);
    911   cache_->SetThumbnails(thumbnails->url_to_images_map);
    912 
    913   ResetThreadSafeImageCache();
    914 
    915   MoveStateToLoaded();
    916 
    917   // Start a timer that refreshes top sites from history.
    918   RestartQueryForTopSitesTimer(
    919       base::TimeDelta::FromSeconds(kUpdateIntervalSecs));
    920 }
    921 
    922 void TopSitesImpl::OnTopSitesAvailableFromHistory(
    923     const MostVisitedURLList* pages) {
    924   DCHECK(pages);
    925   SetTopSites(*pages);
    926 }
    927 
    928 }  // namespace history
    929