Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/history/top_sites_impl.h"
      6 
      7 #include <algorithm>
      8 #include <set>
      9 
     10 #include "base/bind.h"
     11 #include "base/bind_helpers.h"
     12 #include "base/logging.h"
     13 #include "base/md5.h"
     14 #include "base/memory/ref_counted_memory.h"
     15 #include "base/message_loop/message_loop_proxy.h"
     16 #include "base/metrics/histogram.h"
     17 #include "base/prefs/pref_service.h"
     18 #include "base/prefs/scoped_user_pref_update.h"
     19 #include "base/strings/string_util.h"
     20 #include "base/strings/utf_string_conversions.h"
     21 #include "base/task_runner.h"
     22 #include "base/values.h"
     23 #include "chrome/browser/chrome_notification_types.h"
     24 #include "chrome/browser/history/history_backend.h"
     25 #include "chrome/browser/history/history_db_task.h"
     26 #include "chrome/browser/history/history_notifications.h"
     27 #include "chrome/browser/history/history_service_factory.h"
     28 #include "chrome/browser/history/page_usage_data.h"
     29 #include "chrome/browser/history/top_sites_cache.h"
     30 #include "chrome/browser/history/url_utils.h"
     31 #include "chrome/browser/profiles/profile.h"
     32 #include "chrome/browser/ui/webui/ntp/most_visited_handler.h"
     33 #include "chrome/browser/ui/webui/ntp/new_tab_ui.h"
     34 #include "chrome/common/pref_names.h"
     35 #include "chrome/common/thumbnail_score.h"
     36 #include "content/public/browser/browser_thread.h"
     37 #include "content/public/browser/navigation_controller.h"
     38 #include "content/public/browser/navigation_details.h"
     39 #include "content/public/browser/navigation_entry.h"
     40 #include "content/public/browser/notification_service.h"
     41 #include "content/public/browser/web_contents.h"
     42 #include "grit/locale_settings.h"
     43 #include "ui/base/l10n/l10n_util.h"
     44 #include "ui/base/layout.h"
     45 #include "ui/base/resource/resource_bundle.h"
     46 #include "ui/gfx/image/image_util.h"
     47 
     48 using base::DictionaryValue;
     49 using content::BrowserThread;
     50 using content::NavigationController;
     51 
     52 namespace history {
     53 
     54 namespace {
     55 
     56 void RunOrPostGetMostVisitedURLsCallback(
     57     base::TaskRunner* task_runner,
     58     bool include_forced_urls,
     59     const TopSitesImpl::GetMostVisitedURLsCallback& callback,
     60     const MostVisitedURLList& all_urls,
     61     const MostVisitedURLList& nonforced_urls) {
     62   const MostVisitedURLList* urls =
     63       include_forced_urls ? &all_urls : &nonforced_urls;
     64   if (task_runner->RunsTasksOnCurrentThread())
     65     callback.Run(*urls);
     66   else
     67     task_runner->PostTask(FROM_HERE, base::Bind(callback, *urls));
     68 }
     69 
     70 // Compares two MostVisitedURL having a non-null |last_forced_time|.
     71 bool ForcedURLComparator(const MostVisitedURL& first,
     72                          const MostVisitedURL& second) {
     73   DCHECK(!first.last_forced_time.is_null() &&
     74          !second.last_forced_time.is_null());
     75   return first.last_forced_time < second.last_forced_time;
     76 }
     77 
     78 }  // namespace
     79 
     80 // How many non-forced top sites to store in the cache.
     81 static const size_t kNonForcedTopSitesNumber = 20;
     82 
     83 // How many forced top sites to store in the cache.
     84 static const size_t kForcedTopSitesNumber = 20;
     85 
     86 // Max number of temporary images we'll cache. See comment above
     87 // temp_images_ for details.
     88 static const size_t kMaxTempTopImages = 8;
     89 
     90 static const int kDaysOfHistory = 90;
     91 // Time from startup to first HistoryService query.
     92 static const int64 kUpdateIntervalSecs = 15;
     93 // Intervals between requests to HistoryService.
     94 static const int64 kMinUpdateIntervalMinutes = 1;
     95 static const int64 kMaxUpdateIntervalMinutes = 60;
     96 
     97 // Use 100 quality (highest quality) because we're very sensitive to
     98 // artifacts for these small sized, highly detailed images.
     99 static const int kTopSitesImageQuality = 100;
    100 
    101 TopSitesImpl::TopSitesImpl(Profile* profile)
    102     : backend_(NULL),
    103       cache_(new TopSitesCache()),
    104       thread_safe_cache_(new TopSitesCache()),
    105       profile_(profile),
    106       last_num_urls_changed_(0),
    107       loaded_(false) {
    108   if (!profile_)
    109     return;
    110 
    111   if (content::NotificationService::current()) {
    112     registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED,
    113                    content::Source<Profile>(profile_));
    114     // Listen for any nav commits. We'll ignore those not related to this
    115     // profile when we get the notification.
    116     registrar_.Add(this, content::NOTIFICATION_NAV_ENTRY_COMMITTED,
    117                    content::NotificationService::AllSources());
    118   }
    119   for (size_t i = 0; i < arraysize(kPrepopulatedPages); i++) {
    120     int url_id = kPrepopulatedPages[i].url_id;
    121     prepopulated_page_urls_.push_back(
    122         GURL(l10n_util::GetStringUTF8(url_id)));
    123   }
    124 }
    125 
    126 void TopSitesImpl::Init(const base::FilePath& db_name) {
    127   // Create the backend here, rather than in the constructor, so that
    128   // unit tests that do not need the backend can run without a problem.
    129   backend_ = new TopSitesBackend;
    130   backend_->Init(db_name);
    131   backend_->GetMostVisitedThumbnails(
    132       base::Bind(&TopSitesImpl::OnGotMostVisitedThumbnails,
    133                  base::Unretained(this)),
    134       &cancelable_task_tracker_);
    135 }
    136 
    137 bool TopSitesImpl::SetPageThumbnail(const GURL& url,
    138                                     const gfx::Image& thumbnail,
    139                                     const ThumbnailScore& score) {
    140   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    141 
    142   if (!loaded_) {
    143     // TODO(sky): I need to cache these and apply them after the load
    144     // completes.
    145     return false;
    146   }
    147 
    148   bool add_temp_thumbnail = false;
    149   if (!IsKnownURL(url)) {
    150     if (!IsNonForcedFull()) {
    151       add_temp_thumbnail = true;
    152     } else {
    153       return false;  // This URL is not known to us.
    154     }
    155   }
    156 
    157   if (!HistoryService::CanAddURL(url))
    158     return false;  // It's not a real webpage.
    159 
    160   scoped_refptr<base::RefCountedBytes> thumbnail_data;
    161   if (!EncodeBitmap(thumbnail, &thumbnail_data))
    162     return false;
    163 
    164   if (add_temp_thumbnail) {
    165     // Always remove the existing entry and then add it back. That way if we end
    166     // up with too many temp thumbnails we'll prune the oldest first.
    167     RemoveTemporaryThumbnailByURL(url);
    168     AddTemporaryThumbnail(url, thumbnail_data.get(), score);
    169     return true;
    170   }
    171 
    172   return SetPageThumbnailEncoded(url, thumbnail_data.get(), score);
    173 }
    174 
    175 bool TopSitesImpl::SetPageThumbnailToJPEGBytes(
    176     const GURL& url,
    177     const base::RefCountedMemory* memory,
    178     const ThumbnailScore& score) {
    179   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    180 
    181   if (!loaded_) {
    182     // TODO(sky): I need to cache these and apply them after the load
    183     // completes.
    184     return false;
    185   }
    186 
    187   bool add_temp_thumbnail = false;
    188   if (!IsKnownURL(url)) {
    189     if (!IsNonForcedFull()) {
    190       add_temp_thumbnail = true;
    191     } else {
    192       return false;  // This URL is not known to us.
    193     }
    194   }
    195 
    196   if (!HistoryService::CanAddURL(url))
    197     return false;  // It's not a real webpage.
    198 
    199   if (add_temp_thumbnail) {
    200     // Always remove the existing entry and then add it back. That way if we end
    201     // up with too many temp thumbnails we'll prune the oldest first.
    202     RemoveTemporaryThumbnailByURL(url);
    203     AddTemporaryThumbnail(url, memory, score);
    204     return true;
    205   }
    206 
    207   return SetPageThumbnailEncoded(url, memory, score);
    208 }
    209 
    210 // WARNING: this function may be invoked on any thread.
    211 void TopSitesImpl::GetMostVisitedURLs(
    212     const GetMostVisitedURLsCallback& callback,
    213     bool include_forced_urls) {
    214   MostVisitedURLList filtered_urls;
    215   {
    216     base::AutoLock lock(lock_);
    217     if (!loaded_) {
    218       // A request came in before we finished loading. Store the callback and
    219       // we'll run it on current thread when we finish loading.
    220       pending_callbacks_.push_back(
    221           base::Bind(&RunOrPostGetMostVisitedURLsCallback,
    222                      base::MessageLoopProxy::current(),
    223                      include_forced_urls,
    224                      callback));
    225       return;
    226     }
    227     if (include_forced_urls) {
    228       filtered_urls = thread_safe_cache_->top_sites();
    229     } else {
    230       filtered_urls.assign(thread_safe_cache_->top_sites().begin() +
    231                               thread_safe_cache_->GetNumForcedURLs(),
    232                            thread_safe_cache_->top_sites().end());
    233     }
    234   }
    235   callback.Run(filtered_urls);
    236 }
    237 
    238 bool TopSitesImpl::GetPageThumbnail(
    239     const GURL& url,
    240     bool prefix_match,
    241     scoped_refptr<base::RefCountedMemory>* bytes) {
    242   // WARNING: this may be invoked on any thread.
    243   // Perform exact match.
    244   {
    245     base::AutoLock lock(lock_);
    246     if (thread_safe_cache_->GetPageThumbnail(url, bytes))
    247       return true;
    248   }
    249 
    250   // Resource bundle is thread safe.
    251   for (size_t i = 0; i < arraysize(kPrepopulatedPages); i++) {
    252     if (url == prepopulated_page_urls_[i]) {
    253       *bytes = ResourceBundle::GetSharedInstance().
    254           LoadDataResourceBytesForScale(
    255               kPrepopulatedPages[i].thumbnail_id,
    256               ui::SCALE_FACTOR_100P);
    257       return true;
    258     }
    259   }
    260 
    261   if (prefix_match) {
    262     // If http or https, search with |url| first, then try the other one.
    263     std::vector<GURL> url_list;
    264     url_list.push_back(url);
    265     if (url.SchemeIsHTTPOrHTTPS())
    266       url_list.push_back(ToggleHTTPAndHTTPS(url));
    267 
    268     for (std::vector<GURL>::iterator it = url_list.begin();
    269          it != url_list.end(); ++it) {
    270       base::AutoLock lock(lock_);
    271 
    272       GURL canonical_url;
    273       // Test whether any stored URL is a prefix of |url|.
    274       canonical_url = thread_safe_cache_->GetGeneralizedCanonicalURL(*it);
    275       if (!canonical_url.is_empty() &&
    276           thread_safe_cache_->GetPageThumbnail(canonical_url, bytes)) {
    277         return true;
    278       }
    279     }
    280   }
    281 
    282   return false;
    283 }
    284 
    285 bool TopSitesImpl::GetPageThumbnailScore(const GURL& url,
    286                                          ThumbnailScore* score) {
    287   // WARNING: this may be invoked on any thread.
    288   base::AutoLock lock(lock_);
    289   return thread_safe_cache_->GetPageThumbnailScore(url, score);
    290 }
    291 
    292 bool TopSitesImpl::GetTemporaryPageThumbnailScore(const GURL& url,
    293                                                   ThumbnailScore* score) {
    294   for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end();
    295        ++i) {
    296     if (i->first == url) {
    297       *score = i->second.thumbnail_score;
    298       return true;
    299     }
    300   }
    301   return false;
    302 }
    303 
    304 
    305 // Returns the index of |url| in |urls|, or -1 if not found.
    306 static int IndexOf(const MostVisitedURLList& urls, const GURL& url) {
    307   for (size_t i = 0; i < urls.size(); i++) {
    308     if (urls[i].url == url)
    309       return i;
    310   }
    311   return -1;
    312 }
    313 
    314 void TopSitesImpl::SyncWithHistory() {
    315   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    316   if (loaded_ && temp_images_.size()) {
    317     // If we have temporary thumbnails it means there isn't much data, and most
    318     // likely the user is first running Chrome. During this time we throttle
    319     // updating from history by 30 seconds. If the user creates a new tab page
    320     // during this window of time we force updating from history so that the new
    321     // tab page isn't so far out of date.
    322     timer_.Stop();
    323     StartQueryForMostVisited();
    324   }
    325 }
    326 
    327 bool TopSitesImpl::HasBlacklistedItems() const {
    328   const DictionaryValue* blacklist =
    329       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
    330   return blacklist && !blacklist->empty();
    331 }
    332 
    333 void TopSitesImpl::AddBlacklistedURL(const GURL& url) {
    334   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    335 
    336   Value* dummy = Value::CreateNullValue();
    337   {
    338     DictionaryPrefUpdate update(profile_->GetPrefs(),
    339                                 prefs::kNtpMostVisitedURLsBlacklist);
    340     DictionaryValue* blacklist = update.Get();
    341     blacklist->SetWithoutPathExpansion(GetURLHash(url), dummy);
    342   }
    343 
    344   ResetThreadSafeCache();
    345   NotifyTopSitesChanged();
    346 }
    347 
    348 void TopSitesImpl::RemoveBlacklistedURL(const GURL& url) {
    349   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    350   {
    351     DictionaryPrefUpdate update(profile_->GetPrefs(),
    352                                 prefs::kNtpMostVisitedURLsBlacklist);
    353     DictionaryValue* blacklist = update.Get();
    354     blacklist->RemoveWithoutPathExpansion(GetURLHash(url), NULL);
    355   }
    356   ResetThreadSafeCache();
    357   NotifyTopSitesChanged();
    358 }
    359 
    360 bool TopSitesImpl::IsBlacklisted(const GURL& url) {
    361   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    362   const DictionaryValue* blacklist =
    363       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
    364   return blacklist && blacklist->HasKey(GetURLHash(url));
    365 }
    366 
    367 void TopSitesImpl::ClearBlacklistedURLs() {
    368   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    369   {
    370     DictionaryPrefUpdate update(profile_->GetPrefs(),
    371                                 prefs::kNtpMostVisitedURLsBlacklist);
    372     DictionaryValue* blacklist = update.Get();
    373     blacklist->Clear();
    374   }
    375   ResetThreadSafeCache();
    376   NotifyTopSitesChanged();
    377 }
    378 
    379 void TopSitesImpl::Shutdown() {
    380   profile_ = NULL;
    381   // Cancel all requests so that the service doesn't callback to us after we've
    382   // invoked Shutdown (this could happen if we have a pending request and
    383   // Shutdown is invoked).
    384   history_consumer_.CancelAllRequests();
    385   backend_->Shutdown();
    386 }
    387 
    388 // static
    389 void TopSitesImpl::DiffMostVisited(const MostVisitedURLList& old_list,
    390                                    const MostVisitedURLList& new_list,
    391                                    TopSitesDelta* delta) {
    392 
    393   // Add all the old URLs for quick lookup. This maps URLs to the corresponding
    394   // index in the input.
    395   std::map<GURL, size_t> all_old_urls;
    396   size_t num_old_forced = 0;
    397   for (size_t i = 0; i < old_list.size(); i++) {
    398     if (!old_list[i].last_forced_time.is_null())
    399       num_old_forced++;
    400     DCHECK(old_list[i].last_forced_time.is_null() || i < num_old_forced)
    401         << "Forced URLs must all appear before non-forced URLs.";
    402     all_old_urls[old_list[i].url] = i;
    403   }
    404 
    405   // Check all the URLs in the new set to see which ones are new or just moved.
    406   // When we find a match in the old set, we'll reset its index to our special
    407   // marker. This allows us to quickly identify the deleted ones in a later
    408   // pass.
    409   const size_t kAlreadyFoundMarker = static_cast<size_t>(-1);
    410   int rank = -1;  // Forced URLs have a rank of -1.
    411   for (size_t i = 0; i < new_list.size(); i++) {
    412     // Increase the rank if we're going through forced URLs. This works because
    413     // non-forced URLs all come after forced URLs.
    414     if (new_list[i].last_forced_time.is_null())
    415       rank++;
    416     DCHECK(new_list[i].last_forced_time.is_null() == (rank != -1))
    417         << "Forced URLs must all appear before non-forced URLs.";
    418     std::map<GURL, size_t>::iterator found = all_old_urls.find(new_list[i].url);
    419     if (found == all_old_urls.end()) {
    420       MostVisitedURLWithRank added;
    421       added.url = new_list[i];
    422       added.rank = rank;
    423       delta->added.push_back(added);
    424     } else {
    425       DCHECK(found->second != kAlreadyFoundMarker)
    426           << "Same URL appears twice in the new list.";
    427       int old_rank = found->second >= num_old_forced ?
    428           found->second - num_old_forced : -1;
    429       if (old_rank != rank ||
    430           old_list[found->second].last_forced_time !=
    431               new_list[i].last_forced_time) {
    432         MostVisitedURLWithRank moved;
    433         moved.url = new_list[i];
    434         moved.rank = rank;
    435         delta->moved.push_back(moved);
    436       }
    437       found->second = kAlreadyFoundMarker;
    438     }
    439   }
    440 
    441   // Any member without the special marker in the all_old_urls list means that
    442   // there wasn't a "new" URL that mapped to it, so it was deleted.
    443   for (std::map<GURL, size_t>::const_iterator i = all_old_urls.begin();
    444        i != all_old_urls.end(); ++i) {
    445     if (i->second != kAlreadyFoundMarker)
    446       delta->deleted.push_back(old_list[i->second]);
    447   }
    448 }
    449 
    450 CancelableRequestProvider::Handle TopSitesImpl::StartQueryForMostVisited() {
    451   DCHECK(loaded_);
    452   if (!profile_)
    453     return 0;
    454 
    455   HistoryService* hs = HistoryServiceFactory::GetForProfile(
    456       profile_, Profile::EXPLICIT_ACCESS);
    457   // |hs| may be null during unit tests.
    458   if (hs) {
    459     return hs->QueryMostVisitedURLs(
    460         num_results_to_request_from_history(),
    461         kDaysOfHistory,
    462         &history_consumer_,
    463         base::Bind(&TopSitesImpl::OnTopSitesAvailableFromHistory,
    464                    base::Unretained(this)));
    465   }
    466   return 0;
    467 }
    468 
    469 bool TopSitesImpl::IsKnownURL(const GURL& url) {
    470   return loaded_ && cache_->IsKnownURL(url);
    471 }
    472 
    473 const std::string& TopSitesImpl::GetCanonicalURLString(const GURL& url) const {
    474   return cache_->GetCanonicalURL(url).spec();
    475 }
    476 
    477 bool TopSitesImpl::IsNonForcedFull() {
    478   return loaded_ && cache_->GetNumNonForcedURLs() >= kNonForcedTopSitesNumber;
    479 }
    480 
    481 bool TopSitesImpl::IsForcedFull() {
    482   return loaded_ && cache_->GetNumForcedURLs() >= kForcedTopSitesNumber;
    483 }
    484 
    485 TopSitesImpl::~TopSitesImpl() {
    486 }
    487 
    488 bool TopSitesImpl::SetPageThumbnailNoDB(
    489     const GURL& url,
    490     const base::RefCountedMemory* thumbnail_data,
    491     const ThumbnailScore& score) {
    492   // This should only be invoked when we know about the url.
    493   DCHECK(cache_->IsKnownURL(url));
    494 
    495   const MostVisitedURL& most_visited =
    496       cache_->top_sites()[cache_->GetURLIndex(url)];
    497   Images* image = cache_->GetImage(url);
    498 
    499   // When comparing the thumbnail scores, we need to take into account the
    500   // redirect hops, which are not generated when the thumbnail is because the
    501   // redirects weren't known. We fill that in here since we know the redirects.
    502   ThumbnailScore new_score_with_redirects(score);
    503   new_score_with_redirects.redirect_hops_from_dest =
    504       GetRedirectDistanceForURL(most_visited, url);
    505 
    506   if (!ShouldReplaceThumbnailWith(image->thumbnail_score,
    507                                   new_score_with_redirects) &&
    508       image->thumbnail.get())
    509     return false;  // The one we already have is better.
    510 
    511   image->thumbnail = const_cast<base::RefCountedMemory*>(thumbnail_data);
    512   image->thumbnail_score = new_score_with_redirects;
    513 
    514   ResetThreadSafeImageCache();
    515   return true;
    516 }
    517 
    518 bool TopSitesImpl::SetPageThumbnailEncoded(
    519     const GURL& url,
    520     const base::RefCountedMemory* thumbnail,
    521     const ThumbnailScore& score) {
    522   if (!SetPageThumbnailNoDB(url, thumbnail, score))
    523     return false;
    524 
    525   // Update the database.
    526   if (!cache_->IsKnownURL(url))
    527     return false;
    528 
    529   size_t index = cache_->GetURLIndex(url);
    530   int url_rank = index - cache_->GetNumForcedURLs();
    531   const MostVisitedURL& most_visited = cache_->top_sites()[index];
    532   backend_->SetPageThumbnail(most_visited,
    533                              url_rank < 0 ? -1 : url_rank,
    534                              *(cache_->GetImage(most_visited.url)));
    535   return true;
    536 }
    537 
    538 // static
    539 bool TopSitesImpl::EncodeBitmap(const gfx::Image& bitmap,
    540                                 scoped_refptr<base::RefCountedBytes>* bytes) {
    541   if (bitmap.IsEmpty())
    542     return false;
    543   *bytes = new base::RefCountedBytes();
    544   std::vector<unsigned char> data;
    545   if (!gfx::JPEG1xEncodedDataFromImage(bitmap, kTopSitesImageQuality, &data))
    546     return false;
    547 
    548   // As we're going to cache this data, make sure the vector is only as big as
    549   // it needs to be, as JPEGCodec::Encode() over-allocates data.capacity().
    550   // (In a C++0x future, we can just call shrink_to_fit() in Encode())
    551   (*bytes)->data() = data;
    552   return true;
    553 }
    554 
    555 void TopSitesImpl::RemoveTemporaryThumbnailByURL(const GURL& url) {
    556   for (TempImages::iterator i = temp_images_.begin(); i != temp_images_.end();
    557        ++i) {
    558     if (i->first == url) {
    559       temp_images_.erase(i);
    560       return;
    561     }
    562   }
    563 }
    564 
    565 void TopSitesImpl::AddTemporaryThumbnail(
    566     const GURL& url,
    567     const base::RefCountedMemory* thumbnail,
    568     const ThumbnailScore& score) {
    569   if (temp_images_.size() == kMaxTempTopImages)
    570     temp_images_.erase(temp_images_.begin());
    571 
    572   TempImage image;
    573   image.first = url;
    574   image.second.thumbnail = const_cast<base::RefCountedMemory*>(thumbnail);
    575   image.second.thumbnail_score = score;
    576   temp_images_.push_back(image);
    577 }
    578 
    579 void TopSitesImpl::TimerFired() {
    580   StartQueryForMostVisited();
    581 }
    582 
    583 // static
    584 int TopSitesImpl::GetRedirectDistanceForURL(const MostVisitedURL& most_visited,
    585                                             const GURL& url) {
    586   for (size_t i = 0; i < most_visited.redirects.size(); i++) {
    587     if (most_visited.redirects[i] == url)
    588       return static_cast<int>(most_visited.redirects.size() - i - 1);
    589   }
    590   NOTREACHED() << "URL should always be found.";
    591   return 0;
    592 }
    593 
    594 MostVisitedURLList TopSitesImpl::GetPrepopulatePages() {
    595   MostVisitedURLList urls;
    596   urls.resize(arraysize(kPrepopulatedPages));
    597   for (size_t i = 0; i < urls.size(); ++i) {
    598     MostVisitedURL& url = urls[i];
    599     url.url = GURL(prepopulated_page_urls_[i]);
    600     url.redirects.push_back(url.url);
    601     url.title = l10n_util::GetStringUTF16(kPrepopulatedPages[i].title_id);
    602   }
    603   return urls;
    604 }
    605 
    606 bool TopSitesImpl::loaded() const {
    607   return loaded_;
    608 }
    609 
    610 bool TopSitesImpl::AddForcedURL(const GURL& url, const base::Time& time) {
    611   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    612   size_t num_forced = cache_->GetNumForcedURLs();
    613   MostVisitedURLList new_list(cache_->top_sites());
    614   MostVisitedURL new_url;
    615 
    616   if (cache_->IsKnownURL(url)) {
    617     size_t index = cache_->GetURLIndex(url);
    618     // Do nothing if we currently have that URL as non-forced.
    619     if (new_list[index].last_forced_time.is_null())
    620       return false;
    621 
    622     // Update the |last_forced_time| of the already existing URL. Delete it and
    623     // reinsert it at the right location.
    624     new_url = new_list[index];
    625     new_list.erase(new_list.begin() + index);
    626     num_forced--;
    627   } else {
    628     new_url.url = url;
    629     new_url.redirects.push_back(url);
    630   }
    631   new_url.last_forced_time = time;
    632   // Add forced URLs and sort. Added to the end of the list of forced URLs
    633   // since this is almost always where it needs to go, unless the user's local
    634   // clock is fiddled with.
    635   MostVisitedURLList::iterator mid = new_list.begin() + num_forced;
    636   new_list.insert(mid, new_url);
    637   mid = new_list.begin() + num_forced;  // Mid was invalidated.
    638   std::inplace_merge(new_list.begin(), mid, mid + 1, ForcedURLComparator);
    639   SetTopSites(new_list);
    640   return true;
    641 }
    642 
    643 bool TopSitesImpl::AddPrepopulatedPages(MostVisitedURLList* urls,
    644                                         size_t num_forced_urls) {
    645   bool added = false;
    646   MostVisitedURLList prepopulate_urls = GetPrepopulatePages();
    647   for (size_t i = 0; i < prepopulate_urls.size(); ++i) {
    648     if (urls->size() - num_forced_urls < kNonForcedTopSitesNumber &&
    649         IndexOf(*urls, prepopulate_urls[i].url) == -1) {
    650       urls->push_back(prepopulate_urls[i]);
    651       added = true;
    652     }
    653   }
    654   return added;
    655 }
    656 
    657 size_t TopSitesImpl::MergeCachedForcedURLs(MostVisitedURLList* new_list) {
    658   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    659   // Add all the new URLs for quick lookup. Take that opportunity to count the
    660   // number of forced URLs in |new_list|.
    661   std::set<GURL> all_new_urls;
    662   size_t num_forced = 0;
    663   for (size_t i = 0; i < new_list->size(); ++i) {
    664     all_new_urls.insert((*new_list)[i].url);
    665     if (!(*new_list)[i].last_forced_time.is_null())
    666       ++num_forced;
    667   }
    668 
    669   // Keep the forced URLs from |cache_| that are not found in |new_list|.
    670   MostVisitedURLList filtered_forced_urls;
    671   for (size_t i = 0; i < cache_->GetNumForcedURLs(); ++i) {
    672     if (all_new_urls.find(cache_->top_sites()[i].url) == all_new_urls.end())
    673       filtered_forced_urls.push_back(cache_->top_sites()[i]);
    674   }
    675   num_forced += filtered_forced_urls.size();
    676 
    677   // Prepend forced URLs and sort in order of ascending |last_forced_time|.
    678   new_list->insert(new_list->begin(), filtered_forced_urls.begin(),
    679                    filtered_forced_urls.end());
    680   std::inplace_merge(
    681       new_list->begin(), new_list->begin() + filtered_forced_urls.size(),
    682       new_list->begin() + num_forced, ForcedURLComparator);
    683 
    684   // Drop older forced URLs if the list overflows. Since forced URLs are always
    685   // sort in increasing order of |last_forced_time|, drop the first ones.
    686   if (num_forced > kForcedTopSitesNumber) {
    687     new_list->erase(new_list->begin(),
    688                     new_list->begin() + (num_forced - kForcedTopSitesNumber));
    689     num_forced = kForcedTopSitesNumber;
    690   }
    691 
    692   return num_forced;
    693 }
    694 
    695 void TopSitesImpl::ApplyBlacklist(const MostVisitedURLList& urls,
    696                                   MostVisitedURLList* out) {
    697   // Log the number of times ApplyBlacklist is called so we can compute the
    698   // average number of blacklisted items per user.
    699   const DictionaryValue* blacklist =
    700       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
    701   UMA_HISTOGRAM_BOOLEAN("TopSites.NumberOfApplyBlacklist", true);
    702   UMA_HISTOGRAM_COUNTS_100("TopSites.NumberOfBlacklistedItems",
    703       (blacklist ? blacklist->size() : 0));
    704   size_t num_non_forced_urls = 0;
    705   size_t num_forced_urls = 0;
    706   for (size_t i = 0; i < urls.size(); ++i) {
    707     if (!IsBlacklisted(urls[i].url)) {
    708       if (urls[i].last_forced_time.is_null()) {
    709         // Non-forced URL.
    710         if (num_non_forced_urls >= kNonForcedTopSitesNumber)
    711           continue;
    712         num_non_forced_urls++;
    713       } else {
    714         // Forced URL.
    715         if (num_forced_urls >= kForcedTopSitesNumber)
    716           continue;
    717         num_forced_urls++;
    718       }
    719       out->push_back(urls[i]);
    720     }
    721   }
    722 }
    723 
    724 std::string TopSitesImpl::GetURLHash(const GURL& url) {
    725   // We don't use canonical URLs here to be able to blacklist only one of
    726   // the two 'duplicate' sites, e.g. 'gmail.com' and 'mail.google.com'.
    727   return base::MD5String(url.spec());
    728 }
    729 
    730 base::TimeDelta TopSitesImpl::GetUpdateDelay() {
    731   if (cache_->top_sites().size() <= arraysize(kPrepopulatedPages))
    732     return base::TimeDelta::FromSeconds(30);
    733 
    734   int64 range = kMaxUpdateIntervalMinutes - kMinUpdateIntervalMinutes;
    735   int64 minutes = kMaxUpdateIntervalMinutes -
    736       last_num_urls_changed_ * range / cache_->top_sites().size();
    737   return base::TimeDelta::FromMinutes(minutes);
    738 }
    739 
    740 void TopSitesImpl::Observe(int type,
    741                            const content::NotificationSource& source,
    742                            const content::NotificationDetails& details) {
    743   if (!loaded_)
    744     return;
    745 
    746   if (type == chrome::NOTIFICATION_HISTORY_URLS_DELETED) {
    747     content::Details<history::URLsDeletedDetails> deleted_details(details);
    748     if (deleted_details->all_history) {
    749       SetTopSites(MostVisitedURLList());
    750       backend_->ResetDatabase();
    751     } else {
    752       std::set<size_t> indices_to_delete;  // Indices into top_sites_.
    753       for (URLRows::const_iterator i = deleted_details->rows.begin();
    754            i != deleted_details->rows.end(); ++i) {
    755         if (cache_->IsKnownURL(i->url()))
    756           indices_to_delete.insert(cache_->GetURLIndex(i->url()));
    757       }
    758 
    759       if (indices_to_delete.empty())
    760         return;
    761 
    762       MostVisitedURLList new_top_sites(cache_->top_sites());
    763       for (std::set<size_t>::reverse_iterator i = indices_to_delete.rbegin();
    764            i != indices_to_delete.rend(); i++) {
    765         new_top_sites.erase(new_top_sites.begin() + *i);
    766       }
    767       SetTopSites(new_top_sites);
    768     }
    769     StartQueryForMostVisited();
    770   } else if (type == content::NOTIFICATION_NAV_ENTRY_COMMITTED) {
    771     NavigationController* controller =
    772         content::Source<NavigationController>(source).ptr();
    773     Profile* profile = Profile::FromBrowserContext(
    774         controller->GetWebContents()->GetBrowserContext());
    775     if (profile == profile_ && !IsNonForcedFull()) {
    776       content::LoadCommittedDetails* load_details =
    777           content::Details<content::LoadCommittedDetails>(details).ptr();
    778       if (!load_details)
    779         return;
    780       const GURL& url = load_details->entry->GetURL();
    781       if (!cache_->IsKnownURL(url) && HistoryService::CanAddURL(url)) {
    782         // To avoid slamming history we throttle requests when the url updates.
    783         // To do otherwise negatively impacts perf tests.
    784         RestartQueryForTopSitesTimer(GetUpdateDelay());
    785       }
    786     }
    787   }
    788 }
    789 
    790 void TopSitesImpl::SetTopSites(const MostVisitedURLList& new_top_sites) {
    791   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    792 
    793   MostVisitedURLList top_sites(new_top_sites);
    794   size_t num_forced_urls = MergeCachedForcedURLs(&top_sites);
    795   AddPrepopulatedPages(&top_sites, num_forced_urls);
    796 
    797   TopSitesDelta delta;
    798   DiffMostVisited(cache_->top_sites(), top_sites, &delta);
    799   if (!delta.deleted.empty() || !delta.added.empty() || !delta.moved.empty()) {
    800     backend_->UpdateTopSites(delta);
    801   }
    802 
    803   last_num_urls_changed_ = delta.added.size() + delta.moved.size();
    804 
    805   // We always do the following steps (setting top sites in cache, and resetting
    806   // thread safe cache ...) as this method is invoked during startup at which
    807   // point the caches haven't been updated yet.
    808   cache_->SetTopSites(top_sites);
    809 
    810   // See if we have any tmp thumbnails for the new sites.
    811   if (!temp_images_.empty()) {
    812     for (size_t i = 0; i < top_sites.size(); ++i) {
    813       const MostVisitedURL& mv = top_sites[i];
    814       GURL canonical_url = cache_->GetCanonicalURL(mv.url);
    815       // At the time we get the thumbnail redirects aren't known, so we have to
    816       // iterate through all the images.
    817       for (TempImages::iterator it = temp_images_.begin();
    818            it != temp_images_.end(); ++it) {
    819         if (canonical_url == cache_->GetCanonicalURL(it->first)) {
    820           SetPageThumbnailEncoded(
    821               mv.url, it->second.thumbnail.get(), it->second.thumbnail_score);
    822           temp_images_.erase(it);
    823           break;
    824         }
    825       }
    826     }
    827   }
    828 
    829   if (top_sites.size() - num_forced_urls >= kNonForcedTopSitesNumber)
    830     temp_images_.clear();
    831 
    832   ResetThreadSafeCache();
    833   ResetThreadSafeImageCache();
    834   NotifyTopSitesChanged();
    835 
    836   // Restart the timer that queries history for top sites. This is done to
    837   // ensure we stay in sync with history.
    838   RestartQueryForTopSitesTimer(GetUpdateDelay());
    839 }
    840 
    841 int TopSitesImpl::num_results_to_request_from_history() const {
    842   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    843 
    844   const DictionaryValue* blacklist =
    845       profile_->GetPrefs()->GetDictionary(prefs::kNtpMostVisitedURLsBlacklist);
    846   return kNonForcedTopSitesNumber + (blacklist ? blacklist->size() : 0);
    847 }
    848 
    849 void TopSitesImpl::MoveStateToLoaded() {
    850   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    851 
    852   MostVisitedURLList filtered_urls_all;
    853   MostVisitedURLList filtered_urls_nonforced;
    854   PendingCallbacks pending_callbacks;
    855   {
    856     base::AutoLock lock(lock_);
    857 
    858     if (loaded_)
    859       return;  // Don't do anything if we're already loaded.
    860     loaded_ = true;
    861 
    862     // Now that we're loaded we can service the queued up callbacks. Copy them
    863     // here and service them outside the lock.
    864     if (!pending_callbacks_.empty()) {
    865       // We always filter out forced URLs because callers of GetMostVisitedURLs
    866       // are not interested in them.
    867       filtered_urls_all = thread_safe_cache_->top_sites();
    868       filtered_urls_nonforced.assign(thread_safe_cache_->top_sites().begin() +
    869                                        thread_safe_cache_->GetNumForcedURLs(),
    870                                      thread_safe_cache_->top_sites().end());
    871       pending_callbacks.swap(pending_callbacks_);
    872     }
    873   }
    874 
    875   for (size_t i = 0; i < pending_callbacks.size(); i++)
    876     pending_callbacks[i].Run(filtered_urls_all, filtered_urls_nonforced);
    877 
    878   content::NotificationService::current()->Notify(
    879       chrome::NOTIFICATION_TOP_SITES_LOADED,
    880       content::Source<Profile>(profile_),
    881       content::Details<TopSites>(this));
    882 }
    883 
    884 void TopSitesImpl::ResetThreadSafeCache() {
    885   base::AutoLock lock(lock_);
    886   MostVisitedURLList cached;
    887   ApplyBlacklist(cache_->top_sites(), &cached);
    888   thread_safe_cache_->SetTopSites(cached);
    889 }
    890 
    891 void TopSitesImpl::ResetThreadSafeImageCache() {
    892   base::AutoLock lock(lock_);
    893   thread_safe_cache_->SetThumbnails(cache_->images());
    894 }
    895 
    896 void TopSitesImpl::NotifyTopSitesChanged() {
    897   content::NotificationService::current()->Notify(
    898       chrome::NOTIFICATION_TOP_SITES_CHANGED,
    899       content::Source<TopSites>(this),
    900       content::NotificationService::NoDetails());
    901 }
    902 
    903 void TopSitesImpl::RestartQueryForTopSitesTimer(base::TimeDelta delta) {
    904   if (timer_.IsRunning() && ((timer_start_time_ + timer_.GetCurrentDelay()) <
    905                              (base::TimeTicks::Now() + delta))) {
    906     return;
    907   }
    908 
    909   timer_start_time_ = base::TimeTicks::Now();
    910   timer_.Stop();
    911   timer_.Start(FROM_HERE, delta, this, &TopSitesImpl::TimerFired);
    912 }
    913 
    914 void TopSitesImpl::OnGotMostVisitedThumbnails(
    915     const scoped_refptr<MostVisitedThumbnails>& thumbnails) {
    916   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    917 
    918   // Set the top sites directly in the cache so that SetTopSites diffs
    919   // correctly.
    920   cache_->SetTopSites(thumbnails->most_visited);
    921   SetTopSites(thumbnails->most_visited);
    922   cache_->SetThumbnails(thumbnails->url_to_images_map);
    923 
    924   ResetThreadSafeImageCache();
    925 
    926   MoveStateToLoaded();
    927 
    928   // Start a timer that refreshes top sites from history.
    929   RestartQueryForTopSitesTimer(
    930       base::TimeDelta::FromSeconds(kUpdateIntervalSecs));
    931 }
    932 
    933 void TopSitesImpl::OnTopSitesAvailableFromHistory(
    934     CancelableRequestProvider::Handle handle,
    935     MostVisitedURLList pages) {
    936   SetTopSites(pages);
    937 
    938   // Used only in testing.
    939   content::NotificationService::current()->Notify(
    940       chrome::NOTIFICATION_TOP_SITES_UPDATED,
    941       content::Source<TopSitesImpl>(this),
    942       content::Details<CancelableRequestProvider::Handle>(&handle));
    943 }
    944 
    945 }  // namespace history
    946