Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_HISTORY_TOP_SITES_H_
      6 #define CHROME_BROWSER_HISTORY_TOP_SITES_H_
      7 #pragma once
      8 
      9 #include <list>
     10 #include <set>
     11 #include <string>
     12 #include <utility>
     13 
     14 #include "base/basictypes.h"
     15 #include "base/gtest_prod_util.h"
     16 #include "base/memory/ref_counted.h"
     17 #include "base/memory/ref_counted_memory.h"
     18 #include "base/synchronization/lock.h"
     19 #include "base/time.h"
     20 #include "base/timer.h"
     21 #include "chrome/browser/history/history_types.h"
     22 #include "chrome/browser/history/history.h"
     23 #include "chrome/browser/history/page_usage_data.h"
     24 #include "chrome/common/thumbnail_score.h"
     25 #include "content/browser/cancelable_request.h"
     26 #include "googleurl/src/gurl.h"
     27 
     28 class DictionaryValue;
     29 class FilePath;
     30 class SkBitmap;
     31 class Profile;
     32 
     33 namespace history {
     34 
     35 class TopSitesCache;
     36 class TopSitesBackend;
     37 class TopSitesTest;
     38 
     39 // Stores the data for the top "most visited" sites. This includes a cache of
     40 // the most visited data from history, as well as the corresponding thumbnails
     41 // of those sites.
     42 //
     43 // This class allows requests for most visited urls and thumbnails on any
     44 // thread. All other methods must be invoked on the UI thread. All mutations
     45 // to internal state happen on the UI thread and are scheduled to update the
     46 // db using TopSitesBackend.
     47 class TopSites
     48     : public base::RefCountedThreadSafe<TopSites>,
     49       public NotificationObserver,
     50       public CancelableRequestProvider {
     51  public:
     52   explicit TopSites(Profile* profile);
     53 
     54   // Initializes TopSites.
     55   void Init(const FilePath& db_name);
     56 
     57   // Sets the given thumbnail for the given URL. Returns true if the thumbnail
     58   // was updated. False means either the URL wasn't known to us, or we felt
     59   // that our current thumbnail was superior to the given one.
     60   bool SetPageThumbnail(const GURL& url,
     61                         const SkBitmap& thumbnail,
     62                         const ThumbnailScore& score);
     63 
     64   // Callback for GetMostVisitedURLs.
     65   typedef Callback1<const MostVisitedURLList&>::Type GetTopSitesCallback;
     66   typedef std::set<scoped_refptr<CancelableRequest<GetTopSitesCallback> > >
     67       PendingCallbackSet;
     68 
     69   // Returns a list of most visited URLs via a callback.
     70   // This may be invoked on any thread.
     71   // NOTE: the callback may be called immediately if we have the data cached.
     72   void GetMostVisitedURLs(CancelableRequestConsumer* consumer,
     73                           GetTopSitesCallback* callback);
     74 
     75   // Get a thumbnail for a given page. Returns true iff we have the thumbnail.
     76   // This may be invoked on any thread.
     77   // As this method may be invoked on any thread the ref count needs to be
     78   // upped before this method returns, so this takes a scoped_refptr*.
     79   bool GetPageThumbnail(const GURL& url,
     80                         scoped_refptr<RefCountedBytes>* bytes);
     81 
     82   // Get a thumbnail score for a given page. Returns true iff we have the
     83   // thumbnail score.  This may be invoked on any thread. The score will
     84   // be copied to |score|.
     85   virtual bool GetPageThumbnailScore(const GURL& url, ThumbnailScore* score);
     86 
     87   // Get a temporary thumbnail score for a given page. Returns true iff we
     88   // have the thumbnail score. Useful when checking if we should update a
     89   // thumbnail for a given page. The score will be copied to |score|.
     90   bool GetTemporaryPageThumbnailScore(const GURL& url, ThumbnailScore* score);
     91 
     92   // Invoked from History if migration is needed. If this is invoked it will
     93   // be before HistoryLoaded is invoked.
     94   void MigrateFromHistory();
     95 
     96   // Invoked with data from migrating thumbnails out of history.
     97   void FinishHistoryMigration(const ThumbnailMigration& data);
     98 
     99   // Invoked from history when it finishes loading. If MigrateFromHistory was
    100   // not invoked at this point then we load from the top sites service.
    101   void HistoryLoaded();
    102 
    103   // Blacklisted URLs
    104 
    105   // Returns true if there is at least one item in the blacklist.
    106   bool HasBlacklistedItems() const;
    107 
    108   // Add a  URL to the blacklist.
    109   void AddBlacklistedURL(const GURL& url);
    110 
    111   // Removes a URL from the blacklist.
    112   void RemoveBlacklistedURL(const GURL& url);
    113 
    114   // Returns true if the URL is blacklisted.
    115   bool IsBlacklisted(const GURL& url);
    116 
    117   // Clear the blacklist.
    118   void ClearBlacklistedURLs();
    119 
    120   // Pinned URLs
    121 
    122   // Pin a URL at |index|.
    123   void AddPinnedURL(const GURL& url, size_t index);
    124 
    125   // Returns true if a URL is pinned.
    126   bool IsURLPinned(const GURL& url);
    127 
    128   // Unpin a URL.
    129   void RemovePinnedURL(const GURL& url);
    130 
    131   // Return a URL pinned at |index| via |out|. Returns true if there
    132   // is a URL pinned at |index|.
    133   bool GetPinnedURLAtIndex(size_t index, GURL* out);
    134 
    135   // Shuts down top sites.
    136   void Shutdown();
    137 
    138   // Generates the diff of things that happened between "old" and "new."
    139   //
    140   // The URLs that are in "new" but not "old" will be have their index into
    141   // "new" put in |added_urls|. The URLs that are in "old" but not "new" will
    142   // have their index into "old" put into |deleted_urls|.
    143   //
    144   // URLs appearing in both old and new lists but having different indices will
    145   // have their index into "new" be put into |moved_urls|.
    146   static void DiffMostVisited(const MostVisitedURLList& old_list,
    147                               const MostVisitedURLList& new_list,
    148                               TopSitesDelta* delta);
    149 
    150   // Query history service for the list of available thumbnails. Returns the
    151   // handle for the request, or NULL if a request could not be made.
    152   // Public only for testing purposes.
    153   CancelableRequestProvider::Handle StartQueryForMostVisited();
    154 
    155   bool loaded() const { return loaded_; }
    156 
    157   // Returns true if the given URL is known to the top sites service.
    158   // This function also returns false if TopSites isn't loaded yet.
    159   virtual bool IsKnownURL(const GURL& url);
    160 
    161   // Returns true if the top sites list is full (i.e. we already have the
    162   // maximum number of top sites).  This function also returns false if
    163   // TopSites isn't loaded yet.
    164   virtual bool IsFull();
    165 
    166  protected:
    167   // For allowing inheritance.
    168   virtual ~TopSites();
    169 
    170  private:
    171   friend class base::RefCountedThreadSafe<TopSites>;
    172   friend class TopSitesTest;
    173 
    174   typedef std::pair<GURL, Images> TempImage;
    175   typedef std::list<TempImage> TempImages;
    176 
    177   // Enumeration of the possible states history can be in.
    178   enum HistoryLoadState {
    179     // We're waiting for history to finish loading.
    180     HISTORY_LOADING,
    181 
    182     // History finished loading and we need to migrate top sites out of history.
    183     HISTORY_MIGRATING,
    184 
    185     // History is loaded.
    186     HISTORY_LOADED
    187   };
    188 
    189   // Enumeration of possible states the top sites backend can be in.
    190   enum TopSitesLoadState {
    191     // We're waiting for the backend to finish loading.
    192     TOP_SITES_LOADING,
    193 
    194     // The backend finished loading, but we may need to migrate. This is true if
    195     // the top sites db didn't exist, or if the db existed but is from an old
    196     // version.
    197     TOP_SITES_LOADED_WAITING_FOR_HISTORY,
    198 
    199     // Top sites is loaded.
    200     TOP_SITES_LOADED
    201   };
    202 
    203   // Sets the thumbnail without writing to the database. Useful when
    204   // reading last known top sites from the DB.
    205   // Returns true if the thumbnail was set, false if the existing one is better.
    206   bool SetPageThumbnailNoDB(const GURL& url,
    207                             const RefCountedBytes* thumbnail_data,
    208                             const ThumbnailScore& score);
    209 
    210   // A version of SetPageThumbnail that takes RefCountedBytes as
    211   // returned by HistoryService.
    212   bool SetPageThumbnailEncoded(const GURL& url,
    213                                const RefCountedBytes* thumbnail,
    214                                const ThumbnailScore& score);
    215 
    216   // Encodes the bitmap to bytes for storage to the db. Returns true if the
    217   // bitmap was successfully encoded.
    218   static bool EncodeBitmap(const SkBitmap& bitmap,
    219                            scoped_refptr<RefCountedBytes>* bytes);
    220 
    221   // Removes the cached thumbnail for url. Does nothing if |url| if not cached
    222   // in |temp_images_|.
    223   void RemoveTemporaryThumbnailByURL(const GURL& url);
    224 
    225   // Add a thumbnail for an unknown url. See temp_thumbnails_map_.
    226   void AddTemporaryThumbnail(const GURL& url,
    227                              const RefCountedBytes* thumbnail,
    228                              const ThumbnailScore& score);
    229 
    230   // Called by our timer. Starts the query for the most visited sites.
    231   void TimerFired();
    232 
    233   // Finds the given URL in the redirect chain for the given TopSite, and
    234   // returns the distance from the destination in hops that the given URL is.
    235   // The URL is assumed to be in the list. The destination is 0.
    236   static int GetRedirectDistanceForURL(const MostVisitedURL& most_visited,
    237                                        const GURL& url);
    238 
    239   // Returns the set of prepopulate pages.
    240   static MostVisitedURLList GetPrepopulatePages();
    241 
    242   // Add prepopulated pages: 'welcome to Chrome' and themes gallery to |urls|.
    243   // Returns true if any pages were added.
    244   static bool AddPrepopulatedPages(MostVisitedURLList* urls);
    245 
    246   // Convert pinned_urls_ dictionary to the new format. Use URLs as
    247   // dictionary keys.
    248   void MigratePinnedURLs();
    249 
    250   // Takes |urls|, produces it's copy in |out| after removing
    251   // blacklisted URLs and reordering pinned URLs.
    252   void ApplyBlacklistAndPinnedURLs(const MostVisitedURLList& urls,
    253                                    MostVisitedURLList* out);
    254 
    255   // Converts a url into a canonical string representation.
    256   std::string GetURLString(const GURL& url);
    257 
    258   // Returns an MD5 hash of the URL. Hashing is required for blacklisted URLs.
    259   std::string GetURLHash(const GURL& url);
    260 
    261   // Returns the delay until the next update of history is needed.
    262   // Uses num_urls_changed
    263   base::TimeDelta GetUpdateDelay();
    264 
    265   // Executes all of the callbacks in |pending_callbacks|. This is used after
    266   // we finish loading if any requests came in before we loaded.
    267   static void ProcessPendingCallbacks(
    268       const PendingCallbackSet& pending_callbacks,
    269       const MostVisitedURLList& urls);
    270 
    271   // Implementation of NotificationObserver.
    272   virtual void Observe(NotificationType type,
    273                        const NotificationSource& source,
    274                        const NotificationDetails& details);
    275 
    276   // Resets top_sites_ and updates the db (in the background). All mutations to
    277   // top_sites_ *must* go through this.
    278   void SetTopSites(const MostVisitedURLList& new_top_sites);
    279 
    280   // Returns the number of most visted results to request from history. This
    281   // changes depending upon how many urls have been blacklisted.
    282   int num_results_to_request_from_history() const;
    283 
    284   // Invoked when transitioning to LOADED. Notifies any queued up callbacks.
    285   void MoveStateToLoaded();
    286 
    287   void ResetThreadSafeCache();
    288 
    289   void ResetThreadSafeImageCache();
    290 
    291   // Stops and starts timer with a delay of |delta|.
    292   void RestartQueryForTopSitesTimer(base::TimeDelta delta);
    293 
    294   // Callback after TopSitesBackend has finished migration. This tells history
    295   // to finish it's side of migration (nuking thumbnails on disk).
    296   void OnHistoryMigrationWrittenToDisk(
    297       CancelableRequestProvider::Handle handle);
    298 
    299   // Callback from TopSites with the top sites/thumbnails.
    300   void OnGotMostVisitedThumbnails(CancelableRequestProvider::Handle handle,
    301                                   scoped_refptr<MostVisitedThumbnails> data,
    302                                   bool may_need_history_migration);
    303 
    304   // Called when history service returns a list of top URLs.
    305   void OnTopSitesAvailableFromHistory(CancelableRequestProvider::Handle handle,
    306                                       MostVisitedURLList data);
    307 
    308   scoped_refptr<TopSitesBackend> backend_;
    309 
    310   // The top sites data.
    311   scoped_ptr<TopSitesCache> cache_;
    312 
    313   // Copy of the top sites data that may be accessed on any thread (assuming
    314   // you hold |lock_|). The data in |thread_safe_cache_| has blacklisted and
    315   // pinned urls applied (|cache_| does not).
    316   scoped_ptr<TopSitesCache> thread_safe_cache_;
    317 
    318   Profile* profile_;
    319 
    320   // Lock used to access |thread_safe_cache_|.
    321   mutable base::Lock lock_;
    322 
    323   CancelableRequestConsumer cancelable_consumer_;
    324 
    325   // Timer that asks history for the top sites. This is used to make sure our
    326   // data stays in sync with history.
    327   base::OneShotTimer<TopSites> timer_;
    328 
    329   // The time we started |timer_| at. Only valid if |timer_| is running.
    330   base::TimeTicks timer_start_time_;
    331 
    332   NotificationRegistrar registrar_;
    333 
    334   // The number of URLs changed on the last update.
    335   size_t last_num_urls_changed_;
    336 
    337   // The map of requests for the top sites list. Can only be
    338   // non-empty at startup. After we read the top sites from the DB, we'll
    339   // always have a cached list.
    340   PendingCallbackSet pending_callbacks_;
    341 
    342   // Stores thumbnails for unknown pages. When SetPageThumbnail is
    343   // called, if we don't know about that URL yet and we don't have
    344   // enough Top Sites (new profile), we store it until the next
    345   // SetTopSites call.
    346   TempImages temp_images_;
    347 
    348   // Blacklisted and pinned URLs are stored in Preferences.
    349 
    350   // Blacklisted URLs. They are filtered out from the list of Top
    351   // Sites when GetMostVisitedURLs is called. Note that we are still
    352   // storing all URLs, but filtering on access. It is a dictionary,
    353   // key is the URL, value is a dummy value. This is owned by the
    354   // PrefService.
    355   const DictionaryValue* blacklist_;
    356 
    357   // This is a dictionary for the pinned URLs for the the most visited part of
    358   // the new tab page. Key is the URL, value is index where it is pinned at (may
    359   // be the same as key). This is owned by the PrefService.
    360   const DictionaryValue* pinned_urls_;
    361 
    362   // See description above HistoryLoadState.
    363   HistoryLoadState history_state_;
    364 
    365   // See description above TopSitesLoadState.
    366   TopSitesLoadState top_sites_state_;
    367 
    368   // Are we loaded?
    369   bool loaded_;
    370 
    371   DISALLOW_COPY_AND_ASSIGN(TopSites);
    372 };
    373 
    374 }  // namespace history
    375 
    376 #endif  // CHROME_BROWSER_HISTORY_TOP_SITES_H_
    377