Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
      6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
      7 
      8 #include <functional>
      9 #include <map>
     10 #include <set>
     11 #include <string>
     12 #include <vector>
     13 
     14 #include "base/basictypes.h"
     15 #include "base/files/file_path.h"
     16 #include "base/gtest_prod_util.h"
     17 #include "base/memory/ref_counted.h"
     18 #include "base/memory/weak_ptr.h"
     19 #include "base/strings/string16.h"
     20 #include "chrome/browser/autocomplete/autocomplete_match.h"
     21 #include "chrome/browser/autocomplete/history_provider_util.h"
     22 #include "chrome/browser/common/cancelable_request.h"
     23 #include "chrome/browser/history/history_db_task.h"
     24 #include "chrome/browser/history/history_types.h"
     25 #include "chrome/browser/history/in_memory_url_index_types.h"
     26 #include "chrome/browser/history/scored_history_match.h"
     27 #include "content/public/browser/notification_observer.h"
     28 #include "content/public/browser/notification_registrar.h"
     29 #include "sql/connection.h"
     30 
     31 class HistoryQuickProviderTest;
     32 class Profile;
     33 
     34 namespace base {
     35 class Time;
     36 }
     37 
     38 namespace in_memory_url_index {
     39 class InMemoryURLIndexCacheItem;
     40 }
     41 
     42 namespace history {
     43 
     44 namespace imui = in_memory_url_index;
     45 
     46 class HistoryDatabase;
     47 class URLIndexPrivateData;
     48 struct URLsDeletedDetails;
     49 struct URLsModifiedDetails;
     50 struct URLVisitedDetails;
     51 
     52 // The URL history source.
     53 // Holds portions of the URL database in memory in an indexed form.  Used to
     54 // quickly look up matching URLs for a given query string.  Used by
     55 // the HistoryURLProvider for inline autocomplete and to provide URL
     56 // matches to the omnibox.
     57 //
     58 // Note about multi-byte codepoints and the data structures in the
     59 // InMemoryURLIndex class: One will quickly notice that no effort is made to
     60 // insure that multi-byte character boundaries are detected when indexing the
     61 // words and characters in the URL history database except when converting
     62 // URL strings to lowercase. Multi-byte-edness makes no difference when
     63 // indexing or when searching the index as the final filtering of results
     64 // is dependent on the comparison of a string of bytes, not individual
     65 // characters. While the lookup of those bytes during a search in the
     66 // |char_word_map_| could serve up words in which the individual char16
     67 // occurs as a portion of a composite character the next filtering step
     68 // will eliminate such words except in the case where a single character
     69 // is being searched on and which character occurs as the second char16 of a
     70 // multi-char16 instance.
     71 class InMemoryURLIndex : public content::NotificationObserver,
     72                          public base::SupportsWeakPtr<InMemoryURLIndex> {
     73  public:
     74   // Defines an abstract class which is notified upon completion of restoring
     75   // the index's private data either by reading from the cache file or by
     76   // rebuilding from the history database.
     77   class RestoreCacheObserver {
     78    public:
     79     virtual ~RestoreCacheObserver();
     80 
     81     // Callback that lets the observer know that the restore operation has
     82     // completed. |succeeded| indicates if the restore was successful. This is
     83     // called on the UI thread.
     84     virtual void OnCacheRestoreFinished(bool succeeded) = 0;
     85   };
     86 
     87   // Defines an abstract class which is notified upon completion of saving
     88   // the index's private data to the cache file.
     89   class SaveCacheObserver {
     90    public:
     91     virtual ~SaveCacheObserver();
     92 
     93     // Callback that lets the observer know that the save succeeded.
     94     // This is called on the UI thread.
     95     virtual void OnCacheSaveFinished(bool succeeded) = 0;
     96   };
     97 
     98   // |profile|, which may be NULL during unit testing, is used to register for
     99   // history changes. |history_dir| is a path to the directory containing the
    100   // history database within the profile wherein the cache and transaction
    101   // journals will be stored. |languages| gives a list of language encodings by
    102   // which URLs and omnibox searches are broken down into words and characters.
    103   InMemoryURLIndex(Profile* profile,
    104                    const base::FilePath& history_dir,
    105                    const std::string& languages);
    106   virtual ~InMemoryURLIndex();
    107 
    108   // Opens and prepares the index of historical URL visits. If the index private
    109   // data cannot be restored from its cache file then it is rebuilt from the
    110   // history database.
    111   void Init();
    112 
    113   // Signals that any outstanding initialization should be canceled and
    114   // flushes the cache to disk.
    115   void ShutDown();
    116 
    117   // Scans the history index and returns a vector with all scored, matching
    118   // history items. This entry point simply forwards the call on to the
    119   // URLIndexPrivateData class. For a complete description of this function
    120   // refer to that class.  If |cursor_position| is string16::npos, the
    121   // function doesn't do anything special with the cursor; this is equivalent
    122   // to the cursor being at the end.
    123   ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string,
    124                                             size_t cursor_position);
    125 
    126   // Deletes the index entry, if any, for the given |url|.
    127   void DeleteURL(const GURL& url);
    128 
    129   // Sets the optional observers for completion of restoral and saving of the
    130   // index's private data.
    131   void set_restore_cache_observer(
    132       RestoreCacheObserver* restore_cache_observer) {
    133     restore_cache_observer_ = restore_cache_observer;
    134   }
    135   void set_save_cache_observer(SaveCacheObserver* save_cache_observer) {
    136     save_cache_observer_ = save_cache_observer;
    137   }
    138 
    139   // Indicates that the index restoration is complete.
    140   bool restored() const {
    141     return restored_;
    142   }
    143 
    144  private:
    145   friend class ::HistoryQuickProviderTest;
    146   friend class InMemoryURLIndexTest;
    147   friend class InMemoryURLIndexCacheTest;
    148   FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization);
    149 
    150   // Creating one of me without a history path is not allowed (tests excepted).
    151   InMemoryURLIndex();
    152 
    153   // HistoryDBTask used to rebuild our private data from the history database.
    154   class RebuildPrivateDataFromHistoryDBTask : public HistoryDBTask {
    155    public:
    156     explicit RebuildPrivateDataFromHistoryDBTask(
    157         InMemoryURLIndex* index,
    158         const std::string& languages,
    159         const std::set<std::string>& scheme_whitelist);
    160 
    161     virtual bool RunOnDBThread(HistoryBackend* backend,
    162                                history::HistoryDatabase* db) OVERRIDE;
    163     virtual void DoneRunOnMainThread() OVERRIDE;
    164 
    165    private:
    166     virtual ~RebuildPrivateDataFromHistoryDBTask();
    167 
    168     InMemoryURLIndex* index_;  // Call back to this index at completion.
    169     std::string languages_;  // Languages for word-breaking.
    170     std::set<std::string> scheme_whitelist_;  // Schemes to be indexed.
    171     bool succeeded_;  // Indicates if the rebuild was successful.
    172     scoped_refptr<URLIndexPrivateData> data_;  // The rebuilt private data.
    173 
    174     DISALLOW_COPY_AND_ASSIGN(RebuildPrivateDataFromHistoryDBTask);
    175   };
    176 
    177   // Initializes all index data members in preparation for restoring the index
    178   // from the cache or a complete rebuild from the history database.
    179   void ClearPrivateData();
    180 
    181   // Constructs a file path for the cache file within the same directory where
    182   // the history database is kept and saves that path to |file_path|. Returns
    183   // true if |file_path| can be successfully constructed. (This function
    184   // provided as a hook for unit testing.)
    185   bool GetCacheFilePath(base::FilePath* file_path);
    186 
    187   // Restores the index's private data from the cache file stored in the
    188   // profile directory.
    189   void PostRestoreFromCacheFileTask();
    190 
    191   // Schedules a history task to rebuild our private data from the history
    192   // database.
    193   void ScheduleRebuildFromHistory();
    194 
    195   // Callback used by RebuildPrivateDataFromHistoryDBTask to signal completion
    196   // or rebuilding our private data from the history database. |succeeded|
    197   // will be true if the rebuild was successful. |data| will point to a new
    198   // instanceof the private data just rebuilt.
    199   void DoneRebuidingPrivateDataFromHistoryDB(
    200       bool succeeded,
    201       scoped_refptr<URLIndexPrivateData> private_data);
    202 
    203   // Rebuilds the history index from the history database in |history_db|.
    204   // Used for unit testing only.
    205   void RebuildFromHistory(HistoryDatabase* history_db);
    206 
    207   // Determines if the private data was successfully reloaded from the cache
    208   // file or if the private data must be rebuilt from the history database.
    209   // |private_data_ptr|'s data will be NULL if the cache file load failed. If
    210   // successful, sets the private data and notifies any
    211   // |restore_cache_observer_|. Otherwise, kicks off a rebuild from the history
    212   // database.
    213   void OnCacheLoadDone(scoped_refptr<URLIndexPrivateData> private_data_ptr);
    214 
    215   // Callback function that sets the private data from the just-restored-from-
    216   // file |private_data|. Notifies any |restore_cache_observer_| that the
    217   // restore has succeeded.
    218   void OnCacheRestored(URLIndexPrivateData* private_data);
    219 
    220   // Posts a task to cache the index private data and write the cache file to
    221   // the profile directory.
    222   void PostSaveToCacheFileTask();
    223 
    224   // Saves private_data_ to the given |path|. Runs on the UI thread.
    225   // Provided for unit testing so that a test cache file can be used.
    226   void DoSaveToCacheFile(const base::FilePath& path);
    227 
    228   // Notifies the observer, if any, of the success of the private data caching.
    229   // |succeeded| is true on a successful save.
    230   void OnCacheSaveDone(bool succeeded);
    231 
    232   // Handles notifications of history changes.
    233   virtual void Observe(int notification_type,
    234                        const content::NotificationSource& source,
    235                        const content::NotificationDetails& details) OVERRIDE;
    236 
    237   // Notification handlers.
    238   void OnURLVisited(const URLVisitedDetails* details);
    239   void OnURLsModified(const URLsModifiedDetails* details);
    240   void OnURLsDeleted(const URLsDeletedDetails* details);
    241 
    242   // Sets the directory wherein the cache file will be maintained.
    243   // For unit test usage only.
    244   void set_history_dir(const base::FilePath& dir_path) {
    245     history_dir_ = dir_path;
    246   }
    247 
    248   // Returns a pointer to our private data. For unit testing only.
    249   URLIndexPrivateData* private_data() { return private_data_.get(); }
    250 
    251   // Returns the set of whitelisted schemes. For unit testing only.
    252   const std::set<std::string>& scheme_whitelist() { return scheme_whitelist_; }
    253 
    254   // The profile, may be null when testing.
    255   Profile* profile_;
    256 
    257   // Directory where cache file resides. This is, except when unit testing,
    258   // the same directory in which the profile's history database is found. It
    259   // should never be empty.
    260   base::FilePath history_dir_;
    261 
    262   // Languages used during the word-breaking process during indexing.
    263   std::string languages_;
    264 
    265   // Only URLs with a whitelisted scheme are indexed.
    266   std::set<std::string> scheme_whitelist_;
    267 
    268   // The index's durable private data.
    269   scoped_refptr<URLIndexPrivateData> private_data_;
    270 
    271   // Observers to notify upon restoral or save of the private data cache.
    272   RestoreCacheObserver* restore_cache_observer_;
    273   SaveCacheObserver* save_cache_observer_;
    274 
    275   CancelableRequestConsumer cache_reader_consumer_;
    276   content::NotificationRegistrar registrar_;
    277 
    278   // Set to true once the shutdown process has begun.
    279   bool shutdown_;
    280 
    281   // Set to true once the index restoration is complete.
    282   bool restored_;
    283 
    284   // Set to true when changes to the index have been made and the index needs
    285   // to be cached. Set to false when the index has been cached. Used as a
    286   // temporary safety check to insure that the cache is saved before the
    287   // index has been destructed.
    288   // TODO(mrossetti): Eliminate once the transition to SQLite has been done.
    289   // http://crbug.com/83659
    290   bool needs_to_be_cached_;
    291 
    292   DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex);
    293 };
    294 
    295 }  // namespace history
    296 
    297 #endif  // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_
    298