1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 7 8 #include <functional> 9 #include <map> 10 #include <set> 11 #include <string> 12 #include <vector> 13 14 #include "base/basictypes.h" 15 #include "base/files/file_path.h" 16 #include "base/gtest_prod_util.h" 17 #include "base/memory/ref_counted.h" 18 #include "base/memory/weak_ptr.h" 19 #include "base/strings/string16.h" 20 #include "chrome/browser/autocomplete/autocomplete_match.h" 21 #include "chrome/browser/autocomplete/history_provider_util.h" 22 #include "chrome/browser/common/cancelable_request.h" 23 #include "chrome/browser/history/history_db_task.h" 24 #include "chrome/browser/history/history_types.h" 25 #include "chrome/browser/history/in_memory_url_index_types.h" 26 #include "chrome/browser/history/scored_history_match.h" 27 #include "content/public/browser/notification_observer.h" 28 #include "content/public/browser/notification_registrar.h" 29 #include "sql/connection.h" 30 31 class HistoryQuickProviderTest; 32 class Profile; 33 34 namespace base { 35 class Time; 36 } 37 38 namespace in_memory_url_index { 39 class InMemoryURLIndexCacheItem; 40 } 41 42 namespace history { 43 44 namespace imui = in_memory_url_index; 45 46 class HistoryDatabase; 47 class URLIndexPrivateData; 48 struct URLsDeletedDetails; 49 struct URLsModifiedDetails; 50 struct URLVisitedDetails; 51 52 // The URL history source. 53 // Holds portions of the URL database in memory in an indexed form. Used to 54 // quickly look up matching URLs for a given query string. Used by 55 // the HistoryURLProvider for inline autocomplete and to provide URL 56 // matches to the omnibox. 57 // 58 // Note about multi-byte codepoints and the data structures in the 59 // InMemoryURLIndex class: One will quickly notice that no effort is made to 60 // insure that multi-byte character boundaries are detected when indexing the 61 // words and characters in the URL history database except when converting 62 // URL strings to lowercase. Multi-byte-edness makes no difference when 63 // indexing or when searching the index as the final filtering of results 64 // is dependent on the comparison of a string of bytes, not individual 65 // characters. While the lookup of those bytes during a search in the 66 // |char_word_map_| could serve up words in which the individual char16 67 // occurs as a portion of a composite character the next filtering step 68 // will eliminate such words except in the case where a single character 69 // is being searched on and which character occurs as the second char16 of a 70 // multi-char16 instance. 71 class InMemoryURLIndex : public content::NotificationObserver, 72 public base::SupportsWeakPtr<InMemoryURLIndex> { 73 public: 74 // Defines an abstract class which is notified upon completion of restoring 75 // the index's private data either by reading from the cache file or by 76 // rebuilding from the history database. 77 class RestoreCacheObserver { 78 public: 79 virtual ~RestoreCacheObserver(); 80 81 // Callback that lets the observer know that the restore operation has 82 // completed. |succeeded| indicates if the restore was successful. This is 83 // called on the UI thread. 84 virtual void OnCacheRestoreFinished(bool succeeded) = 0; 85 }; 86 87 // Defines an abstract class which is notified upon completion of saving 88 // the index's private data to the cache file. 89 class SaveCacheObserver { 90 public: 91 virtual ~SaveCacheObserver(); 92 93 // Callback that lets the observer know that the save succeeded. 94 // This is called on the UI thread. 95 virtual void OnCacheSaveFinished(bool succeeded) = 0; 96 }; 97 98 // |profile|, which may be NULL during unit testing, is used to register for 99 // history changes. |history_dir| is a path to the directory containing the 100 // history database within the profile wherein the cache and transaction 101 // journals will be stored. |languages| gives a list of language encodings by 102 // which URLs and omnibox searches are broken down into words and characters. 103 InMemoryURLIndex(Profile* profile, 104 const base::FilePath& history_dir, 105 const std::string& languages); 106 virtual ~InMemoryURLIndex(); 107 108 // Opens and prepares the index of historical URL visits. If the index private 109 // data cannot be restored from its cache file then it is rebuilt from the 110 // history database. 111 void Init(); 112 113 // Signals that any outstanding initialization should be canceled and 114 // flushes the cache to disk. 115 void ShutDown(); 116 117 // Scans the history index and returns a vector with all scored, matching 118 // history items. This entry point simply forwards the call on to the 119 // URLIndexPrivateData class. For a complete description of this function 120 // refer to that class. If |cursor_position| is string16::npos, the 121 // function doesn't do anything special with the cursor; this is equivalent 122 // to the cursor being at the end. 123 ScoredHistoryMatches HistoryItemsForTerms(const string16& term_string, 124 size_t cursor_position); 125 126 // Deletes the index entry, if any, for the given |url|. 127 void DeleteURL(const GURL& url); 128 129 // Sets the optional observers for completion of restoral and saving of the 130 // index's private data. 131 void set_restore_cache_observer( 132 RestoreCacheObserver* restore_cache_observer) { 133 restore_cache_observer_ = restore_cache_observer; 134 } 135 void set_save_cache_observer(SaveCacheObserver* save_cache_observer) { 136 save_cache_observer_ = save_cache_observer; 137 } 138 139 // Indicates that the index restoration is complete. 140 bool restored() const { 141 return restored_; 142 } 143 144 private: 145 friend class ::HistoryQuickProviderTest; 146 friend class InMemoryURLIndexTest; 147 friend class InMemoryURLIndexCacheTest; 148 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); 149 150 // Creating one of me without a history path is not allowed (tests excepted). 151 InMemoryURLIndex(); 152 153 // HistoryDBTask used to rebuild our private data from the history database. 154 class RebuildPrivateDataFromHistoryDBTask : public HistoryDBTask { 155 public: 156 explicit RebuildPrivateDataFromHistoryDBTask( 157 InMemoryURLIndex* index, 158 const std::string& languages, 159 const std::set<std::string>& scheme_whitelist); 160 161 virtual bool RunOnDBThread(HistoryBackend* backend, 162 history::HistoryDatabase* db) OVERRIDE; 163 virtual void DoneRunOnMainThread() OVERRIDE; 164 165 private: 166 virtual ~RebuildPrivateDataFromHistoryDBTask(); 167 168 InMemoryURLIndex* index_; // Call back to this index at completion. 169 std::string languages_; // Languages for word-breaking. 170 std::set<std::string> scheme_whitelist_; // Schemes to be indexed. 171 bool succeeded_; // Indicates if the rebuild was successful. 172 scoped_refptr<URLIndexPrivateData> data_; // The rebuilt private data. 173 174 DISALLOW_COPY_AND_ASSIGN(RebuildPrivateDataFromHistoryDBTask); 175 }; 176 177 // Initializes all index data members in preparation for restoring the index 178 // from the cache or a complete rebuild from the history database. 179 void ClearPrivateData(); 180 181 // Constructs a file path for the cache file within the same directory where 182 // the history database is kept and saves that path to |file_path|. Returns 183 // true if |file_path| can be successfully constructed. (This function 184 // provided as a hook for unit testing.) 185 bool GetCacheFilePath(base::FilePath* file_path); 186 187 // Restores the index's private data from the cache file stored in the 188 // profile directory. 189 void PostRestoreFromCacheFileTask(); 190 191 // Schedules a history task to rebuild our private data from the history 192 // database. 193 void ScheduleRebuildFromHistory(); 194 195 // Callback used by RebuildPrivateDataFromHistoryDBTask to signal completion 196 // or rebuilding our private data from the history database. |succeeded| 197 // will be true if the rebuild was successful. |data| will point to a new 198 // instanceof the private data just rebuilt. 199 void DoneRebuidingPrivateDataFromHistoryDB( 200 bool succeeded, 201 scoped_refptr<URLIndexPrivateData> private_data); 202 203 // Rebuilds the history index from the history database in |history_db|. 204 // Used for unit testing only. 205 void RebuildFromHistory(HistoryDatabase* history_db); 206 207 // Determines if the private data was successfully reloaded from the cache 208 // file or if the private data must be rebuilt from the history database. 209 // |private_data_ptr|'s data will be NULL if the cache file load failed. If 210 // successful, sets the private data and notifies any 211 // |restore_cache_observer_|. Otherwise, kicks off a rebuild from the history 212 // database. 213 void OnCacheLoadDone(scoped_refptr<URLIndexPrivateData> private_data_ptr); 214 215 // Callback function that sets the private data from the just-restored-from- 216 // file |private_data|. Notifies any |restore_cache_observer_| that the 217 // restore has succeeded. 218 void OnCacheRestored(URLIndexPrivateData* private_data); 219 220 // Posts a task to cache the index private data and write the cache file to 221 // the profile directory. 222 void PostSaveToCacheFileTask(); 223 224 // Saves private_data_ to the given |path|. Runs on the UI thread. 225 // Provided for unit testing so that a test cache file can be used. 226 void DoSaveToCacheFile(const base::FilePath& path); 227 228 // Notifies the observer, if any, of the success of the private data caching. 229 // |succeeded| is true on a successful save. 230 void OnCacheSaveDone(bool succeeded); 231 232 // Handles notifications of history changes. 233 virtual void Observe(int notification_type, 234 const content::NotificationSource& source, 235 const content::NotificationDetails& details) OVERRIDE; 236 237 // Notification handlers. 238 void OnURLVisited(const URLVisitedDetails* details); 239 void OnURLsModified(const URLsModifiedDetails* details); 240 void OnURLsDeleted(const URLsDeletedDetails* details); 241 242 // Sets the directory wherein the cache file will be maintained. 243 // For unit test usage only. 244 void set_history_dir(const base::FilePath& dir_path) { 245 history_dir_ = dir_path; 246 } 247 248 // Returns a pointer to our private data. For unit testing only. 249 URLIndexPrivateData* private_data() { return private_data_.get(); } 250 251 // Returns the set of whitelisted schemes. For unit testing only. 252 const std::set<std::string>& scheme_whitelist() { return scheme_whitelist_; } 253 254 // The profile, may be null when testing. 255 Profile* profile_; 256 257 // Directory where cache file resides. This is, except when unit testing, 258 // the same directory in which the profile's history database is found. It 259 // should never be empty. 260 base::FilePath history_dir_; 261 262 // Languages used during the word-breaking process during indexing. 263 std::string languages_; 264 265 // Only URLs with a whitelisted scheme are indexed. 266 std::set<std::string> scheme_whitelist_; 267 268 // The index's durable private data. 269 scoped_refptr<URLIndexPrivateData> private_data_; 270 271 // Observers to notify upon restoral or save of the private data cache. 272 RestoreCacheObserver* restore_cache_observer_; 273 SaveCacheObserver* save_cache_observer_; 274 275 CancelableRequestConsumer cache_reader_consumer_; 276 content::NotificationRegistrar registrar_; 277 278 // Set to true once the shutdown process has begun. 279 bool shutdown_; 280 281 // Set to true once the index restoration is complete. 282 bool restored_; 283 284 // Set to true when changes to the index have been made and the index needs 285 // to be cached. Set to false when the index has been cached. Used as a 286 // temporary safety check to insure that the cache is saved before the 287 // index has been destructed. 288 // TODO(mrossetti): Eliminate once the transition to SQLite has been done. 289 // http://crbug.com/83659 290 bool needs_to_be_cached_; 291 292 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); 293 }; 294 295 } // namespace history 296 297 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 298