1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 6 #define CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 7 8 #include <functional> 9 #include <map> 10 #include <set> 11 #include <string> 12 #include <vector> 13 14 #include "base/basictypes.h" 15 #include "base/files/file_path.h" 16 #include "base/gtest_prod_util.h" 17 #include "base/memory/ref_counted.h" 18 #include "base/memory/weak_ptr.h" 19 #include "base/strings/string16.h" 20 #include "chrome/browser/common/cancelable_request.h" 21 #include "chrome/browser/history/history_db_task.h" 22 #include "chrome/browser/history/history_types.h" 23 #include "chrome/browser/history/in_memory_url_index_types.h" 24 #include "chrome/browser/history/scored_history_match.h" 25 #include "content/public/browser/notification_observer.h" 26 #include "content/public/browser/notification_registrar.h" 27 #include "sql/connection.h" 28 29 class HistoryQuickProviderTest; 30 class Profile; 31 32 namespace base { 33 class Time; 34 } 35 36 namespace in_memory_url_index { 37 class InMemoryURLIndexCacheItem; 38 } 39 40 namespace history { 41 42 namespace imui = in_memory_url_index; 43 44 class HistoryClient; 45 class HistoryDatabase; 46 class URLIndexPrivateData; 47 struct URLsDeletedDetails; 48 struct URLsModifiedDetails; 49 struct URLVisitedDetails; 50 51 // The URL history source. 52 // Holds portions of the URL database in memory in an indexed form. Used to 53 // quickly look up matching URLs for a given query string. Used by 54 // the HistoryURLProvider for inline autocomplete and to provide URL 55 // matches to the omnibox. 56 // 57 // Note about multi-byte codepoints and the data structures in the 58 // InMemoryURLIndex class: One will quickly notice that no effort is made to 59 // insure that multi-byte character boundaries are detected when indexing the 60 // words and characters in the URL history database except when converting 61 // URL strings to lowercase. Multi-byte-edness makes no difference when 62 // indexing or when searching the index as the final filtering of results 63 // is dependent on the comparison of a string of bytes, not individual 64 // characters. While the lookup of those bytes during a search in the 65 // |char_word_map_| could serve up words in which the individual char16 66 // occurs as a portion of a composite character the next filtering step 67 // will eliminate such words except in the case where a single character 68 // is being searched on and which character occurs as the second char16 of a 69 // multi-char16 instance. 70 class InMemoryURLIndex : public content::NotificationObserver, 71 public base::SupportsWeakPtr<InMemoryURLIndex> { 72 public: 73 // Defines an abstract class which is notified upon completion of restoring 74 // the index's private data either by reading from the cache file or by 75 // rebuilding from the history database. 76 class RestoreCacheObserver { 77 public: 78 virtual ~RestoreCacheObserver(); 79 80 // Callback that lets the observer know that the restore operation has 81 // completed. |succeeded| indicates if the restore was successful. This is 82 // called on the UI thread. 83 virtual void OnCacheRestoreFinished(bool succeeded) = 0; 84 }; 85 86 // Defines an abstract class which is notified upon completion of saving 87 // the index's private data to the cache file. 88 class SaveCacheObserver { 89 public: 90 virtual ~SaveCacheObserver(); 91 92 // Callback that lets the observer know that the save succeeded. 93 // This is called on the UI thread. 94 virtual void OnCacheSaveFinished(bool succeeded) = 0; 95 }; 96 97 // |profile|, which may be NULL during unit testing, is used to register for 98 // history changes. |history_dir| is a path to the directory containing the 99 // history database within the profile wherein the cache and transaction 100 // journals will be stored. |languages| gives a list of language encodings by 101 // which URLs and omnibox searches are broken down into words and characters. 102 InMemoryURLIndex(Profile* profile, 103 const base::FilePath& history_dir, 104 const std::string& languages, 105 HistoryClient* client); 106 virtual ~InMemoryURLIndex(); 107 108 // Opens and prepares the index of historical URL visits. If the index private 109 // data cannot be restored from its cache file then it is rebuilt from the 110 // history database. 111 void Init(); 112 113 // Signals that any outstanding initialization should be canceled and 114 // flushes the cache to disk. 115 void ShutDown(); 116 117 // Scans the history index and returns a vector with all scored, matching 118 // history items. This entry point simply forwards the call on to the 119 // URLIndexPrivateData class. For a complete description of this function 120 // refer to that class. If |cursor_position| is base::string16::npos, the 121 // function doesn't do anything special with the cursor; this is equivalent 122 // to the cursor being at the end. In total, |max_matches| of items will be 123 // returned in the |ScoredHistoryMatches| vector. 124 ScoredHistoryMatches HistoryItemsForTerms(const base::string16& term_string, 125 size_t cursor_position, 126 size_t max_matches); 127 128 // Deletes the index entry, if any, for the given |url|. 129 void DeleteURL(const GURL& url); 130 131 // Sets the optional observers for completion of restoral and saving of the 132 // index's private data. 133 void set_restore_cache_observer( 134 RestoreCacheObserver* restore_cache_observer) { 135 restore_cache_observer_ = restore_cache_observer; 136 } 137 void set_save_cache_observer(SaveCacheObserver* save_cache_observer) { 138 save_cache_observer_ = save_cache_observer; 139 } 140 141 // Indicates that the index restoration is complete. 142 bool restored() const { 143 return restored_; 144 } 145 146 private: 147 friend class ::HistoryQuickProviderTest; 148 friend class InMemoryURLIndexTest; 149 friend class InMemoryURLIndexCacheTest; 150 FRIEND_TEST_ALL_PREFIXES(LimitedInMemoryURLIndexTest, Initialization); 151 152 // Creating one of me without a history path is not allowed (tests excepted). 153 InMemoryURLIndex(); 154 155 // HistoryDBTask used to rebuild our private data from the history database. 156 class RebuildPrivateDataFromHistoryDBTask : public HistoryDBTask { 157 public: 158 explicit RebuildPrivateDataFromHistoryDBTask( 159 InMemoryURLIndex* index, 160 const std::string& languages, 161 const std::set<std::string>& scheme_whitelist); 162 163 virtual bool RunOnDBThread(HistoryBackend* backend, 164 history::HistoryDatabase* db) OVERRIDE; 165 virtual void DoneRunOnMainThread() OVERRIDE; 166 167 private: 168 virtual ~RebuildPrivateDataFromHistoryDBTask(); 169 170 InMemoryURLIndex* index_; // Call back to this index at completion. 171 std::string languages_; // Languages for word-breaking. 172 std::set<std::string> scheme_whitelist_; // Schemes to be indexed. 173 bool succeeded_; // Indicates if the rebuild was successful. 174 scoped_refptr<URLIndexPrivateData> data_; // The rebuilt private data. 175 176 DISALLOW_COPY_AND_ASSIGN(RebuildPrivateDataFromHistoryDBTask); 177 }; 178 179 // Initializes all index data members in preparation for restoring the index 180 // from the cache or a complete rebuild from the history database. 181 void ClearPrivateData(); 182 183 // Constructs a file path for the cache file within the same directory where 184 // the history database is kept and saves that path to |file_path|. Returns 185 // true if |file_path| can be successfully constructed. (This function 186 // provided as a hook for unit testing.) 187 bool GetCacheFilePath(base::FilePath* file_path); 188 189 // Restores the index's private data from the cache file stored in the 190 // profile directory. 191 void PostRestoreFromCacheFileTask(); 192 193 // Schedules a history task to rebuild our private data from the history 194 // database. 195 void ScheduleRebuildFromHistory(); 196 197 // Callback used by RebuildPrivateDataFromHistoryDBTask to signal completion 198 // or rebuilding our private data from the history database. |succeeded| 199 // will be true if the rebuild was successful. |data| will point to a new 200 // instanceof the private data just rebuilt. 201 void DoneRebuidingPrivateDataFromHistoryDB( 202 bool succeeded, 203 scoped_refptr<URLIndexPrivateData> private_data); 204 205 // Rebuilds the history index from the history database in |history_db|. 206 // Used for unit testing only. 207 void RebuildFromHistory(HistoryDatabase* history_db); 208 209 // Determines if the private data was successfully reloaded from the cache 210 // file or if the private data must be rebuilt from the history database. 211 // |private_data_ptr|'s data will be NULL if the cache file load failed. If 212 // successful, sets the private data and notifies any 213 // |restore_cache_observer_|. Otherwise, kicks off a rebuild from the history 214 // database. 215 void OnCacheLoadDone(scoped_refptr<URLIndexPrivateData> private_data_ptr); 216 217 // Callback function that sets the private data from the just-restored-from- 218 // file |private_data|. Notifies any |restore_cache_observer_| that the 219 // restore has succeeded. 220 void OnCacheRestored(URLIndexPrivateData* private_data); 221 222 // Posts a task to cache the index private data and write the cache file to 223 // the profile directory. 224 void PostSaveToCacheFileTask(); 225 226 // Saves private_data_ to the given |path|. Runs on the UI thread. 227 // Provided for unit testing so that a test cache file can be used. 228 void DoSaveToCacheFile(const base::FilePath& path); 229 230 // Notifies the observer, if any, of the success of the private data caching. 231 // |succeeded| is true on a successful save. 232 void OnCacheSaveDone(bool succeeded); 233 234 // Handles notifications of history changes. 235 virtual void Observe(int notification_type, 236 const content::NotificationSource& source, 237 const content::NotificationDetails& details) OVERRIDE; 238 239 // Notification handlers. 240 void OnURLVisited(const URLVisitedDetails* details); 241 void OnURLsModified(const URLsModifiedDetails* details); 242 void OnURLsDeleted(const URLsDeletedDetails* details); 243 244 // Sets the directory wherein the cache file will be maintained. 245 // For unit test usage only. 246 void set_history_dir(const base::FilePath& dir_path) { 247 history_dir_ = dir_path; 248 } 249 250 // Returns a pointer to our private data. For unit testing only. 251 URLIndexPrivateData* private_data() { return private_data_.get(); } 252 253 // Returns the set of whitelisted schemes. For unit testing only. 254 const std::set<std::string>& scheme_whitelist() { return scheme_whitelist_; } 255 256 // The profile, may be null when testing. 257 Profile* profile_; 258 259 // The HistoryClient; may be NULL when testing. 260 HistoryClient* history_client_; 261 262 // Directory where cache file resides. This is, except when unit testing, 263 // the same directory in which the profile's history database is found. It 264 // should never be empty. 265 base::FilePath history_dir_; 266 267 // Languages used during the word-breaking process during indexing. 268 std::string languages_; 269 270 // Only URLs with a whitelisted scheme are indexed. 271 std::set<std::string> scheme_whitelist_; 272 273 // The index's durable private data. 274 scoped_refptr<URLIndexPrivateData> private_data_; 275 276 // Observers to notify upon restoral or save of the private data cache. 277 RestoreCacheObserver* restore_cache_observer_; 278 SaveCacheObserver* save_cache_observer_; 279 280 CancelableRequestConsumer cache_reader_consumer_; 281 content::NotificationRegistrar registrar_; 282 283 // Set to true once the shutdown process has begun. 284 bool shutdown_; 285 286 // Set to true once the index restoration is complete. 287 bool restored_; 288 289 // Set to true when changes to the index have been made and the index needs 290 // to be cached. Set to false when the index has been cached. Used as a 291 // temporary safety check to insure that the cache is saved before the 292 // index has been destructed. 293 bool needs_to_be_cached_; 294 295 DISALLOW_COPY_AND_ASSIGN(InMemoryURLIndex); 296 }; 297 298 } // namespace history 299 300 #endif // CHROME_BROWSER_HISTORY_IN_MEMORY_URL_INDEX_H_ 301