1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_HISTORY_CORE_BROWSER_URL_DATABASE_H_ 6 #define COMPONENTS_HISTORY_CORE_BROWSER_URL_DATABASE_H_ 7 8 #include "base/basictypes.h" 9 #include "components/history/core/browser/keyword_id.h" 10 #include "components/history/core/browser/url_row.h" 11 #include "components/query_parser/query_parser.h" 12 #include "sql/statement.h" 13 14 class GURL; 15 16 namespace sql { 17 class Connection; 18 } 19 20 namespace history { 21 22 struct KeywordSearchTermRow; 23 struct KeywordSearchTermVisit; 24 class VisitDatabase; // For friend statement. 25 26 // Encapsulates an SQL database that holds URL info. This is a subset of the 27 // full history data. We split this class' functionality out from the larger 28 // HistoryDatabase class to support maintaining separate databases of URLs with 29 // different capabilities (for example, the in-memory database). 30 // 31 // This is refcounted to support calling InvokeLater() with some of its methods 32 // (necessary to maintain ordering of DB operations). 33 class URLDatabase { 34 public: 35 // Must call CreateURLTable() and CreateURLIndexes() before using to make 36 // sure the database is initialized. 37 URLDatabase(); 38 39 // This object must be destroyed on the thread where all accesses are 40 // happening to avoid thread-safety problems. 41 virtual ~URLDatabase(); 42 43 // Converts a GURL to a string used in the history database. We plan to 44 // do more complex operations than just getting the spec out involving 45 // punycode, so this function should be used instead of url.spec() when 46 // interacting with the database. 47 // 48 // TODO(brettw) this should be moved out of the public section and the 49 // entire public HistoryDatabase interface should use GURL. This should 50 // also probably return a string instead since that is what the DB uses 51 // internally and we can avoid the extra conversion. 52 static std::string GURLToDatabaseURL(const GURL& url); 53 54 // URL table functions ------------------------------------------------------- 55 56 // Looks up a url given an id. Fills info with the data. Returns true on 57 // success and false otherwise. 58 bool GetURLRow(URLID url_id, URLRow* info); 59 60 // Looks up all urls that were typed in manually. Fills info with the data. 61 // Returns true on success and false otherwise. 62 bool GetAllTypedUrls(URLRows* urls); 63 64 // Looks up the given URL and if it exists, fills the given pointers with the 65 // associated info and returns the ID of that URL. If the info pointer is 66 // NULL, no information about the URL will be filled in, only the ID will be 67 // returned. Returns 0 if the URL was not found. 68 URLID GetRowForURL(const GURL& url, URLRow* info); 69 70 // Given an already-existing row in the URL table, updates that URL's stats. 71 // This can not change the URL. Returns true on success. 72 // 73 // This will NOT update the title used for full text indexing. If you are 74 // setting the title, call SetPageIndexedData with the new title. 75 bool UpdateURLRow(URLID url_id, const URLRow& info); 76 77 // Adds a line to the URL database with the given information and returns the 78 // newly generated ID for the row (the |id| in |info| is ignored). A row with 79 // the given URL must not exist. Returns 0 on error. 80 // 81 // This does NOT add a row to the full text search database. Use 82 // HistoryDatabase::SetPageIndexedData to do this. 83 URLID AddURL(const URLRow& info) { 84 return AddURLInternal(info, false); 85 } 86 87 // Either adds a new row to the URL table with the given information (with the 88 // the |id| as specified in |info|), or updates the pre-existing row with this 89 // |id| if there is one already. This is also known as an "upsert" or "merge" 90 // operation. Returns true on success. 91 bool InsertOrUpdateURLRowByID(const URLRow& info); 92 93 // Delete the row of the corresponding URL. Only the row in the URL table and 94 // corresponding keyword search terms will be deleted, not any other data that 95 // may refer to the URL row. Returns true if the row existed and was deleted. 96 bool DeleteURLRow(URLID id); 97 98 // URL mass-deleting --------------------------------------------------------- 99 100 // Begins the mass-deleting operation by creating a temporary URL table. 101 // The caller than adds the URLs it wants to preseve to the temporary table, 102 // and then deletes everything else by calling CommitTemporaryURLTable(). 103 // Returns true on success. 104 bool CreateTemporaryURLTable(); 105 106 // Adds a row to the temporary URL table. This must be called between 107 // CreateTemporaryURLTable() and CommitTemporaryURLTable() (see those for more 108 // info). The ID of the URL will change in the temporary table, so the new ID 109 // is returned. Returns 0 on failure. 110 URLID AddTemporaryURL(const URLRow& row) { 111 return AddURLInternal(row, true); 112 } 113 114 // Ends the mass-deleting by replacing the original URL table with the 115 // temporary one created in CreateTemporaryURLTable. Returns true on success. 116 bool CommitTemporaryURLTable(); 117 118 // Enumeration --------------------------------------------------------------- 119 120 // A basic enumerator to enumerate urls database. 121 class URLEnumeratorBase { 122 public: 123 URLEnumeratorBase(); 124 virtual ~URLEnumeratorBase(); 125 126 private: 127 friend class URLDatabase; 128 129 bool initialized_; 130 sql::Statement statement_; 131 132 DISALLOW_COPY_AND_ASSIGN(URLEnumeratorBase); 133 }; 134 135 // A basic enumerator to enumerate urls 136 class URLEnumerator : public URLEnumeratorBase { 137 public: 138 URLEnumerator(); 139 140 // Retreives the next url. Returns false if no more urls are available 141 bool GetNextURL(history::URLRow* r); 142 143 private: 144 DISALLOW_COPY_AND_ASSIGN(URLEnumerator); 145 }; 146 147 // Initializes the given enumerator to enumerator all URLs in the database. 148 bool InitURLEnumeratorForEverything(URLEnumerator* enumerator); 149 150 // Initializes the given enumerator to enumerator all URLs in the database 151 // that are historically significant: ones having been visited within 3 days, 152 // having their URL manually typed more than once, or having been visited 153 // more than 3 times. 154 bool InitURLEnumeratorForSignificant(URLEnumerator* enumerator); 155 156 // Favicons ------------------------------------------------------------------ 157 158 // Autocomplete -------------------------------------------------------------- 159 160 // Fills the given array with URLs matching the given prefix. They will be 161 // sorted by typed count, then by visit count, then by visit date (most recent 162 // first) up to the given maximum number. If |typed_only| is true, only urls 163 // that have been typed once are returned. For caller convenience, returns 164 // whether any results were found. 165 bool AutocompleteForPrefix(const std::string& prefix, 166 size_t max_results, 167 bool typed_only, 168 URLRows* results); 169 170 // Returns true if the database holds some past typed navigation to a URL on 171 // the provided hostname. 172 bool IsTypedHost(const std::string& host); 173 174 // Tries to find the shortest URL beginning with |base| that strictly 175 // prefixes |url|, and has minimum visit_ and typed_counts as specified. 176 // If found, fills in |info| and returns true; otherwise returns false, 177 // leaving |info| unchanged. 178 // We allow matches of exactly |base| iff |allow_base| is true. 179 bool FindShortestURLFromBase(const std::string& base, 180 const std::string& url, 181 int min_visits, 182 int min_typed, 183 bool allow_base, 184 history::URLRow* info); 185 186 // History search ------------------------------------------------------------ 187 188 // Performs a brute force search over the database to find any URLs or titles 189 // which match the |query| string. Returns any matches in |results|. 190 bool GetTextMatches(const base::string16& query, URLRows* results); 191 192 // Keyword Search Terms ------------------------------------------------------ 193 194 // Sets the search terms for the specified url/keyword pair. 195 bool SetKeywordSearchTermsForURL(URLID url_id, 196 KeywordID keyword_id, 197 const base::string16& term); 198 199 // Looks up a keyword search term given a url id. Returns all the search terms 200 // in |rows|. Returns true on success. 201 bool GetKeywordSearchTermRow(URLID url_id, KeywordSearchTermRow* row); 202 203 // Looks up all keyword search terms given a term, Fills the rows with data. 204 // Returns true on success and false otherwise. 205 bool GetKeywordSearchTermRows(const base::string16& term, 206 std::vector<KeywordSearchTermRow>* rows); 207 208 // Deletes all search terms for the specified keyword that have been added by 209 // way of SetKeywordSearchTermsForURL. 210 void DeleteAllSearchTermsForKeyword(KeywordID keyword_id); 211 212 // Returns up to max_count of the most recent search terms for the specified 213 // keyword. 214 void GetMostRecentKeywordSearchTerms( 215 KeywordID keyword_id, 216 const base::string16& prefix, 217 int max_count, 218 std::vector<KeywordSearchTermVisit>* matches); 219 220 // Deletes all searches matching |term|. 221 bool DeleteKeywordSearchTerm(const base::string16& term); 222 223 // Deletes any search corresponding to |url_id|. 224 bool DeleteKeywordSearchTermForURL(URLID url_id); 225 226 // Migration ----------------------------------------------------------------- 227 228 // Do to a bug we were setting the favicon of about:blank. This forces 229 // about:blank to have no icon or title. Returns true on success, false if 230 // the favicon couldn't be updated. 231 bool MigrateFromVersion11ToVersion12(); 232 233 protected: 234 friend class VisitDatabase; 235 236 // See HISTORY_URL_ROW_FIELDS below. 237 static const char kURLRowFields[]; 238 239 // The number of fiends in kURLRowFields. If callers need additional 240 // fields, they can add their 0-based index to this value to get the index of 241 // fields following kURLRowFields. 242 static const int kNumURLRowFields; 243 244 // Drops the starred_id column from urls, returning true on success. This does 245 // nothing (and returns true) if the urls doesn't contain the starred_id 246 // column. 247 bool DropStarredIDFromURLs(); 248 249 // Initialization functions. The indexing functions are separate from the 250 // table creation functions so the in-memory database and the temporary tables 251 // used when clearing history can populate the table and then create the 252 // index, which is faster than the reverse. 253 // 254 // is_temporary is false when generating the "regular" URLs table. The expirer 255 // sets this to true to generate the temporary table, which will have a 256 // different name but the same schema. 257 bool CreateURLTable(bool is_temporary); 258 259 // Creates the index over URLs so we can quickly look up based on URL. 260 bool CreateMainURLIndex(); 261 262 // Ensures the keyword search terms table exists. 263 bool InitKeywordSearchTermsTable(); 264 265 // Creates the indices used for keyword search terms. 266 bool CreateKeywordSearchTermsIndices(); 267 268 // Deletes the keyword search terms table. 269 bool DropKeywordSearchTermsTable(); 270 271 // Inserts the given URL row into the URLs table, using the regular table 272 // if is_temporary is false, or the temporary URL table if is temporary is 273 // true. The current |id| of |info| will be ignored in both cases and a new ID 274 // will be generated, which will also constitute the return value, except in 275 // case of an error, when the return value is 0. The temporary table may only 276 // be used in between CreateTemporaryURLTable() and CommitTemporaryURLTable(). 277 URLID AddURLInternal(const URLRow& info, bool is_temporary); 278 279 // Convenience to fill a history::URLRow. Must be in sync with the fields in 280 // kHistoryURLRowFields. 281 static void FillURLRow(sql::Statement& s, URLRow* i); 282 283 // Returns the database for the functions in this interface. The decendent of 284 // this class implements these functions to return its objects. 285 virtual sql::Connection& GetDB() = 0; 286 287 private: 288 // True if InitKeywordSearchTermsTable() has been invoked. Not all subclasses 289 // have keyword search terms. 290 bool has_keyword_search_terms_; 291 292 query_parser::QueryParser query_parser_; 293 294 DISALLOW_COPY_AND_ASSIGN(URLDatabase); 295 }; 296 297 // The fields and order expected by FillURLRow(). ID is guaranteed to be first 298 // so that DISTINCT can be prepended to get distinct URLs. 299 // 300 // This is available BOTH as a macro and a static string (kURLRowFields). Use 301 // the macro if you want to put this in the middle of an otherwise constant 302 // string, it will save time doing string appends. If you have to build a SQL 303 // string dynamically anyway, use the constant, it will save space. 304 #define HISTORY_URL_ROW_FIELDS \ 305 " urls.id, urls.url, urls.title, urls.visit_count, urls.typed_count, " \ 306 "urls.last_visit_time, urls.hidden " 307 308 // Constants which specify, when considered altogether, 'significant' 309 // history items. These are used to filter out insignificant items 310 // for consideration as autocomplete candidates. 311 extern const int kLowQualityMatchTypedLimit; 312 extern const int kLowQualityMatchVisitLimit; 313 extern const int kLowQualityMatchAgeLimitInDays; 314 315 // Returns the date threshold for considering an history item as significant. 316 base::Time AutocompleteAgeThreshold(); 317 318 // Return true if |row| qualifies as an autocomplete candidate. If |time_cache| 319 // is_null() then this function determines a new time threshold each time it is 320 // called. Since getting system time can be costly (such as for cases where 321 // this function will be called in a loop over many history items), you can 322 // provide a non-null |time_cache| by simply initializing |time_cache| with 323 // AutocompleteAgeThreshold() (or any other desired time in the past). 324 bool RowQualifiesAsSignificant(const URLRow& row, const base::Time& threshold); 325 326 } // namespace history 327 328 #endif // COMPONENTS_HISTORY_CORE_BROWSER_URL_DATABASE_H_ 329