Home | History | Annotate | Download | only in browser
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef COMPONENTS_HISTORY_CORE_BROWSER_URL_DATABASE_H_
      6 #define COMPONENTS_HISTORY_CORE_BROWSER_URL_DATABASE_H_
      7 
      8 #include "base/basictypes.h"
      9 #include "components/history/core/browser/keyword_id.h"
     10 #include "components/history/core/browser/url_row.h"
     11 #include "components/query_parser/query_parser.h"
     12 #include "sql/statement.h"
     13 
     14 class GURL;
     15 
     16 namespace sql {
     17 class Connection;
     18 }
     19 
     20 namespace history {
     21 
     22 struct KeywordSearchTermRow;
     23 struct KeywordSearchTermVisit;
     24 class VisitDatabase;  // For friend statement.
     25 
     26 // Encapsulates an SQL database that holds URL info.  This is a subset of the
     27 // full history data.  We split this class' functionality out from the larger
     28 // HistoryDatabase class to support maintaining separate databases of URLs with
     29 // different capabilities (for example, the in-memory database).
     30 //
     31 // This is refcounted to support calling InvokeLater() with some of its methods
     32 // (necessary to maintain ordering of DB operations).
     33 class URLDatabase {
     34  public:
     35   // Must call CreateURLTable() and CreateURLIndexes() before using to make
     36   // sure the database is initialized.
     37   URLDatabase();
     38 
     39   // This object must be destroyed on the thread where all accesses are
     40   // happening to avoid thread-safety problems.
     41   virtual ~URLDatabase();
     42 
     43   // Converts a GURL to a string used in the history database. We plan to
     44   // do more complex operations than just getting the spec out involving
     45   // punycode, so this function should be used instead of url.spec() when
     46   // interacting with the database.
     47   //
     48   // TODO(brettw) this should be moved out of the public section and the
     49   // entire public HistoryDatabase interface should use GURL. This should
     50   // also probably return a string instead since that is what the DB uses
     51   // internally and we can avoid the extra conversion.
     52   static std::string GURLToDatabaseURL(const GURL& url);
     53 
     54   // URL table functions -------------------------------------------------------
     55 
     56   // Looks up a url given an id. Fills info with the data. Returns true on
     57   // success and false otherwise.
     58   bool GetURLRow(URLID url_id, URLRow* info);
     59 
     60   // Looks up all urls that were typed in manually. Fills info with the data.
     61   // Returns true on success and false otherwise.
     62   bool GetAllTypedUrls(URLRows* urls);
     63 
     64   // Looks up the given URL and if it exists, fills the given pointers with the
     65   // associated info and returns the ID of that URL. If the info pointer is
     66   // NULL, no information about the URL will be filled in, only the ID will be
     67   // returned. Returns 0 if the URL was not found.
     68   URLID GetRowForURL(const GURL& url, URLRow* info);
     69 
     70   // Given an already-existing row in the URL table, updates that URL's stats.
     71   // This can not change the URL.  Returns true on success.
     72   //
     73   // This will NOT update the title used for full text indexing. If you are
     74   // setting the title, call SetPageIndexedData with the new title.
     75   bool UpdateURLRow(URLID url_id, const URLRow& info);
     76 
     77   // Adds a line to the URL database with the given information and returns the
     78   // newly generated ID for the row (the |id| in |info| is ignored). A row with
     79   // the given URL must not exist. Returns 0 on error.
     80   //
     81   // This does NOT add a row to the full text search database. Use
     82   // HistoryDatabase::SetPageIndexedData to do this.
     83   URLID AddURL(const URLRow& info) {
     84     return AddURLInternal(info, false);
     85   }
     86 
     87   // Either adds a new row to the URL table with the given information (with the
     88   // the |id| as specified in |info|), or updates the pre-existing row with this
     89   // |id| if there is one already. This is also known as an "upsert" or "merge"
     90   // operation. Returns true on success.
     91   bool InsertOrUpdateURLRowByID(const URLRow& info);
     92 
     93   // Delete the row of the corresponding URL. Only the row in the URL table and
     94   // corresponding keyword search terms will be deleted, not any other data that
     95   // may refer to the URL row. Returns true if the row existed and was deleted.
     96   bool DeleteURLRow(URLID id);
     97 
     98   // URL mass-deleting ---------------------------------------------------------
     99 
    100   // Begins the mass-deleting operation by creating a temporary URL table.
    101   // The caller than adds the URLs it wants to preseve to the temporary table,
    102   // and then deletes everything else by calling CommitTemporaryURLTable().
    103   // Returns true on success.
    104   bool CreateTemporaryURLTable();
    105 
    106   // Adds a row to the temporary URL table. This must be called between
    107   // CreateTemporaryURLTable() and CommitTemporaryURLTable() (see those for more
    108   // info). The ID of the URL will change in the temporary table, so the new ID
    109   // is returned. Returns 0 on failure.
    110   URLID AddTemporaryURL(const URLRow& row) {
    111     return AddURLInternal(row, true);
    112   }
    113 
    114   // Ends the mass-deleting by replacing the original URL table with the
    115   // temporary one created in CreateTemporaryURLTable. Returns true on success.
    116   bool CommitTemporaryURLTable();
    117 
    118   // Enumeration ---------------------------------------------------------------
    119 
    120   // A basic enumerator to enumerate urls database.
    121   class URLEnumeratorBase {
    122    public:
    123     URLEnumeratorBase();
    124     virtual ~URLEnumeratorBase();
    125 
    126    private:
    127     friend class URLDatabase;
    128 
    129     bool initialized_;
    130     sql::Statement statement_;
    131 
    132     DISALLOW_COPY_AND_ASSIGN(URLEnumeratorBase);
    133   };
    134 
    135   // A basic enumerator to enumerate urls
    136   class URLEnumerator : public URLEnumeratorBase {
    137    public:
    138     URLEnumerator();
    139 
    140     // Retreives the next url. Returns false if no more urls are available
    141     bool GetNextURL(history::URLRow* r);
    142 
    143    private:
    144     DISALLOW_COPY_AND_ASSIGN(URLEnumerator);
    145   };
    146 
    147   // Initializes the given enumerator to enumerator all URLs in the database.
    148   bool InitURLEnumeratorForEverything(URLEnumerator* enumerator);
    149 
    150   // Initializes the given enumerator to enumerator all URLs in the database
    151   // that are historically significant: ones having been visited within 3 days,
    152   // having their URL manually typed more than once, or having been visited
    153   // more than 3 times.
    154   bool InitURLEnumeratorForSignificant(URLEnumerator* enumerator);
    155 
    156   // Favicons ------------------------------------------------------------------
    157 
    158   // Autocomplete --------------------------------------------------------------
    159 
    160   // Fills the given array with URLs matching the given prefix.  They will be
    161   // sorted by typed count, then by visit count, then by visit date (most recent
    162   // first) up to the given maximum number.  If |typed_only| is true, only urls
    163   // that have been typed once are returned.  For caller convenience, returns
    164   // whether any results were found.
    165   bool AutocompleteForPrefix(const std::string& prefix,
    166                              size_t max_results,
    167                              bool typed_only,
    168                              URLRows* results);
    169 
    170   // Returns true if the database holds some past typed navigation to a URL on
    171   // the provided hostname.
    172   bool IsTypedHost(const std::string& host);
    173 
    174   // Tries to find the shortest URL beginning with |base| that strictly
    175   // prefixes |url|, and has minimum visit_ and typed_counts as specified.
    176   // If found, fills in |info| and returns true; otherwise returns false,
    177   // leaving |info| unchanged.
    178   // We allow matches of exactly |base| iff |allow_base| is true.
    179   bool FindShortestURLFromBase(const std::string& base,
    180                                const std::string& url,
    181                                int min_visits,
    182                                int min_typed,
    183                                bool allow_base,
    184                                history::URLRow* info);
    185 
    186   // History search ------------------------------------------------------------
    187 
    188   // Performs a brute force search over the database to find any URLs or titles
    189   // which match the |query| string.  Returns any matches in |results|.
    190   bool GetTextMatches(const base::string16& query, URLRows* results);
    191 
    192   // Keyword Search Terms ------------------------------------------------------
    193 
    194   // Sets the search terms for the specified url/keyword pair.
    195   bool SetKeywordSearchTermsForURL(URLID url_id,
    196                                    KeywordID keyword_id,
    197                                    const base::string16& term);
    198 
    199   // Looks up a keyword search term given a url id. Returns all the search terms
    200   // in |rows|. Returns true on success.
    201   bool GetKeywordSearchTermRow(URLID url_id, KeywordSearchTermRow* row);
    202 
    203   // Looks up all keyword search terms given a term, Fills the rows with data.
    204   // Returns true on success and false otherwise.
    205   bool GetKeywordSearchTermRows(const base::string16& term,
    206                                 std::vector<KeywordSearchTermRow>* rows);
    207 
    208   // Deletes all search terms for the specified keyword that have been added by
    209   // way of SetKeywordSearchTermsForURL.
    210   void DeleteAllSearchTermsForKeyword(KeywordID keyword_id);
    211 
    212   // Returns up to max_count of the most recent search terms for the specified
    213   // keyword.
    214   void GetMostRecentKeywordSearchTerms(
    215       KeywordID keyword_id,
    216       const base::string16& prefix,
    217       int max_count,
    218       std::vector<KeywordSearchTermVisit>* matches);
    219 
    220   // Deletes all searches matching |term|.
    221   bool DeleteKeywordSearchTerm(const base::string16& term);
    222 
    223   // Deletes any search corresponding to |url_id|.
    224   bool DeleteKeywordSearchTermForURL(URLID url_id);
    225 
    226   // Migration -----------------------------------------------------------------
    227 
    228   // Do to a bug we were setting the favicon of about:blank. This forces
    229   // about:blank to have no icon or title. Returns true on success, false if
    230   // the favicon couldn't be updated.
    231   bool MigrateFromVersion11ToVersion12();
    232 
    233  protected:
    234   friend class VisitDatabase;
    235 
    236   // See HISTORY_URL_ROW_FIELDS below.
    237   static const char kURLRowFields[];
    238 
    239   // The number of fiends in kURLRowFields. If callers need additional
    240   // fields, they can add their 0-based index to this value to get the index of
    241   // fields following kURLRowFields.
    242   static const int kNumURLRowFields;
    243 
    244   // Drops the starred_id column from urls, returning true on success. This does
    245   // nothing (and returns true) if the urls doesn't contain the starred_id
    246   // column.
    247   bool DropStarredIDFromURLs();
    248 
    249   // Initialization functions. The indexing functions are separate from the
    250   // table creation functions so the in-memory database and the temporary tables
    251   // used when clearing history can populate the table and then create the
    252   // index, which is faster than the reverse.
    253   //
    254   // is_temporary is false when generating the "regular" URLs table. The expirer
    255   // sets this to true to generate the  temporary table, which will have a
    256   // different name but the same schema.
    257   bool CreateURLTable(bool is_temporary);
    258 
    259   // Creates the index over URLs so we can quickly look up based on URL.
    260   bool CreateMainURLIndex();
    261 
    262   // Ensures the keyword search terms table exists.
    263   bool InitKeywordSearchTermsTable();
    264 
    265   // Creates the indices used for keyword search terms.
    266   bool CreateKeywordSearchTermsIndices();
    267 
    268   // Deletes the keyword search terms table.
    269   bool DropKeywordSearchTermsTable();
    270 
    271   // Inserts the given URL row into the URLs table, using the regular table
    272   // if is_temporary is false, or the temporary URL table if is temporary is
    273   // true. The current |id| of |info| will be ignored in both cases and a new ID
    274   // will be generated, which will also constitute the return value, except in
    275   // case of an error, when the return value is 0. The temporary table may only
    276   // be used in between CreateTemporaryURLTable() and CommitTemporaryURLTable().
    277   URLID AddURLInternal(const URLRow& info, bool is_temporary);
    278 
    279   // Convenience to fill a history::URLRow. Must be in sync with the fields in
    280   // kHistoryURLRowFields.
    281   static void FillURLRow(sql::Statement& s, URLRow* i);
    282 
    283   // Returns the database for the functions in this interface. The decendent of
    284   // this class implements these functions to return its objects.
    285   virtual sql::Connection& GetDB() = 0;
    286 
    287  private:
    288   // True if InitKeywordSearchTermsTable() has been invoked. Not all subclasses
    289   // have keyword search terms.
    290   bool has_keyword_search_terms_;
    291 
    292   query_parser::QueryParser query_parser_;
    293 
    294   DISALLOW_COPY_AND_ASSIGN(URLDatabase);
    295 };
    296 
    297 // The fields and order expected by FillURLRow(). ID is guaranteed to be first
    298 // so that DISTINCT can be prepended to get distinct URLs.
    299 //
    300 // This is available BOTH as a macro and a static string (kURLRowFields). Use
    301 // the macro if you want to put this in the middle of an otherwise constant
    302 // string, it will save time doing string appends. If you have to build a SQL
    303 // string dynamically anyway, use the constant, it will save space.
    304 #define HISTORY_URL_ROW_FIELDS \
    305     " urls.id, urls.url, urls.title, urls.visit_count, urls.typed_count, " \
    306     "urls.last_visit_time, urls.hidden "
    307 
    308 // Constants which specify, when considered altogether, 'significant'
    309 // history items. These are used to filter out insignificant items
    310 // for consideration as autocomplete candidates.
    311 extern const int kLowQualityMatchTypedLimit;
    312 extern const int kLowQualityMatchVisitLimit;
    313 extern const int kLowQualityMatchAgeLimitInDays;
    314 
    315 // Returns the date threshold for considering an history item as significant.
    316 base::Time AutocompleteAgeThreshold();
    317 
    318 // Return true if |row| qualifies as an autocomplete candidate. If |time_cache|
    319 // is_null() then this function determines a new time threshold each time it is
    320 // called. Since getting system time can be costly (such as for cases where
    321 // this function will be called in a loop over many history items), you can
    322 // provide a non-null |time_cache| by simply initializing |time_cache| with
    323 // AutocompleteAgeThreshold() (or any other desired time in the past).
    324 bool RowQualifiesAsSignificant(const URLRow& row, const base::Time& threshold);
    325 
    326 }  // namespace history
    327 
    328 #endif  // COMPONENTS_HISTORY_CORE_BROWSER_URL_DATABASE_H_
    329