Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
      6 #define CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
      7 #pragma once
      8 
      9 #include <set>
     10 #include <vector>
     11 
     12 #include "app/sql/connection.h"
     13 #include "app/sql/meta_table.h"
     14 #include "base/basictypes.h"
     15 #include "base/file_path.h"
     16 #include "base/string16.h"
     17 #include "chrome/browser/history/history_types.h"
     18 #include "googleurl/src/gurl.h"
     19 
     20 namespace history {
     21 
     22 // Encapsulation of a full-text indexed database file.
     23 class TextDatabase {
     24  public:
     25   typedef int DBIdent;
     26 
     27   typedef std::set<GURL> URLSet;
     28 
     29   // Returned from the search function.
     30   struct Match {
     31     Match();
     32     ~Match();
     33 
     34     // URL of the match.
     35     GURL url;
     36 
     37     // The title is returned because the title in the text database and the URL
     38     // database may differ. This happens because we capture the title when the
     39     // body is captured, and don't update it later.
     40     string16 title;
     41 
     42     // Time the page that was returned was visited.
     43     base::Time time;
     44 
     45     // Identifies any found matches in the title of the document. These are not
     46     // included in the snippet.
     47     Snippet::MatchPositions title_match_positions;
     48 
     49     // Snippet of the match we generated from the body.
     50     Snippet snippet;
     51   };
     52 
     53   // Note: You must call init which must succeed before using this class.
     54   //
     55   // Computes the matches for the query, returning results in decreasing order
     56   // of visit time.
     57   //
     58   // This function will attach the new database to the given database
     59   // connection. This allows one sqlite3 object to share many TextDatabases,
     60   // meaning that they will all share the same cache, which allows us to limit
     61   // the total size that text indexing databasii can take up.
     62   //
     63   // |file_name| is the name of the file on disk.
     64   //
     65   // ID is the identifier for the database. It should uniquely identify it among
     66   // other databases on disk and in the sqlite connection.
     67   //
     68   // |allow_create| indicates if we want to allow creation of the file if it
     69   // doesn't exist. For files associated with older time periods, we don't want
     70   // to create them if they don't exist, so this flag would be false.
     71   TextDatabase(const FilePath& path,
     72                DBIdent id,
     73                bool allow_create);
     74   ~TextDatabase();
     75 
     76   // Initializes the database connection and creates the file if the class
     77   // was created with |allow_create|. If the file couldn't be opened or
     78   // created, this will return false. No other functions should be called
     79   // after this.
     80   bool Init();
     81 
     82   // Allows updates to be batched. This gives higher performance when multiple
     83   // updates are happening because every insert doesn't require a sync to disk.
     84   // Transactions can be nested, only the outermost one will actually count.
     85   void BeginTransaction();
     86   void CommitTransaction();
     87 
     88   // For testing, returns the file name of the database so it can be deleted
     89   // after the test. This is valid even before Init() is called.
     90   const FilePath& file_name() const { return file_name_; }
     91 
     92   // Returns a NULL-terminated string that is the base of history index files,
     93   // which is the part before the database identifier. For example
     94   // "History Index *". This is for finding existing database files.
     95   static const FilePath::CharType* file_base();
     96 
     97   // Converts a filename on disk (optionally including a path) to a database
     98   // identifier. If the filename doesn't have the correct format, returns 0.
     99   static DBIdent FileNameToID(const FilePath& file_path);
    100 
    101   // Changing operations -------------------------------------------------------
    102 
    103   // Adds the given data to the page. Returns true on success. The data should
    104   // already be converted to UTF-8.
    105   bool AddPageData(base::Time time,
    106                    const std::string& url,
    107                    const std::string& title,
    108                    const std::string& contents);
    109 
    110   // Deletes the indexed data exactly matching the given URL/time pair.
    111   void DeletePageData(base::Time time, const std::string& url);
    112 
    113   // Optimizes the tree inside the database. This will, in addition to making
    114   // access faster, remove any deleted data from the database (normally it is
    115   // added again as "removed" and it is manually cleaned up when it decides to
    116   // optimize it naturally). It is bad for privacy if a user is deleting a
    117   // page from history but it still exists in the full text database in some
    118   // form. This function will clean that up.
    119   void Optimize();
    120 
    121   // Querying ------------------------------------------------------------------
    122 
    123   // Executes the given query. See QueryOptions for more info on input.
    124   //
    125   // The results are appended to any existing ones in |*results|, and the first
    126   // time considered for the output is in |first_time_searched|
    127   // (see QueryResults for more).
    128   //
    129   // Any URLs found will be added to |unique_urls|. If a URL is already in the
    130   // set, additional results will not be added (giving the ability to uniquify
    131   // URL results).
    132   //
    133   // Callers must run QueryParser on the user text and pass the results of the
    134   // QueryParser to this method as the query string.
    135   void GetTextMatches(const std::string& query,
    136                       const QueryOptions& options,
    137                       std::vector<Match>* results,
    138                       URLSet* unique_urls,
    139                       base::Time* first_time_searched);
    140 
    141   // Converts the given database identifier to a filename. This does not include
    142   // the path, just the file and extension.
    143   static FilePath IDToFileName(DBIdent id);
    144 
    145  private:
    146   // Ensures that the tables and indices are created. Returns true on success.
    147   bool CreateTables();
    148 
    149   // The sql database. Not valid until Init is called.
    150   sql::Connection db_;
    151 
    152   const FilePath path_;
    153   const DBIdent ident_;
    154   const bool allow_create_;
    155 
    156   // Full file name of the file on disk, computed in Init().
    157   FilePath file_name_;
    158 
    159   sql::MetaTable meta_table_;
    160 
    161   DISALLOW_COPY_AND_ASSIGN(TextDatabase);
    162 };
    163 
    164 }  // namespace history
    165 
    166 #endif  // CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
    167