Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/history/visit_database.h"
      6 
      7 #include <algorithm>
      8 #include <limits>
      9 #include <map>
     10 #include <set>
     11 
     12 #include "app/sql/statement.h"
     13 #include "base/logging.h"
     14 #include "base/string_number_conversions.h"
     15 #include "chrome/browser/history/url_database.h"
     16 #include "chrome/common/url_constants.h"
     17 #include "content/common/page_transition_types.h"
     18 
     19 // Rows, in order, of the visit table.
     20 #define HISTORY_VISIT_ROW_FIELDS \
     21   " id,url,visit_time,from_visit,transition,segment_id,is_indexed "
     22 
     23 namespace history {
     24 
     25 VisitDatabase::VisitDatabase() {
     26 }
     27 
     28 VisitDatabase::~VisitDatabase() {
     29 }
     30 
     31 bool VisitDatabase::InitVisitTable() {
     32   if (!GetDB().DoesTableExist("visits")) {
     33     if (!GetDB().Execute("CREATE TABLE visits("
     34         "id INTEGER PRIMARY KEY,"
     35         "url INTEGER NOT NULL," // key of the URL this corresponds to
     36         "visit_time INTEGER NOT NULL,"
     37         "from_visit INTEGER,"
     38         "transition INTEGER DEFAULT 0 NOT NULL,"
     39         "segment_id INTEGER,"
     40         // True when we have indexed data for this visit.
     41         "is_indexed BOOLEAN)"))
     42       return false;
     43   } else if (!GetDB().DoesColumnExist("visits", "is_indexed")) {
     44     // Old versions don't have the is_indexed column, we can just add that and
     45     // not worry about different database revisions, since old ones will
     46     // continue to work.
     47     //
     48     // TODO(brettw) this should be removed once we think everybody has been
     49     // updated (added early Mar 2008).
     50     if (!GetDB().Execute("ALTER TABLE visits ADD COLUMN is_indexed BOOLEAN"))
     51       return false;
     52   }
     53 
     54   // Visit source table contains the source information for all the visits. To
     55   // save space, we do not record those user browsed visits which would be the
     56   // majority in this table. Only other sources are recorded.
     57   // Due to the tight relationship between visit_source and visits table, they
     58   // should be created and dropped at the same time.
     59   if (!GetDB().DoesTableExist("visit_source")) {
     60     if (!GetDB().Execute("CREATE TABLE visit_source("
     61                          "id INTEGER PRIMARY KEY,source INTEGER NOT NULL)"))
     62         return false;
     63   }
     64 
     65   // Index over url so we can quickly find visits for a page. This will just
     66   // fail if it already exists and we'll ignore it.
     67   GetDB().Execute("CREATE INDEX visits_url_index ON visits (url)");
     68 
     69   // Create an index over from visits so that we can efficiently find
     70   // referrers and redirects. Ignore failures because it likely already exists.
     71   GetDB().Execute("CREATE INDEX visits_from_index ON visits (from_visit)");
     72 
     73   // Create an index over time so that we can efficiently find the visits in a
     74   // given time range (most history views are time-based). Ignore failures
     75   // because it likely already exists.
     76   GetDB().Execute("CREATE INDEX visits_time_index ON visits (visit_time)");
     77 
     78   return true;
     79 }
     80 
     81 bool VisitDatabase::DropVisitTable() {
     82   GetDB().Execute("DROP TABLE visit_source");
     83   // This will also drop the indices over the table.
     84   return GetDB().Execute("DROP TABLE visits");
     85 }
     86 
     87 // Must be in sync with HISTORY_VISIT_ROW_FIELDS.
     88 // static
     89 void VisitDatabase::FillVisitRow(sql::Statement& statement, VisitRow* visit) {
     90   visit->visit_id = statement.ColumnInt64(0);
     91   visit->url_id = statement.ColumnInt64(1);
     92   visit->visit_time = base::Time::FromInternalValue(statement.ColumnInt64(2));
     93   visit->referring_visit = statement.ColumnInt64(3);
     94   visit->transition = PageTransition::FromInt(statement.ColumnInt(4));
     95   visit->segment_id = statement.ColumnInt64(5);
     96   visit->is_indexed = !!statement.ColumnInt(6);
     97 }
     98 
     99 // static
    100 void VisitDatabase::FillVisitVector(sql::Statement& statement,
    101                                     VisitVector* visits) {
    102   while (statement.Step()) {
    103     history::VisitRow visit;
    104     FillVisitRow(statement, &visit);
    105     visits->push_back(visit);
    106   }
    107 }
    108 
    109 VisitID VisitDatabase::AddVisit(VisitRow* visit, VisitSource source) {
    110   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
    111       "INSERT INTO visits "
    112       "(url, visit_time, from_visit, transition, segment_id, is_indexed) "
    113       "VALUES (?,?,?,?,?,?)"));
    114   if (!statement) {
    115     VLOG(0) << "Failed to build visit insert statement:  "
    116             << "url_id = " << visit->url_id;
    117     return 0;
    118   }
    119 
    120   statement.BindInt64(0, visit->url_id);
    121   statement.BindInt64(1, visit->visit_time.ToInternalValue());
    122   statement.BindInt64(2, visit->referring_visit);
    123   statement.BindInt64(3, visit->transition);
    124   statement.BindInt64(4, visit->segment_id);
    125   statement.BindInt64(5, visit->is_indexed);
    126 
    127   if (!statement.Run()) {
    128     VLOG(0) << "Failed to execute visit insert statement:  "
    129             << "url_id = " << visit->url_id;
    130     return 0;
    131   }
    132 
    133   visit->visit_id = GetDB().GetLastInsertRowId();
    134 
    135   if (source != SOURCE_BROWSED) {
    136     // Record the source of this visit when it is not browsed.
    137     sql::Statement statement1(GetDB().GetCachedStatement(SQL_FROM_HERE,
    138         "INSERT INTO visit_source (id, source) VALUES (?,?)"));
    139     if (!statement1.is_valid()) {
    140       VLOG(0) << "Failed to build visit_source insert statement:  "
    141               << "url_id = " << visit->visit_id;
    142       return 0;
    143     }
    144 
    145     statement1.BindInt64(0, visit->visit_id);
    146     statement1.BindInt64(1, source);
    147     if (!statement1.Run()) {
    148       VLOG(0) << "Failed to execute visit_source insert statement:  "
    149               << "url_id = " << visit->visit_id;
    150       return 0;
    151     }
    152   }
    153 
    154   return visit->visit_id;
    155 }
    156 
    157 void VisitDatabase::DeleteVisit(const VisitRow& visit) {
    158   // Patch around this visit. Any visits that this went to will now have their
    159   // "source" be the deleted visit's source.
    160   sql::Statement update_chain(GetDB().GetCachedStatement(SQL_FROM_HERE,
    161       "UPDATE visits SET from_visit=? WHERE from_visit=?"));
    162   if (!update_chain)
    163     return;
    164   update_chain.BindInt64(0, visit.referring_visit);
    165   update_chain.BindInt64(1, visit.visit_id);
    166   update_chain.Run();
    167 
    168   // Now delete the actual visit.
    169   sql::Statement del(GetDB().GetCachedStatement(SQL_FROM_HERE,
    170       "DELETE FROM visits WHERE id=?"));
    171   if (!del)
    172     return;
    173   del.BindInt64(0, visit.visit_id);
    174   del.Run();
    175 
    176   // Try to delete the entry in visit_source table as well.
    177   // If the visit was browsed, there is no corresponding entry in visit_source
    178   // table, and nothing will be deleted.
    179   del.Assign(GetDB().GetCachedStatement(SQL_FROM_HERE,
    180              "DELETE FROM visit_source WHERE id=?"));
    181   if (!del.is_valid())
    182     return;
    183   del.BindInt64(0, visit.visit_id);
    184   del.Run();
    185 }
    186 
    187 bool VisitDatabase::GetRowForVisit(VisitID visit_id, VisitRow* out_visit) {
    188   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
    189       "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits WHERE id=?"));
    190   if (!statement)
    191     return false;
    192 
    193   statement.BindInt64(0, visit_id);
    194   if (!statement.Step())
    195     return false;
    196 
    197   FillVisitRow(statement, out_visit);
    198 
    199   // We got a different visit than we asked for, something is wrong.
    200   DCHECK_EQ(visit_id, out_visit->visit_id);
    201   if (visit_id != out_visit->visit_id)
    202     return false;
    203 
    204   return true;
    205 }
    206 
    207 bool VisitDatabase::UpdateVisitRow(const VisitRow& visit) {
    208   // Don't store inconsistent data to the database.
    209   DCHECK_NE(visit.visit_id, visit.referring_visit);
    210   if (visit.visit_id == visit.referring_visit)
    211     return false;
    212 
    213   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
    214       "UPDATE visits SET "
    215       "url=?,visit_time=?,from_visit=?,transition=?,segment_id=?,is_indexed=? "
    216       "WHERE id=?"));
    217   if (!statement)
    218     return false;
    219 
    220   statement.BindInt64(0, visit.url_id);
    221   statement.BindInt64(1, visit.visit_time.ToInternalValue());
    222   statement.BindInt64(2, visit.referring_visit);
    223   statement.BindInt64(3, visit.transition);
    224   statement.BindInt64(4, visit.segment_id);
    225   statement.BindInt64(5, visit.is_indexed);
    226   statement.BindInt64(6, visit.visit_id);
    227   return statement.Run();
    228 }
    229 
    230 bool VisitDatabase::GetVisitsForURL(URLID url_id, VisitVector* visits) {
    231   visits->clear();
    232 
    233   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
    234       "SELECT" HISTORY_VISIT_ROW_FIELDS
    235       "FROM visits "
    236       "WHERE url=? "
    237       "ORDER BY visit_time ASC"));
    238   if (!statement)
    239     return false;
    240 
    241   statement.BindInt64(0, url_id);
    242   FillVisitVector(statement, visits);
    243   return true;
    244 }
    245 
    246 void VisitDatabase::GetAllVisitsInRange(base::Time begin_time,
    247                                         base::Time end_time,
    248                                         int max_results,
    249                                         VisitVector* visits) {
    250   visits->clear();
    251 
    252   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
    253       "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
    254       "WHERE visit_time >= ? AND visit_time < ?"
    255       "ORDER BY visit_time LIMIT ?"));
    256   if (!statement)
    257     return;
    258 
    259   // See GetVisibleVisitsInRange for more info on how these times are bound.
    260   int64 end = end_time.ToInternalValue();
    261   statement.BindInt64(0, begin_time.ToInternalValue());
    262   statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max());
    263   statement.BindInt64(2,
    264       max_results ? max_results : std::numeric_limits<int64>::max());
    265 
    266   FillVisitVector(statement, visits);
    267 }
    268 
    269 void VisitDatabase::GetVisitsInRangeForTransition(
    270     base::Time begin_time,
    271     base::Time end_time,
    272     int max_results,
    273     PageTransition::Type transition,
    274     VisitVector* visits) {
    275   DCHECK(visits);
    276   visits->clear();
    277 
    278   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
    279       "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
    280       "WHERE visit_time >= ? AND visit_time < ? "
    281       "AND (transition & ?) == ?"
    282       "ORDER BY visit_time LIMIT ?"));
    283   if (!statement)
    284     return;
    285 
    286   // See GetVisibleVisitsInRange for more info on how these times are bound.
    287   int64 end = end_time.ToInternalValue();
    288   statement.BindInt64(0, begin_time.ToInternalValue());
    289   statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max());
    290   statement.BindInt(2, PageTransition::CORE_MASK);
    291   statement.BindInt(3, transition);
    292   statement.BindInt64(4,
    293       max_results ? max_results : std::numeric_limits<int64>::max());
    294 
    295   FillVisitVector(statement, visits);
    296 }
    297 
    298 void VisitDatabase::GetVisibleVisitsInRange(base::Time begin_time,
    299                                             base::Time end_time,
    300                                             int max_count,
    301                                             VisitVector* visits) {
    302   visits->clear();
    303   // The visit_time values can be duplicated in a redirect chain, so we sort
    304   // by id too, to ensure a consistent ordering just in case.
    305   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
    306       "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
    307       "WHERE visit_time >= ? AND visit_time < ? "
    308       "AND (transition & ?) != 0 "  // CHAIN_END
    309       "AND (transition & ?) NOT IN (?, ?, ?) "  // NO SUBFRAME or
    310                                                 // KEYWORD_GENERATED
    311       "ORDER BY visit_time DESC, id DESC"));
    312   if (!statement)
    313     return;
    314 
    315   // Note that we use min/max values for querying unlimited ranges of time using
    316   // the same statement. Since the time has an index, this will be about the
    317   // same amount of work as just doing a query for everything with no qualifier.
    318   int64 end = end_time.ToInternalValue();
    319   statement.BindInt64(0, begin_time.ToInternalValue());
    320   statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max());
    321   statement.BindInt(2, PageTransition::CHAIN_END);
    322   statement.BindInt(3, PageTransition::CORE_MASK);
    323   statement.BindInt(4, PageTransition::AUTO_SUBFRAME);
    324   statement.BindInt(5, PageTransition::MANUAL_SUBFRAME);
    325   statement.BindInt(6, PageTransition::KEYWORD_GENERATED);
    326 
    327   std::set<URLID> found_urls;
    328   while (statement.Step()) {
    329     VisitRow visit;
    330     FillVisitRow(statement, &visit);
    331     // Make sure the URL this visit corresponds to is unique.
    332     if (found_urls.find(visit.url_id) != found_urls.end())
    333       continue;
    334     found_urls.insert(visit.url_id);
    335     visits->push_back(visit);
    336 
    337     if (max_count > 0 && static_cast<int>(visits->size()) >= max_count)
    338       break;
    339   }
    340 }
    341 
    342 VisitID VisitDatabase::GetMostRecentVisitForURL(URLID url_id,
    343                                                 VisitRow* visit_row) {
    344   // The visit_time values can be duplicated in a redirect chain, so we sort
    345   // by id too, to ensure a consistent ordering just in case.
    346   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
    347       "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
    348       "WHERE url=? "
    349       "ORDER BY visit_time DESC, id DESC "
    350       "LIMIT 1"));
    351   if (!statement)
    352     return 0;
    353 
    354   statement.BindInt64(0, url_id);
    355   if (!statement.Step())
    356     return 0;  // No visits for this URL.
    357 
    358   if (visit_row) {
    359     FillVisitRow(statement, visit_row);
    360     return visit_row->visit_id;
    361   }
    362   return statement.ColumnInt64(0);
    363 }
    364 
    365 bool VisitDatabase::GetMostRecentVisitsForURL(URLID url_id,
    366                                               int max_results,
    367                                               VisitVector* visits) {
    368   visits->clear();
    369 
    370   // The visit_time values can be duplicated in a redirect chain, so we sort
    371   // by id too, to ensure a consistent ordering just in case.
    372   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
    373       "SELECT" HISTORY_VISIT_ROW_FIELDS
    374       "FROM visits "
    375       "WHERE url=? "
    376       "ORDER BY visit_time DESC, id DESC "
    377       "LIMIT ?"));
    378   if (!statement)
    379     return false;
    380 
    381   statement.BindInt64(0, url_id);
    382   statement.BindInt(1, max_results);
    383   FillVisitVector(statement, visits);
    384   return true;
    385 }
    386 
    387 bool VisitDatabase::GetRedirectFromVisit(VisitID from_visit,
    388                                          VisitID* to_visit,
    389                                          GURL* to_url) {
    390   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
    391       "SELECT v.id,u.url "
    392       "FROM visits v JOIN urls u ON v.url = u.id "
    393       "WHERE v.from_visit = ? "
    394       "AND (v.transition & ?) != 0"));  // IS_REDIRECT_MASK
    395   if (!statement)
    396     return false;
    397 
    398   statement.BindInt64(0, from_visit);
    399   statement.BindInt(1, PageTransition::IS_REDIRECT_MASK);
    400 
    401   if (!statement.Step())
    402     return false;  // No redirect from this visit.
    403   if (to_visit)
    404     *to_visit = statement.ColumnInt64(0);
    405   if (to_url)
    406     *to_url = GURL(statement.ColumnString(1));
    407   return true;
    408 }
    409 
    410 bool VisitDatabase::GetRedirectToVisit(VisitID to_visit,
    411                                        VisitID* from_visit,
    412                                        GURL* from_url) {
    413   VisitRow row;
    414   if (!GetRowForVisit(to_visit, &row))
    415     return false;
    416 
    417   if (from_visit)
    418     *from_visit = row.referring_visit;
    419 
    420   if (from_url) {
    421     sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
    422         "SELECT u.url "
    423         "FROM visits v JOIN urls u ON v.url = u.id "
    424         "WHERE v.id = ?"));
    425     statement.BindInt64(0, row.referring_visit);
    426 
    427     if (!statement.Step())
    428       return false;
    429 
    430     *from_url = GURL(statement.ColumnString(0));
    431   }
    432   return true;
    433 }
    434 
    435 bool VisitDatabase::GetVisitCountToHost(const GURL& url,
    436                                         int* count,
    437                                         base::Time* first_visit) {
    438   if (!url.SchemeIs(chrome::kHttpScheme) && !url.SchemeIs(chrome::kHttpsScheme))
    439     return false;
    440 
    441   // We need to search for URLs with a matching host/port. One way to query for
    442   // this is to use the LIKE operator, eg 'url LIKE http://google.com/%'. This
    443   // is inefficient though in that it doesn't use the index and each entry must
    444   // be visited. The same query can be executed by using >= and < operator.
    445   // The query becomes:
    446   // 'url >= http://google.com/' and url < http://google.com0'.
    447   // 0 is used as it is one character greater than '/'.
    448   GURL search_url(url);
    449   const std::string host_query_min = search_url.GetOrigin().spec();
    450 
    451   if (host_query_min.empty())
    452     return false;
    453 
    454   std::string host_query_max = host_query_min;
    455   host_query_max[host_query_max.size() - 1] = '0';
    456 
    457   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
    458       "SELECT MIN(v.visit_time), COUNT(*) "
    459       "FROM visits v INNER JOIN urls u ON v.url = u.id "
    460       "WHERE (u.url >= ? AND u.url < ?)"));
    461   if (!statement)
    462     return false;
    463 
    464   statement.BindString(0, host_query_min);
    465   statement.BindString(1, host_query_max);
    466 
    467   if (!statement.Step()) {
    468     // We've never been to this page before.
    469     *count = 0;
    470     return true;
    471   }
    472 
    473   *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0));
    474   *count = statement.ColumnInt(1);
    475   return true;
    476 }
    477 
    478 bool VisitDatabase::GetStartDate(base::Time* first_visit) {
    479   sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
    480       "SELECT MIN(visit_time) FROM visits WHERE visit_time != 0"));
    481   if (!statement || !statement.Step() || statement.ColumnInt64(0) == 0) {
    482     *first_visit = base::Time::Now();
    483     return false;
    484   }
    485   *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0));
    486   return true;
    487 }
    488 
    489 void VisitDatabase::GetVisitsSource(const VisitVector& visits,
    490                                     VisitSourceMap* sources) {
    491   DCHECK(sources);
    492   sources->clear();
    493 
    494   // We query the source in batch. Here defines the batch size.
    495   const size_t batch_size = 500;
    496   size_t visits_size = visits.size();
    497 
    498   size_t start_index = 0, end_index = 0;
    499   while (end_index < visits_size) {
    500     start_index = end_index;
    501     end_index = end_index + batch_size < visits_size ? end_index + batch_size
    502                                                      : visits_size;
    503 
    504     // Compose the sql statement with a list of ids.
    505     std::string sql = "SELECT id,source FROM visit_source ";
    506     sql.append("WHERE id IN (");
    507     // Append all the ids in the statement.
    508     for (size_t j = start_index; j < end_index; j++) {
    509       if (j != start_index)
    510         sql.push_back(',');
    511       sql.append(base::Int64ToString(visits[j].visit_id));
    512     }
    513     sql.append(") ORDER BY id");
    514     sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str()));
    515     if (!statement)
    516       return;
    517 
    518     // Get the source entries out of the query result.
    519     while (statement.Step()) {
    520       std::pair<VisitID, VisitSource> source_entry(statement.ColumnInt64(0),
    521           static_cast<VisitSource>(statement.ColumnInt(1)));
    522       sources->insert(source_entry);
    523     }
    524   }
    525 }
    526 
    527 }  // namespace history
    528