Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/history/history_database.h"
      6 
      7 #include <algorithm>
      8 #include <set>
      9 #include <string>
     10 
     11 #include "base/command_line.h"
     12 #include "base/file_util.h"
     13 #include "base/metrics/histogram.h"
     14 #include "base/rand_util.h"
     15 #include "base/strings/string_util.h"
     16 #include "base/time/time.h"
     17 #include "sql/transaction.h"
     18 
     19 #if defined(OS_MACOSX)
     20 #include "base/mac/mac_util.h"
     21 #endif
     22 
     23 namespace history {
     24 
     25 namespace {
     26 
     27 // Current version number. We write databases at the "current" version number,
     28 // but any previous version that can read the "compatible" one can make do with
     29 // or database without *too* many bad effects.
     30 const int kCurrentVersionNumber = 28;
     31 const int kCompatibleVersionNumber = 16;
     32 const char kEarlyExpirationThresholdKey[] = "early_expiration_threshold";
     33 
     34 }  // namespace
     35 
     36 HistoryDatabase::HistoryDatabase()
     37     : needs_version_17_migration_(false) {
     38 }
     39 
     40 HistoryDatabase::~HistoryDatabase() {
     41 }
     42 
     43 sql::InitStatus HistoryDatabase::Init(const base::FilePath& history_name) {
     44   db_.set_histogram_tag("History");
     45 
     46   // Set the exceptional sqlite error handler.
     47   db_.set_error_callback(error_callback_);
     48 
     49   // Set the database page size to something a little larger to give us
     50   // better performance (we're typically seek rather than bandwidth limited).
     51   // This only has an effect before any tables have been created, otherwise
     52   // this is a NOP. Must be a power of 2 and a max of 8192.
     53   db_.set_page_size(4096);
     54 
     55   // Set the cache size. The page size, plus a little extra, times this
     56   // value, tells us how much memory the cache will use maximum.
     57   // 1000 * 4kB = 4MB
     58   // TODO(brettw) scale this value to the amount of available memory.
     59   db_.set_cache_size(1000);
     60 
     61   // Note that we don't set exclusive locking here. That's done by
     62   // BeginExclusiveMode below which is called later (we have to be in shared
     63   // mode to start out for the in-memory backend to read the data).
     64 
     65   if (!db_.Open(history_name))
     66     return sql::INIT_FAILURE;
     67 
     68   // Wrap the rest of init in a tranaction. This will prevent the database from
     69   // getting corrupted if we crash in the middle of initialization or migration.
     70   sql::Transaction committer(&db_);
     71   if (!committer.Begin())
     72     return sql::INIT_FAILURE;
     73 
     74 #if defined(OS_MACOSX)
     75   // Exclude the history file from backups.
     76   base::mac::SetFileBackupExclusion(history_name);
     77 #endif
     78 
     79   // Prime the cache.
     80   db_.Preload();
     81 
     82   // Create the tables and indices.
     83   // NOTE: If you add something here, also add it to
     84   //       RecreateAllButStarAndURLTables.
     85   if (!meta_table_.Init(&db_, GetCurrentVersion(), kCompatibleVersionNumber))
     86     return sql::INIT_FAILURE;
     87   if (!CreateURLTable(false) || !InitVisitTable() ||
     88       !InitKeywordSearchTermsTable() || !InitDownloadTable() ||
     89       !InitSegmentTables())
     90     return sql::INIT_FAILURE;
     91   CreateMainURLIndex();
     92   CreateKeywordSearchTermsIndices();
     93 
     94   // TODO(benjhayden) Remove at some point.
     95   meta_table_.DeleteKey("next_download_id");
     96 
     97   // Version check.
     98   sql::InitStatus version_status = EnsureCurrentVersion();
     99   if (version_status != sql::INIT_OK)
    100     return version_status;
    101 
    102   return committer.Commit() ? sql::INIT_OK : sql::INIT_FAILURE;
    103 }
    104 
    105 void HistoryDatabase::ComputeDatabaseMetrics(
    106     const base::FilePath& history_name) {
    107     base::TimeTicks start_time = base::TimeTicks::Now();
    108   int64 file_size = 0;
    109   if (!base::GetFileSize(history_name, &file_size))
    110     return;
    111   int file_mb = static_cast<int>(file_size / (1024 * 1024));
    112   UMA_HISTOGRAM_MEMORY_MB("History.DatabaseFileMB", file_mb);
    113 
    114   sql::Statement url_count(db_.GetUniqueStatement("SELECT count(*) FROM urls"));
    115   if (!url_count.Step())
    116     return;
    117   UMA_HISTOGRAM_COUNTS("History.URLTableCount", url_count.ColumnInt(0));
    118 
    119   sql::Statement visit_count(db_.GetUniqueStatement(
    120       "SELECT count(*) FROM visits"));
    121   if (!visit_count.Step())
    122     return;
    123   UMA_HISTOGRAM_COUNTS("History.VisitTableCount", visit_count.ColumnInt(0));
    124 
    125   base::Time one_week_ago = base::Time::Now() - base::TimeDelta::FromDays(7);
    126   sql::Statement weekly_visit_sql(db_.GetUniqueStatement(
    127       "SELECT count(*) FROM visits WHERE visit_time > ?"));
    128   weekly_visit_sql.BindInt64(0, one_week_ago.ToInternalValue());
    129   int weekly_visit_count = 0;
    130   if (weekly_visit_sql.Step())
    131     weekly_visit_count = weekly_visit_sql.ColumnInt(0);
    132   UMA_HISTOGRAM_COUNTS("History.WeeklyVisitCount", weekly_visit_count);
    133 
    134   base::Time one_month_ago = base::Time::Now() - base::TimeDelta::FromDays(30);
    135   sql::Statement monthly_visit_sql(db_.GetUniqueStatement(
    136       "SELECT count(*) FROM visits WHERE visit_time > ? AND visit_time <= ?"));
    137   monthly_visit_sql.BindInt64(0, one_month_ago.ToInternalValue());
    138   monthly_visit_sql.BindInt64(1, one_week_ago.ToInternalValue());
    139   int older_visit_count = 0;
    140   if (monthly_visit_sql.Step())
    141     older_visit_count = monthly_visit_sql.ColumnInt(0);
    142   UMA_HISTOGRAM_COUNTS("History.MonthlyVisitCount",
    143                        older_visit_count + weekly_visit_count);
    144 
    145   UMA_HISTOGRAM_TIMES("History.DatabaseBasicMetricsTime",
    146                       base::TimeTicks::Now() - start_time);
    147 
    148   // Compute the advanced metrics even less often, pending timing data showing
    149   // that's not necessary.
    150   if (base::RandInt(1, 3) == 3) {
    151     start_time = base::TimeTicks::Now();
    152 
    153     // Collect all URLs visited within the last month.
    154     sql::Statement url_sql(db_.GetUniqueStatement(
    155         "SELECT url, last_visit_time FROM urls WHERE last_visit_time > ?"));
    156     url_sql.BindInt64(0, one_month_ago.ToInternalValue());
    157 
    158     // Count URLs (which will always be unique) and unique hosts within the last
    159     // week and last month.
    160     int week_url_count = 0;
    161     int month_url_count = 0;
    162     std::set<std::string> week_hosts;
    163     std::set<std::string> month_hosts;
    164     while (url_sql.Step()) {
    165       GURL url(url_sql.ColumnString(0));
    166       base::Time visit_time =
    167           base::Time::FromInternalValue(url_sql.ColumnInt64(1));
    168       ++month_url_count;
    169       month_hosts.insert(url.host());
    170       if (visit_time > one_week_ago) {
    171         ++week_url_count;
    172         week_hosts.insert(url.host());
    173       }
    174     }
    175     UMA_HISTOGRAM_COUNTS("History.WeeklyURLCount", week_url_count);
    176     UMA_HISTOGRAM_COUNTS_10000("History.WeeklyHostCount", week_hosts.size());
    177     UMA_HISTOGRAM_COUNTS("History.MonthlyURLCount", month_url_count);
    178     UMA_HISTOGRAM_COUNTS_10000("History.MonthlyHostCount", month_hosts.size());
    179     UMA_HISTOGRAM_TIMES("History.DatabaseAdvancedMetricsTime",
    180                         base::TimeTicks::Now() - start_time);
    181   }
    182 }
    183 
    184 void HistoryDatabase::BeginExclusiveMode() {
    185   // We can't use set_exclusive_locking() since that only has an effect before
    186   // the DB is opened.
    187   ignore_result(db_.Execute("PRAGMA locking_mode=EXCLUSIVE"));
    188 }
    189 
    190 // static
    191 int HistoryDatabase::GetCurrentVersion() {
    192   return kCurrentVersionNumber;
    193 }
    194 
    195 void HistoryDatabase::BeginTransaction() {
    196   db_.BeginTransaction();
    197 }
    198 
    199 void HistoryDatabase::CommitTransaction() {
    200   db_.CommitTransaction();
    201 }
    202 
    203 void HistoryDatabase::RollbackTransaction() {
    204   db_.RollbackTransaction();
    205 }
    206 
    207 bool HistoryDatabase::RecreateAllTablesButURL() {
    208   if (!DropVisitTable())
    209     return false;
    210   if (!InitVisitTable())
    211     return false;
    212 
    213   if (!DropKeywordSearchTermsTable())
    214     return false;
    215   if (!InitKeywordSearchTermsTable())
    216     return false;
    217 
    218   if (!DropSegmentTables())
    219     return false;
    220   if (!InitSegmentTables())
    221     return false;
    222 
    223   // We also add the supplementary URL indices at this point. This index is
    224   // over parts of the URL table that weren't automatically created when the
    225   // temporary URL table was
    226   CreateKeywordSearchTermsIndices();
    227   return true;
    228 }
    229 
    230 void HistoryDatabase::Vacuum() {
    231   DCHECK_EQ(0, db_.transaction_nesting()) <<
    232       "Can not have a transaction when vacuuming.";
    233   ignore_result(db_.Execute("VACUUM"));
    234 }
    235 
    236 void HistoryDatabase::TrimMemory(bool aggressively) {
    237   db_.TrimMemory(aggressively);
    238 }
    239 
    240 bool HistoryDatabase::Raze() {
    241   return db_.Raze();
    242 }
    243 
    244 bool HistoryDatabase::SetSegmentID(VisitID visit_id, SegmentID segment_id) {
    245   sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE,
    246       "UPDATE visits SET segment_id = ? WHERE id = ?"));
    247   s.BindInt64(0, segment_id);
    248   s.BindInt64(1, visit_id);
    249   DCHECK(db_.GetLastChangeCount() == 1);
    250 
    251   return s.Run();
    252 }
    253 
    254 SegmentID HistoryDatabase::GetSegmentID(VisitID visit_id) {
    255   sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE,
    256       "SELECT segment_id FROM visits WHERE id = ?"));
    257   s.BindInt64(0, visit_id);
    258 
    259   if (s.Step()) {
    260     if (s.ColumnType(0) == sql::COLUMN_TYPE_NULL)
    261       return 0;
    262     else
    263       return s.ColumnInt64(0);
    264   }
    265   return 0;
    266 }
    267 
    268 base::Time HistoryDatabase::GetEarlyExpirationThreshold() {
    269   if (!cached_early_expiration_threshold_.is_null())
    270     return cached_early_expiration_threshold_;
    271 
    272   int64 threshold;
    273   if (!meta_table_.GetValue(kEarlyExpirationThresholdKey, &threshold)) {
    274     // Set to a very early non-zero time, so it's before all history, but not
    275     // zero to avoid re-retrieval.
    276     threshold = 1L;
    277   }
    278 
    279   cached_early_expiration_threshold_ = base::Time::FromInternalValue(threshold);
    280   return cached_early_expiration_threshold_;
    281 }
    282 
    283 void HistoryDatabase::UpdateEarlyExpirationThreshold(base::Time threshold) {
    284   meta_table_.SetValue(kEarlyExpirationThresholdKey,
    285                        threshold.ToInternalValue());
    286   cached_early_expiration_threshold_ = threshold;
    287 }
    288 
    289 sql::Connection& HistoryDatabase::GetDB() {
    290   return db_;
    291 }
    292 
    293 // Migration -------------------------------------------------------------------
    294 
    295 sql::InitStatus HistoryDatabase::EnsureCurrentVersion() {
    296   // We can't read databases newer than we were designed for.
    297   if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) {
    298     LOG(WARNING) << "History database is too new.";
    299     return sql::INIT_TOO_NEW;
    300   }
    301 
    302   // NOTICE: If you are changing structures for things shared with the archived
    303   // history file like URLs, visits, or downloads, that will need migration as
    304   // well. Instead of putting such migration code in this class, it should be
    305   // in the corresponding file (url_database.cc, etc.) and called from here and
    306   // from the archived_database.cc.
    307 
    308   int cur_version = meta_table_.GetVersionNumber();
    309 
    310   // Put migration code here
    311 
    312   if (cur_version == 15) {
    313     if (!db_.Execute("DROP TABLE starred") || !DropStarredIDFromURLs()) {
    314       LOG(WARNING) << "Unable to update history database to version 16.";
    315       return sql::INIT_FAILURE;
    316     }
    317     ++cur_version;
    318     meta_table_.SetVersionNumber(cur_version);
    319     meta_table_.SetCompatibleVersionNumber(
    320         std::min(cur_version, kCompatibleVersionNumber));
    321   }
    322 
    323   if (cur_version == 16) {
    324 #if !defined(OS_WIN)
    325     // In this version we bring the time format on Mac & Linux in sync with the
    326     // Windows version so that profiles can be moved between computers.
    327     MigrateTimeEpoch();
    328 #endif
    329     // On all platforms we bump the version number, so on Windows this
    330     // migration is a NOP. We keep the compatible version at 16 since things
    331     // will basically still work, just history will be in the future if an
    332     // old version reads it.
    333     ++cur_version;
    334     meta_table_.SetVersionNumber(cur_version);
    335   }
    336 
    337   if (cur_version == 17) {
    338     // Version 17 was for thumbnails to top sites migration. We ended up
    339     // disabling it though, so 17->18 does nothing.
    340     ++cur_version;
    341     meta_table_.SetVersionNumber(cur_version);
    342   }
    343 
    344   if (cur_version == 18) {
    345     // This is the version prior to adding url_source column. We need to
    346     // migrate the database.
    347     cur_version = 19;
    348     meta_table_.SetVersionNumber(cur_version);
    349   }
    350 
    351   if (cur_version == 19) {
    352     cur_version++;
    353     meta_table_.SetVersionNumber(cur_version);
    354     // This was the thumbnail migration.  Obsolete.
    355   }
    356 
    357   if (cur_version == 20) {
    358     // This is the version prior to adding the visit_duration field in visits
    359     // database. We need to migrate the database.
    360     if (!MigrateVisitsWithoutDuration()) {
    361       LOG(WARNING) << "Unable to update history database to version 21.";
    362       return sql::INIT_FAILURE;
    363     }
    364     ++cur_version;
    365     meta_table_.SetVersionNumber(cur_version);
    366   }
    367 
    368   if (cur_version == 21) {
    369     // The android_urls table's data schemal was changed in version 21.
    370 #if defined(OS_ANDROID)
    371     if (!MigrateToVersion22()) {
    372       LOG(WARNING) << "Unable to migrate the android_urls table to version 22";
    373     }
    374 #endif
    375     ++cur_version;
    376     meta_table_.SetVersionNumber(cur_version);
    377   }
    378 
    379   if (cur_version == 22) {
    380     if (!MigrateDownloadsState()) {
    381       LOG(WARNING) << "Unable to fix invalid downloads state values";
    382       // Invalid state values may cause crashes.
    383       return sql::INIT_FAILURE;
    384     }
    385     cur_version++;
    386     meta_table_.SetVersionNumber(cur_version);
    387   }
    388 
    389   if (cur_version == 23) {
    390     if (!MigrateDownloadsReasonPathsAndDangerType()) {
    391       LOG(WARNING) << "Unable to upgrade download interrupt reason and paths";
    392       // Invalid state values may cause crashes.
    393       return sql::INIT_FAILURE;
    394     }
    395     cur_version++;
    396     meta_table_.SetVersionNumber(cur_version);
    397   }
    398 
    399   if (cur_version == 24) {
    400     if (!MigratePresentationIndex()) {
    401       LOG(WARNING) << "Unable to migrate history to version 25";
    402       return sql::INIT_FAILURE;
    403     }
    404     cur_version++;
    405     meta_table_.SetVersionNumber(cur_version);
    406   }
    407 
    408   if (cur_version == 25) {
    409     if (!MigrateReferrer()) {
    410       LOG(WARNING) << "Unable to migrate history to version 26";
    411       return sql::INIT_FAILURE;
    412     }
    413     cur_version++;
    414     meta_table_.SetVersionNumber(cur_version);
    415   }
    416 
    417   if (cur_version == 26) {
    418     if (!MigrateDownloadedByExtension()) {
    419       LOG(WARNING) << "Unable to migrate history to version 27";
    420       return sql::INIT_FAILURE;
    421     }
    422     cur_version++;
    423     meta_table_.SetVersionNumber(cur_version);
    424   }
    425 
    426   if (cur_version == 27) {
    427     if (!MigrateDownloadValidators()) {
    428       LOG(WARNING) << "Unable to migrate history to version 28";
    429       return sql::INIT_FAILURE;
    430     }
    431     cur_version++;
    432     meta_table_.SetVersionNumber(cur_version);
    433   }
    434 
    435   // When the version is too old, we just try to continue anyway, there should
    436   // not be a released product that makes a database too old for us to handle.
    437   LOG_IF(WARNING, cur_version < GetCurrentVersion()) <<
    438          "History database version " << cur_version << " is too old to handle.";
    439 
    440   return sql::INIT_OK;
    441 }
    442 
    443 #if !defined(OS_WIN)
    444 void HistoryDatabase::MigrateTimeEpoch() {
    445   // Update all the times in the URLs and visits table in the main database.
    446   ignore_result(db_.Execute(
    447       "UPDATE urls "
    448       "SET last_visit_time = last_visit_time + 11644473600000000 "
    449       "WHERE id IN (SELECT id FROM urls WHERE last_visit_time > 0);"));
    450   ignore_result(db_.Execute(
    451       "UPDATE visits "
    452       "SET visit_time = visit_time + 11644473600000000 "
    453       "WHERE id IN (SELECT id FROM visits WHERE visit_time > 0);"));
    454   ignore_result(db_.Execute(
    455       "UPDATE segment_usage "
    456       "SET time_slot = time_slot + 11644473600000000 "
    457       "WHERE id IN (SELECT id FROM segment_usage WHERE time_slot > 0);"));
    458 
    459   // Erase all the full text index files. These will take a while to update and
    460   // are less important, so we just blow them away. Same with the archived
    461   // database.
    462   needs_version_17_migration_ = true;
    463 }
    464 #endif
    465 
    466 }  // namespace history
    467