Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/history/history_database.h"
      6 
      7 #include <algorithm>
      8 #include <set>
      9 #include <string>
     10 
     11 #include "base/command_line.h"
     12 #include "base/files/file_util.h"
     13 #include "base/metrics/histogram.h"
     14 #include "base/rand_util.h"
     15 #include "base/strings/string_util.h"
     16 #include "base/time/time.h"
     17 #include "sql/transaction.h"
     18 
     19 #if defined(OS_MACOSX)
     20 #include "base/mac/mac_util.h"
     21 #endif
     22 
     23 namespace history {
     24 
     25 namespace {
     26 
     27 // Current version number. We write databases at the "current" version number,
     28 // but any previous version that can read the "compatible" one can make do with
     29 // our database without *too* many bad effects.
     30 const int kCurrentVersionNumber = 29;
     31 const int kCompatibleVersionNumber = 16;
     32 const char kEarlyExpirationThresholdKey[] = "early_expiration_threshold";
     33 
     34 }  // namespace
     35 
     36 HistoryDatabase::HistoryDatabase() {
     37 }
     38 
     39 HistoryDatabase::~HistoryDatabase() {
     40 }
     41 
     42 sql::InitStatus HistoryDatabase::Init(const base::FilePath& history_name) {
     43   db_.set_histogram_tag("History");
     44 
     45   // Set the exceptional sqlite error handler.
     46   db_.set_error_callback(error_callback_);
     47 
     48   // Set the database page size to something a little larger to give us
     49   // better performance (we're typically seek rather than bandwidth limited).
     50   // This only has an effect before any tables have been created, otherwise
     51   // this is a NOP. Must be a power of 2 and a max of 8192.
     52   db_.set_page_size(4096);
     53 
     54   // Set the cache size. The page size, plus a little extra, times this
     55   // value, tells us how much memory the cache will use maximum.
     56   // 1000 * 4kB = 4MB
     57   // TODO(brettw) scale this value to the amount of available memory.
     58   db_.set_cache_size(1000);
     59 
     60   // Note that we don't set exclusive locking here. That's done by
     61   // BeginExclusiveMode below which is called later (we have to be in shared
     62   // mode to start out for the in-memory backend to read the data).
     63 
     64   if (!db_.Open(history_name))
     65     return sql::INIT_FAILURE;
     66 
     67   // Wrap the rest of init in a tranaction. This will prevent the database from
     68   // getting corrupted if we crash in the middle of initialization or migration.
     69   sql::Transaction committer(&db_);
     70   if (!committer.Begin())
     71     return sql::INIT_FAILURE;
     72 
     73 #if defined(OS_MACOSX)
     74   // Exclude the history file from backups.
     75   base::mac::SetFileBackupExclusion(history_name);
     76 #endif
     77 
     78   // Prime the cache.
     79   db_.Preload();
     80 
     81   // Create the tables and indices.
     82   // NOTE: If you add something here, also add it to
     83   //       RecreateAllButStarAndURLTables.
     84   if (!meta_table_.Init(&db_, GetCurrentVersion(), kCompatibleVersionNumber))
     85     return sql::INIT_FAILURE;
     86   if (!CreateURLTable(false) || !InitVisitTable() ||
     87       !InitKeywordSearchTermsTable() || !InitDownloadTable() ||
     88       !InitSegmentTables())
     89     return sql::INIT_FAILURE;
     90   CreateMainURLIndex();
     91   CreateKeywordSearchTermsIndices();
     92 
     93   // TODO(benjhayden) Remove at some point.
     94   meta_table_.DeleteKey("next_download_id");
     95 
     96   // Version check.
     97   sql::InitStatus version_status = EnsureCurrentVersion();
     98   if (version_status != sql::INIT_OK)
     99     return version_status;
    100 
    101   return committer.Commit() ? sql::INIT_OK : sql::INIT_FAILURE;
    102 }
    103 
    104 void HistoryDatabase::ComputeDatabaseMetrics(
    105     const base::FilePath& history_name) {
    106     base::TimeTicks start_time = base::TimeTicks::Now();
    107   int64 file_size = 0;
    108   if (!base::GetFileSize(history_name, &file_size))
    109     return;
    110   int file_mb = static_cast<int>(file_size / (1024 * 1024));
    111   UMA_HISTOGRAM_MEMORY_MB("History.DatabaseFileMB", file_mb);
    112 
    113   sql::Statement url_count(db_.GetUniqueStatement("SELECT count(*) FROM urls"));
    114   if (!url_count.Step())
    115     return;
    116   UMA_HISTOGRAM_COUNTS("History.URLTableCount", url_count.ColumnInt(0));
    117 
    118   sql::Statement visit_count(db_.GetUniqueStatement(
    119       "SELECT count(*) FROM visits"));
    120   if (!visit_count.Step())
    121     return;
    122   UMA_HISTOGRAM_COUNTS("History.VisitTableCount", visit_count.ColumnInt(0));
    123 
    124   base::Time one_week_ago = base::Time::Now() - base::TimeDelta::FromDays(7);
    125   sql::Statement weekly_visit_sql(db_.GetUniqueStatement(
    126       "SELECT count(*) FROM visits WHERE visit_time > ?"));
    127   weekly_visit_sql.BindInt64(0, one_week_ago.ToInternalValue());
    128   int weekly_visit_count = 0;
    129   if (weekly_visit_sql.Step())
    130     weekly_visit_count = weekly_visit_sql.ColumnInt(0);
    131   UMA_HISTOGRAM_COUNTS("History.WeeklyVisitCount", weekly_visit_count);
    132 
    133   base::Time one_month_ago = base::Time::Now() - base::TimeDelta::FromDays(30);
    134   sql::Statement monthly_visit_sql(db_.GetUniqueStatement(
    135       "SELECT count(*) FROM visits WHERE visit_time > ? AND visit_time <= ?"));
    136   monthly_visit_sql.BindInt64(0, one_month_ago.ToInternalValue());
    137   monthly_visit_sql.BindInt64(1, one_week_ago.ToInternalValue());
    138   int older_visit_count = 0;
    139   if (monthly_visit_sql.Step())
    140     older_visit_count = monthly_visit_sql.ColumnInt(0);
    141   UMA_HISTOGRAM_COUNTS("History.MonthlyVisitCount",
    142                        older_visit_count + weekly_visit_count);
    143 
    144   UMA_HISTOGRAM_TIMES("History.DatabaseBasicMetricsTime",
    145                       base::TimeTicks::Now() - start_time);
    146 
    147   // Compute the advanced metrics even less often, pending timing data showing
    148   // that's not necessary.
    149   if (base::RandInt(1, 3) == 3) {
    150     start_time = base::TimeTicks::Now();
    151 
    152     // Collect all URLs visited within the last month.
    153     sql::Statement url_sql(db_.GetUniqueStatement(
    154         "SELECT url, last_visit_time FROM urls WHERE last_visit_time > ?"));
    155     url_sql.BindInt64(0, one_month_ago.ToInternalValue());
    156 
    157     // Count URLs (which will always be unique) and unique hosts within the last
    158     // week and last month.
    159     int week_url_count = 0;
    160     int month_url_count = 0;
    161     std::set<std::string> week_hosts;
    162     std::set<std::string> month_hosts;
    163     while (url_sql.Step()) {
    164       GURL url(url_sql.ColumnString(0));
    165       base::Time visit_time =
    166           base::Time::FromInternalValue(url_sql.ColumnInt64(1));
    167       ++month_url_count;
    168       month_hosts.insert(url.host());
    169       if (visit_time > one_week_ago) {
    170         ++week_url_count;
    171         week_hosts.insert(url.host());
    172       }
    173     }
    174     UMA_HISTOGRAM_COUNTS("History.WeeklyURLCount", week_url_count);
    175     UMA_HISTOGRAM_COUNTS_10000("History.WeeklyHostCount", week_hosts.size());
    176     UMA_HISTOGRAM_COUNTS("History.MonthlyURLCount", month_url_count);
    177     UMA_HISTOGRAM_COUNTS_10000("History.MonthlyHostCount", month_hosts.size());
    178     UMA_HISTOGRAM_TIMES("History.DatabaseAdvancedMetricsTime",
    179                         base::TimeTicks::Now() - start_time);
    180   }
    181 }
    182 
    183 void HistoryDatabase::BeginExclusiveMode() {
    184   // We can't use set_exclusive_locking() since that only has an effect before
    185   // the DB is opened.
    186   ignore_result(db_.Execute("PRAGMA locking_mode=EXCLUSIVE"));
    187 }
    188 
    189 // static
    190 int HistoryDatabase::GetCurrentVersion() {
    191   return kCurrentVersionNumber;
    192 }
    193 
    194 void HistoryDatabase::BeginTransaction() {
    195   db_.BeginTransaction();
    196 }
    197 
    198 void HistoryDatabase::CommitTransaction() {
    199   db_.CommitTransaction();
    200 }
    201 
    202 void HistoryDatabase::RollbackTransaction() {
    203   db_.RollbackTransaction();
    204 }
    205 
    206 bool HistoryDatabase::RecreateAllTablesButURL() {
    207   if (!DropVisitTable())
    208     return false;
    209   if (!InitVisitTable())
    210     return false;
    211 
    212   if (!DropKeywordSearchTermsTable())
    213     return false;
    214   if (!InitKeywordSearchTermsTable())
    215     return false;
    216 
    217   if (!DropSegmentTables())
    218     return false;
    219   if (!InitSegmentTables())
    220     return false;
    221 
    222   CreateKeywordSearchTermsIndices();
    223   return true;
    224 }
    225 
    226 void HistoryDatabase::Vacuum() {
    227   DCHECK_EQ(0, db_.transaction_nesting()) <<
    228       "Can not have a transaction when vacuuming.";
    229   ignore_result(db_.Execute("VACUUM"));
    230 }
    231 
    232 void HistoryDatabase::TrimMemory(bool aggressively) {
    233   db_.TrimMemory(aggressively);
    234 }
    235 
    236 bool HistoryDatabase::Raze() {
    237   return db_.Raze();
    238 }
    239 
    240 bool HistoryDatabase::SetSegmentID(VisitID visit_id, SegmentID segment_id) {
    241   sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE,
    242       "UPDATE visits SET segment_id = ? WHERE id = ?"));
    243   s.BindInt64(0, segment_id);
    244   s.BindInt64(1, visit_id);
    245   DCHECK(db_.GetLastChangeCount() == 1);
    246 
    247   return s.Run();
    248 }
    249 
    250 SegmentID HistoryDatabase::GetSegmentID(VisitID visit_id) {
    251   sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE,
    252       "SELECT segment_id FROM visits WHERE id = ?"));
    253   s.BindInt64(0, visit_id);
    254 
    255   if (s.Step()) {
    256     if (s.ColumnType(0) == sql::COLUMN_TYPE_NULL)
    257       return 0;
    258     else
    259       return s.ColumnInt64(0);
    260   }
    261   return 0;
    262 }
    263 
    264 base::Time HistoryDatabase::GetEarlyExpirationThreshold() {
    265   if (!cached_early_expiration_threshold_.is_null())
    266     return cached_early_expiration_threshold_;
    267 
    268   int64 threshold;
    269   if (!meta_table_.GetValue(kEarlyExpirationThresholdKey, &threshold)) {
    270     // Set to a very early non-zero time, so it's before all history, but not
    271     // zero to avoid re-retrieval.
    272     threshold = 1L;
    273   }
    274 
    275   cached_early_expiration_threshold_ = base::Time::FromInternalValue(threshold);
    276   return cached_early_expiration_threshold_;
    277 }
    278 
    279 void HistoryDatabase::UpdateEarlyExpirationThreshold(base::Time threshold) {
    280   meta_table_.SetValue(kEarlyExpirationThresholdKey,
    281                        threshold.ToInternalValue());
    282   cached_early_expiration_threshold_ = threshold;
    283 }
    284 
    285 sql::Connection& HistoryDatabase::GetDB() {
    286   return db_;
    287 }
    288 
    289 // Migration -------------------------------------------------------------------
    290 
    291 sql::InitStatus HistoryDatabase::EnsureCurrentVersion() {
    292   // We can't read databases newer than we were designed for.
    293   if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) {
    294     LOG(WARNING) << "History database is too new.";
    295     return sql::INIT_TOO_NEW;
    296   }
    297 
    298   int cur_version = meta_table_.GetVersionNumber();
    299 
    300   // Put migration code here
    301 
    302   if (cur_version == 15) {
    303     if (!db_.Execute("DROP TABLE starred") || !DropStarredIDFromURLs()) {
    304       LOG(WARNING) << "Unable to update history database to version 16.";
    305       return sql::INIT_FAILURE;
    306     }
    307     ++cur_version;
    308     meta_table_.SetVersionNumber(cur_version);
    309     meta_table_.SetCompatibleVersionNumber(
    310         std::min(cur_version, kCompatibleVersionNumber));
    311   }
    312 
    313   if (cur_version == 16) {
    314 #if !defined(OS_WIN)
    315     // In this version we bring the time format on Mac & Linux in sync with the
    316     // Windows version so that profiles can be moved between computers.
    317     MigrateTimeEpoch();
    318 #endif
    319     // On all platforms we bump the version number, so on Windows this
    320     // migration is a NOP. We keep the compatible version at 16 since things
    321     // will basically still work, just history will be in the future if an
    322     // old version reads it.
    323     ++cur_version;
    324     meta_table_.SetVersionNumber(cur_version);
    325   }
    326 
    327   if (cur_version == 17) {
    328     // Version 17 was for thumbnails to top sites migration. We ended up
    329     // disabling it though, so 17->18 does nothing.
    330     ++cur_version;
    331     meta_table_.SetVersionNumber(cur_version);
    332   }
    333 
    334   if (cur_version == 18) {
    335     // This is the version prior to adding url_source column. We need to
    336     // migrate the database.
    337     cur_version = 19;
    338     meta_table_.SetVersionNumber(cur_version);
    339   }
    340 
    341   if (cur_version == 19) {
    342     cur_version++;
    343     meta_table_.SetVersionNumber(cur_version);
    344     // This was the thumbnail migration.  Obsolete.
    345   }
    346 
    347   if (cur_version == 20) {
    348     // This is the version prior to adding the visit_duration field in visits
    349     // database. We need to migrate the database.
    350     if (!MigrateVisitsWithoutDuration()) {
    351       LOG(WARNING) << "Unable to update history database to version 21.";
    352       return sql::INIT_FAILURE;
    353     }
    354     ++cur_version;
    355     meta_table_.SetVersionNumber(cur_version);
    356   }
    357 
    358   if (cur_version == 21) {
    359     // The android_urls table's data schemal was changed in version 21.
    360 #if defined(OS_ANDROID)
    361     if (!MigrateToVersion22()) {
    362       LOG(WARNING) << "Unable to migrate the android_urls table to version 22";
    363     }
    364 #endif
    365     ++cur_version;
    366     meta_table_.SetVersionNumber(cur_version);
    367   }
    368 
    369   if (cur_version == 22) {
    370     if (!MigrateDownloadsState()) {
    371       LOG(WARNING) << "Unable to fix invalid downloads state values";
    372       // Invalid state values may cause crashes.
    373       return sql::INIT_FAILURE;
    374     }
    375     cur_version++;
    376     meta_table_.SetVersionNumber(cur_version);
    377   }
    378 
    379   if (cur_version == 23) {
    380     if (!MigrateDownloadsReasonPathsAndDangerType()) {
    381       LOG(WARNING) << "Unable to upgrade download interrupt reason and paths";
    382       // Invalid state values may cause crashes.
    383       return sql::INIT_FAILURE;
    384     }
    385     cur_version++;
    386     meta_table_.SetVersionNumber(cur_version);
    387   }
    388 
    389   if (cur_version == 24) {
    390     if (!MigratePresentationIndex()) {
    391       LOG(WARNING) << "Unable to migrate history to version 25";
    392       return sql::INIT_FAILURE;
    393     }
    394     cur_version++;
    395     meta_table_.SetVersionNumber(cur_version);
    396   }
    397 
    398   if (cur_version == 25) {
    399     if (!MigrateReferrer()) {
    400       LOG(WARNING) << "Unable to migrate history to version 26";
    401       return sql::INIT_FAILURE;
    402     }
    403     cur_version++;
    404     meta_table_.SetVersionNumber(cur_version);
    405   }
    406 
    407   if (cur_version == 26) {
    408     if (!MigrateDownloadedByExtension()) {
    409       LOG(WARNING) << "Unable to migrate history to version 27";
    410       return sql::INIT_FAILURE;
    411     }
    412     cur_version++;
    413     meta_table_.SetVersionNumber(cur_version);
    414   }
    415 
    416   if (cur_version == 27) {
    417     if (!MigrateDownloadValidators()) {
    418       LOG(WARNING) << "Unable to migrate history to version 28";
    419       return sql::INIT_FAILURE;
    420     }
    421     cur_version++;
    422     meta_table_.SetVersionNumber(cur_version);
    423   }
    424 
    425   if (cur_version == 28) {
    426     if (!MigrateMimeType()) {
    427       LOG(WARNING) << "Unable to migrate history to version 29";
    428       return sql::INIT_FAILURE;
    429     }
    430     cur_version++;
    431     meta_table_.SetVersionNumber(cur_version);
    432   }
    433 
    434   // When the version is too old, we just try to continue anyway, there should
    435   // not be a released product that makes a database too old for us to handle.
    436   LOG_IF(WARNING, cur_version < GetCurrentVersion()) <<
    437          "History database version " << cur_version << " is too old to handle.";
    438 
    439   return sql::INIT_OK;
    440 }
    441 
    442 #if !defined(OS_WIN)
    443 void HistoryDatabase::MigrateTimeEpoch() {
    444   // Update all the times in the URLs and visits table in the main database.
    445   ignore_result(db_.Execute(
    446       "UPDATE urls "
    447       "SET last_visit_time = last_visit_time + 11644473600000000 "
    448       "WHERE id IN (SELECT id FROM urls WHERE last_visit_time > 0);"));
    449   ignore_result(db_.Execute(
    450       "UPDATE visits "
    451       "SET visit_time = visit_time + 11644473600000000 "
    452       "WHERE id IN (SELECT id FROM visits WHERE visit_time > 0);"));
    453   ignore_result(db_.Execute(
    454       "UPDATE segment_usage "
    455       "SET time_slot = time_slot + 11644473600000000 "
    456       "WHERE id IN (SELECT id FROM segment_usage WHERE time_slot > 0);"));
    457 }
    458 #endif
    459 
    460 }  // namespace history
    461