1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/history/history_database.h" 6 7 #include <algorithm> 8 #include <set> 9 #include <string> 10 11 #include "base/command_line.h" 12 #include "base/file_util.h" 13 #include "base/metrics/histogram.h" 14 #include "base/rand_util.h" 15 #include "base/strings/string_util.h" 16 #include "base/time/time.h" 17 #include "sql/transaction.h" 18 19 #if defined(OS_MACOSX) 20 #include "base/mac/mac_util.h" 21 #endif 22 23 namespace history { 24 25 namespace { 26 27 // Current version number. We write databases at the "current" version number, 28 // but any previous version that can read the "compatible" one can make do with 29 // or database without *too* many bad effects. 30 const int kCurrentVersionNumber = 28; 31 const int kCompatibleVersionNumber = 16; 32 const char kEarlyExpirationThresholdKey[] = "early_expiration_threshold"; 33 34 } // namespace 35 36 HistoryDatabase::HistoryDatabase() 37 : needs_version_17_migration_(false) { 38 } 39 40 HistoryDatabase::~HistoryDatabase() { 41 } 42 43 sql::InitStatus HistoryDatabase::Init(const base::FilePath& history_name) { 44 db_.set_histogram_tag("History"); 45 46 // Set the exceptional sqlite error handler. 47 db_.set_error_callback(error_callback_); 48 49 // Set the database page size to something a little larger to give us 50 // better performance (we're typically seek rather than bandwidth limited). 51 // This only has an effect before any tables have been created, otherwise 52 // this is a NOP. Must be a power of 2 and a max of 8192. 53 db_.set_page_size(4096); 54 55 // Set the cache size. The page size, plus a little extra, times this 56 // value, tells us how much memory the cache will use maximum. 57 // 1000 * 4kB = 4MB 58 // TODO(brettw) scale this value to the amount of available memory. 59 db_.set_cache_size(1000); 60 61 // Note that we don't set exclusive locking here. That's done by 62 // BeginExclusiveMode below which is called later (we have to be in shared 63 // mode to start out for the in-memory backend to read the data). 64 65 if (!db_.Open(history_name)) 66 return sql::INIT_FAILURE; 67 68 // Wrap the rest of init in a tranaction. This will prevent the database from 69 // getting corrupted if we crash in the middle of initialization or migration. 70 sql::Transaction committer(&db_); 71 if (!committer.Begin()) 72 return sql::INIT_FAILURE; 73 74 #if defined(OS_MACOSX) 75 // Exclude the history file from backups. 76 base::mac::SetFileBackupExclusion(history_name); 77 #endif 78 79 // Prime the cache. 80 db_.Preload(); 81 82 // Create the tables and indices. 83 // NOTE: If you add something here, also add it to 84 // RecreateAllButStarAndURLTables. 85 if (!meta_table_.Init(&db_, GetCurrentVersion(), kCompatibleVersionNumber)) 86 return sql::INIT_FAILURE; 87 if (!CreateURLTable(false) || !InitVisitTable() || 88 !InitKeywordSearchTermsTable() || !InitDownloadTable() || 89 !InitSegmentTables()) 90 return sql::INIT_FAILURE; 91 CreateMainURLIndex(); 92 CreateKeywordSearchTermsIndices(); 93 94 // TODO(benjhayden) Remove at some point. 95 meta_table_.DeleteKey("next_download_id"); 96 97 // Version check. 98 sql::InitStatus version_status = EnsureCurrentVersion(); 99 if (version_status != sql::INIT_OK) 100 return version_status; 101 102 return committer.Commit() ? sql::INIT_OK : sql::INIT_FAILURE; 103 } 104 105 void HistoryDatabase::ComputeDatabaseMetrics( 106 const base::FilePath& history_name) { 107 base::TimeTicks start_time = base::TimeTicks::Now(); 108 int64 file_size = 0; 109 if (!base::GetFileSize(history_name, &file_size)) 110 return; 111 int file_mb = static_cast<int>(file_size / (1024 * 1024)); 112 UMA_HISTOGRAM_MEMORY_MB("History.DatabaseFileMB", file_mb); 113 114 sql::Statement url_count(db_.GetUniqueStatement("SELECT count(*) FROM urls")); 115 if (!url_count.Step()) 116 return; 117 UMA_HISTOGRAM_COUNTS("History.URLTableCount", url_count.ColumnInt(0)); 118 119 sql::Statement visit_count(db_.GetUniqueStatement( 120 "SELECT count(*) FROM visits")); 121 if (!visit_count.Step()) 122 return; 123 UMA_HISTOGRAM_COUNTS("History.VisitTableCount", visit_count.ColumnInt(0)); 124 125 base::Time one_week_ago = base::Time::Now() - base::TimeDelta::FromDays(7); 126 sql::Statement weekly_visit_sql(db_.GetUniqueStatement( 127 "SELECT count(*) FROM visits WHERE visit_time > ?")); 128 weekly_visit_sql.BindInt64(0, one_week_ago.ToInternalValue()); 129 int weekly_visit_count = 0; 130 if (weekly_visit_sql.Step()) 131 weekly_visit_count = weekly_visit_sql.ColumnInt(0); 132 UMA_HISTOGRAM_COUNTS("History.WeeklyVisitCount", weekly_visit_count); 133 134 base::Time one_month_ago = base::Time::Now() - base::TimeDelta::FromDays(30); 135 sql::Statement monthly_visit_sql(db_.GetUniqueStatement( 136 "SELECT count(*) FROM visits WHERE visit_time > ? AND visit_time <= ?")); 137 monthly_visit_sql.BindInt64(0, one_month_ago.ToInternalValue()); 138 monthly_visit_sql.BindInt64(1, one_week_ago.ToInternalValue()); 139 int older_visit_count = 0; 140 if (monthly_visit_sql.Step()) 141 older_visit_count = monthly_visit_sql.ColumnInt(0); 142 UMA_HISTOGRAM_COUNTS("History.MonthlyVisitCount", 143 older_visit_count + weekly_visit_count); 144 145 UMA_HISTOGRAM_TIMES("History.DatabaseBasicMetricsTime", 146 base::TimeTicks::Now() - start_time); 147 148 // Compute the advanced metrics even less often, pending timing data showing 149 // that's not necessary. 150 if (base::RandInt(1, 3) == 3) { 151 start_time = base::TimeTicks::Now(); 152 153 // Collect all URLs visited within the last month. 154 sql::Statement url_sql(db_.GetUniqueStatement( 155 "SELECT url, last_visit_time FROM urls WHERE last_visit_time > ?")); 156 url_sql.BindInt64(0, one_month_ago.ToInternalValue()); 157 158 // Count URLs (which will always be unique) and unique hosts within the last 159 // week and last month. 160 int week_url_count = 0; 161 int month_url_count = 0; 162 std::set<std::string> week_hosts; 163 std::set<std::string> month_hosts; 164 while (url_sql.Step()) { 165 GURL url(url_sql.ColumnString(0)); 166 base::Time visit_time = 167 base::Time::FromInternalValue(url_sql.ColumnInt64(1)); 168 ++month_url_count; 169 month_hosts.insert(url.host()); 170 if (visit_time > one_week_ago) { 171 ++week_url_count; 172 week_hosts.insert(url.host()); 173 } 174 } 175 UMA_HISTOGRAM_COUNTS("History.WeeklyURLCount", week_url_count); 176 UMA_HISTOGRAM_COUNTS_10000("History.WeeklyHostCount", week_hosts.size()); 177 UMA_HISTOGRAM_COUNTS("History.MonthlyURLCount", month_url_count); 178 UMA_HISTOGRAM_COUNTS_10000("History.MonthlyHostCount", month_hosts.size()); 179 UMA_HISTOGRAM_TIMES("History.DatabaseAdvancedMetricsTime", 180 base::TimeTicks::Now() - start_time); 181 } 182 } 183 184 void HistoryDatabase::BeginExclusiveMode() { 185 // We can't use set_exclusive_locking() since that only has an effect before 186 // the DB is opened. 187 ignore_result(db_.Execute("PRAGMA locking_mode=EXCLUSIVE")); 188 } 189 190 // static 191 int HistoryDatabase::GetCurrentVersion() { 192 return kCurrentVersionNumber; 193 } 194 195 void HistoryDatabase::BeginTransaction() { 196 db_.BeginTransaction(); 197 } 198 199 void HistoryDatabase::CommitTransaction() { 200 db_.CommitTransaction(); 201 } 202 203 void HistoryDatabase::RollbackTransaction() { 204 db_.RollbackTransaction(); 205 } 206 207 bool HistoryDatabase::RecreateAllTablesButURL() { 208 if (!DropVisitTable()) 209 return false; 210 if (!InitVisitTable()) 211 return false; 212 213 if (!DropKeywordSearchTermsTable()) 214 return false; 215 if (!InitKeywordSearchTermsTable()) 216 return false; 217 218 if (!DropSegmentTables()) 219 return false; 220 if (!InitSegmentTables()) 221 return false; 222 223 // We also add the supplementary URL indices at this point. This index is 224 // over parts of the URL table that weren't automatically created when the 225 // temporary URL table was 226 CreateKeywordSearchTermsIndices(); 227 return true; 228 } 229 230 void HistoryDatabase::Vacuum() { 231 DCHECK_EQ(0, db_.transaction_nesting()) << 232 "Can not have a transaction when vacuuming."; 233 ignore_result(db_.Execute("VACUUM")); 234 } 235 236 void HistoryDatabase::TrimMemory(bool aggressively) { 237 db_.TrimMemory(aggressively); 238 } 239 240 bool HistoryDatabase::Raze() { 241 return db_.Raze(); 242 } 243 244 bool HistoryDatabase::SetSegmentID(VisitID visit_id, SegmentID segment_id) { 245 sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE, 246 "UPDATE visits SET segment_id = ? WHERE id = ?")); 247 s.BindInt64(0, segment_id); 248 s.BindInt64(1, visit_id); 249 DCHECK(db_.GetLastChangeCount() == 1); 250 251 return s.Run(); 252 } 253 254 SegmentID HistoryDatabase::GetSegmentID(VisitID visit_id) { 255 sql::Statement s(db_.GetCachedStatement(SQL_FROM_HERE, 256 "SELECT segment_id FROM visits WHERE id = ?")); 257 s.BindInt64(0, visit_id); 258 259 if (s.Step()) { 260 if (s.ColumnType(0) == sql::COLUMN_TYPE_NULL) 261 return 0; 262 else 263 return s.ColumnInt64(0); 264 } 265 return 0; 266 } 267 268 base::Time HistoryDatabase::GetEarlyExpirationThreshold() { 269 if (!cached_early_expiration_threshold_.is_null()) 270 return cached_early_expiration_threshold_; 271 272 int64 threshold; 273 if (!meta_table_.GetValue(kEarlyExpirationThresholdKey, &threshold)) { 274 // Set to a very early non-zero time, so it's before all history, but not 275 // zero to avoid re-retrieval. 276 threshold = 1L; 277 } 278 279 cached_early_expiration_threshold_ = base::Time::FromInternalValue(threshold); 280 return cached_early_expiration_threshold_; 281 } 282 283 void HistoryDatabase::UpdateEarlyExpirationThreshold(base::Time threshold) { 284 meta_table_.SetValue(kEarlyExpirationThresholdKey, 285 threshold.ToInternalValue()); 286 cached_early_expiration_threshold_ = threshold; 287 } 288 289 sql::Connection& HistoryDatabase::GetDB() { 290 return db_; 291 } 292 293 // Migration ------------------------------------------------------------------- 294 295 sql::InitStatus HistoryDatabase::EnsureCurrentVersion() { 296 // We can't read databases newer than we were designed for. 297 if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) { 298 LOG(WARNING) << "History database is too new."; 299 return sql::INIT_TOO_NEW; 300 } 301 302 // NOTICE: If you are changing structures for things shared with the archived 303 // history file like URLs, visits, or downloads, that will need migration as 304 // well. Instead of putting such migration code in this class, it should be 305 // in the corresponding file (url_database.cc, etc.) and called from here and 306 // from the archived_database.cc. 307 308 int cur_version = meta_table_.GetVersionNumber(); 309 310 // Put migration code here 311 312 if (cur_version == 15) { 313 if (!db_.Execute("DROP TABLE starred") || !DropStarredIDFromURLs()) { 314 LOG(WARNING) << "Unable to update history database to version 16."; 315 return sql::INIT_FAILURE; 316 } 317 ++cur_version; 318 meta_table_.SetVersionNumber(cur_version); 319 meta_table_.SetCompatibleVersionNumber( 320 std::min(cur_version, kCompatibleVersionNumber)); 321 } 322 323 if (cur_version == 16) { 324 #if !defined(OS_WIN) 325 // In this version we bring the time format on Mac & Linux in sync with the 326 // Windows version so that profiles can be moved between computers. 327 MigrateTimeEpoch(); 328 #endif 329 // On all platforms we bump the version number, so on Windows this 330 // migration is a NOP. We keep the compatible version at 16 since things 331 // will basically still work, just history will be in the future if an 332 // old version reads it. 333 ++cur_version; 334 meta_table_.SetVersionNumber(cur_version); 335 } 336 337 if (cur_version == 17) { 338 // Version 17 was for thumbnails to top sites migration. We ended up 339 // disabling it though, so 17->18 does nothing. 340 ++cur_version; 341 meta_table_.SetVersionNumber(cur_version); 342 } 343 344 if (cur_version == 18) { 345 // This is the version prior to adding url_source column. We need to 346 // migrate the database. 347 cur_version = 19; 348 meta_table_.SetVersionNumber(cur_version); 349 } 350 351 if (cur_version == 19) { 352 cur_version++; 353 meta_table_.SetVersionNumber(cur_version); 354 // This was the thumbnail migration. Obsolete. 355 } 356 357 if (cur_version == 20) { 358 // This is the version prior to adding the visit_duration field in visits 359 // database. We need to migrate the database. 360 if (!MigrateVisitsWithoutDuration()) { 361 LOG(WARNING) << "Unable to update history database to version 21."; 362 return sql::INIT_FAILURE; 363 } 364 ++cur_version; 365 meta_table_.SetVersionNumber(cur_version); 366 } 367 368 if (cur_version == 21) { 369 // The android_urls table's data schemal was changed in version 21. 370 #if defined(OS_ANDROID) 371 if (!MigrateToVersion22()) { 372 LOG(WARNING) << "Unable to migrate the android_urls table to version 22"; 373 } 374 #endif 375 ++cur_version; 376 meta_table_.SetVersionNumber(cur_version); 377 } 378 379 if (cur_version == 22) { 380 if (!MigrateDownloadsState()) { 381 LOG(WARNING) << "Unable to fix invalid downloads state values"; 382 // Invalid state values may cause crashes. 383 return sql::INIT_FAILURE; 384 } 385 cur_version++; 386 meta_table_.SetVersionNumber(cur_version); 387 } 388 389 if (cur_version == 23) { 390 if (!MigrateDownloadsReasonPathsAndDangerType()) { 391 LOG(WARNING) << "Unable to upgrade download interrupt reason and paths"; 392 // Invalid state values may cause crashes. 393 return sql::INIT_FAILURE; 394 } 395 cur_version++; 396 meta_table_.SetVersionNumber(cur_version); 397 } 398 399 if (cur_version == 24) { 400 if (!MigratePresentationIndex()) { 401 LOG(WARNING) << "Unable to migrate history to version 25"; 402 return sql::INIT_FAILURE; 403 } 404 cur_version++; 405 meta_table_.SetVersionNumber(cur_version); 406 } 407 408 if (cur_version == 25) { 409 if (!MigrateReferrer()) { 410 LOG(WARNING) << "Unable to migrate history to version 26"; 411 return sql::INIT_FAILURE; 412 } 413 cur_version++; 414 meta_table_.SetVersionNumber(cur_version); 415 } 416 417 if (cur_version == 26) { 418 if (!MigrateDownloadedByExtension()) { 419 LOG(WARNING) << "Unable to migrate history to version 27"; 420 return sql::INIT_FAILURE; 421 } 422 cur_version++; 423 meta_table_.SetVersionNumber(cur_version); 424 } 425 426 if (cur_version == 27) { 427 if (!MigrateDownloadValidators()) { 428 LOG(WARNING) << "Unable to migrate history to version 28"; 429 return sql::INIT_FAILURE; 430 } 431 cur_version++; 432 meta_table_.SetVersionNumber(cur_version); 433 } 434 435 // When the version is too old, we just try to continue anyway, there should 436 // not be a released product that makes a database too old for us to handle. 437 LOG_IF(WARNING, cur_version < GetCurrentVersion()) << 438 "History database version " << cur_version << " is too old to handle."; 439 440 return sql::INIT_OK; 441 } 442 443 #if !defined(OS_WIN) 444 void HistoryDatabase::MigrateTimeEpoch() { 445 // Update all the times in the URLs and visits table in the main database. 446 ignore_result(db_.Execute( 447 "UPDATE urls " 448 "SET last_visit_time = last_visit_time + 11644473600000000 " 449 "WHERE id IN (SELECT id FROM urls WHERE last_visit_time > 0);")); 450 ignore_result(db_.Execute( 451 "UPDATE visits " 452 "SET visit_time = visit_time + 11644473600000000 " 453 "WHERE id IN (SELECT id FROM visits WHERE visit_time > 0);")); 454 ignore_result(db_.Execute( 455 "UPDATE segment_usage " 456 "SET time_slot = time_slot + 11644473600000000 " 457 "WHERE id IN (SELECT id FROM segment_usage WHERE time_slot > 0);")); 458 459 // Erase all the full text index files. These will take a while to update and 460 // are less important, so we just blow them away. Same with the archived 461 // database. 462 needs_version_17_migration_ = true; 463 } 464 #endif 465 466 } // namespace history 467