1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/history/visit_database.h" 6 7 #include <algorithm> 8 #include <limits> 9 #include <map> 10 #include <set> 11 12 #include "app/sql/statement.h" 13 #include "base/logging.h" 14 #include "base/string_number_conversions.h" 15 #include "chrome/browser/history/url_database.h" 16 #include "chrome/common/url_constants.h" 17 #include "content/common/page_transition_types.h" 18 19 // Rows, in order, of the visit table. 20 #define HISTORY_VISIT_ROW_FIELDS \ 21 " id,url,visit_time,from_visit,transition,segment_id,is_indexed " 22 23 namespace history { 24 25 VisitDatabase::VisitDatabase() { 26 } 27 28 VisitDatabase::~VisitDatabase() { 29 } 30 31 bool VisitDatabase::InitVisitTable() { 32 if (!GetDB().DoesTableExist("visits")) { 33 if (!GetDB().Execute("CREATE TABLE visits(" 34 "id INTEGER PRIMARY KEY," 35 "url INTEGER NOT NULL," // key of the URL this corresponds to 36 "visit_time INTEGER NOT NULL," 37 "from_visit INTEGER," 38 "transition INTEGER DEFAULT 0 NOT NULL," 39 "segment_id INTEGER," 40 // True when we have indexed data for this visit. 41 "is_indexed BOOLEAN)")) 42 return false; 43 } else if (!GetDB().DoesColumnExist("visits", "is_indexed")) { 44 // Old versions don't have the is_indexed column, we can just add that and 45 // not worry about different database revisions, since old ones will 46 // continue to work. 47 // 48 // TODO(brettw) this should be removed once we think everybody has been 49 // updated (added early Mar 2008). 50 if (!GetDB().Execute("ALTER TABLE visits ADD COLUMN is_indexed BOOLEAN")) 51 return false; 52 } 53 54 // Visit source table contains the source information for all the visits. To 55 // save space, we do not record those user browsed visits which would be the 56 // majority in this table. Only other sources are recorded. 57 // Due to the tight relationship between visit_source and visits table, they 58 // should be created and dropped at the same time. 59 if (!GetDB().DoesTableExist("visit_source")) { 60 if (!GetDB().Execute("CREATE TABLE visit_source(" 61 "id INTEGER PRIMARY KEY,source INTEGER NOT NULL)")) 62 return false; 63 } 64 65 // Index over url so we can quickly find visits for a page. This will just 66 // fail if it already exists and we'll ignore it. 67 GetDB().Execute("CREATE INDEX visits_url_index ON visits (url)"); 68 69 // Create an index over from visits so that we can efficiently find 70 // referrers and redirects. Ignore failures because it likely already exists. 71 GetDB().Execute("CREATE INDEX visits_from_index ON visits (from_visit)"); 72 73 // Create an index over time so that we can efficiently find the visits in a 74 // given time range (most history views are time-based). Ignore failures 75 // because it likely already exists. 76 GetDB().Execute("CREATE INDEX visits_time_index ON visits (visit_time)"); 77 78 return true; 79 } 80 81 bool VisitDatabase::DropVisitTable() { 82 GetDB().Execute("DROP TABLE visit_source"); 83 // This will also drop the indices over the table. 84 return GetDB().Execute("DROP TABLE visits"); 85 } 86 87 // Must be in sync with HISTORY_VISIT_ROW_FIELDS. 88 // static 89 void VisitDatabase::FillVisitRow(sql::Statement& statement, VisitRow* visit) { 90 visit->visit_id = statement.ColumnInt64(0); 91 visit->url_id = statement.ColumnInt64(1); 92 visit->visit_time = base::Time::FromInternalValue(statement.ColumnInt64(2)); 93 visit->referring_visit = statement.ColumnInt64(3); 94 visit->transition = PageTransition::FromInt(statement.ColumnInt(4)); 95 visit->segment_id = statement.ColumnInt64(5); 96 visit->is_indexed = !!statement.ColumnInt(6); 97 } 98 99 // static 100 void VisitDatabase::FillVisitVector(sql::Statement& statement, 101 VisitVector* visits) { 102 while (statement.Step()) { 103 history::VisitRow visit; 104 FillVisitRow(statement, &visit); 105 visits->push_back(visit); 106 } 107 } 108 109 VisitID VisitDatabase::AddVisit(VisitRow* visit, VisitSource source) { 110 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 111 "INSERT INTO visits " 112 "(url, visit_time, from_visit, transition, segment_id, is_indexed) " 113 "VALUES (?,?,?,?,?,?)")); 114 if (!statement) { 115 VLOG(0) << "Failed to build visit insert statement: " 116 << "url_id = " << visit->url_id; 117 return 0; 118 } 119 120 statement.BindInt64(0, visit->url_id); 121 statement.BindInt64(1, visit->visit_time.ToInternalValue()); 122 statement.BindInt64(2, visit->referring_visit); 123 statement.BindInt64(3, visit->transition); 124 statement.BindInt64(4, visit->segment_id); 125 statement.BindInt64(5, visit->is_indexed); 126 127 if (!statement.Run()) { 128 VLOG(0) << "Failed to execute visit insert statement: " 129 << "url_id = " << visit->url_id; 130 return 0; 131 } 132 133 visit->visit_id = GetDB().GetLastInsertRowId(); 134 135 if (source != SOURCE_BROWSED) { 136 // Record the source of this visit when it is not browsed. 137 sql::Statement statement1(GetDB().GetCachedStatement(SQL_FROM_HERE, 138 "INSERT INTO visit_source (id, source) VALUES (?,?)")); 139 if (!statement1.is_valid()) { 140 VLOG(0) << "Failed to build visit_source insert statement: " 141 << "url_id = " << visit->visit_id; 142 return 0; 143 } 144 145 statement1.BindInt64(0, visit->visit_id); 146 statement1.BindInt64(1, source); 147 if (!statement1.Run()) { 148 VLOG(0) << "Failed to execute visit_source insert statement: " 149 << "url_id = " << visit->visit_id; 150 return 0; 151 } 152 } 153 154 return visit->visit_id; 155 } 156 157 void VisitDatabase::DeleteVisit(const VisitRow& visit) { 158 // Patch around this visit. Any visits that this went to will now have their 159 // "source" be the deleted visit's source. 160 sql::Statement update_chain(GetDB().GetCachedStatement(SQL_FROM_HERE, 161 "UPDATE visits SET from_visit=? WHERE from_visit=?")); 162 if (!update_chain) 163 return; 164 update_chain.BindInt64(0, visit.referring_visit); 165 update_chain.BindInt64(1, visit.visit_id); 166 update_chain.Run(); 167 168 // Now delete the actual visit. 169 sql::Statement del(GetDB().GetCachedStatement(SQL_FROM_HERE, 170 "DELETE FROM visits WHERE id=?")); 171 if (!del) 172 return; 173 del.BindInt64(0, visit.visit_id); 174 del.Run(); 175 176 // Try to delete the entry in visit_source table as well. 177 // If the visit was browsed, there is no corresponding entry in visit_source 178 // table, and nothing will be deleted. 179 del.Assign(GetDB().GetCachedStatement(SQL_FROM_HERE, 180 "DELETE FROM visit_source WHERE id=?")); 181 if (!del.is_valid()) 182 return; 183 del.BindInt64(0, visit.visit_id); 184 del.Run(); 185 } 186 187 bool VisitDatabase::GetRowForVisit(VisitID visit_id, VisitRow* out_visit) { 188 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 189 "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits WHERE id=?")); 190 if (!statement) 191 return false; 192 193 statement.BindInt64(0, visit_id); 194 if (!statement.Step()) 195 return false; 196 197 FillVisitRow(statement, out_visit); 198 199 // We got a different visit than we asked for, something is wrong. 200 DCHECK_EQ(visit_id, out_visit->visit_id); 201 if (visit_id != out_visit->visit_id) 202 return false; 203 204 return true; 205 } 206 207 bool VisitDatabase::UpdateVisitRow(const VisitRow& visit) { 208 // Don't store inconsistent data to the database. 209 DCHECK_NE(visit.visit_id, visit.referring_visit); 210 if (visit.visit_id == visit.referring_visit) 211 return false; 212 213 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 214 "UPDATE visits SET " 215 "url=?,visit_time=?,from_visit=?,transition=?,segment_id=?,is_indexed=? " 216 "WHERE id=?")); 217 if (!statement) 218 return false; 219 220 statement.BindInt64(0, visit.url_id); 221 statement.BindInt64(1, visit.visit_time.ToInternalValue()); 222 statement.BindInt64(2, visit.referring_visit); 223 statement.BindInt64(3, visit.transition); 224 statement.BindInt64(4, visit.segment_id); 225 statement.BindInt64(5, visit.is_indexed); 226 statement.BindInt64(6, visit.visit_id); 227 return statement.Run(); 228 } 229 230 bool VisitDatabase::GetVisitsForURL(URLID url_id, VisitVector* visits) { 231 visits->clear(); 232 233 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 234 "SELECT" HISTORY_VISIT_ROW_FIELDS 235 "FROM visits " 236 "WHERE url=? " 237 "ORDER BY visit_time ASC")); 238 if (!statement) 239 return false; 240 241 statement.BindInt64(0, url_id); 242 FillVisitVector(statement, visits); 243 return true; 244 } 245 246 void VisitDatabase::GetAllVisitsInRange(base::Time begin_time, 247 base::Time end_time, 248 int max_results, 249 VisitVector* visits) { 250 visits->clear(); 251 252 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 253 "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " 254 "WHERE visit_time >= ? AND visit_time < ?" 255 "ORDER BY visit_time LIMIT ?")); 256 if (!statement) 257 return; 258 259 // See GetVisibleVisitsInRange for more info on how these times are bound. 260 int64 end = end_time.ToInternalValue(); 261 statement.BindInt64(0, begin_time.ToInternalValue()); 262 statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max()); 263 statement.BindInt64(2, 264 max_results ? max_results : std::numeric_limits<int64>::max()); 265 266 FillVisitVector(statement, visits); 267 } 268 269 void VisitDatabase::GetVisitsInRangeForTransition( 270 base::Time begin_time, 271 base::Time end_time, 272 int max_results, 273 PageTransition::Type transition, 274 VisitVector* visits) { 275 DCHECK(visits); 276 visits->clear(); 277 278 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 279 "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " 280 "WHERE visit_time >= ? AND visit_time < ? " 281 "AND (transition & ?) == ?" 282 "ORDER BY visit_time LIMIT ?")); 283 if (!statement) 284 return; 285 286 // See GetVisibleVisitsInRange for more info on how these times are bound. 287 int64 end = end_time.ToInternalValue(); 288 statement.BindInt64(0, begin_time.ToInternalValue()); 289 statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max()); 290 statement.BindInt(2, PageTransition::CORE_MASK); 291 statement.BindInt(3, transition); 292 statement.BindInt64(4, 293 max_results ? max_results : std::numeric_limits<int64>::max()); 294 295 FillVisitVector(statement, visits); 296 } 297 298 void VisitDatabase::GetVisibleVisitsInRange(base::Time begin_time, 299 base::Time end_time, 300 int max_count, 301 VisitVector* visits) { 302 visits->clear(); 303 // The visit_time values can be duplicated in a redirect chain, so we sort 304 // by id too, to ensure a consistent ordering just in case. 305 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 306 "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " 307 "WHERE visit_time >= ? AND visit_time < ? " 308 "AND (transition & ?) != 0 " // CHAIN_END 309 "AND (transition & ?) NOT IN (?, ?, ?) " // NO SUBFRAME or 310 // KEYWORD_GENERATED 311 "ORDER BY visit_time DESC, id DESC")); 312 if (!statement) 313 return; 314 315 // Note that we use min/max values for querying unlimited ranges of time using 316 // the same statement. Since the time has an index, this will be about the 317 // same amount of work as just doing a query for everything with no qualifier. 318 int64 end = end_time.ToInternalValue(); 319 statement.BindInt64(0, begin_time.ToInternalValue()); 320 statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max()); 321 statement.BindInt(2, PageTransition::CHAIN_END); 322 statement.BindInt(3, PageTransition::CORE_MASK); 323 statement.BindInt(4, PageTransition::AUTO_SUBFRAME); 324 statement.BindInt(5, PageTransition::MANUAL_SUBFRAME); 325 statement.BindInt(6, PageTransition::KEYWORD_GENERATED); 326 327 std::set<URLID> found_urls; 328 while (statement.Step()) { 329 VisitRow visit; 330 FillVisitRow(statement, &visit); 331 // Make sure the URL this visit corresponds to is unique. 332 if (found_urls.find(visit.url_id) != found_urls.end()) 333 continue; 334 found_urls.insert(visit.url_id); 335 visits->push_back(visit); 336 337 if (max_count > 0 && static_cast<int>(visits->size()) >= max_count) 338 break; 339 } 340 } 341 342 VisitID VisitDatabase::GetMostRecentVisitForURL(URLID url_id, 343 VisitRow* visit_row) { 344 // The visit_time values can be duplicated in a redirect chain, so we sort 345 // by id too, to ensure a consistent ordering just in case. 346 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 347 "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits " 348 "WHERE url=? " 349 "ORDER BY visit_time DESC, id DESC " 350 "LIMIT 1")); 351 if (!statement) 352 return 0; 353 354 statement.BindInt64(0, url_id); 355 if (!statement.Step()) 356 return 0; // No visits for this URL. 357 358 if (visit_row) { 359 FillVisitRow(statement, visit_row); 360 return visit_row->visit_id; 361 } 362 return statement.ColumnInt64(0); 363 } 364 365 bool VisitDatabase::GetMostRecentVisitsForURL(URLID url_id, 366 int max_results, 367 VisitVector* visits) { 368 visits->clear(); 369 370 // The visit_time values can be duplicated in a redirect chain, so we sort 371 // by id too, to ensure a consistent ordering just in case. 372 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 373 "SELECT" HISTORY_VISIT_ROW_FIELDS 374 "FROM visits " 375 "WHERE url=? " 376 "ORDER BY visit_time DESC, id DESC " 377 "LIMIT ?")); 378 if (!statement) 379 return false; 380 381 statement.BindInt64(0, url_id); 382 statement.BindInt(1, max_results); 383 FillVisitVector(statement, visits); 384 return true; 385 } 386 387 bool VisitDatabase::GetRedirectFromVisit(VisitID from_visit, 388 VisitID* to_visit, 389 GURL* to_url) { 390 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 391 "SELECT v.id,u.url " 392 "FROM visits v JOIN urls u ON v.url = u.id " 393 "WHERE v.from_visit = ? " 394 "AND (v.transition & ?) != 0")); // IS_REDIRECT_MASK 395 if (!statement) 396 return false; 397 398 statement.BindInt64(0, from_visit); 399 statement.BindInt(1, PageTransition::IS_REDIRECT_MASK); 400 401 if (!statement.Step()) 402 return false; // No redirect from this visit. 403 if (to_visit) 404 *to_visit = statement.ColumnInt64(0); 405 if (to_url) 406 *to_url = GURL(statement.ColumnString(1)); 407 return true; 408 } 409 410 bool VisitDatabase::GetRedirectToVisit(VisitID to_visit, 411 VisitID* from_visit, 412 GURL* from_url) { 413 VisitRow row; 414 if (!GetRowForVisit(to_visit, &row)) 415 return false; 416 417 if (from_visit) 418 *from_visit = row.referring_visit; 419 420 if (from_url) { 421 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 422 "SELECT u.url " 423 "FROM visits v JOIN urls u ON v.url = u.id " 424 "WHERE v.id = ?")); 425 statement.BindInt64(0, row.referring_visit); 426 427 if (!statement.Step()) 428 return false; 429 430 *from_url = GURL(statement.ColumnString(0)); 431 } 432 return true; 433 } 434 435 bool VisitDatabase::GetVisitCountToHost(const GURL& url, 436 int* count, 437 base::Time* first_visit) { 438 if (!url.SchemeIs(chrome::kHttpScheme) && !url.SchemeIs(chrome::kHttpsScheme)) 439 return false; 440 441 // We need to search for URLs with a matching host/port. One way to query for 442 // this is to use the LIKE operator, eg 'url LIKE http://google.com/%'. This 443 // is inefficient though in that it doesn't use the index and each entry must 444 // be visited. The same query can be executed by using >= and < operator. 445 // The query becomes: 446 // 'url >= http://google.com/' and url < http://google.com0'. 447 // 0 is used as it is one character greater than '/'. 448 GURL search_url(url); 449 const std::string host_query_min = search_url.GetOrigin().spec(); 450 451 if (host_query_min.empty()) 452 return false; 453 454 std::string host_query_max = host_query_min; 455 host_query_max[host_query_max.size() - 1] = '0'; 456 457 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 458 "SELECT MIN(v.visit_time), COUNT(*) " 459 "FROM visits v INNER JOIN urls u ON v.url = u.id " 460 "WHERE (u.url >= ? AND u.url < ?)")); 461 if (!statement) 462 return false; 463 464 statement.BindString(0, host_query_min); 465 statement.BindString(1, host_query_max); 466 467 if (!statement.Step()) { 468 // We've never been to this page before. 469 *count = 0; 470 return true; 471 } 472 473 *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0)); 474 *count = statement.ColumnInt(1); 475 return true; 476 } 477 478 bool VisitDatabase::GetStartDate(base::Time* first_visit) { 479 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 480 "SELECT MIN(visit_time) FROM visits WHERE visit_time != 0")); 481 if (!statement || !statement.Step() || statement.ColumnInt64(0) == 0) { 482 *first_visit = base::Time::Now(); 483 return false; 484 } 485 *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0)); 486 return true; 487 } 488 489 void VisitDatabase::GetVisitsSource(const VisitVector& visits, 490 VisitSourceMap* sources) { 491 DCHECK(sources); 492 sources->clear(); 493 494 // We query the source in batch. Here defines the batch size. 495 const size_t batch_size = 500; 496 size_t visits_size = visits.size(); 497 498 size_t start_index = 0, end_index = 0; 499 while (end_index < visits_size) { 500 start_index = end_index; 501 end_index = end_index + batch_size < visits_size ? end_index + batch_size 502 : visits_size; 503 504 // Compose the sql statement with a list of ids. 505 std::string sql = "SELECT id,source FROM visit_source "; 506 sql.append("WHERE id IN ("); 507 // Append all the ids in the statement. 508 for (size_t j = start_index; j < end_index; j++) { 509 if (j != start_index) 510 sql.push_back(','); 511 sql.append(base::Int64ToString(visits[j].visit_id)); 512 } 513 sql.append(") ORDER BY id"); 514 sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str())); 515 if (!statement) 516 return; 517 518 // Get the source entries out of the query result. 519 while (statement.Step()) { 520 std::pair<VisitID, VisitSource> source_entry(statement.ColumnInt64(0), 521 static_cast<VisitSource>(statement.ColumnInt(1))); 522 sources->insert(source_entry); 523 } 524 } 525 } 526 527 } // namespace history 528