1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/history/url_database.h" 6 7 #include <algorithm> 8 #include <limits> 9 #include <string> 10 #include <vector> 11 12 #include "base/i18n/case_conversion.h" 13 #include "base/strings/utf_string_conversions.h" 14 #include "chrome/common/url_constants.h" 15 #include "sql/statement.h" 16 #include "ui/base/l10n/l10n_util.h" 17 #include "url/gurl.h" 18 19 namespace history { 20 21 const char URLDatabase::kURLRowFields[] = HISTORY_URL_ROW_FIELDS; 22 const int URLDatabase::kNumURLRowFields = 9; 23 24 URLDatabase::URLEnumeratorBase::URLEnumeratorBase() 25 : initialized_(false) { 26 } 27 28 URLDatabase::URLEnumeratorBase::~URLEnumeratorBase() { 29 } 30 31 URLDatabase::URLEnumerator::URLEnumerator() { 32 } 33 34 URLDatabase::IconMappingEnumerator::IconMappingEnumerator() { 35 } 36 37 bool URLDatabase::URLEnumerator::GetNextURL(URLRow* r) { 38 if (statement_.Step()) { 39 FillURLRow(statement_, r); 40 return true; 41 } 42 return false; 43 } 44 45 bool URLDatabase::IconMappingEnumerator::GetNextIconMapping(IconMapping* r) { 46 if (!statement_.Step()) 47 return false; 48 49 r->page_url = GURL(statement_.ColumnString(0)); 50 r->icon_id = statement_.ColumnInt64(1); 51 return true; 52 } 53 54 URLDatabase::URLDatabase() 55 : has_keyword_search_terms_(false) { 56 } 57 58 URLDatabase::~URLDatabase() { 59 } 60 61 // static 62 std::string URLDatabase::GURLToDatabaseURL(const GURL& gurl) { 63 // TODO(brettw): do something fancy here with encoding, etc. 64 65 // Strip username and password from URL before sending to DB. 66 GURL::Replacements replacements; 67 replacements.ClearUsername(); 68 replacements.ClearPassword(); 69 70 return (gurl.ReplaceComponents(replacements)).spec(); 71 } 72 73 // Convenience to fill a history::URLRow. Must be in sync with the fields in 74 // kURLRowFields. 75 void URLDatabase::FillURLRow(sql::Statement& s, history::URLRow* i) { 76 DCHECK(i); 77 i->id_ = s.ColumnInt64(0); 78 i->url_ = GURL(s.ColumnString(1)); 79 i->title_ = s.ColumnString16(2); 80 i->visit_count_ = s.ColumnInt(3); 81 i->typed_count_ = s.ColumnInt(4); 82 i->last_visit_ = base::Time::FromInternalValue(s.ColumnInt64(5)); 83 i->hidden_ = s.ColumnInt(6) != 0; 84 } 85 86 bool URLDatabase::GetURLRow(URLID url_id, URLRow* info) { 87 // TODO(brettw) We need check for empty URLs to handle the case where 88 // there are old URLs in the database that are empty that got in before 89 // we added any checks. We should eventually be able to remove it 90 // when all inputs are using GURL (which prohibit empty input). 91 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 92 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE id=?")); 93 statement.BindInt64(0, url_id); 94 95 if (statement.Step()) { 96 FillURLRow(statement, info); 97 return true; 98 } 99 return false; 100 } 101 102 bool URLDatabase::GetAllTypedUrls(URLRows* urls) { 103 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 104 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE typed_count > 0")); 105 106 while (statement.Step()) { 107 URLRow info; 108 FillURLRow(statement, &info); 109 urls->push_back(info); 110 } 111 return true; 112 } 113 114 URLID URLDatabase::GetRowForURL(const GURL& url, history::URLRow* info) { 115 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 116 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE url=?")); 117 std::string url_string = GURLToDatabaseURL(url); 118 statement.BindString(0, url_string); 119 120 if (!statement.Step()) 121 return 0; // no data 122 123 if (info) 124 FillURLRow(statement, info); 125 return statement.ColumnInt64(0); 126 } 127 128 bool URLDatabase::UpdateURLRow(URLID url_id, 129 const history::URLRow& info) { 130 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 131 "UPDATE urls SET title=?,visit_count=?,typed_count=?,last_visit_time=?," 132 "hidden=?" 133 "WHERE id=?")); 134 statement.BindString16(0, info.title()); 135 statement.BindInt(1, info.visit_count()); 136 statement.BindInt(2, info.typed_count()); 137 statement.BindInt64(3, info.last_visit().ToInternalValue()); 138 statement.BindInt(4, info.hidden() ? 1 : 0); 139 statement.BindInt64(5, url_id); 140 141 return statement.Run(); 142 } 143 144 URLID URLDatabase::AddURLInternal(const history::URLRow& info, 145 bool is_temporary) { 146 // This function is used to insert into two different tables, so we have to 147 // do some shuffling. Unfortinately, we can't use the macro 148 // HISTORY_URL_ROW_FIELDS because that specifies the table name which is 149 // invalid in the insert syntax. 150 #define ADDURL_COMMON_SUFFIX \ 151 " (url, title, visit_count, typed_count, "\ 152 "last_visit_time, hidden) "\ 153 "VALUES (?,?,?,?,?,?)" 154 const char* statement_name; 155 const char* statement_sql; 156 if (is_temporary) { 157 statement_name = "AddURLTemporary"; 158 statement_sql = "INSERT INTO temp_urls" ADDURL_COMMON_SUFFIX; 159 } else { 160 statement_name = "AddURL"; 161 statement_sql = "INSERT INTO urls" ADDURL_COMMON_SUFFIX; 162 } 163 #undef ADDURL_COMMON_SUFFIX 164 165 sql::Statement statement(GetDB().GetCachedStatement( 166 sql::StatementID(statement_name), statement_sql)); 167 statement.BindString(0, GURLToDatabaseURL(info.url())); 168 statement.BindString16(1, info.title()); 169 statement.BindInt(2, info.visit_count()); 170 statement.BindInt(3, info.typed_count()); 171 statement.BindInt64(4, info.last_visit().ToInternalValue()); 172 statement.BindInt(5, info.hidden() ? 1 : 0); 173 174 if (!statement.Run()) { 175 VLOG(0) << "Failed to add url " << info.url().possibly_invalid_spec() 176 << " to table history.urls."; 177 return 0; 178 } 179 return GetDB().GetLastInsertRowId(); 180 } 181 182 bool URLDatabase::DeleteURLRow(URLID id) { 183 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 184 "DELETE FROM urls WHERE id = ?")); 185 statement.BindInt64(0, id); 186 187 if (!statement.Run()) 188 return false; 189 190 // And delete any keyword visits. 191 if (!has_keyword_search_terms_) 192 return true; 193 194 sql::Statement del_keyword_visit(GetDB().GetCachedStatement(SQL_FROM_HERE, 195 "DELETE FROM keyword_search_terms WHERE url_id=?")); 196 del_keyword_visit.BindInt64(0, id); 197 198 return del_keyword_visit.Run(); 199 } 200 201 bool URLDatabase::CreateTemporaryURLTable() { 202 return CreateURLTable(true); 203 } 204 205 bool URLDatabase::CommitTemporaryURLTable() { 206 // See the comments in the header file as well as 207 // HistoryBackend::DeleteAllHistory() for more information on how this works 208 // and why it does what it does. 209 // 210 // Note that the main database overrides this to additionally create the 211 // supplimentary indices that the archived database doesn't need. 212 213 // Swap the url table out and replace it with the temporary one. 214 if (!GetDB().Execute("DROP TABLE urls")) { 215 NOTREACHED() << GetDB().GetErrorMessage(); 216 return false; 217 } 218 if (!GetDB().Execute("ALTER TABLE temp_urls RENAME TO urls")) { 219 NOTREACHED() << GetDB().GetErrorMessage(); 220 return false; 221 } 222 223 // Create the index over URLs. This is needed for the main, in-memory, and 224 // archived databases, so we always do it. The supplimentary indices used by 225 // the main database are not created here. When deleting all history, they 226 // are created by HistoryDatabase::RecreateAllButStarAndURLTables(). 227 CreateMainURLIndex(); 228 229 return true; 230 } 231 232 bool URLDatabase::InitURLEnumeratorForEverything(URLEnumerator* enumerator) { 233 DCHECK(!enumerator->initialized_); 234 std::string sql("SELECT "); 235 sql.append(kURLRowFields); 236 sql.append(" FROM urls"); 237 enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str())); 238 enumerator->initialized_ = enumerator->statement_.is_valid(); 239 return enumerator->statement_.is_valid(); 240 } 241 242 bool URLDatabase::InitURLEnumeratorForSignificant(URLEnumerator* enumerator) { 243 DCHECK(!enumerator->initialized_); 244 std::string sql("SELECT "); 245 sql.append(kURLRowFields); 246 sql.append(" FROM urls WHERE last_visit_time >= ? OR visit_count >= ? OR " 247 "typed_count >= ?"); 248 enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str())); 249 enumerator->statement_.BindInt64( 250 0, AutocompleteAgeThreshold().ToInternalValue()); 251 enumerator->statement_.BindInt(1, kLowQualityMatchVisitLimit); 252 enumerator->statement_.BindInt(2, kLowQualityMatchTypedLimit); 253 enumerator->initialized_ = enumerator->statement_.is_valid(); 254 return enumerator->statement_.is_valid(); 255 } 256 257 bool URLDatabase::InitIconMappingEnumeratorForEverything( 258 IconMappingEnumerator* enumerator) { 259 DCHECK(!enumerator->initialized_); 260 enumerator->statement_.Assign(GetDB().GetUniqueStatement( 261 "SELECT url, favicon_id FROM urls WHERE favicon_id <> 0")); 262 enumerator->initialized_ = enumerator->statement_.is_valid(); 263 return enumerator->statement_.is_valid(); 264 } 265 266 bool URLDatabase::AutocompleteForPrefix(const std::string& prefix, 267 size_t max_results, 268 bool typed_only, 269 URLRows* results) { 270 // NOTE: this query originally sorted by starred as the second parameter. But 271 // as bookmarks is no longer part of the db we no longer include the order 272 // by clause. 273 results->clear(); 274 const char* sql; 275 int line; 276 if (typed_only) { 277 sql = "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls " 278 "WHERE url >= ? AND url < ? AND hidden = 0 AND typed_count > 0 " 279 "ORDER BY typed_count DESC, visit_count DESC, last_visit_time DESC " 280 "LIMIT ?"; 281 line = __LINE__; 282 } else { 283 sql = "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls " 284 "WHERE url >= ? AND url < ? AND hidden = 0 " 285 "ORDER BY typed_count DESC, visit_count DESC, last_visit_time DESC " 286 "LIMIT ?"; 287 line = __LINE__; 288 } 289 sql::Statement statement( 290 GetDB().GetCachedStatement(sql::StatementID(__FILE__, line), sql)); 291 292 // We will find all strings between "prefix" and this string, which is prefix 293 // followed by the maximum character size. Use 8-bit strings for everything 294 // so we can be sure sqlite is comparing everything in 8-bit mode. Otherwise, 295 // it will have to convert strings either to UTF-8 or UTF-16 for comparison. 296 std::string end_query(prefix); 297 end_query.push_back(std::numeric_limits<unsigned char>::max()); 298 299 statement.BindString(0, prefix); 300 statement.BindString(1, end_query); 301 statement.BindInt(2, static_cast<int>(max_results)); 302 303 while (statement.Step()) { 304 history::URLRow info; 305 FillURLRow(statement, &info); 306 if (info.url().is_valid()) 307 results->push_back(info); 308 } 309 return !results->empty(); 310 } 311 312 bool URLDatabase::IsTypedHost(const std::string& host) { 313 const char* schemes[] = { 314 chrome::kHttpScheme, 315 chrome::kHttpsScheme, 316 chrome::kFtpScheme 317 }; 318 URLRows dummy; 319 for (size_t i = 0; i < arraysize(schemes); ++i) { 320 std::string scheme_and_host(schemes[i]); 321 scheme_and_host += content::kStandardSchemeSeparator + host; 322 if (AutocompleteForPrefix(scheme_and_host + '/', 1, true, &dummy) || 323 AutocompleteForPrefix(scheme_and_host + ':', 1, true, &dummy)) 324 return true; 325 } 326 return false; 327 } 328 329 bool URLDatabase::FindShortestURLFromBase(const std::string& base, 330 const std::string& url, 331 int min_visits, 332 int min_typed, 333 bool allow_base, 334 history::URLRow* info) { 335 // Select URLs that start with |base| and are prefixes of |url|. All parts 336 // of this query except the substr() call can be done using the index. We 337 // could do this query with a couple of LIKE or GLOB statements as well, but 338 // those wouldn't use the index, and would run into problems with "wildcard" 339 // characters that appear in URLs (% for LIKE, or *, ? for GLOB). 340 std::string sql("SELECT "); 341 sql.append(kURLRowFields); 342 sql.append(" FROM urls WHERE url "); 343 sql.append(allow_base ? ">=" : ">"); 344 sql.append(" ? AND url < :end AND url = substr(:end, 1, length(url)) " 345 "AND hidden = 0 AND visit_count >= ? AND typed_count >= ? " 346 "ORDER BY url LIMIT 1"); 347 sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str())); 348 statement.BindString(0, base); 349 statement.BindString(1, url); // :end 350 statement.BindInt(2, min_visits); 351 statement.BindInt(3, min_typed); 352 353 if (!statement.Step()) 354 return false; 355 356 DCHECK(info); 357 FillURLRow(statement, info); 358 return true; 359 } 360 361 bool URLDatabase::GetTextMatches(const string16& query, 362 URLRows* results) { 363 ScopedVector<QueryNode> query_nodes; 364 query_parser_.ParseQueryNodes(query, &query_nodes.get()); 365 366 results->clear(); 367 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 368 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE hidden = 0")); 369 370 while (statement.Step()) { 371 std::vector<QueryWord> query_words; 372 string16 url = base::i18n::ToLower(statement.ColumnString16(1)); 373 query_parser_.ExtractQueryWords(url, &query_words); 374 string16 title = base::i18n::ToLower(statement.ColumnString16(2)); 375 query_parser_.ExtractQueryWords(title, &query_words); 376 377 if (query_parser_.DoesQueryMatch(query_words, query_nodes.get())) { 378 history::URLResult info; 379 FillURLRow(statement, &info); 380 if (info.url().is_valid()) 381 results->push_back(info); 382 } 383 } 384 return !results->empty(); 385 } 386 387 bool URLDatabase::InitKeywordSearchTermsTable() { 388 has_keyword_search_terms_ = true; 389 if (!GetDB().DoesTableExist("keyword_search_terms")) { 390 if (!GetDB().Execute("CREATE TABLE keyword_search_terms (" 391 "keyword_id INTEGER NOT NULL," // ID of the TemplateURL. 392 "url_id INTEGER NOT NULL," // ID of the url. 393 "lower_term LONGVARCHAR NOT NULL," // The search term, in lower case. 394 "term LONGVARCHAR NOT NULL)")) // The actual search term. 395 return false; 396 } 397 return true; 398 } 399 400 bool URLDatabase::CreateKeywordSearchTermsIndices() { 401 // For searching. 402 if (!GetDB().Execute( 403 "CREATE INDEX IF NOT EXISTS keyword_search_terms_index1 ON " 404 "keyword_search_terms (keyword_id, lower_term)")) { 405 return false; 406 } 407 408 // For deletion. 409 if (!GetDB().Execute( 410 "CREATE INDEX IF NOT EXISTS keyword_search_terms_index2 ON " 411 "keyword_search_terms (url_id)")) { 412 return false; 413 } 414 415 // For query or deletion by term. 416 if (!GetDB().Execute( 417 "CREATE INDEX IF NOT EXISTS keyword_search_terms_index3 ON " 418 "keyword_search_terms (term)")) { 419 return false; 420 } 421 return true; 422 } 423 424 bool URLDatabase::DropKeywordSearchTermsTable() { 425 // This will implicitly delete the indices over the table. 426 return GetDB().Execute("DROP TABLE keyword_search_terms"); 427 } 428 429 bool URLDatabase::SetKeywordSearchTermsForURL(URLID url_id, 430 TemplateURLID keyword_id, 431 const string16& term) { 432 DCHECK(url_id && keyword_id && !term.empty()); 433 434 sql::Statement exist_statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 435 "SELECT term FROM keyword_search_terms " 436 "WHERE keyword_id = ? AND url_id = ?")); 437 exist_statement.BindInt64(0, keyword_id); 438 exist_statement.BindInt64(1, url_id); 439 440 if (exist_statement.Step()) 441 return true; // Term already exists, no need to add it. 442 443 if (!exist_statement.Succeeded()) 444 return false; 445 446 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 447 "INSERT INTO keyword_search_terms (keyword_id, url_id, lower_term, term) " 448 "VALUES (?,?,?,?)")); 449 statement.BindInt64(0, keyword_id); 450 statement.BindInt64(1, url_id); 451 statement.BindString16(2, base::i18n::ToLower(term)); 452 statement.BindString16(3, term); 453 return statement.Run(); 454 } 455 456 bool URLDatabase::GetKeywordSearchTermRow(URLID url_id, 457 KeywordSearchTermRow* row) { 458 DCHECK(url_id); 459 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 460 "SELECT keyword_id, term FROM keyword_search_terms WHERE url_id=?")); 461 statement.BindInt64(0, url_id); 462 463 if (!statement.Step()) 464 return false; 465 466 if (row) { 467 row->url_id = url_id; 468 row->keyword_id = statement.ColumnInt64(0); 469 row->term = statement.ColumnString16(1); 470 } 471 return true; 472 } 473 474 bool URLDatabase::GetKeywordSearchTermRows( 475 const string16& term, 476 std::vector<KeywordSearchTermRow>* rows) { 477 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 478 "SELECT keyword_id, url_id FROM keyword_search_terms WHERE term=?")); 479 statement.BindString16(0, term); 480 481 if (!statement.is_valid()) 482 return false; 483 484 while (statement.Step()) { 485 KeywordSearchTermRow row; 486 row.url_id = statement.ColumnInt64(1); 487 row.keyword_id = statement.ColumnInt64(0); 488 row.term = term; 489 rows->push_back(row); 490 } 491 return true; 492 } 493 494 void URLDatabase::DeleteAllSearchTermsForKeyword( 495 TemplateURLID keyword_id) { 496 DCHECK(keyword_id); 497 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 498 "DELETE FROM keyword_search_terms WHERE keyword_id=?")); 499 statement.BindInt64(0, keyword_id); 500 501 statement.Run(); 502 } 503 504 void URLDatabase::GetMostRecentKeywordSearchTerms( 505 TemplateURLID keyword_id, 506 const string16& prefix, 507 int max_count, 508 std::vector<KeywordSearchTermVisit>* matches) { 509 // NOTE: the keyword_id can be zero if on first run the user does a query 510 // before the TemplateURLService has finished loading. As the chances of this 511 // occurring are small, we ignore it. 512 if (!keyword_id) 513 return; 514 515 DCHECK(!prefix.empty()); 516 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 517 "SELECT DISTINCT kv.term, u.visit_count, u.last_visit_time " 518 "FROM keyword_search_terms kv " 519 "JOIN urls u ON kv.url_id = u.id " 520 "WHERE kv.keyword_id = ? AND kv.lower_term >= ? AND kv.lower_term < ? " 521 "ORDER BY u.last_visit_time DESC LIMIT ?")); 522 523 // NOTE: Keep this ToLower() call in sync with search_provider.cc. 524 string16 lower_prefix = base::i18n::ToLower(prefix); 525 // This magic gives us a prefix search. 526 string16 next_prefix = lower_prefix; 527 next_prefix[next_prefix.size() - 1] = 528 next_prefix[next_prefix.size() - 1] + 1; 529 statement.BindInt64(0, keyword_id); 530 statement.BindString16(1, lower_prefix); 531 statement.BindString16(2, next_prefix); 532 statement.BindInt(3, max_count); 533 534 KeywordSearchTermVisit visit; 535 while (statement.Step()) { 536 visit.term = statement.ColumnString16(0); 537 visit.visits = statement.ColumnInt(1); 538 visit.time = base::Time::FromInternalValue(statement.ColumnInt64(2)); 539 matches->push_back(visit); 540 } 541 } 542 543 bool URLDatabase::DeleteKeywordSearchTerm(const string16& term) { 544 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 545 "DELETE FROM keyword_search_terms WHERE term=?")); 546 statement.BindString16(0, term); 547 548 return statement.Run(); 549 } 550 551 bool URLDatabase::DropStarredIDFromURLs() { 552 if (!GetDB().DoesColumnExist("urls", "starred_id")) 553 return true; // urls is already updated, no need to continue. 554 555 // Create a temporary table to contain the new URLs table. 556 if (!CreateTemporaryURLTable()) { 557 NOTREACHED(); 558 return false; 559 } 560 561 // Copy the contents. 562 if (!GetDB().Execute( 563 "INSERT INTO temp_urls (id, url, title, visit_count, typed_count, " 564 "last_visit_time, hidden, favicon_id) " 565 "SELECT id, url, title, visit_count, typed_count, last_visit_time, " 566 "hidden, favicon_id FROM urls")) { 567 NOTREACHED() << GetDB().GetErrorMessage(); 568 return false; 569 } 570 571 // Rename/commit the tmp table. 572 CommitTemporaryURLTable(); 573 574 return true; 575 } 576 577 bool URLDatabase::CreateURLTable(bool is_temporary) { 578 const char* name = is_temporary ? "temp_urls" : "urls"; 579 if (GetDB().DoesTableExist(name)) 580 return true; 581 582 std::string sql; 583 sql.append("CREATE TABLE "); 584 sql.append(name); 585 sql.append("(" 586 "id INTEGER PRIMARY KEY," 587 "url LONGVARCHAR," 588 "title LONGVARCHAR," 589 "visit_count INTEGER DEFAULT 0 NOT NULL," 590 "typed_count INTEGER DEFAULT 0 NOT NULL," 591 "last_visit_time INTEGER NOT NULL," 592 "hidden INTEGER DEFAULT 0 NOT NULL," 593 "favicon_id INTEGER DEFAULT 0 NOT NULL)"); // favicon_id is not used now. 594 595 return GetDB().Execute(sql.c_str()); 596 } 597 598 bool URLDatabase::CreateMainURLIndex() { 599 // Index over URLs so we can quickly look up based on URL. 600 return GetDB().Execute( 601 "CREATE INDEX IF NOT EXISTS urls_url_index ON urls (url)"); 602 } 603 604 } // namespace history 605