1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/history/url_database.h" 6 7 #include <algorithm> 8 #include <limits> 9 #include <string> 10 #include <vector> 11 12 #include "base/i18n/case_conversion.h" 13 #include "base/strings/utf_string_conversions.h" 14 #include "chrome/common/url_constants.h" 15 #include "net/base/net_util.h" 16 #include "sql/statement.h" 17 #include "ui/base/l10n/l10n_util.h" 18 #include "url/gurl.h" 19 20 namespace history { 21 22 const char URLDatabase::kURLRowFields[] = HISTORY_URL_ROW_FIELDS; 23 const int URLDatabase::kNumURLRowFields = 9; 24 25 URLDatabase::URLEnumeratorBase::URLEnumeratorBase() 26 : initialized_(false) { 27 } 28 29 URLDatabase::URLEnumeratorBase::~URLEnumeratorBase() { 30 } 31 32 URLDatabase::URLEnumerator::URLEnumerator() { 33 } 34 35 bool URLDatabase::URLEnumerator::GetNextURL(URLRow* r) { 36 if (statement_.Step()) { 37 FillURLRow(statement_, r); 38 return true; 39 } 40 return false; 41 } 42 43 URLDatabase::URLDatabase() 44 : has_keyword_search_terms_(false) { 45 } 46 47 URLDatabase::~URLDatabase() { 48 } 49 50 // static 51 std::string URLDatabase::GURLToDatabaseURL(const GURL& gurl) { 52 // TODO(brettw): do something fancy here with encoding, etc. 53 54 // Strip username and password from URL before sending to DB. 55 GURL::Replacements replacements; 56 replacements.ClearUsername(); 57 replacements.ClearPassword(); 58 59 return (gurl.ReplaceComponents(replacements)).spec(); 60 } 61 62 // Convenience to fill a history::URLRow. Must be in sync with the fields in 63 // kURLRowFields. 64 void URLDatabase::FillURLRow(sql::Statement& s, history::URLRow* i) { 65 DCHECK(i); 66 i->id_ = s.ColumnInt64(0); 67 i->url_ = GURL(s.ColumnString(1)); 68 i->title_ = s.ColumnString16(2); 69 i->visit_count_ = s.ColumnInt(3); 70 i->typed_count_ = s.ColumnInt(4); 71 i->last_visit_ = base::Time::FromInternalValue(s.ColumnInt64(5)); 72 i->hidden_ = s.ColumnInt(6) != 0; 73 } 74 75 bool URLDatabase::GetURLRow(URLID url_id, URLRow* info) { 76 // TODO(brettw) We need check for empty URLs to handle the case where 77 // there are old URLs in the database that are empty that got in before 78 // we added any checks. We should eventually be able to remove it 79 // when all inputs are using GURL (which prohibit empty input). 80 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 81 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE id=?")); 82 statement.BindInt64(0, url_id); 83 84 if (statement.Step()) { 85 FillURLRow(statement, info); 86 return true; 87 } 88 return false; 89 } 90 91 bool URLDatabase::GetAllTypedUrls(URLRows* urls) { 92 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 93 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE typed_count > 0")); 94 95 while (statement.Step()) { 96 URLRow info; 97 FillURLRow(statement, &info); 98 urls->push_back(info); 99 } 100 return true; 101 } 102 103 URLID URLDatabase::GetRowForURL(const GURL& url, history::URLRow* info) { 104 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 105 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE url=?")); 106 std::string url_string = GURLToDatabaseURL(url); 107 statement.BindString(0, url_string); 108 109 if (!statement.Step()) 110 return 0; // no data 111 112 if (info) 113 FillURLRow(statement, info); 114 return statement.ColumnInt64(0); 115 } 116 117 bool URLDatabase::UpdateURLRow(URLID url_id, 118 const history::URLRow& info) { 119 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 120 "UPDATE urls SET title=?,visit_count=?,typed_count=?,last_visit_time=?," 121 "hidden=?" 122 "WHERE id=?")); 123 statement.BindString16(0, info.title()); 124 statement.BindInt(1, info.visit_count()); 125 statement.BindInt(2, info.typed_count()); 126 statement.BindInt64(3, info.last_visit().ToInternalValue()); 127 statement.BindInt(4, info.hidden() ? 1 : 0); 128 statement.BindInt64(5, url_id); 129 130 return statement.Run(); 131 } 132 133 URLID URLDatabase::AddURLInternal(const history::URLRow& info, 134 bool is_temporary) { 135 // This function is used to insert into two different tables, so we have to 136 // do some shuffling. Unfortinately, we can't use the macro 137 // HISTORY_URL_ROW_FIELDS because that specifies the table name which is 138 // invalid in the insert syntax. 139 #define ADDURL_COMMON_SUFFIX \ 140 " (url, title, visit_count, typed_count, "\ 141 "last_visit_time, hidden) "\ 142 "VALUES (?,?,?,?,?,?)" 143 const char* statement_name; 144 const char* statement_sql; 145 if (is_temporary) { 146 statement_name = "AddURLTemporary"; 147 statement_sql = "INSERT INTO temp_urls" ADDURL_COMMON_SUFFIX; 148 } else { 149 statement_name = "AddURL"; 150 statement_sql = "INSERT INTO urls" ADDURL_COMMON_SUFFIX; 151 } 152 #undef ADDURL_COMMON_SUFFIX 153 154 sql::Statement statement(GetDB().GetCachedStatement( 155 sql::StatementID(statement_name), statement_sql)); 156 statement.BindString(0, GURLToDatabaseURL(info.url())); 157 statement.BindString16(1, info.title()); 158 statement.BindInt(2, info.visit_count()); 159 statement.BindInt(3, info.typed_count()); 160 statement.BindInt64(4, info.last_visit().ToInternalValue()); 161 statement.BindInt(5, info.hidden() ? 1 : 0); 162 163 if (!statement.Run()) { 164 VLOG(0) << "Failed to add url " << info.url().possibly_invalid_spec() 165 << " to table history.urls."; 166 return 0; 167 } 168 return GetDB().GetLastInsertRowId(); 169 } 170 171 bool URLDatabase::DeleteURLRow(URLID id) { 172 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 173 "DELETE FROM urls WHERE id = ?")); 174 statement.BindInt64(0, id); 175 176 if (!statement.Run()) 177 return false; 178 179 // And delete any keyword visits. 180 return !has_keyword_search_terms_ || DeleteKeywordSearchTermForURL(id); 181 } 182 183 bool URLDatabase::CreateTemporaryURLTable() { 184 return CreateURLTable(true); 185 } 186 187 bool URLDatabase::CommitTemporaryURLTable() { 188 // See the comments in the header file as well as 189 // HistoryBackend::DeleteAllHistory() for more information on how this works 190 // and why it does what it does. 191 // 192 // Note that the main database overrides this to additionally create the 193 // supplimentary indices that the archived database doesn't need. 194 195 // Swap the url table out and replace it with the temporary one. 196 if (!GetDB().Execute("DROP TABLE urls")) { 197 NOTREACHED() << GetDB().GetErrorMessage(); 198 return false; 199 } 200 if (!GetDB().Execute("ALTER TABLE temp_urls RENAME TO urls")) { 201 NOTREACHED() << GetDB().GetErrorMessage(); 202 return false; 203 } 204 205 // Create the index over URLs. This is needed for the main, in-memory, and 206 // archived databases, so we always do it. The supplimentary indices used by 207 // the main database are not created here. When deleting all history, they 208 // are created by HistoryDatabase::RecreateAllButStarAndURLTables(). 209 CreateMainURLIndex(); 210 211 return true; 212 } 213 214 bool URLDatabase::InitURLEnumeratorForEverything(URLEnumerator* enumerator) { 215 DCHECK(!enumerator->initialized_); 216 std::string sql("SELECT "); 217 sql.append(kURLRowFields); 218 sql.append(" FROM urls"); 219 enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str())); 220 enumerator->initialized_ = enumerator->statement_.is_valid(); 221 return enumerator->statement_.is_valid(); 222 } 223 224 bool URLDatabase::InitURLEnumeratorForSignificant(URLEnumerator* enumerator) { 225 DCHECK(!enumerator->initialized_); 226 std::string sql("SELECT "); 227 sql.append(kURLRowFields); 228 sql.append(" FROM urls WHERE last_visit_time >= ? OR visit_count >= ? OR " 229 "typed_count >= ?"); 230 enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str())); 231 enumerator->statement_.BindInt64( 232 0, AutocompleteAgeThreshold().ToInternalValue()); 233 enumerator->statement_.BindInt(1, kLowQualityMatchVisitLimit); 234 enumerator->statement_.BindInt(2, kLowQualityMatchTypedLimit); 235 enumerator->initialized_ = enumerator->statement_.is_valid(); 236 return enumerator->statement_.is_valid(); 237 } 238 239 bool URLDatabase::AutocompleteForPrefix(const std::string& prefix, 240 size_t max_results, 241 bool typed_only, 242 URLRows* results) { 243 // NOTE: this query originally sorted by starred as the second parameter. But 244 // as bookmarks is no longer part of the db we no longer include the order 245 // by clause. 246 results->clear(); 247 const char* sql; 248 int line; 249 if (typed_only) { 250 sql = "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls " 251 "WHERE url >= ? AND url < ? AND hidden = 0 AND typed_count > 0 " 252 "ORDER BY typed_count DESC, visit_count DESC, last_visit_time DESC " 253 "LIMIT ?"; 254 line = __LINE__; 255 } else { 256 sql = "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls " 257 "WHERE url >= ? AND url < ? AND hidden = 0 " 258 "ORDER BY typed_count DESC, visit_count DESC, last_visit_time DESC " 259 "LIMIT ?"; 260 line = __LINE__; 261 } 262 sql::Statement statement( 263 GetDB().GetCachedStatement(sql::StatementID(__FILE__, line), sql)); 264 265 // We will find all strings between "prefix" and this string, which is prefix 266 // followed by the maximum character size. Use 8-bit strings for everything 267 // so we can be sure sqlite is comparing everything in 8-bit mode. Otherwise, 268 // it will have to convert strings either to UTF-8 or UTF-16 for comparison. 269 std::string end_query(prefix); 270 end_query.push_back(std::numeric_limits<unsigned char>::max()); 271 272 statement.BindString(0, prefix); 273 statement.BindString(1, end_query); 274 statement.BindInt(2, static_cast<int>(max_results)); 275 276 while (statement.Step()) { 277 history::URLRow info; 278 FillURLRow(statement, &info); 279 if (info.url().is_valid()) 280 results->push_back(info); 281 } 282 return !results->empty(); 283 } 284 285 bool URLDatabase::IsTypedHost(const std::string& host) { 286 const char* schemes[] = { 287 content::kHttpScheme, 288 content::kHttpsScheme, 289 content::kFtpScheme 290 }; 291 URLRows dummy; 292 for (size_t i = 0; i < arraysize(schemes); ++i) { 293 std::string scheme_and_host(schemes[i]); 294 scheme_and_host += content::kStandardSchemeSeparator + host; 295 if (AutocompleteForPrefix(scheme_and_host + '/', 1, true, &dummy) || 296 AutocompleteForPrefix(scheme_and_host + ':', 1, true, &dummy)) 297 return true; 298 } 299 return false; 300 } 301 302 bool URLDatabase::FindShortestURLFromBase(const std::string& base, 303 const std::string& url, 304 int min_visits, 305 int min_typed, 306 bool allow_base, 307 history::URLRow* info) { 308 // Select URLs that start with |base| and are prefixes of |url|. All parts 309 // of this query except the substr() call can be done using the index. We 310 // could do this query with a couple of LIKE or GLOB statements as well, but 311 // those wouldn't use the index, and would run into problems with "wildcard" 312 // characters that appear in URLs (% for LIKE, or *, ? for GLOB). 313 std::string sql("SELECT "); 314 sql.append(kURLRowFields); 315 sql.append(" FROM urls WHERE url "); 316 sql.append(allow_base ? ">=" : ">"); 317 sql.append(" ? AND url < :end AND url = substr(:end, 1, length(url)) " 318 "AND hidden = 0 AND visit_count >= ? AND typed_count >= ? " 319 "ORDER BY url LIMIT 1"); 320 sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str())); 321 statement.BindString(0, base); 322 statement.BindString(1, url); // :end 323 statement.BindInt(2, min_visits); 324 statement.BindInt(3, min_typed); 325 326 if (!statement.Step()) 327 return false; 328 329 DCHECK(info); 330 FillURLRow(statement, info); 331 return true; 332 } 333 334 bool URLDatabase::GetTextMatches(const base::string16& query, 335 URLRows* results) { 336 ScopedVector<QueryNode> query_nodes; 337 query_parser_.ParseQueryNodes(query, &query_nodes.get()); 338 339 results->clear(); 340 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 341 "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE hidden = 0")); 342 343 while (statement.Step()) { 344 std::vector<QueryWord> query_words; 345 base::string16 url = base::i18n::ToLower(statement.ColumnString16(1)); 346 query_parser_.ExtractQueryWords(url, &query_words); 347 GURL gurl(url); 348 if (gurl.is_valid()) { 349 // Decode punycode to match IDN. 350 // |query_words| won't be shown to user - therefore we can use empty 351 // |languages| to reduce dependency (no need to call PrefService). 352 base::string16 ascii = base::ASCIIToUTF16(gurl.host()); 353 base::string16 utf = net::IDNToUnicode(gurl.host(), std::string()); 354 if (ascii != utf) 355 query_parser_.ExtractQueryWords(utf, &query_words); 356 } 357 base::string16 title = base::i18n::ToLower(statement.ColumnString16(2)); 358 query_parser_.ExtractQueryWords(title, &query_words); 359 360 if (query_parser_.DoesQueryMatch(query_words, query_nodes.get())) { 361 history::URLResult info; 362 FillURLRow(statement, &info); 363 if (info.url().is_valid()) 364 results->push_back(info); 365 } 366 } 367 return !results->empty(); 368 } 369 370 bool URLDatabase::InitKeywordSearchTermsTable() { 371 has_keyword_search_terms_ = true; 372 if (!GetDB().DoesTableExist("keyword_search_terms")) { 373 if (!GetDB().Execute("CREATE TABLE keyword_search_terms (" 374 "keyword_id INTEGER NOT NULL," // ID of the TemplateURL. 375 "url_id INTEGER NOT NULL," // ID of the url. 376 "lower_term LONGVARCHAR NOT NULL," // The search term, in lower case. 377 "term LONGVARCHAR NOT NULL)")) // The actual search term. 378 return false; 379 } 380 return true; 381 } 382 383 bool URLDatabase::CreateKeywordSearchTermsIndices() { 384 // For searching. 385 if (!GetDB().Execute( 386 "CREATE INDEX IF NOT EXISTS keyword_search_terms_index1 ON " 387 "keyword_search_terms (keyword_id, lower_term)")) { 388 return false; 389 } 390 391 // For deletion. 392 if (!GetDB().Execute( 393 "CREATE INDEX IF NOT EXISTS keyword_search_terms_index2 ON " 394 "keyword_search_terms (url_id)")) { 395 return false; 396 } 397 398 // For query or deletion by term. 399 if (!GetDB().Execute( 400 "CREATE INDEX IF NOT EXISTS keyword_search_terms_index3 ON " 401 "keyword_search_terms (term)")) { 402 return false; 403 } 404 return true; 405 } 406 407 bool URLDatabase::DropKeywordSearchTermsTable() { 408 // This will implicitly delete the indices over the table. 409 return GetDB().Execute("DROP TABLE keyword_search_terms"); 410 } 411 412 bool URLDatabase::SetKeywordSearchTermsForURL(URLID url_id, 413 TemplateURLID keyword_id, 414 const base::string16& term) { 415 DCHECK(url_id && keyword_id && !term.empty()); 416 417 sql::Statement exist_statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 418 "SELECT term FROM keyword_search_terms " 419 "WHERE keyword_id = ? AND url_id = ?")); 420 exist_statement.BindInt64(0, keyword_id); 421 exist_statement.BindInt64(1, url_id); 422 423 if (exist_statement.Step()) 424 return true; // Term already exists, no need to add it. 425 426 if (!exist_statement.Succeeded()) 427 return false; 428 429 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 430 "INSERT INTO keyword_search_terms (keyword_id, url_id, lower_term, term) " 431 "VALUES (?,?,?,?)")); 432 statement.BindInt64(0, keyword_id); 433 statement.BindInt64(1, url_id); 434 statement.BindString16(2, base::i18n::ToLower(term)); 435 statement.BindString16(3, term); 436 return statement.Run(); 437 } 438 439 bool URLDatabase::GetKeywordSearchTermRow(URLID url_id, 440 KeywordSearchTermRow* row) { 441 DCHECK(url_id); 442 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 443 "SELECT keyword_id, term FROM keyword_search_terms WHERE url_id=?")); 444 statement.BindInt64(0, url_id); 445 446 if (!statement.Step()) 447 return false; 448 449 if (row) { 450 row->url_id = url_id; 451 row->keyword_id = statement.ColumnInt64(0); 452 row->term = statement.ColumnString16(1); 453 } 454 return true; 455 } 456 457 bool URLDatabase::GetKeywordSearchTermRows( 458 const base::string16& term, 459 std::vector<KeywordSearchTermRow>* rows) { 460 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 461 "SELECT keyword_id, url_id FROM keyword_search_terms WHERE term=?")); 462 statement.BindString16(0, term); 463 464 if (!statement.is_valid()) 465 return false; 466 467 while (statement.Step()) { 468 KeywordSearchTermRow row; 469 row.url_id = statement.ColumnInt64(1); 470 row.keyword_id = statement.ColumnInt64(0); 471 row.term = term; 472 rows->push_back(row); 473 } 474 return true; 475 } 476 477 void URLDatabase::DeleteAllSearchTermsForKeyword( 478 TemplateURLID keyword_id) { 479 DCHECK(keyword_id); 480 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 481 "DELETE FROM keyword_search_terms WHERE keyword_id=?")); 482 statement.BindInt64(0, keyword_id); 483 484 statement.Run(); 485 } 486 487 void URLDatabase::GetMostRecentKeywordSearchTerms( 488 TemplateURLID keyword_id, 489 const base::string16& prefix, 490 int max_count, 491 std::vector<KeywordSearchTermVisit>* matches) { 492 // NOTE: the keyword_id can be zero if on first run the user does a query 493 // before the TemplateURLService has finished loading. As the chances of this 494 // occurring are small, we ignore it. 495 if (!keyword_id) 496 return; 497 498 DCHECK(!prefix.empty()); 499 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 500 "SELECT DISTINCT kv.term, u.visit_count, u.last_visit_time " 501 "FROM keyword_search_terms kv " 502 "JOIN urls u ON kv.url_id = u.id " 503 "WHERE kv.keyword_id = ? AND kv.lower_term >= ? AND kv.lower_term < ? " 504 "ORDER BY u.last_visit_time DESC LIMIT ?")); 505 506 // NOTE: Keep this ToLower() call in sync with search_provider.cc. 507 base::string16 lower_prefix = base::i18n::ToLower(prefix); 508 // This magic gives us a prefix search. 509 base::string16 next_prefix = lower_prefix; 510 next_prefix[next_prefix.size() - 1] = 511 next_prefix[next_prefix.size() - 1] + 1; 512 statement.BindInt64(0, keyword_id); 513 statement.BindString16(1, lower_prefix); 514 statement.BindString16(2, next_prefix); 515 statement.BindInt(3, max_count); 516 517 KeywordSearchTermVisit visit; 518 while (statement.Step()) { 519 visit.term = statement.ColumnString16(0); 520 visit.visits = statement.ColumnInt(1); 521 visit.time = base::Time::FromInternalValue(statement.ColumnInt64(2)); 522 matches->push_back(visit); 523 } 524 } 525 526 bool URLDatabase::DeleteKeywordSearchTerm(const base::string16& term) { 527 sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, 528 "DELETE FROM keyword_search_terms WHERE term=?")); 529 statement.BindString16(0, term); 530 531 return statement.Run(); 532 } 533 534 bool URLDatabase::DeleteKeywordSearchTermForURL(URLID url_id) { 535 sql::Statement statement(GetDB().GetCachedStatement( 536 SQL_FROM_HERE, "DELETE FROM keyword_search_terms WHERE url_id=?")); 537 statement.BindInt64(0, url_id); 538 return statement.Run(); 539 } 540 541 bool URLDatabase::DropStarredIDFromURLs() { 542 if (!GetDB().DoesColumnExist("urls", "starred_id")) 543 return true; // urls is already updated, no need to continue. 544 545 // Create a temporary table to contain the new URLs table. 546 if (!CreateTemporaryURLTable()) { 547 NOTREACHED(); 548 return false; 549 } 550 551 // Copy the contents. 552 if (!GetDB().Execute( 553 "INSERT INTO temp_urls (id, url, title, visit_count, typed_count, " 554 "last_visit_time, hidden, favicon_id) " 555 "SELECT id, url, title, visit_count, typed_count, last_visit_time, " 556 "hidden, favicon_id FROM urls")) { 557 NOTREACHED() << GetDB().GetErrorMessage(); 558 return false; 559 } 560 561 // Rename/commit the tmp table. 562 CommitTemporaryURLTable(); 563 564 return true; 565 } 566 567 bool URLDatabase::CreateURLTable(bool is_temporary) { 568 const char* name = is_temporary ? "temp_urls" : "urls"; 569 if (GetDB().DoesTableExist(name)) 570 return true; 571 572 std::string sql; 573 sql.append("CREATE TABLE "); 574 sql.append(name); 575 sql.append("(" 576 "id INTEGER PRIMARY KEY," 577 "url LONGVARCHAR," 578 "title LONGVARCHAR," 579 "visit_count INTEGER DEFAULT 0 NOT NULL," 580 "typed_count INTEGER DEFAULT 0 NOT NULL," 581 "last_visit_time INTEGER NOT NULL," 582 "hidden INTEGER DEFAULT 0 NOT NULL," 583 "favicon_id INTEGER DEFAULT 0 NOT NULL)"); // favicon_id is not used now. 584 585 return GetDB().Execute(sql.c_str()); 586 } 587 588 bool URLDatabase::CreateMainURLIndex() { 589 // Index over URLs so we can quickly look up based on URL. 590 return GetDB().Execute( 591 "CREATE INDEX IF NOT EXISTS urls_url_index ON urls (url)"); 592 } 593 594 } // namespace history 595