1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/autocomplete/history_url_provider.h" 6 7 #include <algorithm> 8 9 #include "base/basictypes.h" 10 #include "base/bind.h" 11 #include "base/command_line.h" 12 #include "base/message_loop/message_loop.h" 13 #include "base/metrics/histogram.h" 14 #include "base/prefs/pref_service.h" 15 #include "base/strings/string_util.h" 16 #include "base/strings/utf_string_conversions.h" 17 #include "chrome/browser/autocomplete/autocomplete_match.h" 18 #include "chrome/browser/autocomplete/autocomplete_provider_listener.h" 19 #include "chrome/browser/history/history_backend.h" 20 #include "chrome/browser/history/history_database.h" 21 #include "chrome/browser/history/history_service.h" 22 #include "chrome/browser/history/history_service_factory.h" 23 #include "chrome/browser/history/history_types.h" 24 #include "chrome/browser/omnibox/omnibox_field_trial.h" 25 #include "chrome/browser/profiles/profile.h" 26 #include "chrome/browser/search_engines/template_url_service.h" 27 #include "chrome/browser/search_engines/template_url_service_factory.h" 28 #include "chrome/common/chrome_switches.h" 29 #include "chrome/common/net/url_fixer_upper.h" 30 #include "chrome/common/pref_names.h" 31 #include "chrome/common/url_constants.h" 32 #include "net/base/net_util.h" 33 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 34 #include "url/gurl.h" 35 #include "url/url_parse.h" 36 #include "url/url_util.h" 37 38 namespace { 39 40 // If |create_if_necessary| is true, ensures that |matches| contains an 41 // entry for |info|, creating a new such entry if necessary (using 42 // |input_location| and |match_in_scheme|). 43 // 44 // If |promote| is true, this also ensures the entry is the first element in 45 // |matches|, moving or adding it to the front as appropriate. When |promote| 46 // is false, existing matches are left in place, and newly added matches are 47 // placed at the back. 48 // 49 // It's OK to call this function with both |create_if_necessary| and 50 // |promote| false, in which case we'll do nothing. 51 void CreateOrPromoteMatch(const history::URLRow& info, 52 size_t input_location, 53 bool match_in_scheme, 54 history::HistoryMatches* matches, 55 bool create_if_necessary, 56 bool promote) { 57 // |matches| may already have an entry for this. 58 for (history::HistoryMatches::iterator i(matches->begin()); 59 i != matches->end(); ++i) { 60 if (i->url_info.url() == info.url()) { 61 // Rotate it to the front if the caller wishes. 62 if (promote) 63 std::rotate(matches->begin(), i, i + 1); 64 return; 65 } 66 } 67 68 if (!create_if_necessary) 69 return; 70 71 // No entry, so create one. 72 history::HistoryMatch match(info, input_location, match_in_scheme, true); 73 if (promote) 74 matches->push_front(match); 75 else 76 matches->push_back(match); 77 } 78 79 // Given the user's |input| and a |match| created from it, reduce the match's 80 // URL to just a host. If this host still matches the user input, return it. 81 // Returns the empty string on failure. 82 GURL ConvertToHostOnly(const history::HistoryMatch& match, 83 const string16& input) { 84 // See if we should try to do host-only suggestions for this URL. Nonstandard 85 // schemes means there's no authority section, so suggesting the host name 86 // is useless. File URLs are standard, but host suggestion is not useful for 87 // them either. 88 const GURL& url = match.url_info.url(); 89 if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile()) 90 return GURL(); 91 92 // Transform to a host-only match. Bail if the host no longer matches the 93 // user input (e.g. because the user typed more than just a host). 94 GURL host = url.GetWithEmptyPath(); 95 if ((host.spec().length() < (match.input_location + input.length()))) 96 return GURL(); // User typing is longer than this host suggestion. 97 98 const string16 spec = UTF8ToUTF16(host.spec()); 99 if (spec.compare(match.input_location, input.length(), input)) 100 return GURL(); // User typing is no longer a prefix. 101 102 return host; 103 } 104 105 // Acts like the > operator for URLInfo classes. 106 bool CompareHistoryMatch(const history::HistoryMatch& a, 107 const history::HistoryMatch& b) { 108 // A URL that has been typed at all is better than one that has never been 109 // typed. (Note "!"s on each side) 110 if (!a.url_info.typed_count() != !b.url_info.typed_count()) 111 return a.url_info.typed_count() > b.url_info.typed_count(); 112 113 // Innermost matches (matches after any scheme or "www.") are better than 114 // non-innermost matches. 115 if (a.innermost_match != b.innermost_match) 116 return a.innermost_match; 117 118 // URLs that have been typed more often are better. 119 if (a.url_info.typed_count() != b.url_info.typed_count()) 120 return a.url_info.typed_count() > b.url_info.typed_count(); 121 122 // For URLs that have each been typed once, a host (alone) is better than a 123 // page inside. 124 if ((a.url_info.typed_count() == 1) && (a.IsHostOnly() != b.IsHostOnly())) 125 return a.IsHostOnly(); 126 127 // URLs that have been visited more often are better. 128 if (a.url_info.visit_count() != b.url_info.visit_count()) 129 return a.url_info.visit_count() > b.url_info.visit_count(); 130 131 // URLs that have been visited more recently are better. 132 return a.url_info.last_visit() > b.url_info.last_visit(); 133 } 134 135 // Extracts typed_count, visit_count, and last_visited time from the 136 // URLRow and puts them in the additional info field of the |match| 137 // for display in about:omnibox. 138 void RecordAdditionalInfoFromUrlRow(const history::URLRow& info, 139 AutocompleteMatch* match) { 140 match->RecordAdditionalInfo("typed count", info.typed_count()); 141 match->RecordAdditionalInfo("visit count", info.visit_count()); 142 match->RecordAdditionalInfo("last visit", info.last_visit()); 143 } 144 145 } // namespace 146 147 // ----------------------------------------------------------------- 148 // SearchTermsDataSnapshot 149 150 // Implementation of SearchTermsData that takes a snapshot of another 151 // SearchTermsData by copying all the responses to the different getters into 152 // member strings, then returning those strings when its own getters are called. 153 // This will typically be constructed on the UI thread from 154 // UIThreadSearchTermsData but is subsequently safe to use on any thread. 155 class SearchTermsDataSnapshot : public SearchTermsData { 156 public: 157 explicit SearchTermsDataSnapshot(const SearchTermsData& search_terms_data); 158 virtual ~SearchTermsDataSnapshot(); 159 160 virtual std::string GoogleBaseURLValue() const OVERRIDE; 161 virtual std::string GetApplicationLocale() const OVERRIDE; 162 virtual string16 GetRlzParameterValue() const OVERRIDE; 163 virtual std::string GetSearchClient() const OVERRIDE; 164 virtual std::string InstantEnabledParam() const OVERRIDE; 165 virtual std::string InstantExtendedEnabledParam() const OVERRIDE; 166 virtual std::string NTPIsThemedParam() const OVERRIDE; 167 168 private: 169 std::string google_base_url_value_; 170 std::string application_locale_; 171 string16 rlz_parameter_value_; 172 std::string search_client_; 173 std::string instant_enabled_param_; 174 std::string instant_extended_enabled_param_; 175 std::string ntp_is_themed_param_; 176 177 DISALLOW_COPY_AND_ASSIGN(SearchTermsDataSnapshot); 178 }; 179 180 SearchTermsDataSnapshot::SearchTermsDataSnapshot( 181 const SearchTermsData& search_terms_data) 182 : google_base_url_value_(search_terms_data.GoogleBaseURLValue()), 183 application_locale_(search_terms_data.GetApplicationLocale()), 184 rlz_parameter_value_(search_terms_data.GetRlzParameterValue()), 185 search_client_(search_terms_data.GetSearchClient()), 186 instant_enabled_param_(search_terms_data.InstantEnabledParam()), 187 instant_extended_enabled_param_( 188 search_terms_data.InstantExtendedEnabledParam()), 189 ntp_is_themed_param_(search_terms_data.NTPIsThemedParam()) {} 190 191 SearchTermsDataSnapshot::~SearchTermsDataSnapshot() { 192 } 193 194 std::string SearchTermsDataSnapshot::GoogleBaseURLValue() const { 195 return google_base_url_value_; 196 } 197 198 std::string SearchTermsDataSnapshot::GetApplicationLocale() const { 199 return application_locale_; 200 } 201 202 string16 SearchTermsDataSnapshot::GetRlzParameterValue() const { 203 return rlz_parameter_value_; 204 } 205 206 std::string SearchTermsDataSnapshot::GetSearchClient() const { 207 return search_client_; 208 } 209 210 std::string SearchTermsDataSnapshot::InstantEnabledParam() const { 211 return instant_enabled_param_; 212 } 213 214 std::string SearchTermsDataSnapshot::InstantExtendedEnabledParam() const { 215 return instant_extended_enabled_param_; 216 } 217 218 std::string SearchTermsDataSnapshot::NTPIsThemedParam() const { 219 return ntp_is_themed_param_; 220 } 221 222 // ----------------------------------------------------------------- 223 // HistoryURLProvider 224 225 // These ugly magic numbers will go away once we switch all scoring 226 // behavior (including URL-what-you-typed) to HistoryQuick provider. 227 const int HistoryURLProvider::kScoreForBestInlineableResult = 1413; 228 const int HistoryURLProvider::kScoreForUnvisitedIntranetResult = 1403; 229 const int HistoryURLProvider::kScoreForWhatYouTypedResult = 1203; 230 const int HistoryURLProvider::kBaseScoreForNonInlineableResult = 900; 231 232 // VisitClassifier is used to classify the type of visit to a particular url. 233 class HistoryURLProvider::VisitClassifier { 234 public: 235 enum Type { 236 INVALID, // Navigations to the URL are not allowed. 237 UNVISITED_INTRANET, // A navigable URL for which we have no visit data but 238 // which is known to refer to a visited intranet host. 239 VISITED, // The site has been previously visited. 240 }; 241 242 VisitClassifier(HistoryURLProvider* provider, 243 const AutocompleteInput& input, 244 history::URLDatabase* db); 245 246 // Returns the type of visit for the specified input. 247 Type type() const { return type_; } 248 249 // Returns the URLRow for the visit. 250 const history::URLRow& url_row() const { return url_row_; } 251 252 private: 253 HistoryURLProvider* provider_; 254 history::URLDatabase* db_; 255 Type type_; 256 history::URLRow url_row_; 257 258 DISALLOW_COPY_AND_ASSIGN(VisitClassifier); 259 }; 260 261 HistoryURLProvider::VisitClassifier::VisitClassifier( 262 HistoryURLProvider* provider, 263 const AutocompleteInput& input, 264 history::URLDatabase* db) 265 : provider_(provider), 266 db_(db), 267 type_(INVALID) { 268 const GURL& url = input.canonicalized_url(); 269 // Detect email addresses. These cases will look like "http://user@site/", 270 // and because the history backend strips auth creds, we'll get a bogus exact 271 // match below if the user has visited "site". 272 if (!url.is_valid() || 273 ((input.type() == AutocompleteInput::UNKNOWN) && 274 input.parts().username.is_nonempty() && 275 !input.parts().password.is_nonempty() && 276 !input.parts().path.is_nonempty())) 277 return; 278 279 if (db_->GetRowForURL(url, &url_row_)) { 280 type_ = VISITED; 281 return; 282 } 283 284 if (provider_->CanFindIntranetURL(db_, input)) { 285 // The user typed an intranet hostname that they've visited (albeit with a 286 // different port and/or path) before. 287 url_row_ = history::URLRow(url); 288 type_ = UNVISITED_INTRANET; 289 } 290 } 291 292 HistoryURLProviderParams::HistoryURLProviderParams( 293 const AutocompleteInput& input, 294 bool trim_http, 295 const std::string& languages, 296 TemplateURL* default_search_provider, 297 const SearchTermsData& search_terms_data) 298 : message_loop(base::MessageLoop::current()), 299 input(input), 300 prevent_inline_autocomplete(input.prevent_inline_autocomplete()), 301 trim_http(trim_http), 302 failed(false), 303 languages(languages), 304 dont_suggest_exact_input(false), 305 default_search_provider(default_search_provider ? 306 new TemplateURL(default_search_provider->profile(), 307 default_search_provider->data()) : NULL), 308 search_terms_data(new SearchTermsDataSnapshot(search_terms_data)) { 309 } 310 311 HistoryURLProviderParams::~HistoryURLProviderParams() { 312 } 313 314 HistoryURLProvider::HistoryURLProvider(AutocompleteProviderListener* listener, 315 Profile* profile) 316 : HistoryProvider(listener, profile, 317 AutocompleteProvider::TYPE_HISTORY_URL), 318 params_(NULL), 319 cull_redirects_( 320 !OmniboxFieldTrial::InHUPCullRedirectsFieldTrial() || 321 !OmniboxFieldTrial::InHUPCullRedirectsFieldTrialExperimentGroup()), 322 create_shorter_match_( 323 !OmniboxFieldTrial::InHUPCreateShorterMatchFieldTrial() || 324 !OmniboxFieldTrial:: 325 InHUPCreateShorterMatchFieldTrialExperimentGroup()), 326 search_url_database_(true) { 327 } 328 329 // static 330 AutocompleteMatch HistoryURLProvider::SuggestExactInput( 331 AutocompleteProvider* provider, 332 const AutocompleteInput& input, 333 bool trim_http) { 334 AutocompleteMatch match(provider, 0, false, 335 AutocompleteMatchType::URL_WHAT_YOU_TYPED); 336 337 const GURL& url = input.canonicalized_url(); 338 if (url.is_valid()) { 339 match.destination_url = url; 340 341 // Trim off "http://" if the user didn't type it. 342 // NOTE: We use TrimHttpPrefix() here rather than StringForURLDisplay() to 343 // strip the scheme as we need to know the offset so we can adjust the 344 // |match_location| below. StringForURLDisplay() and TrimHttpPrefix() have 345 // slightly different behavior as well (the latter will strip even without 346 // two slashes after the scheme). 347 string16 display_string(provider->StringForURLDisplay(url, false, false)); 348 const size_t offset = trim_http ? TrimHttpPrefix(&display_string) : 0; 349 match.fill_into_edit = 350 AutocompleteInput::FormattedStringWithEquivalentMeaning(url, 351 display_string); 352 match.allowed_to_be_default_match = true; 353 // NOTE: Don't set match.inline_autocompletion to something non-empty here; 354 // it's surprising and annoying. 355 356 // Try to highlight "innermost" match location. If we fix up "w" into 357 // "www.w.com", we want to highlight the fifth character, not the first. 358 // This relies on match.destination_url being the non-prefix-trimmed version 359 // of match.contents. 360 match.contents = display_string; 361 const URLPrefix* best_prefix = URLPrefix::BestURLPrefix( 362 UTF8ToUTF16(match.destination_url.spec()), input.text()); 363 364 // We only want to trim the HTTP scheme off our match if the user didn't 365 // explicitly type "http:". 366 DCHECK(!trim_http || !HasHTTPScheme(input.text())); 367 368 // Because of the vagaries of GURL, it's possible for match.destination_url 369 // to not contain the user's input at all (so |best_prefix| is NULL). 370 // In this case don't mark anything as a match. 371 const size_t match_location = (best_prefix == NULL) ? 372 string16::npos : best_prefix->prefix.length() - offset; 373 AutocompleteMatch::ClassifyLocationInString(match_location, 374 input.text().length(), 375 match.contents.length(), 376 ACMatchClassification::URL, 377 &match.contents_class); 378 379 match.is_history_what_you_typed_match = true; 380 } 381 382 return match; 383 } 384 385 void HistoryURLProvider::Start(const AutocompleteInput& input, 386 bool minimal_changes) { 387 // NOTE: We could try hard to do less work in the |minimal_changes| case 388 // here; some clever caching would let us reuse the raw matches from the 389 // history DB without re-querying. However, we'd still have to go back to 390 // the history thread to mark these up properly, and if pass 2 is currently 391 // running, we'd need to wait for it to return to the main thread before 392 // doing this (we can't just write new data for it to read due to thread 393 // safety issues). At that point it's just as fast, and easier, to simply 394 // re-run the query from scratch and ignore |minimal_changes|. 395 396 // Cancel any in-progress query. 397 Stop(false); 398 399 RunAutocompletePasses(input, true); 400 } 401 402 void HistoryURLProvider::Stop(bool clear_cached_results) { 403 done_ = true; 404 405 if (params_) 406 params_->cancel_flag.Set(); 407 } 408 409 // Called on the history thread. 410 void HistoryURLProvider::ExecuteWithDB(history::HistoryBackend* backend, 411 history::URLDatabase* db, 412 HistoryURLProviderParams* params) { 413 // We may get called with a NULL database if it couldn't be properly 414 // initialized. 415 if (!db) { 416 params->failed = true; 417 } else if (!params->cancel_flag.IsSet()) { 418 base::TimeTicks beginning_time = base::TimeTicks::Now(); 419 420 DoAutocomplete(backend, db, params); 421 422 UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime", 423 base::TimeTicks::Now() - beginning_time); 424 } 425 426 // Return the results (if any) to the main thread. 427 params->message_loop->PostTask(FROM_HERE, base::Bind( 428 &HistoryURLProvider::QueryComplete, this, params)); 429 } 430 431 // Used by both autocomplete passes, and therefore called on multiple different 432 // threads (though not simultaneously). 433 void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, 434 history::URLDatabase* db, 435 HistoryURLProviderParams* params) { 436 VisitClassifier classifier(this, params->input, db); 437 // Create a What You Typed match, which we'll need below. 438 // 439 // We display this to the user when there's a reasonable chance they actually 440 // care: 441 // * Their input can be opened as a URL, and 442 // * We parsed the input as a URL, or it starts with an explicit "http:" or 443 // "https:". 444 // that is when their input can be opened as a URL. 445 // Otherwise, this is just low-quality noise. In the cases where we've parsed 446 // as UNKNOWN, we'll still show an accidental search infobar if need be. 447 bool have_what_you_typed_match = 448 params->input.canonicalized_url().is_valid() && 449 (params->input.type() != AutocompleteInput::QUERY) && 450 ((params->input.type() != AutocompleteInput::UNKNOWN) || 451 (classifier.type() == VisitClassifier::UNVISITED_INTRANET) || 452 !params->trim_http || 453 (AutocompleteInput::NumNonHostComponents(params->input.parts()) > 0)); 454 AutocompleteMatch what_you_typed_match( 455 SuggestExactInput(this, params->input, params->trim_http)); 456 what_you_typed_match.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0); 457 458 // Get the matching URLs from the DB 459 history::URLRows url_matches; 460 history::HistoryMatches history_matches; 461 462 if (search_url_database_) { 463 const URLPrefixes& prefixes = URLPrefix::GetURLPrefixes(); 464 for (URLPrefixes::const_iterator i(prefixes.begin()); i != prefixes.end(); 465 ++i) { 466 if (params->cancel_flag.IsSet()) 467 return; // Canceled in the middle of a query, give up. 468 // We only need kMaxMatches results in the end, but before we 469 // get there we need to promote lower-quality matches that are 470 // prefixes of higher-quality matches, and remove lower-quality 471 // redirects. So we ask for more results than we need, of every 472 // prefix type, in hopes this will give us far more than enough 473 // to work with. CullRedirects() will then reduce the list to 474 // the best kMaxMatches results. 475 db->AutocompleteForPrefix( 476 UTF16ToUTF8(i->prefix + params->input.text()), 477 kMaxMatches * 2, 478 (backend == NULL), 479 &url_matches); 480 for (history::URLRows::const_iterator j(url_matches.begin()); 481 j != url_matches.end(); ++j) { 482 const URLPrefix* best_prefix = 483 URLPrefix::BestURLPrefix(UTF8ToUTF16(j->url().spec()), string16()); 484 DCHECK(best_prefix != NULL); 485 history_matches.push_back(history::HistoryMatch(*j, i->prefix.length(), 486 i->num_components == 0, 487 i->num_components >= best_prefix->num_components)); 488 } 489 } 490 } 491 492 // Create sorted list of suggestions. 493 CullPoorMatches(&history_matches, params); 494 SortMatches(&history_matches); 495 PromoteOrCreateShorterSuggestion(db, *params, have_what_you_typed_match, 496 what_you_typed_match, &history_matches); 497 498 // Try to promote a match as an exact/inline autocomplete match. This also 499 // moves it to the front of |history_matches|, so skip over it when 500 // converting the rest of the matches. 501 size_t first_match = 1; 502 size_t exact_suggestion = 0; 503 // Checking |is_history_what_you_typed_match| tells us whether 504 // SuggestExactInput() succeeded in constructing a valid match. 505 if (what_you_typed_match.is_history_what_you_typed_match && 506 (!backend || !params->dont_suggest_exact_input) && 507 FixupExactSuggestion(db, params->input, classifier, &what_you_typed_match, 508 &history_matches)) { 509 // Got an exact match for the user's input. Treat it as the best match 510 // regardless of the input type. 511 exact_suggestion = 1; 512 params->matches.push_back(what_you_typed_match); 513 } else if (params->prevent_inline_autocomplete || 514 history_matches.empty() || 515 !PromoteMatchForInlineAutocomplete(params, history_matches.front())) { 516 // Failed to promote any URLs for inline autocompletion. Use the What You 517 // Typed match, if we have it. 518 first_match = 0; 519 if (have_what_you_typed_match) 520 params->matches.push_back(what_you_typed_match); 521 } 522 523 // This is the end of the synchronous pass. 524 if (!backend) 525 return; 526 // If search_url_database_ is false, we shouldn't have scheduled a second 527 // pass. 528 DCHECK(search_url_database_); 529 530 // Determine relevancy of highest scoring match, if any. 531 int relevance = -1; 532 for (ACMatches::const_iterator it = params->matches.begin(); 533 it != params->matches.end(); ++it) { 534 relevance = std::max(relevance, it->relevance); 535 } 536 537 if (cull_redirects_) { 538 // Remove redirects and trim list to size. We want to provide up to 539 // kMaxMatches results plus the What You Typed result, if it was added to 540 // |history_matches| above. 541 CullRedirects(backend, &history_matches, kMaxMatches + exact_suggestion); 542 } else { 543 // Simply trim the list to size. 544 if (history_matches.size() > kMaxMatches + exact_suggestion) 545 history_matches.resize(kMaxMatches + exact_suggestion); 546 } 547 548 // Convert the history matches to autocomplete matches. 549 for (size_t i = first_match; i < history_matches.size(); ++i) { 550 const history::HistoryMatch& match = history_matches[i]; 551 DCHECK(!have_what_you_typed_match || 552 (match.url_info.url() != 553 GURL(params->matches.front().destination_url))); 554 // If we've assigned a score already, all later matches score one 555 // less than the previous match. 556 relevance = (relevance > 0) ? (relevance - 1) : 557 CalculateRelevance(NORMAL, history_matches.size() - 1 - i); 558 AutocompleteMatch ac_match = HistoryMatchToACMatch(params, match, 559 NORMAL, relevance); 560 params->matches.push_back(ac_match); 561 } 562 } 563 564 // Called on the main thread when the query is complete. 565 void HistoryURLProvider::QueryComplete( 566 HistoryURLProviderParams* params_gets_deleted) { 567 // Ensure |params_gets_deleted| gets deleted on exit. 568 scoped_ptr<HistoryURLProviderParams> params(params_gets_deleted); 569 570 // If the user hasn't already started another query, clear our member pointer 571 // so we can't write into deleted memory. 572 if (params_ == params_gets_deleted) 573 params_ = NULL; 574 575 // Don't send responses for queries that have been canceled. 576 if (params->cancel_flag.IsSet()) 577 return; // Already set done_ when we canceled, no need to set it again. 578 579 // Don't modify |matches_| if the query failed, since it might have a default 580 // match in it, whereas |params->matches| will be empty. 581 if (!params->failed) { 582 matches_.swap(params->matches); 583 UpdateStarredStateOfMatches(); 584 } 585 586 done_ = true; 587 listener_->OnProviderUpdate(true); 588 } 589 590 HistoryURLProvider::~HistoryURLProvider() { 591 // Note: This object can get leaked on shutdown if there are pending 592 // requests on the database (which hold a reference to us). Normally, these 593 // messages get flushed for each thread. We do a round trip from main, to 594 // history, back to main while holding a reference. If the main thread 595 // completes before the history thread, the message to delegate back to the 596 // main thread will not run and the reference will leak. Therefore, don't do 597 // anything on destruction. 598 } 599 600 int HistoryURLProvider::CalculateRelevance(MatchType match_type, 601 size_t match_number) const { 602 switch (match_type) { 603 case INLINE_AUTOCOMPLETE: 604 return kScoreForBestInlineableResult; 605 606 case UNVISITED_INTRANET: 607 return kScoreForUnvisitedIntranetResult; 608 609 case WHAT_YOU_TYPED: 610 return kScoreForWhatYouTypedResult; 611 612 default: // NORMAL 613 return kBaseScoreForNonInlineableResult + 614 static_cast<int>(match_number); 615 } 616 } 617 618 void HistoryURLProvider::RunAutocompletePasses( 619 const AutocompleteInput& input, 620 bool fixup_input_and_run_pass_1) { 621 matches_.clear(); 622 623 if ((input.type() == AutocompleteInput::INVALID) || 624 (input.type() == AutocompleteInput::FORCED_QUERY)) 625 return; 626 627 // Create a match for exactly what the user typed. This will only be used as 628 // a fallback in case we can't get the history service or URL DB; otherwise, 629 // we'll run this again in DoAutocomplete() and use that result instead. 630 const bool trim_http = !HasHTTPScheme(input.text()); 631 // Don't do this for queries -- while we can sometimes mark up a match for 632 // this, it's not what the user wants, and just adds noise. 633 if ((input.type() != AutocompleteInput::QUERY) && 634 input.canonicalized_url().is_valid()) { 635 AutocompleteMatch what_you_typed(SuggestExactInput(this, input, trim_http)); 636 what_you_typed.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0); 637 matches_.push_back(what_you_typed); 638 } 639 640 // We'll need the history service to run both passes, so try to obtain it. 641 if (!profile_) 642 return; 643 HistoryService* const history_service = 644 HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS); 645 if (!history_service) 646 return; 647 648 // Get the default search provider and search terms data now since we have to 649 // retrieve these on the UI thread, and the second pass runs on the history 650 // thread. |template_url_service| can be NULL when testing. 651 TemplateURLService* template_url_service = 652 TemplateURLServiceFactory::GetForProfile(profile_); 653 TemplateURL* default_search_provider = template_url_service ? 654 template_url_service->GetDefaultSearchProvider() : NULL; 655 UIThreadSearchTermsData data(profile_); 656 657 // Create the data structure for the autocomplete passes. We'll save this off 658 // onto the |params_| member for later deletion below if we need to run pass 659 // 2. 660 std::string languages(languages_); 661 if (languages.empty()) { 662 languages = 663 profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); 664 } 665 scoped_ptr<HistoryURLProviderParams> params( 666 new HistoryURLProviderParams(input, trim_http, languages, 667 default_search_provider, data)); 668 669 params->prevent_inline_autocomplete = 670 PreventInlineAutocomplete(input); 671 672 if (fixup_input_and_run_pass_1) { 673 // Do some fixup on the user input before matching against it, so we provide 674 // good results for local file paths, input with spaces, etc. 675 if (!FixupUserInput(¶ms->input)) 676 return; 677 678 // Pass 1: Get the in-memory URL database, and use it to find and promote 679 // the inline autocomplete match, if any. 680 history::URLDatabase* url_db = history_service->InMemoryDatabase(); 681 // url_db can be NULL if it hasn't finished initializing (or failed to 682 // initialize). In this case all we can do is fall back on the second 683 // pass. 684 // 685 // TODO(pkasting): We should just block here until this loads. Any time 686 // someone unloads the history backend, we'll get inconsistent inline 687 // autocomplete behavior here. 688 if (url_db) { 689 DoAutocomplete(NULL, url_db, params.get()); 690 // params->matches now has the matches we should expose to the provider. 691 // Pass 2 expects a "clean slate" set of matches. 692 matches_.clear(); 693 matches_.swap(params->matches); 694 UpdateStarredStateOfMatches(); 695 } 696 } 697 698 // Pass 2: Ask the history service to call us back on the history thread, 699 // where we can read the full on-disk DB. 700 if (search_url_database_ && 701 (input.matches_requested() == AutocompleteInput::ALL_MATCHES)) { 702 done_ = false; 703 params_ = params.release(); // This object will be destroyed in 704 // QueryComplete() once we're done with it. 705 history_service->ScheduleAutocomplete(this, params_); 706 } 707 } 708 709 bool HistoryURLProvider::FixupExactSuggestion( 710 history::URLDatabase* db, 711 const AutocompleteInput& input, 712 const VisitClassifier& classifier, 713 AutocompleteMatch* match, 714 history::HistoryMatches* matches) const { 715 DCHECK(match != NULL); 716 DCHECK(matches != NULL); 717 718 MatchType type = INLINE_AUTOCOMPLETE; 719 switch (classifier.type()) { 720 case VisitClassifier::INVALID: 721 return false; 722 case VisitClassifier::UNVISITED_INTRANET: 723 type = UNVISITED_INTRANET; 724 break; 725 default: 726 DCHECK_EQ(VisitClassifier::VISITED, classifier.type()); 727 // We have data for this match, use it. 728 match->deletable = true; 729 match->description = classifier.url_row().title(); 730 RecordAdditionalInfoFromUrlRow(classifier.url_row(), match); 731 AutocompleteMatch::ClassifyMatchInString( 732 input.text(), 733 classifier.url_row().title(), 734 ACMatchClassification::NONE, &match->description_class); 735 if (!classifier.url_row().typed_count()) { 736 // If we reach here, we must be in the second pass, and we must not have 737 // this row's data available during the first pass. That means we 738 // either scored it as WHAT_YOU_TYPED or UNVISITED_INTRANET, and to 739 // maintain the ordering between passes consistent, we need to score it 740 // the same way here. 741 type = CanFindIntranetURL(db, input) ? 742 UNVISITED_INTRANET : WHAT_YOU_TYPED; 743 } 744 break; 745 } 746 747 const GURL& url = match->destination_url; 748 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); 749 // If the what-you-typed result looks like a single word (which can be 750 // interpreted as an intranet address) followed by a pound sign ("#"), 751 // leave the score for the url-what-you-typed result as is. It will be 752 // outscored by a search query from the SearchProvider. This test fixes 753 // cases such as "c#" and "c# foo" where the user has visited an intranet 754 // site "c". We want the search-what-you-typed score to beat the 755 // URL-what-you-typed score in this case. Most of the below test tries to 756 // make sure that this code does not trigger if the user did anything to 757 // indicate the desired match is a URL. For instance, "c/# foo" will not 758 // pass the test because that will be classified as input type URL. The 759 // parsed.CountCharactersBefore() in the test looks for the presence of a 760 // reference fragment in the URL by checking whether the position differs 761 // included the delimiter (pound sign) versus not including the delimiter. 762 // (One cannot simply check url.ref() because it will not distinguish 763 // between the input "c" and the input "c#", both of which will have empty 764 // reference fragments.) 765 if ((type == UNVISITED_INTRANET) && 766 (input.type() != AutocompleteInput::URL) && 767 url.username().empty() && url.password().empty() && url.port().empty() && 768 (url.path() == "/") && url.query().empty() && 769 (parsed.CountCharactersBefore(url_parse::Parsed::REF, true) != 770 parsed.CountCharactersBefore(url_parse::Parsed::REF, false))) { 771 return false; 772 } 773 774 match->relevance = CalculateRelevance(type, 0); 775 776 // If there are any other matches, then don't promote this match here, in 777 // hopes the caller will be able to inline autocomplete a better suggestion. 778 // DoAutocomplete() will fall back on this match if inline autocompletion 779 // fails. This matches how we react to never-visited URL inputs in the non- 780 // intranet case. 781 if (type == UNVISITED_INTRANET && !matches->empty()) 782 return false; 783 784 // Put it on the front of the HistoryMatches for redirect culling. 785 CreateOrPromoteMatch(classifier.url_row(), string16::npos, false, matches, 786 true, true); 787 return true; 788 } 789 790 bool HistoryURLProvider::CanFindIntranetURL( 791 history::URLDatabase* db, 792 const AutocompleteInput& input) const { 793 // Normally passing the first two conditions below ought to guarantee the 794 // third condition, but because FixupUserInput() can run and modify the 795 // input's text and parts between Parse() and here, it seems better to be 796 // paranoid and check. 797 if ((input.type() != AutocompleteInput::UNKNOWN) || 798 !LowerCaseEqualsASCII(input.scheme(), chrome::kHttpScheme) || 799 !input.parts().host.is_nonempty()) 800 return false; 801 const std::string host(UTF16ToUTF8( 802 input.text().substr(input.parts().host.begin, input.parts().host.len))); 803 const size_t registry_length = 804 net::registry_controlled_domains::GetRegistryLength( 805 host, 806 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 807 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 808 return registry_length == 0 && db->IsTypedHost(host); 809 } 810 811 bool HistoryURLProvider::PromoteMatchForInlineAutocomplete( 812 HistoryURLProviderParams* params, 813 const history::HistoryMatch& match) { 814 // Promote the first match if it's been typed at least n times, where n == 1 815 // for "simple" (host-only) URLs and n == 2 for others. We set a higher bar 816 // for these long URLs because it's less likely that users will want to visit 817 // them again. Even though we don't increment the typed_count for pasted-in 818 // URLs, if the user manually edits the URL or types some long thing in by 819 // hand, we wouldn't want to immediately start autocompleting it. 820 if (!match.url_info.typed_count() || 821 ((match.url_info.typed_count() == 1) && 822 !match.IsHostOnly())) 823 return false; 824 825 // In the case where the user has typed "foo.com" and visited (but not typed) 826 // "foo/", and the input is "foo", we can reach here for "foo.com" during the 827 // first pass but have the second pass suggest the exact input as a better 828 // URL. Since we need both passes to agree, and since during the first pass 829 // there's no way to know about "foo/", make reaching this point prevent any 830 // future pass from suggesting the exact input as a better match. 831 if (params) { 832 params->dont_suggest_exact_input = true; 833 params->matches.push_back(HistoryMatchToACMatch(params, match, 834 INLINE_AUTOCOMPLETE, CalculateRelevance(INLINE_AUTOCOMPLETE, 0))); 835 } 836 return true; 837 } 838 839 // See if a shorter version of the best match should be created, and if so place 840 // it at the front of |matches|. This can suggest history URLs that are 841 // prefixes of the best match (if they've been visited enough, compared to the 842 // best match), or create host-only suggestions even when they haven't been 843 // visited before: if the user visited http://example.com/asdf once, we'll 844 // suggest http://example.com/ even if they've never been to it. 845 void HistoryURLProvider::PromoteOrCreateShorterSuggestion( 846 history::URLDatabase* db, 847 const HistoryURLProviderParams& params, 848 bool have_what_you_typed_match, 849 const AutocompleteMatch& what_you_typed_match, 850 history::HistoryMatches* matches) { 851 if (matches->empty()) 852 return; // No matches, nothing to do. 853 854 // Determine the base URL from which to search, and whether that URL could 855 // itself be added as a match. We can add the base iff it's not "effectively 856 // the same" as any "what you typed" match. 857 const history::HistoryMatch& match = matches->front(); 858 GURL search_base = ConvertToHostOnly(match, params.input.text()); 859 bool can_add_search_base_to_matches = !have_what_you_typed_match; 860 if (search_base.is_empty()) { 861 // Search from what the user typed when we couldn't reduce the best match 862 // to a host. Careful: use a substring of |match| here, rather than the 863 // first match in |params|, because they might have different prefixes. If 864 // the user typed "google.com", |what_you_typed_match| will hold 865 // "http://google.com/", but |match| might begin with 866 // "http://www.google.com/". 867 // TODO: this should be cleaned up, and is probably incorrect for IDN. 868 std::string new_match = match.url_info.url().possibly_invalid_spec(). 869 substr(0, match.input_location + params.input.text().length()); 870 search_base = GURL(new_match); 871 // TODO(mrossetti): There is a degenerate case where the following may 872 // cause a failure: http://www/~someword/fubar.html. Diagnose. 873 // See: http://crbug.com/50101 874 if (search_base.is_empty()) 875 return; // Can't construct a valid URL from which to start a search. 876 } else if (!can_add_search_base_to_matches) { 877 can_add_search_base_to_matches = 878 (search_base != what_you_typed_match.destination_url); 879 } 880 if (search_base == match.url_info.url()) 881 return; // Couldn't shorten |match|, so no range of URLs to search over. 882 883 // Search the DB for short URLs between our base and |match|. 884 history::URLRow info(search_base); 885 bool promote = true; 886 // A short URL is only worth suggesting if it's been visited at least a third 887 // as often as the longer URL. 888 const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1; 889 // For stability between the in-memory and on-disk autocomplete passes, when 890 // the long URL has been typed before, only suggest shorter URLs that have 891 // also been typed. Otherwise, the on-disk pass could suggest a shorter URL 892 // (which hasn't been typed) that the in-memory pass doesn't know about, 893 // thereby making the top match, and thus the behavior of inline 894 // autocomplete, unstable. 895 const int min_typed_count = match.url_info.typed_count() ? 1 : 0; 896 if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(), 897 match.url_info.url().possibly_invalid_spec(), min_visit_count, 898 min_typed_count, can_add_search_base_to_matches, &info)) { 899 if (!can_add_search_base_to_matches) 900 return; // Couldn't find anything and can't add the search base, bail. 901 902 // Try to get info on the search base itself. Promote it to the top if the 903 // original best match isn't good enough to autocomplete. 904 db->GetRowForURL(search_base, &info); 905 promote = match.url_info.typed_count() <= 1; 906 } 907 908 // Promote or add the desired URL to the list of matches. 909 bool ensure_can_inline = 910 promote && PromoteMatchForInlineAutocomplete(NULL, match); 911 CreateOrPromoteMatch(info, match.input_location, match.match_in_scheme, 912 matches, create_shorter_match_, promote); 913 if (ensure_can_inline) { 914 // If |match| was inline-autocompletable and we're promoting something to 915 // replace it, make sure the promoted item is also inline-autocompletable. 916 // Setting the typed_count to 2 is sufficient to guarantee this (and is safe 917 // because by this point all sorting has already happened and the only thing 918 // checking the typed_count will be PromoteMatchForInlineAutocomplete()). 919 // 920 // We have to do this here rather than changing |info| before calling 921 // EnsureMatchPresent() because if EnsureMatchPresent() merely moves an 922 // existing match to the front, it will ignore the typed_count in |info|. 923 // But we set |ensure_can_inline| above because |match| is a reference and 924 // thus checking it here would examine the wrong match. 925 matches->front().url_info.set_typed_count(2); 926 } 927 } 928 929 void HistoryURLProvider::SortMatches(history::HistoryMatches* matches) const { 930 // Sort by quality, best first. 931 std::sort(matches->begin(), matches->end(), &CompareHistoryMatch); 932 933 // Remove duplicate matches (caused by the search string appearing in one of 934 // the prefixes as well as after it). Consider the following scenario: 935 // 936 // User has visited "http://http.com" once and "http://htaccess.com" twice. 937 // User types "http". The autocomplete search with prefix "http://" returns 938 // the first host, while the search with prefix "" returns both hosts. Now 939 // we sort them into rank order: 940 // http://http.com (innermost_match) 941 // http://htaccess.com (!innermost_match, url_info.visit_count == 2) 942 // http://http.com (!innermost_match, url_info.visit_count == 1) 943 // 944 // The above scenario tells us we can't use std::unique(), since our 945 // duplicates are not always sequential. It also tells us we should remove 946 // the lower-quality duplicate(s), since otherwise the returned results won't 947 // be ordered correctly. This is easy to do: we just always remove the later 948 // element of a duplicate pair. 949 // Be careful! Because the vector contents may change as we remove elements, 950 // we use an index instead of an iterator in the outer loop, and don't 951 // precalculate the ending position. 952 for (size_t i = 0; i < matches->size(); ++i) { 953 for (history::HistoryMatches::iterator j(matches->begin() + i + 1); 954 j != matches->end(); ) { 955 if ((*matches)[i].url_info.url() == j->url_info.url()) 956 j = matches->erase(j); 957 else 958 ++j; 959 } 960 } 961 } 962 963 void HistoryURLProvider::CullPoorMatches( 964 history::HistoryMatches* matches, 965 HistoryURLProviderParams* params) const { 966 const base::Time& threshold(history::AutocompleteAgeThreshold()); 967 for (history::HistoryMatches::iterator i(matches->begin()); 968 i != matches->end(); ) { 969 if (RowQualifiesAsSignificant(i->url_info, threshold) && 970 !(params->default_search_provider && 971 params->default_search_provider->IsSearchURLUsingTermsData( 972 i->url_info.url(), *params->search_terms_data.get()))) { 973 ++i; 974 } else { 975 i = matches->erase(i); 976 } 977 } 978 } 979 980 void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend, 981 history::HistoryMatches* matches, 982 size_t max_results) const { 983 for (size_t source = 0; 984 (source < matches->size()) && (source < max_results); ) { 985 const GURL& url = (*matches)[source].url_info.url(); 986 // TODO(brettw) this should go away when everything uses GURL. 987 history::RedirectList redirects; 988 backend->GetMostRecentRedirectsFrom(url, &redirects); 989 if (!redirects.empty()) { 990 // Remove all but the first occurrence of any of these redirects in the 991 // search results. We also must add the URL we queried for, since it may 992 // not be the first match and we'd want to remove it. 993 // 994 // For example, when A redirects to B and our matches are [A, X, B], 995 // we'll get B as the redirects from, and we want to remove the second 996 // item of that pair, removing B. If A redirects to B and our matches are 997 // [B, X, A], we'll want to remove A instead. 998 redirects.push_back(url); 999 source = RemoveSubsequentMatchesOf(matches, source, redirects); 1000 } else { 1001 // Advance to next item. 1002 source++; 1003 } 1004 } 1005 1006 if (matches->size() > max_results) 1007 matches->resize(max_results); 1008 } 1009 1010 size_t HistoryURLProvider::RemoveSubsequentMatchesOf( 1011 history::HistoryMatches* matches, 1012 size_t source_index, 1013 const std::vector<GURL>& remove) const { 1014 size_t next_index = source_index + 1; // return value = item after source 1015 1016 // Find the first occurrence of any URL in the redirect chain. We want to 1017 // keep this one since it is rated the highest. 1018 history::HistoryMatches::iterator first(std::find_first_of( 1019 matches->begin(), matches->end(), remove.begin(), remove.end(), 1020 history::HistoryMatch::EqualsGURL)); 1021 DCHECK(first != matches->end()) << "We should have always found at least the " 1022 "original URL."; 1023 1024 // Find any following occurrences of any URL in the redirect chain, these 1025 // should be deleted. 1026 for (history::HistoryMatches::iterator next(std::find_first_of(first + 1, 1027 matches->end(), remove.begin(), remove.end(), 1028 history::HistoryMatch::EqualsGURL)); 1029 next != matches->end(); next = std::find_first_of(next, matches->end(), 1030 remove.begin(), remove.end(), history::HistoryMatch::EqualsGURL)) { 1031 // Remove this item. When we remove an item before the source index, we 1032 // need to shift it to the right and remember that so we can return it. 1033 next = matches->erase(next); 1034 if (static_cast<size_t>(next - matches->begin()) < next_index) 1035 --next_index; 1036 } 1037 return next_index; 1038 } 1039 1040 AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch( 1041 HistoryURLProviderParams* params, 1042 const history::HistoryMatch& history_match, 1043 MatchType match_type, 1044 int relevance) { 1045 const history::URLRow& info = history_match.url_info; 1046 AutocompleteMatch match(this, relevance, 1047 !!info.visit_count(), AutocompleteMatchType::HISTORY_URL); 1048 match.typed_count = info.typed_count(); 1049 match.destination_url = info.url(); 1050 DCHECK(match.destination_url.is_valid()); 1051 size_t inline_autocomplete_offset = 1052 history_match.input_location + params->input.text().length(); 1053 std::string languages = (match_type == WHAT_YOU_TYPED) ? 1054 std::string() : params->languages; 1055 const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll & 1056 ~((params->trim_http && !history_match.match_in_scheme) ? 1057 0 : net::kFormatUrlOmitHTTP); 1058 match.fill_into_edit = 1059 AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(), 1060 net::FormatUrl(info.url(), languages, format_types, 1061 net::UnescapeRule::SPACES, NULL, NULL, 1062 &inline_autocomplete_offset)); 1063 if (!params->prevent_inline_autocomplete && 1064 (inline_autocomplete_offset != string16::npos)) { 1065 DCHECK(inline_autocomplete_offset <= match.fill_into_edit.length()); 1066 match.inline_autocompletion = 1067 match.fill_into_edit.substr(inline_autocomplete_offset); 1068 } 1069 // The latter part of the test effectively asks "is the inline completion 1070 // empty?" (i.e., is this match effectively the what-you-typed match?). 1071 match.allowed_to_be_default_match = !params->prevent_inline_autocomplete || 1072 ((inline_autocomplete_offset != string16::npos) && 1073 (inline_autocomplete_offset >= match.fill_into_edit.length())); 1074 1075 size_t match_start = history_match.input_location; 1076 match.contents = net::FormatUrl(info.url(), languages, 1077 format_types, net::UnescapeRule::SPACES, NULL, NULL, &match_start); 1078 if ((match_start != string16::npos) && 1079 (inline_autocomplete_offset != string16::npos) && 1080 (inline_autocomplete_offset != match_start)) { 1081 DCHECK(inline_autocomplete_offset > match_start); 1082 AutocompleteMatch::ClassifyLocationInString(match_start, 1083 inline_autocomplete_offset - match_start, match.contents.length(), 1084 ACMatchClassification::URL, &match.contents_class); 1085 } else { 1086 AutocompleteMatch::ClassifyLocationInString(string16::npos, 0, 1087 match.contents.length(), ACMatchClassification::URL, 1088 &match.contents_class); 1089 } 1090 match.description = info.title(); 1091 AutocompleteMatch::ClassifyMatchInString(params->input.text(), 1092 info.title(), 1093 ACMatchClassification::NONE, 1094 &match.description_class); 1095 RecordAdditionalInfoFromUrlRow(info, &match); 1096 return match; 1097 } 1098