1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/autocomplete/history_url_provider.h" 6 7 #include <algorithm> 8 9 #include "base/basictypes.h" 10 #include "base/bind.h" 11 #include "base/command_line.h" 12 #include "base/message_loop/message_loop.h" 13 #include "base/metrics/histogram.h" 14 #include "base/prefs/pref_service.h" 15 #include "base/strings/string_util.h" 16 #include "base/strings/utf_string_conversions.h" 17 #include "chrome/browser/autocomplete/autocomplete_match.h" 18 #include "chrome/browser/autocomplete/autocomplete_provider_listener.h" 19 #include "chrome/browser/history/history_backend.h" 20 #include "chrome/browser/history/history_database.h" 21 #include "chrome/browser/history/history_service.h" 22 #include "chrome/browser/history/history_service_factory.h" 23 #include "chrome/browser/history/history_types.h" 24 #include "chrome/browser/history/in_memory_url_index_types.h" 25 #include "chrome/browser/history/scored_history_match.h" 26 #include "chrome/browser/omnibox/omnibox_field_trial.h" 27 #include "chrome/browser/profiles/profile.h" 28 #include "chrome/browser/search_engines/template_url_service.h" 29 #include "chrome/browser/search_engines/template_url_service_factory.h" 30 #include "chrome/common/chrome_switches.h" 31 #include "chrome/common/net/url_fixer_upper.h" 32 #include "chrome/common/pref_names.h" 33 #include "chrome/common/url_constants.h" 34 #include "net/base/net_util.h" 35 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 36 #include "url/gurl.h" 37 #include "url/url_parse.h" 38 #include "url/url_util.h" 39 40 namespace { 41 42 // If |create_if_necessary| is true, ensures that |matches| contains an 43 // entry for |info|, creating a new such entry if necessary (using 44 // |input_location| and |match_in_scheme|). 45 // 46 // If |promote| is true, this also ensures the entry is the first element in 47 // |matches|, moving or adding it to the front as appropriate. When |promote| 48 // is false, existing matches are left in place, and newly added matches are 49 // placed at the back. 50 // 51 // It's OK to call this function with both |create_if_necessary| and 52 // |promote| false, in which case we'll do nothing. 53 // 54 // Returns whether the match exists regardless if it was promoted/created. 55 bool CreateOrPromoteMatch(const history::URLRow& info, 56 size_t input_location, 57 bool match_in_scheme, 58 history::HistoryMatches* matches, 59 bool create_if_necessary, 60 bool promote) { 61 // |matches| may already have an entry for this. 62 for (history::HistoryMatches::iterator i(matches->begin()); 63 i != matches->end(); ++i) { 64 if (i->url_info.url() == info.url()) { 65 // Rotate it to the front if the caller wishes. 66 if (promote) 67 std::rotate(matches->begin(), i, i + 1); 68 return true; 69 } 70 } 71 72 if (!create_if_necessary) 73 return false; 74 75 // No entry, so create one. 76 history::HistoryMatch match(info, input_location, match_in_scheme, true); 77 if (promote) 78 matches->push_front(match); 79 else 80 matches->push_back(match); 81 82 return true; 83 } 84 85 // Given the user's |input| and a |match| created from it, reduce the match's 86 // URL to just a host. If this host still matches the user input, return it. 87 // Returns the empty string on failure. 88 GURL ConvertToHostOnly(const history::HistoryMatch& match, 89 const base::string16& input) { 90 // See if we should try to do host-only suggestions for this URL. Nonstandard 91 // schemes means there's no authority section, so suggesting the host name 92 // is useless. File URLs are standard, but host suggestion is not useful for 93 // them either. 94 const GURL& url = match.url_info.url(); 95 if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile()) 96 return GURL(); 97 98 // Transform to a host-only match. Bail if the host no longer matches the 99 // user input (e.g. because the user typed more than just a host). 100 GURL host = url.GetWithEmptyPath(); 101 if ((host.spec().length() < (match.input_location + input.length()))) 102 return GURL(); // User typing is longer than this host suggestion. 103 104 const base::string16 spec = UTF8ToUTF16(host.spec()); 105 if (spec.compare(match.input_location, input.length(), input)) 106 return GURL(); // User typing is no longer a prefix. 107 108 return host; 109 } 110 111 // Acts like the > operator for URLInfo classes. 112 bool CompareHistoryMatch(const history::HistoryMatch& a, 113 const history::HistoryMatch& b) { 114 // A promoted match is better than non-promoted. 115 if (a.promoted != b.promoted) 116 return a.promoted; 117 118 // A URL that has been typed at all is better than one that has never been 119 // typed. (Note "!"s on each side) 120 if (!a.url_info.typed_count() != !b.url_info.typed_count()) 121 return a.url_info.typed_count() > b.url_info.typed_count(); 122 123 // Innermost matches (matches after any scheme or "www.") are better than 124 // non-innermost matches. 125 if (a.innermost_match != b.innermost_match) 126 return a.innermost_match; 127 128 // URLs that have been typed more often are better. 129 if (a.url_info.typed_count() != b.url_info.typed_count()) 130 return a.url_info.typed_count() > b.url_info.typed_count(); 131 132 // For URLs that have each been typed once, a host (alone) is better than a 133 // page inside. 134 if ((a.url_info.typed_count() == 1) && (a.IsHostOnly() != b.IsHostOnly())) 135 return a.IsHostOnly(); 136 137 // URLs that have been visited more often are better. 138 if (a.url_info.visit_count() != b.url_info.visit_count()) 139 return a.url_info.visit_count() > b.url_info.visit_count(); 140 141 // URLs that have been visited more recently are better. 142 return a.url_info.last_visit() > b.url_info.last_visit(); 143 } 144 145 // Sorts and dedups the given list of matches. 146 void SortAndDedupMatches(history::HistoryMatches* matches) { 147 // Sort by quality, best first. 148 std::sort(matches->begin(), matches->end(), &CompareHistoryMatch); 149 150 // Remove duplicate matches (caused by the search string appearing in one of 151 // the prefixes as well as after it). Consider the following scenario: 152 // 153 // User has visited "http://http.com" once and "http://htaccess.com" twice. 154 // User types "http". The autocomplete search with prefix "http://" returns 155 // the first host, while the search with prefix "" returns both hosts. Now 156 // we sort them into rank order: 157 // http://http.com (innermost_match) 158 // http://htaccess.com (!innermost_match, url_info.visit_count == 2) 159 // http://http.com (!innermost_match, url_info.visit_count == 1) 160 // 161 // The above scenario tells us we can't use std::unique(), since our 162 // duplicates are not always sequential. It also tells us we should remove 163 // the lower-quality duplicate(s), since otherwise the returned results won't 164 // be ordered correctly. This is easy to do: we just always remove the later 165 // element of a duplicate pair. 166 // Be careful! Because the vector contents may change as we remove elements, 167 // we use an index instead of an iterator in the outer loop, and don't 168 // precalculate the ending position. 169 for (size_t i = 0; i < matches->size(); ++i) { 170 for (history::HistoryMatches::iterator j(matches->begin() + i + 1); 171 j != matches->end(); ) { 172 if ((*matches)[i].url_info.url() == j->url_info.url()) 173 j = matches->erase(j); 174 else 175 ++j; 176 } 177 } 178 } 179 180 // Extracts typed_count, visit_count, and last_visited time from the 181 // URLRow and puts them in the additional info field of the |match| 182 // for display in about:omnibox. 183 void RecordAdditionalInfoFromUrlRow(const history::URLRow& info, 184 AutocompleteMatch* match) { 185 match->RecordAdditionalInfo("typed count", info.typed_count()); 186 match->RecordAdditionalInfo("visit count", info.visit_count()); 187 match->RecordAdditionalInfo("last visit", info.last_visit()); 188 } 189 190 } // namespace 191 192 // ----------------------------------------------------------------- 193 // SearchTermsDataSnapshot 194 195 // Implementation of SearchTermsData that takes a snapshot of another 196 // SearchTermsData by copying all the responses to the different getters into 197 // member strings, then returning those strings when its own getters are called. 198 // This will typically be constructed on the UI thread from 199 // UIThreadSearchTermsData but is subsequently safe to use on any thread. 200 class SearchTermsDataSnapshot : public SearchTermsData { 201 public: 202 explicit SearchTermsDataSnapshot(const SearchTermsData& search_terms_data); 203 virtual ~SearchTermsDataSnapshot(); 204 205 virtual std::string GoogleBaseURLValue() const OVERRIDE; 206 virtual std::string GetApplicationLocale() const OVERRIDE; 207 virtual base::string16 GetRlzParameterValue() const OVERRIDE; 208 virtual std::string GetSearchClient() const OVERRIDE; 209 virtual std::string ForceInstantResultsParam( 210 bool for_prerender) const OVERRIDE; 211 virtual std::string InstantExtendedEnabledParam() const OVERRIDE; 212 virtual std::string NTPIsThemedParam() const OVERRIDE; 213 214 private: 215 std::string google_base_url_value_; 216 std::string application_locale_; 217 base::string16 rlz_parameter_value_; 218 std::string search_client_; 219 std::string force_instant_results_param_; 220 std::string instant_extended_enabled_param_; 221 std::string ntp_is_themed_param_; 222 223 DISALLOW_COPY_AND_ASSIGN(SearchTermsDataSnapshot); 224 }; 225 226 SearchTermsDataSnapshot::SearchTermsDataSnapshot( 227 const SearchTermsData& search_terms_data) 228 : google_base_url_value_(search_terms_data.GoogleBaseURLValue()), 229 application_locale_(search_terms_data.GetApplicationLocale()), 230 rlz_parameter_value_(search_terms_data.GetRlzParameterValue()), 231 search_client_(search_terms_data.GetSearchClient()), 232 force_instant_results_param_( 233 search_terms_data.ForceInstantResultsParam(false)), 234 instant_extended_enabled_param_( 235 search_terms_data.InstantExtendedEnabledParam()), 236 ntp_is_themed_param_(search_terms_data.NTPIsThemedParam()) {} 237 238 SearchTermsDataSnapshot::~SearchTermsDataSnapshot() { 239 } 240 241 std::string SearchTermsDataSnapshot::GoogleBaseURLValue() const { 242 return google_base_url_value_; 243 } 244 245 std::string SearchTermsDataSnapshot::GetApplicationLocale() const { 246 return application_locale_; 247 } 248 249 base::string16 SearchTermsDataSnapshot::GetRlzParameterValue() const { 250 return rlz_parameter_value_; 251 } 252 253 std::string SearchTermsDataSnapshot::GetSearchClient() const { 254 return search_client_; 255 } 256 257 std::string SearchTermsDataSnapshot::ForceInstantResultsParam( 258 bool for_prerender) const { 259 return force_instant_results_param_; 260 } 261 262 std::string SearchTermsDataSnapshot::InstantExtendedEnabledParam() const { 263 return instant_extended_enabled_param_; 264 } 265 266 std::string SearchTermsDataSnapshot::NTPIsThemedParam() const { 267 return ntp_is_themed_param_; 268 } 269 270 // ----------------------------------------------------------------- 271 // HistoryURLProvider 272 273 // These ugly magic numbers will go away once we switch all scoring 274 // behavior (including URL-what-you-typed) to HistoryQuick provider. 275 const int HistoryURLProvider::kScoreForBestInlineableResult = 1413; 276 const int HistoryURLProvider::kScoreForUnvisitedIntranetResult = 1403; 277 const int HistoryURLProvider::kScoreForWhatYouTypedResult = 1203; 278 const int HistoryURLProvider::kBaseScoreForNonInlineableResult = 900; 279 280 // VisitClassifier is used to classify the type of visit to a particular url. 281 class HistoryURLProvider::VisitClassifier { 282 public: 283 enum Type { 284 INVALID, // Navigations to the URL are not allowed. 285 UNVISITED_INTRANET, // A navigable URL for which we have no visit data but 286 // which is known to refer to a visited intranet host. 287 VISITED, // The site has been previously visited. 288 }; 289 290 VisitClassifier(HistoryURLProvider* provider, 291 const AutocompleteInput& input, 292 history::URLDatabase* db); 293 294 // Returns the type of visit for the specified input. 295 Type type() const { return type_; } 296 297 // Returns the URLRow for the visit. 298 const history::URLRow& url_row() const { return url_row_; } 299 300 private: 301 HistoryURLProvider* provider_; 302 history::URLDatabase* db_; 303 Type type_; 304 history::URLRow url_row_; 305 306 DISALLOW_COPY_AND_ASSIGN(VisitClassifier); 307 }; 308 309 HistoryURLProvider::VisitClassifier::VisitClassifier( 310 HistoryURLProvider* provider, 311 const AutocompleteInput& input, 312 history::URLDatabase* db) 313 : provider_(provider), 314 db_(db), 315 type_(INVALID) { 316 const GURL& url = input.canonicalized_url(); 317 // Detect email addresses. These cases will look like "http://user@site/", 318 // and because the history backend strips auth creds, we'll get a bogus exact 319 // match below if the user has visited "site". 320 if (!url.is_valid() || 321 ((input.type() == AutocompleteInput::UNKNOWN) && 322 input.parts().username.is_nonempty() && 323 !input.parts().password.is_nonempty() && 324 !input.parts().path.is_nonempty())) 325 return; 326 327 if (db_->GetRowForURL(url, &url_row_)) { 328 type_ = VISITED; 329 return; 330 } 331 332 if (provider_->CanFindIntranetURL(db_, input)) { 333 // The user typed an intranet hostname that they've visited (albeit with a 334 // different port and/or path) before. 335 url_row_ = history::URLRow(url); 336 type_ = UNVISITED_INTRANET; 337 } 338 } 339 340 HistoryURLProviderParams::HistoryURLProviderParams( 341 const AutocompleteInput& input, 342 bool trim_http, 343 const std::string& languages, 344 TemplateURL* default_search_provider, 345 const SearchTermsData& search_terms_data) 346 : message_loop(base::MessageLoop::current()), 347 input(input), 348 prevent_inline_autocomplete(input.prevent_inline_autocomplete()), 349 trim_http(trim_http), 350 failed(false), 351 languages(languages), 352 dont_suggest_exact_input(false), 353 default_search_provider(default_search_provider ? 354 new TemplateURL(default_search_provider->profile(), 355 default_search_provider->data()) : NULL), 356 search_terms_data(new SearchTermsDataSnapshot(search_terms_data)) { 357 } 358 359 HistoryURLProviderParams::~HistoryURLProviderParams() { 360 } 361 362 HistoryURLProvider::HistoryURLProvider(AutocompleteProviderListener* listener, 363 Profile* profile) 364 : HistoryProvider(listener, profile, 365 AutocompleteProvider::TYPE_HISTORY_URL), 366 params_(NULL), 367 cull_redirects_( 368 !OmniboxFieldTrial::InHUPCullRedirectsFieldTrial() || 369 !OmniboxFieldTrial::InHUPCullRedirectsFieldTrialExperimentGroup()), 370 create_shorter_match_( 371 !OmniboxFieldTrial::InHUPCreateShorterMatchFieldTrial() || 372 !OmniboxFieldTrial:: 373 InHUPCreateShorterMatchFieldTrialExperimentGroup()), 374 search_url_database_(true) { 375 } 376 377 void HistoryURLProvider::Start(const AutocompleteInput& input, 378 bool minimal_changes) { 379 // NOTE: We could try hard to do less work in the |minimal_changes| case 380 // here; some clever caching would let us reuse the raw matches from the 381 // history DB without re-querying. However, we'd still have to go back to 382 // the history thread to mark these up properly, and if pass 2 is currently 383 // running, we'd need to wait for it to return to the main thread before 384 // doing this (we can't just write new data for it to read due to thread 385 // safety issues). At that point it's just as fast, and easier, to simply 386 // re-run the query from scratch and ignore |minimal_changes|. 387 388 // Cancel any in-progress query. 389 Stop(false); 390 391 RunAutocompletePasses(input, true); 392 } 393 394 void HistoryURLProvider::Stop(bool clear_cached_results) { 395 done_ = true; 396 397 if (params_) 398 params_->cancel_flag.Set(); 399 } 400 401 AutocompleteMatch HistoryURLProvider::SuggestExactInput( 402 const base::string16& text, 403 const GURL& destination_url, 404 bool trim_http) { 405 AutocompleteMatch match(this, 0, false, 406 AutocompleteMatchType::URL_WHAT_YOU_TYPED); 407 408 if (destination_url.is_valid()) { 409 match.destination_url = destination_url; 410 411 // Trim off "http://" if the user didn't type it. 412 // NOTE: We use TrimHttpPrefix() here rather than StringForURLDisplay() to 413 // strip the scheme as we need to know the offset so we can adjust the 414 // |match_location| below. StringForURLDisplay() and TrimHttpPrefix() have 415 // slightly different behavior as well (the latter will strip even without 416 // two slashes after the scheme). 417 DCHECK(!trim_http || !AutocompleteInput::HasHTTPScheme(text)); 418 base::string16 display_string( 419 StringForURLDisplay(destination_url, false, false)); 420 const size_t offset = trim_http ? TrimHttpPrefix(&display_string) : 0; 421 match.fill_into_edit = 422 AutocompleteInput::FormattedStringWithEquivalentMeaning(destination_url, 423 display_string); 424 match.allowed_to_be_default_match = true; 425 // NOTE: Don't set match.inline_autocompletion to something non-empty here; 426 // it's surprising and annoying. 427 428 // Try to highlight "innermost" match location. If we fix up "w" into 429 // "www.w.com", we want to highlight the fifth character, not the first. 430 // This relies on match.destination_url being the non-prefix-trimmed version 431 // of match.contents. 432 match.contents = display_string; 433 const URLPrefix* best_prefix = 434 URLPrefix::BestURLPrefix(UTF8ToUTF16(destination_url.spec()), text); 435 // It's possible for match.destination_url to not contain the user's input 436 // at all (so |best_prefix| is NULL), for example if the input is 437 // "view-source:x" and |destination_url| has an inserted "http://" in the 438 // middle. 439 if (best_prefix == NULL) { 440 AutocompleteMatch::ClassifyMatchInString(text, match.contents, 441 ACMatchClassification::URL, 442 &match.contents_class); 443 } else { 444 AutocompleteMatch::ClassifyLocationInString( 445 best_prefix->prefix.length() - offset, text.length(), 446 match.contents.length(), ACMatchClassification::URL, 447 &match.contents_class); 448 } 449 450 match.is_history_what_you_typed_match = true; 451 } 452 453 return match; 454 } 455 456 // Called on the history thread. 457 void HistoryURLProvider::ExecuteWithDB(history::HistoryBackend* backend, 458 history::URLDatabase* db, 459 HistoryURLProviderParams* params) { 460 // We may get called with a NULL database if it couldn't be properly 461 // initialized. 462 if (!db) { 463 params->failed = true; 464 } else if (!params->cancel_flag.IsSet()) { 465 base::TimeTicks beginning_time = base::TimeTicks::Now(); 466 467 DoAutocomplete(backend, db, params); 468 469 UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime", 470 base::TimeTicks::Now() - beginning_time); 471 } 472 473 // Return the results (if any) to the main thread. 474 params->message_loop->PostTask(FROM_HERE, base::Bind( 475 &HistoryURLProvider::QueryComplete, this, params)); 476 } 477 478 // Used by both autocomplete passes, and therefore called on multiple different 479 // threads (though not simultaneously). 480 void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, 481 history::URLDatabase* db, 482 HistoryURLProviderParams* params) { 483 VisitClassifier classifier(this, params->input, db); 484 // Create a What You Typed match, which we'll need below. 485 // 486 // We display this to the user when there's a reasonable chance they actually 487 // care: 488 // * Their input can be opened as a URL, and 489 // * We parsed the input as a URL, or it starts with an explicit "http:" or 490 // "https:". 491 // that is when their input can be opened as a URL. 492 // Otherwise, this is just low-quality noise. In the cases where we've parsed 493 // as UNKNOWN, we'll still show an accidental search infobar if need be. 494 bool have_what_you_typed_match = 495 params->input.canonicalized_url().is_valid() && 496 (params->input.type() != AutocompleteInput::QUERY) && 497 ((params->input.type() != AutocompleteInput::UNKNOWN) || 498 (classifier.type() == VisitClassifier::UNVISITED_INTRANET) || 499 !params->trim_http || 500 (AutocompleteInput::NumNonHostComponents(params->input.parts()) > 0)); 501 AutocompleteMatch what_you_typed_match(SuggestExactInput( 502 params->input.text(), params->input.canonicalized_url(), 503 params->trim_http)); 504 what_you_typed_match.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0); 505 506 // Get the matching URLs from the DB 507 history::URLRows url_matches; 508 history::HistoryMatches history_matches; 509 510 if (search_url_database_) { 511 const URLPrefixes& prefixes = URLPrefix::GetURLPrefixes(); 512 for (URLPrefixes::const_iterator i(prefixes.begin()); i != prefixes.end(); 513 ++i) { 514 if (params->cancel_flag.IsSet()) 515 return; // Canceled in the middle of a query, give up. 516 // We only need kMaxMatches results in the end, but before we 517 // get there we need to promote lower-quality matches that are 518 // prefixes of higher-quality matches, and remove lower-quality 519 // redirects. So we ask for more results than we need, of every 520 // prefix type, in hopes this will give us far more than enough 521 // to work with. CullRedirects() will then reduce the list to 522 // the best kMaxMatches results. 523 db->AutocompleteForPrefix( 524 UTF16ToUTF8(i->prefix + params->input.text()), 525 kMaxMatches * 2, 526 (backend == NULL), 527 &url_matches); 528 for (history::URLRows::const_iterator j(url_matches.begin()); 529 j != url_matches.end(); ++j) { 530 const URLPrefix* best_prefix = 531 URLPrefix::BestURLPrefix(UTF8ToUTF16(j->url().spec()), 532 base::string16()); 533 DCHECK(best_prefix != NULL); 534 history_matches.push_back(history::HistoryMatch(*j, i->prefix.length(), 535 i->num_components == 0, 536 i->num_components >= best_prefix->num_components)); 537 } 538 } 539 } 540 541 // Create sorted list of suggestions. 542 CullPoorMatches(*params, &history_matches); 543 SortAndDedupMatches(&history_matches); 544 PromoteOrCreateShorterSuggestion(db, *params, have_what_you_typed_match, 545 what_you_typed_match, &history_matches); 546 547 // Try to promote a match as an exact/inline autocomplete match. This also 548 // moves it to the front of |history_matches|, so skip over it when 549 // converting the rest of the matches. 550 size_t first_match = 1; 551 size_t exact_suggestion = 0; 552 // Checking |is_history_what_you_typed_match| tells us whether 553 // SuggestExactInput() succeeded in constructing a valid match. 554 if (what_you_typed_match.is_history_what_you_typed_match && 555 (!backend || !params->dont_suggest_exact_input) && 556 FixupExactSuggestion(db, params->input, classifier, &what_you_typed_match, 557 &history_matches)) { 558 // Got an exact match for the user's input. Treat it as the best match 559 // regardless of the input type. 560 exact_suggestion = 1; 561 params->matches.push_back(what_you_typed_match); 562 } else if (params->prevent_inline_autocomplete || 563 history_matches.empty() || 564 !PromoteMatchForInlineAutocomplete(history_matches.front(), params)) { 565 // Failed to promote any URLs for inline autocompletion. Use the What You 566 // Typed match, if we have it. 567 first_match = 0; 568 if (have_what_you_typed_match) 569 params->matches.push_back(what_you_typed_match); 570 } 571 572 // This is the end of the synchronous pass. 573 if (!backend) 574 return; 575 // If search_url_database_ is false, we shouldn't have scheduled a second 576 // pass. 577 DCHECK(search_url_database_); 578 579 // Determine relevancy of highest scoring match, if any. 580 int relevance = -1; 581 for (ACMatches::const_iterator it = params->matches.begin(); 582 it != params->matches.end(); ++it) { 583 relevance = std::max(relevance, it->relevance); 584 } 585 586 if (cull_redirects_) { 587 // Remove redirects and trim list to size. We want to provide up to 588 // kMaxMatches results plus the What You Typed result, if it was added to 589 // |history_matches| above. 590 CullRedirects(backend, &history_matches, kMaxMatches + exact_suggestion); 591 } else { 592 // Simply trim the list to size. 593 if (history_matches.size() > kMaxMatches + exact_suggestion) 594 history_matches.resize(kMaxMatches + exact_suggestion); 595 } 596 597 // Convert the history matches to autocomplete matches. 598 for (size_t i = first_match; i < history_matches.size(); ++i) { 599 const history::HistoryMatch& match = history_matches[i]; 600 DCHECK(!have_what_you_typed_match || 601 (match.url_info.url() != 602 GURL(params->matches.front().destination_url))); 603 // If we've assigned a score already, all later matches score one 604 // less than the previous match. 605 relevance = (relevance > 0) ? (relevance - 1) : 606 CalculateRelevance(NORMAL, history_matches.size() - 1 - i); 607 AutocompleteMatch ac_match = HistoryMatchToACMatch(*params, match, 608 NORMAL, relevance); 609 params->matches.push_back(ac_match); 610 } 611 } 612 613 // Called on the main thread when the query is complete. 614 void HistoryURLProvider::QueryComplete( 615 HistoryURLProviderParams* params_gets_deleted) { 616 // Ensure |params_gets_deleted| gets deleted on exit. 617 scoped_ptr<HistoryURLProviderParams> params(params_gets_deleted); 618 619 // If the user hasn't already started another query, clear our member pointer 620 // so we can't write into deleted memory. 621 if (params_ == params_gets_deleted) 622 params_ = NULL; 623 624 // Don't send responses for queries that have been canceled. 625 if (params->cancel_flag.IsSet()) 626 return; // Already set done_ when we canceled, no need to set it again. 627 628 // Don't modify |matches_| if the query failed, since it might have a default 629 // match in it, whereas |params->matches| will be empty. 630 if (!params->failed) { 631 matches_.swap(params->matches); 632 UpdateStarredStateOfMatches(); 633 } 634 635 done_ = true; 636 listener_->OnProviderUpdate(true); 637 } 638 639 HistoryURLProvider::~HistoryURLProvider() { 640 // Note: This object can get leaked on shutdown if there are pending 641 // requests on the database (which hold a reference to us). Normally, these 642 // messages get flushed for each thread. We do a round trip from main, to 643 // history, back to main while holding a reference. If the main thread 644 // completes before the history thread, the message to delegate back to the 645 // main thread will not run and the reference will leak. Therefore, don't do 646 // anything on destruction. 647 } 648 649 int HistoryURLProvider::CalculateRelevance(MatchType match_type, 650 size_t match_number) const { 651 switch (match_type) { 652 case INLINE_AUTOCOMPLETE: 653 return kScoreForBestInlineableResult; 654 655 case UNVISITED_INTRANET: 656 return kScoreForUnvisitedIntranetResult; 657 658 case WHAT_YOU_TYPED: 659 return kScoreForWhatYouTypedResult; 660 661 default: // NORMAL 662 return kBaseScoreForNonInlineableResult + 663 static_cast<int>(match_number); 664 } 665 } 666 667 void HistoryURLProvider::RunAutocompletePasses( 668 const AutocompleteInput& input, 669 bool fixup_input_and_run_pass_1) { 670 matches_.clear(); 671 672 if ((input.type() == AutocompleteInput::INVALID) || 673 (input.type() == AutocompleteInput::FORCED_QUERY)) 674 return; 675 676 // Create a match for exactly what the user typed. This will only be used as 677 // a fallback in case we can't get the history service or URL DB; otherwise, 678 // we'll run this again in DoAutocomplete() and use that result instead. 679 const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()); 680 // Don't do this for queries -- while we can sometimes mark up a match for 681 // this, it's not what the user wants, and just adds noise. 682 if ((input.type() != AutocompleteInput::QUERY) && 683 input.canonicalized_url().is_valid()) { 684 AutocompleteMatch what_you_typed(SuggestExactInput( 685 input.text(), input.canonicalized_url(), trim_http)); 686 what_you_typed.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0); 687 matches_.push_back(what_you_typed); 688 } 689 690 // We'll need the history service to run both passes, so try to obtain it. 691 if (!profile_) 692 return; 693 HistoryService* const history_service = 694 HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS); 695 if (!history_service) 696 return; 697 698 // Get the default search provider and search terms data now since we have to 699 // retrieve these on the UI thread, and the second pass runs on the history 700 // thread. |template_url_service| can be NULL when testing. 701 TemplateURLService* template_url_service = 702 TemplateURLServiceFactory::GetForProfile(profile_); 703 TemplateURL* default_search_provider = template_url_service ? 704 template_url_service->GetDefaultSearchProvider() : NULL; 705 UIThreadSearchTermsData data(profile_); 706 707 // Create the data structure for the autocomplete passes. We'll save this off 708 // onto the |params_| member for later deletion below if we need to run pass 709 // 2. 710 scoped_ptr<HistoryURLProviderParams> params( 711 new HistoryURLProviderParams( 712 input, trim_http, 713 profile_->GetPrefs()->GetString(prefs::kAcceptLanguages), 714 default_search_provider, data)); 715 716 params->prevent_inline_autocomplete = 717 PreventInlineAutocomplete(input); 718 719 if (fixup_input_and_run_pass_1) { 720 // Do some fixup on the user input before matching against it, so we provide 721 // good results for local file paths, input with spaces, etc. 722 if (!FixupUserInput(¶ms->input)) 723 return; 724 725 // Pass 1: Get the in-memory URL database, and use it to find and promote 726 // the inline autocomplete match, if any. 727 history::URLDatabase* url_db = history_service->InMemoryDatabase(); 728 // url_db can be NULL if it hasn't finished initializing (or failed to 729 // initialize). In this case all we can do is fall back on the second 730 // pass. 731 // 732 // TODO(pkasting): We should just block here until this loads. Any time 733 // someone unloads the history backend, we'll get inconsistent inline 734 // autocomplete behavior here. 735 if (url_db) { 736 DoAutocomplete(NULL, url_db, params.get()); 737 // params->matches now has the matches we should expose to the provider. 738 // Pass 2 expects a "clean slate" set of matches. 739 matches_.clear(); 740 matches_.swap(params->matches); 741 UpdateStarredStateOfMatches(); 742 } 743 } 744 745 // Pass 2: Ask the history service to call us back on the history thread, 746 // where we can read the full on-disk DB. 747 if (search_url_database_ && 748 (input.matches_requested() == AutocompleteInput::ALL_MATCHES)) { 749 done_ = false; 750 params_ = params.release(); // This object will be destroyed in 751 // QueryComplete() once we're done with it. 752 history_service->ScheduleAutocomplete(this, params_); 753 } 754 } 755 756 bool HistoryURLProvider::FixupExactSuggestion( 757 history::URLDatabase* db, 758 const AutocompleteInput& input, 759 const VisitClassifier& classifier, 760 AutocompleteMatch* match, 761 history::HistoryMatches* matches) const { 762 DCHECK(match != NULL); 763 DCHECK(matches != NULL); 764 765 MatchType type = INLINE_AUTOCOMPLETE; 766 switch (classifier.type()) { 767 case VisitClassifier::INVALID: 768 return false; 769 case VisitClassifier::UNVISITED_INTRANET: 770 type = UNVISITED_INTRANET; 771 break; 772 default: 773 DCHECK_EQ(VisitClassifier::VISITED, classifier.type()); 774 // We have data for this match, use it. 775 match->deletable = true; 776 match->description = classifier.url_row().title(); 777 RecordAdditionalInfoFromUrlRow(classifier.url_row(), match); 778 match->description_class = 779 ClassifyDescription(input.text(), match->description); 780 if (!classifier.url_row().typed_count()) { 781 // If we reach here, we must be in the second pass, and we must not have 782 // this row's data available during the first pass. That means we 783 // either scored it as WHAT_YOU_TYPED or UNVISITED_INTRANET, and to 784 // maintain the ordering between passes consistent, we need to score it 785 // the same way here. 786 type = CanFindIntranetURL(db, input) ? 787 UNVISITED_INTRANET : WHAT_YOU_TYPED; 788 } 789 break; 790 } 791 792 const GURL& url = match->destination_url; 793 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); 794 // If the what-you-typed result looks like a single word (which can be 795 // interpreted as an intranet address) followed by a pound sign ("#"), 796 // leave the score for the url-what-you-typed result as is. It will be 797 // outscored by a search query from the SearchProvider. This test fixes 798 // cases such as "c#" and "c# foo" where the user has visited an intranet 799 // site "c". We want the search-what-you-typed score to beat the 800 // URL-what-you-typed score in this case. Most of the below test tries to 801 // make sure that this code does not trigger if the user did anything to 802 // indicate the desired match is a URL. For instance, "c/# foo" will not 803 // pass the test because that will be classified as input type URL. The 804 // parsed.CountCharactersBefore() in the test looks for the presence of a 805 // reference fragment in the URL by checking whether the position differs 806 // included the delimiter (pound sign) versus not including the delimiter. 807 // (One cannot simply check url.ref() because it will not distinguish 808 // between the input "c" and the input "c#", both of which will have empty 809 // reference fragments.) 810 if ((type == UNVISITED_INTRANET) && 811 (input.type() != AutocompleteInput::URL) && 812 url.username().empty() && url.password().empty() && url.port().empty() && 813 (url.path() == "/") && url.query().empty() && 814 (parsed.CountCharactersBefore(url_parse::Parsed::REF, true) != 815 parsed.CountCharactersBefore(url_parse::Parsed::REF, false))) { 816 return false; 817 } 818 819 match->relevance = CalculateRelevance(type, 0); 820 821 // If there are any other matches, then don't promote this match here, in 822 // hopes the caller will be able to inline autocomplete a better suggestion. 823 // DoAutocomplete() will fall back on this match if inline autocompletion 824 // fails. This matches how we react to never-visited URL inputs in the non- 825 // intranet case. 826 if (type == UNVISITED_INTRANET && !matches->empty()) 827 return false; 828 829 // Put it on the front of the HistoryMatches for redirect culling. 830 CreateOrPromoteMatch(classifier.url_row(), base::string16::npos, false, 831 matches, true, true); 832 return true; 833 } 834 835 bool HistoryURLProvider::CanFindIntranetURL( 836 history::URLDatabase* db, 837 const AutocompleteInput& input) const { 838 // Normally passing the first two conditions below ought to guarantee the 839 // third condition, but because FixupUserInput() can run and modify the 840 // input's text and parts between Parse() and here, it seems better to be 841 // paranoid and check. 842 if ((input.type() != AutocompleteInput::UNKNOWN) || 843 !LowerCaseEqualsASCII(input.scheme(), content::kHttpScheme) || 844 !input.parts().host.is_nonempty()) 845 return false; 846 const std::string host(UTF16ToUTF8( 847 input.text().substr(input.parts().host.begin, input.parts().host.len))); 848 const size_t registry_length = 849 net::registry_controlled_domains::GetRegistryLength( 850 host, 851 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 852 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 853 return registry_length == 0 && db->IsTypedHost(host); 854 } 855 856 bool HistoryURLProvider::PromoteMatchForInlineAutocomplete( 857 const history::HistoryMatch& match, 858 HistoryURLProviderParams* params) { 859 // Promote the first match if it's been marked for promotion or typed at least 860 // n times, where n == 1 for "simple" (host-only) URLs and n == 2 for others. 861 // We set a higher bar for these long URLs because it's less likely that users 862 // will want to visit them again. Even though we don't increment the 863 // typed_count for pasted-in URLs, if the user manually edits the URL or types 864 // some long thing in by hand, we wouldn't want to immediately start 865 // autocompleting it. 866 if (!match.promoted && 867 (!match.url_info.typed_count() || 868 ((match.url_info.typed_count() == 1) && 869 !match.IsHostOnly()))) 870 return false; 871 872 // In the case where the user has typed "foo.com" and visited (but not typed) 873 // "foo/", and the input is "foo", we can reach here for "foo.com" during the 874 // first pass but have the second pass suggest the exact input as a better 875 // URL. Since we need both passes to agree, and since during the first pass 876 // there's no way to know about "foo/", make reaching this point prevent any 877 // future pass from suggesting the exact input as a better match. 878 if (params) { 879 params->dont_suggest_exact_input = true; 880 params->matches.push_back(HistoryMatchToACMatch(*params, match, 881 INLINE_AUTOCOMPLETE, CalculateRelevance(INLINE_AUTOCOMPLETE, 0))); 882 } 883 return true; 884 } 885 886 // See if a shorter version of the best match should be created, and if so place 887 // it at the front of |matches|. This can suggest history URLs that are 888 // prefixes of the best match (if they've been visited enough, compared to the 889 // best match), or create host-only suggestions even when they haven't been 890 // visited before: if the user visited http://example.com/asdf once, we'll 891 // suggest http://example.com/ even if they've never been to it. 892 void HistoryURLProvider::PromoteOrCreateShorterSuggestion( 893 history::URLDatabase* db, 894 const HistoryURLProviderParams& params, 895 bool have_what_you_typed_match, 896 const AutocompleteMatch& what_you_typed_match, 897 history::HistoryMatches* matches) { 898 if (matches->empty()) 899 return; // No matches, nothing to do. 900 901 // Determine the base URL from which to search, and whether that URL could 902 // itself be added as a match. We can add the base iff it's not "effectively 903 // the same" as any "what you typed" match. 904 const history::HistoryMatch& match = matches->front(); 905 GURL search_base = ConvertToHostOnly(match, params.input.text()); 906 bool can_add_search_base_to_matches = !have_what_you_typed_match; 907 if (search_base.is_empty()) { 908 // Search from what the user typed when we couldn't reduce the best match 909 // to a host. Careful: use a substring of |match| here, rather than the 910 // first match in |params|, because they might have different prefixes. If 911 // the user typed "google.com", |what_you_typed_match| will hold 912 // "http://google.com/", but |match| might begin with 913 // "http://www.google.com/". 914 // TODO: this should be cleaned up, and is probably incorrect for IDN. 915 std::string new_match = match.url_info.url().possibly_invalid_spec(). 916 substr(0, match.input_location + params.input.text().length()); 917 search_base = GURL(new_match); 918 // TODO(mrossetti): There is a degenerate case where the following may 919 // cause a failure: http://www/~someword/fubar.html. Diagnose. 920 // See: http://crbug.com/50101 921 if (search_base.is_empty()) 922 return; // Can't construct a valid URL from which to start a search. 923 } else if (!can_add_search_base_to_matches) { 924 can_add_search_base_to_matches = 925 (search_base != what_you_typed_match.destination_url); 926 } 927 if (search_base == match.url_info.url()) 928 return; // Couldn't shorten |match|, so no range of URLs to search over. 929 930 // Search the DB for short URLs between our base and |match|. 931 history::URLRow info(search_base); 932 bool promote = true; 933 // A short URL is only worth suggesting if it's been visited at least a third 934 // as often as the longer URL. 935 const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1; 936 // For stability between the in-memory and on-disk autocomplete passes, when 937 // the long URL has been typed before, only suggest shorter URLs that have 938 // also been typed. Otherwise, the on-disk pass could suggest a shorter URL 939 // (which hasn't been typed) that the in-memory pass doesn't know about, 940 // thereby making the top match, and thus the behavior of inline 941 // autocomplete, unstable. 942 const int min_typed_count = match.url_info.typed_count() ? 1 : 0; 943 if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(), 944 match.url_info.url().possibly_invalid_spec(), min_visit_count, 945 min_typed_count, can_add_search_base_to_matches, &info)) { 946 if (!can_add_search_base_to_matches) 947 return; // Couldn't find anything and can't add the search base, bail. 948 949 // Try to get info on the search base itself. Promote it to the top if the 950 // original best match isn't good enough to autocomplete. 951 db->GetRowForURL(search_base, &info); 952 promote = match.url_info.typed_count() <= 1; 953 } 954 955 // Promote or add the desired URL to the list of matches. 956 bool ensure_can_inline = 957 promote && PromoteMatchForInlineAutocomplete(match, NULL); 958 ensure_can_inline &= CreateOrPromoteMatch(info, match.input_location, 959 match.match_in_scheme, matches, create_shorter_match_, promote); 960 if (ensure_can_inline) 961 matches->front().promoted = true; 962 } 963 964 void HistoryURLProvider::CullPoorMatches( 965 const HistoryURLProviderParams& params, 966 history::HistoryMatches* matches) const { 967 const base::Time& threshold(history::AutocompleteAgeThreshold()); 968 for (history::HistoryMatches::iterator i(matches->begin()); 969 i != matches->end(); ) { 970 if (RowQualifiesAsSignificant(i->url_info, threshold) && 971 !(params.default_search_provider && 972 params.default_search_provider->IsSearchURLUsingTermsData( 973 i->url_info.url(), *params.search_terms_data.get()))) { 974 ++i; 975 } else { 976 i = matches->erase(i); 977 } 978 } 979 } 980 981 void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend, 982 history::HistoryMatches* matches, 983 size_t max_results) const { 984 for (size_t source = 0; 985 (source < matches->size()) && (source < max_results); ) { 986 const GURL& url = (*matches)[source].url_info.url(); 987 // TODO(brettw) this should go away when everything uses GURL. 988 history::RedirectList redirects; 989 backend->GetMostRecentRedirectsFrom(url, &redirects); 990 if (!redirects.empty()) { 991 // Remove all but the first occurrence of any of these redirects in the 992 // search results. We also must add the URL we queried for, since it may 993 // not be the first match and we'd want to remove it. 994 // 995 // For example, when A redirects to B and our matches are [A, X, B], 996 // we'll get B as the redirects from, and we want to remove the second 997 // item of that pair, removing B. If A redirects to B and our matches are 998 // [B, X, A], we'll want to remove A instead. 999 redirects.push_back(url); 1000 source = RemoveSubsequentMatchesOf(matches, source, redirects); 1001 } else { 1002 // Advance to next item. 1003 source++; 1004 } 1005 } 1006 1007 if (matches->size() > max_results) 1008 matches->resize(max_results); 1009 } 1010 1011 size_t HistoryURLProvider::RemoveSubsequentMatchesOf( 1012 history::HistoryMatches* matches, 1013 size_t source_index, 1014 const std::vector<GURL>& remove) const { 1015 size_t next_index = source_index + 1; // return value = item after source 1016 1017 // Find the first occurrence of any URL in the redirect chain. We want to 1018 // keep this one since it is rated the highest. 1019 history::HistoryMatches::iterator first(std::find_first_of( 1020 matches->begin(), matches->end(), remove.begin(), remove.end(), 1021 history::HistoryMatch::EqualsGURL)); 1022 DCHECK(first != matches->end()) << "We should have always found at least the " 1023 "original URL."; 1024 1025 // Find any following occurrences of any URL in the redirect chain, these 1026 // should be deleted. 1027 for (history::HistoryMatches::iterator next(std::find_first_of(first + 1, 1028 matches->end(), remove.begin(), remove.end(), 1029 history::HistoryMatch::EqualsGURL)); 1030 next != matches->end(); next = std::find_first_of(next, matches->end(), 1031 remove.begin(), remove.end(), history::HistoryMatch::EqualsGURL)) { 1032 // Remove this item. When we remove an item before the source index, we 1033 // need to shift it to the right and remember that so we can return it. 1034 next = matches->erase(next); 1035 if (static_cast<size_t>(next - matches->begin()) < next_index) 1036 --next_index; 1037 } 1038 return next_index; 1039 } 1040 1041 AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch( 1042 const HistoryURLProviderParams& params, 1043 const history::HistoryMatch& history_match, 1044 MatchType match_type, 1045 int relevance) { 1046 const history::URLRow& info = history_match.url_info; 1047 AutocompleteMatch match(this, relevance, 1048 !!info.visit_count(), AutocompleteMatchType::HISTORY_URL); 1049 match.typed_count = info.typed_count(); 1050 match.destination_url = info.url(); 1051 DCHECK(match.destination_url.is_valid()); 1052 size_t inline_autocomplete_offset = 1053 history_match.input_location + params.input.text().length(); 1054 std::string languages = (match_type == WHAT_YOU_TYPED) ? 1055 std::string() : params.languages; 1056 const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll & 1057 ~((params.trim_http && !history_match.match_in_scheme) ? 1058 0 : net::kFormatUrlOmitHTTP); 1059 match.fill_into_edit = 1060 AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(), 1061 net::FormatUrl(info.url(), languages, format_types, 1062 net::UnescapeRule::SPACES, NULL, NULL, 1063 &inline_autocomplete_offset)); 1064 if (!params.prevent_inline_autocomplete && 1065 (inline_autocomplete_offset != base::string16::npos)) { 1066 DCHECK(inline_autocomplete_offset <= match.fill_into_edit.length()); 1067 match.inline_autocompletion = 1068 match.fill_into_edit.substr(inline_autocomplete_offset); 1069 } 1070 // The latter part of the test effectively asks "is the inline completion 1071 // empty?" (i.e., is this match effectively the what-you-typed match?). 1072 match.allowed_to_be_default_match = !params.prevent_inline_autocomplete || 1073 ((inline_autocomplete_offset != base::string16::npos) && 1074 (inline_autocomplete_offset >= match.fill_into_edit.length())); 1075 1076 size_t match_start = history_match.input_location; 1077 match.contents = net::FormatUrl(info.url(), languages, 1078 format_types, net::UnescapeRule::SPACES, NULL, NULL, &match_start); 1079 if ((match_start != base::string16::npos) && 1080 (inline_autocomplete_offset != base::string16::npos) && 1081 (inline_autocomplete_offset != match_start)) { 1082 DCHECK(inline_autocomplete_offset > match_start); 1083 AutocompleteMatch::ClassifyLocationInString(match_start, 1084 inline_autocomplete_offset - match_start, match.contents.length(), 1085 ACMatchClassification::URL, &match.contents_class); 1086 } else { 1087 AutocompleteMatch::ClassifyLocationInString(base::string16::npos, 0, 1088 match.contents.length(), ACMatchClassification::URL, 1089 &match.contents_class); 1090 } 1091 match.description = info.title(); 1092 match.description_class = 1093 ClassifyDescription(params.input.text(), match.description); 1094 RecordAdditionalInfoFromUrlRow(info, &match); 1095 return match; 1096 } 1097 1098 // static 1099 ACMatchClassifications HistoryURLProvider::ClassifyDescription( 1100 const base::string16& input_text, 1101 const base::string16& description) { 1102 base::string16 clean_description = history::CleanUpTitleForMatching(description); 1103 history::TermMatches description_matches(SortAndDeoverlapMatches( 1104 history::MatchTermInString(input_text, clean_description, 0))); 1105 history::WordStarts description_word_starts; 1106 history::String16VectorFromString16( 1107 clean_description, false, &description_word_starts); 1108 description_matches = 1109 history::ScoredHistoryMatch::FilterTermMatchesByWordStarts( 1110 description_matches, description_word_starts, 0u, std::string::npos); 1111 return SpansFromTermMatch( 1112 description_matches, clean_description.length(), false); 1113 } 1114