Home | History | Annotate | Download | only in autocomplete
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/autocomplete/history_quick_provider.h"
      6 
      7 #include <vector>
      8 
      9 #include "base/basictypes.h"
     10 #include "base/command_line.h"
     11 #include "base/i18n/break_iterator.h"
     12 #include "base/logging.h"
     13 #include "base/metrics/field_trial.h"
     14 #include "base/metrics/histogram.h"
     15 #include "base/prefs/pref_service.h"
     16 #include "base/strings/string_number_conversions.h"
     17 #include "base/strings/string_util.h"
     18 #include "base/strings/utf_string_conversions.h"
     19 #include "base/time/time.h"
     20 #include "chrome/browser/autocomplete/autocomplete_result.h"
     21 #include "chrome/browser/autocomplete/history_url_provider.h"
     22 #include "chrome/browser/history/history_database.h"
     23 #include "chrome/browser/history/history_service.h"
     24 #include "chrome/browser/history/history_service_factory.h"
     25 #include "chrome/browser/history/in_memory_url_index.h"
     26 #include "chrome/browser/history/in_memory_url_index_types.h"
     27 #include "chrome/browser/history/scored_history_match.h"
     28 #include "chrome/browser/omnibox/omnibox_field_trial.h"
     29 #include "chrome/browser/profiles/profile.h"
     30 #include "chrome/browser/search/search.h"
     31 #include "chrome/browser/search_engines/template_url.h"
     32 #include "chrome/browser/search_engines/template_url_service.h"
     33 #include "chrome/browser/search_engines/template_url_service_factory.h"
     34 #include "chrome/common/autocomplete_match_type.h"
     35 #include "chrome/common/chrome_switches.h"
     36 #include "chrome/common/net/url_fixer_upper.h"
     37 #include "chrome/common/pref_names.h"
     38 #include "chrome/common/url_constants.h"
     39 #include "content/public/browser/browser_thread.h"
     40 #include "content/public/browser/notification_source.h"
     41 #include "content/public/browser/notification_types.h"
     42 #include "net/base/escape.h"
     43 #include "net/base/net_util.h"
     44 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
     45 #include "url/url_parse.h"
     46 #include "url/url_util.h"
     47 
     48 using history::InMemoryURLIndex;
     49 using history::ScoredHistoryMatch;
     50 using history::ScoredHistoryMatches;
     51 
     52 bool HistoryQuickProvider::disabled_ = false;
     53 
     54 HistoryQuickProvider::HistoryQuickProvider(
     55     AutocompleteProviderListener* listener,
     56     Profile* profile)
     57     : HistoryProvider(listener, profile,
     58           AutocompleteProvider::TYPE_HISTORY_QUICK),
     59       languages_(profile_->GetPrefs()->GetString(prefs::kAcceptLanguages)) {
     60 }
     61 
     62 void HistoryQuickProvider::Start(const AutocompleteInput& input,
     63                                  bool minimal_changes) {
     64   matches_.clear();
     65   if (disabled_)
     66     return;
     67 
     68   // Don't bother with INVALID and FORCED_QUERY.  Also pass when looking for
     69   // BEST_MATCH and there is no inline autocompletion because none of the HQP
     70   // matches can score highly enough to qualify.
     71   if ((input.type() == AutocompleteInput::INVALID) ||
     72       (input.type() == AutocompleteInput::FORCED_QUERY) ||
     73       (input.matches_requested() == AutocompleteInput::BEST_MATCH &&
     74        input.prevent_inline_autocomplete()))
     75     return;
     76 
     77   autocomplete_input_ = input;
     78 
     79   // TODO(pkasting): We should just block here until this loads.  Any time
     80   // someone unloads the history backend, we'll get inconsistent inline
     81   // autocomplete behavior here.
     82   if (GetIndex()) {
     83     base::TimeTicks start_time = base::TimeTicks::Now();
     84     DoAutocomplete();
     85     if (input.text().length() < 6) {
     86       base::TimeTicks end_time = base::TimeTicks::Now();
     87       std::string name = "HistoryQuickProvider.QueryIndexTime." +
     88           base::IntToString(input.text().length());
     89       base::HistogramBase* counter = base::Histogram::FactoryGet(
     90           name, 1, 1000, 50, base::Histogram::kUmaTargetedHistogramFlag);
     91       counter->Add(static_cast<int>((end_time - start_time).InMilliseconds()));
     92     }
     93     UpdateStarredStateOfMatches();
     94   }
     95 }
     96 
     97 void HistoryQuickProvider::DeleteMatch(const AutocompleteMatch& match) {
     98   DCHECK(match.deletable);
     99   DCHECK(match.destination_url.is_valid());
    100   // Delete the match from the InMemoryURLIndex.
    101   GetIndex()->DeleteURL(match.destination_url);
    102   DeleteMatchFromMatches(match);
    103 }
    104 
    105 HistoryQuickProvider::~HistoryQuickProvider() {}
    106 
    107 void HistoryQuickProvider::DoAutocomplete() {
    108   // Get the matching URLs from the DB.
    109   ScoredHistoryMatches matches = GetIndex()->HistoryItemsForTerms(
    110       autocomplete_input_.text(),
    111       autocomplete_input_.cursor_position());
    112   if (matches.empty())
    113     return;
    114 
    115   // Figure out if HistoryURL provider has a URL-what-you-typed match
    116   // that ought to go first and what its score will be.
    117   bool will_have_url_what_you_typed_match_first = false;
    118   int url_what_you_typed_match_score = -1;  // undefined
    119   // These are necessary (but not sufficient) conditions for the omnibox
    120   // input to be a URL-what-you-typed match.  The username test checks that
    121   // either the username does not exist (a regular URL such as http://site/)
    122   // or, if the username exists (http://user@site/), there must be either
    123   // a password or a port.  Together these exclude pure username@site
    124   // inputs because these are likely to be an e-mail address.  HistoryURL
    125   // provider won't promote the URL-what-you-typed match to first
    126   // for these inputs.
    127   const bool can_have_url_what_you_typed_match_first =
    128       autocomplete_input_.canonicalized_url().is_valid() &&
    129       (autocomplete_input_.type() != AutocompleteInput::QUERY) &&
    130       (autocomplete_input_.type() != AutocompleteInput::FORCED_QUERY) &&
    131       (!autocomplete_input_.parts().username.is_nonempty() ||
    132        autocomplete_input_.parts().password.is_nonempty() ||
    133        autocomplete_input_.parts().path.is_nonempty());
    134   if (can_have_url_what_you_typed_match_first) {
    135     HistoryService* const history_service =
    136         HistoryServiceFactory::GetForProfile(profile_,
    137                                              Profile::EXPLICIT_ACCESS);
    138     // We expect HistoryService to be available.  In case it's not,
    139     // (e.g., due to Profile corruption) we let HistoryQuick provider
    140     // completions (which may be available because it's a different
    141     // data structure) compete with the URL-what-you-typed match as
    142     // normal.
    143     if (history_service) {
    144       history::URLDatabase* url_db = history_service->InMemoryDatabase();
    145       // url_db can be NULL if it hasn't finished initializing (or
    146       // failed to to initialize).  In this case, we let HistoryQuick
    147       // provider completions compete with the URL-what-you-typed
    148       // match as normal.
    149       if (url_db) {
    150         const std::string host(UTF16ToUTF8(autocomplete_input_.text().substr(
    151             autocomplete_input_.parts().host.begin,
    152             autocomplete_input_.parts().host.len)));
    153         // We want to put the URL-what-you-typed match first if either
    154         // * the user visited the URL before (intranet or internet).
    155         // * it's a URL on a host that user visited before and this
    156         //   is the root path of the host.  (If the user types some
    157         //   of a path--more than a simple "/"--we let autocomplete compete
    158         //   normally with the URL-what-you-typed match.)
    159         // TODO(mpearson): Remove this hacky code and simply score URL-what-
    160         // you-typed in some sane way relative to possible completions:
    161         // URL-what-you-typed should get some sort of a boost relative
    162         // to completions, but completions should naturally win if
    163         // they're a lot more popular.  In this process, if the input
    164         // is a bare intranet hostname that has been visited before, we
    165         // may want to enforce that the only completions that can outscore
    166         // the URL-what-you-typed match are on the same host (i.e., aren't
    167         // from a longer internet hostname for which the omnibox input is
    168         // a prefix).
    169         if (url_db->GetRowForURL(
    170             autocomplete_input_.canonicalized_url(), NULL) != 0) {
    171           // We visited this URL before.
    172           will_have_url_what_you_typed_match_first = true;
    173           // HistoryURLProvider gives visited what-you-typed URLs a high score.
    174           url_what_you_typed_match_score =
    175               HistoryURLProvider::kScoreForBestInlineableResult;
    176         } else if (url_db->IsTypedHost(host) &&
    177              (!autocomplete_input_.parts().path.is_nonempty() ||
    178               ((autocomplete_input_.parts().path.len == 1) &&
    179                (autocomplete_input_.text()[
    180                    autocomplete_input_.parts().path.begin] == '/'))) &&
    181              !autocomplete_input_.parts().query.is_nonempty() &&
    182              !autocomplete_input_.parts().ref.is_nonempty()) {
    183           // Not visited, but we've seen the host before.
    184           will_have_url_what_you_typed_match_first = true;
    185           const size_t registry_length =
    186               net::registry_controlled_domains::GetRegistryLength(
    187                   host,
    188                   net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
    189                   net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
    190           if (registry_length == 0) {
    191             // Known intranet hosts get one score.
    192             url_what_you_typed_match_score =
    193                 HistoryURLProvider::kScoreForUnvisitedIntranetResult;
    194           } else {
    195             // Known internet hosts get another.
    196             url_what_you_typed_match_score =
    197                 HistoryURLProvider::kScoreForWhatYouTypedResult;
    198           }
    199         }
    200       }
    201     }
    202   }
    203 
    204   // Loop over every result and add it to matches_.  In the process,
    205   // guarantee that scores are decreasing.  |max_match_score| keeps
    206   // track of the highest score we can assign to any later results we
    207   // see.  Also, if we're not allowing inline autocompletions in
    208   // general or the current best suggestion isn't inlineable,
    209   // artificially reduce the starting |max_match_score| (which
    210   // therefore applies to all results) to something low enough that
    211   // guarantees no result will be offered as an inline autocomplete
    212   // suggestion.  Also do a similar reduction if we think there will be
    213   // a URL-what-you-typed match.  (We want URL-what-you-typed matches for
    214   // visited URLs to beat out any longer URLs, no matter how frequently
    215   // they're visited.)  The strength of this last reduction depends on the
    216   // likely score for the URL-what-you-typed result.
    217 
    218   // |template_url_service| or |template_url| can be NULL in unit tests.
    219   TemplateURLService* template_url_service =
    220       TemplateURLServiceFactory::GetForProfile(profile_);
    221   TemplateURL* template_url = template_url_service ?
    222       template_url_service->GetDefaultSearchProvider() : NULL;
    223   int max_match_score =
    224       (OmniboxFieldTrial::ReorderForLegalDefaultMatch(
    225          autocomplete_input_.current_page_classification()) ||
    226        (!PreventInlineAutocomplete(autocomplete_input_) &&
    227         matches.begin()->can_inline)) ?
    228       matches.begin()->raw_score :
    229       (AutocompleteResult::kLowestDefaultScore - 1);
    230   if (will_have_url_what_you_typed_match_first) {
    231     max_match_score = std::min(max_match_score,
    232         url_what_you_typed_match_score - 1);
    233   }
    234   for (ScoredHistoryMatches::const_iterator match_iter = matches.begin();
    235        match_iter != matches.end(); ++match_iter) {
    236     const ScoredHistoryMatch& history_match(*match_iter);
    237     // Culls results corresponding to queries from the default search engine.
    238     // These are low-quality, difficult-to-understand matches for users, and the
    239     // SearchProvider should surface past queries in a better way anyway.
    240     if (!template_url ||
    241         !template_url->IsSearchURL(history_match.url_info.url())) {
    242       // Set max_match_score to the score we'll assign this result:
    243       max_match_score = std::min(max_match_score, history_match.raw_score);
    244       matches_.push_back(QuickMatchToACMatch(history_match, max_match_score));
    245       // Mark this max_match_score as being used:
    246       max_match_score--;
    247     }
    248   }
    249 }
    250 
    251 AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch(
    252     const ScoredHistoryMatch& history_match,
    253     int score) {
    254   const history::URLRow& info = history_match.url_info;
    255   AutocompleteMatch match(this, score, !!info.visit_count(),
    256       history_match.url_matches.empty() ? AutocompleteMatchType::HISTORY_TITLE :
    257           AutocompleteMatchType::HISTORY_URL);
    258   match.typed_count = info.typed_count();
    259   match.destination_url = info.url();
    260   DCHECK(match.destination_url.is_valid());
    261 
    262   // Format the URL autocomplete presentation.
    263   std::vector<size_t> offsets =
    264       OffsetsFromTermMatches(history_match.url_matches);
    265   const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll &
    266       ~(!history_match.match_in_scheme ? 0 : net::kFormatUrlOmitHTTP);
    267   match.fill_into_edit =
    268       AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(),
    269           net::FormatUrlWithOffsets(info.url(), languages_, format_types,
    270               net::UnescapeRule::SPACES, NULL, NULL, &offsets));
    271   history::TermMatches new_matches =
    272       ReplaceOffsetsInTermMatches(history_match.url_matches, offsets);
    273   match.contents = net::FormatUrl(info.url(), languages_, format_types,
    274               net::UnescapeRule::SPACES, NULL, NULL, NULL);
    275   match.contents_class =
    276       SpansFromTermMatch(new_matches, match.contents.length(), true);
    277 
    278   match.allowed_to_be_default_match = history_match.can_inline &&
    279       !PreventInlineAutocomplete(autocomplete_input_);
    280   if (match.allowed_to_be_default_match) {
    281     DCHECK(!new_matches.empty());
    282     size_t inline_autocomplete_offset = new_matches[0].offset +
    283         new_matches[0].length;
    284     // |inline_autocomplete_offset| may be beyond the end of the
    285     // |fill_into_edit| if the user has typed an URL with a scheme and the
    286     // last character typed is a slash.  That slash is removed by the
    287     // FormatURLWithOffsets call above.
    288     if (inline_autocomplete_offset < match.fill_into_edit.length()) {
    289       match.inline_autocompletion =
    290           match.fill_into_edit.substr(inline_autocomplete_offset);
    291     }
    292   }
    293 
    294   // Format the description autocomplete presentation.
    295   match.description = info.title();
    296   match.description_class = SpansFromTermMatch(
    297       history_match.title_matches, match.description.length(), false);
    298 
    299   match.RecordAdditionalInfo("typed count", info.typed_count());
    300   match.RecordAdditionalInfo("visit count", info.visit_count());
    301   match.RecordAdditionalInfo("last visit", info.last_visit());
    302 
    303   return match;
    304 }
    305 
    306 history::InMemoryURLIndex* HistoryQuickProvider::GetIndex() {
    307   if (index_for_testing_.get())
    308     return index_for_testing_.get();
    309 
    310   HistoryService* const history_service =
    311       HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS);
    312   if (!history_service)
    313     return NULL;
    314 
    315   return history_service->InMemoryIndex();
    316 }
    317 
    318 // static
    319 ACMatchClassifications HistoryQuickProvider::SpansFromTermMatch(
    320     const history::TermMatches& matches,
    321     size_t text_length,
    322     bool is_url) {
    323   ACMatchClassification::Style url_style =
    324       is_url ? ACMatchClassification::URL : ACMatchClassification::NONE;
    325   ACMatchClassifications spans;
    326   if (matches.empty()) {
    327     if (text_length)
    328       spans.push_back(ACMatchClassification(0, url_style));
    329     return spans;
    330   }
    331   if (matches[0].offset)
    332     spans.push_back(ACMatchClassification(0, url_style));
    333   size_t match_count = matches.size();
    334   for (size_t i = 0; i < match_count;) {
    335     size_t offset = matches[i].offset;
    336     spans.push_back(ACMatchClassification(offset,
    337         ACMatchClassification::MATCH | url_style));
    338     // Skip all adjacent matches.
    339     do {
    340       offset += matches[i].length;
    341       ++i;
    342     } while ((i < match_count) && (offset == matches[i].offset));
    343     if (offset < text_length)
    344       spans.push_back(ACMatchClassification(offset, url_style));
    345   }
    346 
    347   return spans;
    348 }
    349