Home | History | Annotate | Download | only in autocomplete
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/autocomplete/history_quick_provider.h"
      6 
      7 #include <vector>
      8 
      9 #include "base/basictypes.h"
     10 #include "base/command_line.h"
     11 #include "base/i18n/break_iterator.h"
     12 #include "base/logging.h"
     13 #include "base/metrics/field_trial.h"
     14 #include "base/metrics/histogram.h"
     15 #include "base/prefs/pref_service.h"
     16 #include "base/strings/string_number_conversions.h"
     17 #include "base/strings/string_util.h"
     18 #include "base/strings/utf_string_conversions.h"
     19 #include "base/time/time.h"
     20 #include "chrome/browser/autocomplete/chrome_autocomplete_scheme_classifier.h"
     21 #include "chrome/browser/autocomplete/history_url_provider.h"
     22 #include "chrome/browser/history/history_database.h"
     23 #include "chrome/browser/history/history_service.h"
     24 #include "chrome/browser/history/history_service_factory.h"
     25 #include "chrome/browser/history/in_memory_url_index.h"
     26 #include "chrome/browser/history/in_memory_url_index_types.h"
     27 #include "chrome/browser/history/scored_history_match.h"
     28 #include "chrome/browser/profiles/profile.h"
     29 #include "chrome/browser/search_engines/template_url_service_factory.h"
     30 #include "chrome/common/chrome_switches.h"
     31 #include "chrome/common/pref_names.h"
     32 #include "chrome/common/url_constants.h"
     33 #include "components/metrics/proto/omnibox_input_type.pb.h"
     34 #include "components/omnibox/autocomplete_match_type.h"
     35 #include "components/omnibox/autocomplete_result.h"
     36 #include "components/omnibox/omnibox_field_trial.h"
     37 #include "components/search_engines/template_url.h"
     38 #include "components/search_engines/template_url_service.h"
     39 #include "content/public/browser/notification_source.h"
     40 #include "content/public/browser/notification_types.h"
     41 #include "net/base/escape.h"
     42 #include "net/base/net_util.h"
     43 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
     44 #include "url/url_parse.h"
     45 #include "url/url_util.h"
     46 
     47 using history::InMemoryURLIndex;
     48 using history::ScoredHistoryMatch;
     49 using history::ScoredHistoryMatches;
     50 
     51 bool HistoryQuickProvider::disabled_ = false;
     52 
     53 HistoryQuickProvider::HistoryQuickProvider(Profile* profile)
     54     : HistoryProvider(profile, AutocompleteProvider::TYPE_HISTORY_QUICK),
     55       languages_(profile_->GetPrefs()->GetString(prefs::kAcceptLanguages)) {
     56 }
     57 
     58 void HistoryQuickProvider::Start(const AutocompleteInput& input,
     59                                  bool minimal_changes) {
     60   matches_.clear();
     61   if (disabled_)
     62     return;
     63 
     64   // Don't bother with INVALID and FORCED_QUERY.
     65   if ((input.type() == metrics::OmniboxInputType::INVALID) ||
     66       (input.type() == metrics::OmniboxInputType::FORCED_QUERY))
     67     return;
     68 
     69   autocomplete_input_ = input;
     70 
     71   // TODO(pkasting): We should just block here until this loads.  Any time
     72   // someone unloads the history backend, we'll get inconsistent inline
     73   // autocomplete behavior here.
     74   if (GetIndex()) {
     75     base::TimeTicks start_time = base::TimeTicks::Now();
     76     DoAutocomplete();
     77     if (input.text().length() < 6) {
     78       base::TimeTicks end_time = base::TimeTicks::Now();
     79       std::string name = "HistoryQuickProvider.QueryIndexTime." +
     80           base::IntToString(input.text().length());
     81       base::HistogramBase* counter = base::Histogram::FactoryGet(
     82           name, 1, 1000, 50, base::Histogram::kUmaTargetedHistogramFlag);
     83       counter->Add(static_cast<int>((end_time - start_time).InMilliseconds()));
     84     }
     85   }
     86 }
     87 
     88 HistoryQuickProvider::~HistoryQuickProvider() {}
     89 
     90 void HistoryQuickProvider::DoAutocomplete() {
     91   // Get the matching URLs from the DB.
     92   ScoredHistoryMatches matches = GetIndex()->HistoryItemsForTerms(
     93       autocomplete_input_.text(),
     94       autocomplete_input_.cursor_position(),
     95       AutocompleteProvider::kMaxMatches);
     96   if (matches.empty())
     97     return;
     98 
     99   // Figure out if HistoryURL provider has a URL-what-you-typed match
    100   // that ought to go first and what its score will be.
    101   bool will_have_url_what_you_typed_match_first = false;
    102   int url_what_you_typed_match_score = -1;  // undefined
    103   // These are necessary (but not sufficient) conditions for the omnibox
    104   // input to be a URL-what-you-typed match.  The username test checks that
    105   // either the username does not exist (a regular URL such as http://site/)
    106   // or, if the username exists (http://user@site/), there must be either
    107   // a password or a port.  Together these exclude pure username@site
    108   // inputs because these are likely to be an e-mail address.  HistoryURL
    109   // provider won't promote the URL-what-you-typed match to first
    110   // for these inputs.
    111   const bool can_have_url_what_you_typed_match_first =
    112       (autocomplete_input_.type() != metrics::OmniboxInputType::QUERY) &&
    113       (!autocomplete_input_.parts().username.is_nonempty() ||
    114        autocomplete_input_.parts().password.is_nonempty() ||
    115        autocomplete_input_.parts().path.is_nonempty());
    116   if (can_have_url_what_you_typed_match_first) {
    117     HistoryService* const history_service =
    118         HistoryServiceFactory::GetForProfile(profile_,
    119                                              Profile::EXPLICIT_ACCESS);
    120     // We expect HistoryService to be available.  In case it's not,
    121     // (e.g., due to Profile corruption) we let HistoryQuick provider
    122     // completions (which may be available because it's a different
    123     // data structure) compete with the URL-what-you-typed match as
    124     // normal.
    125     if (history_service) {
    126       history::URLDatabase* url_db = history_service->InMemoryDatabase();
    127       // url_db can be NULL if it hasn't finished initializing (or
    128       // failed to to initialize).  In this case, we let HistoryQuick
    129       // provider completions compete with the URL-what-you-typed
    130       // match as normal.
    131       if (url_db) {
    132         const std::string host(base::UTF16ToUTF8(
    133             autocomplete_input_.text().substr(
    134                 autocomplete_input_.parts().host.begin,
    135                 autocomplete_input_.parts().host.len)));
    136         // We want to put the URL-what-you-typed match first if either
    137         // * the user visited the URL before (intranet or internet).
    138         // * it's a URL on a host that user visited before and this
    139         //   is the root path of the host.  (If the user types some
    140         //   of a path--more than a simple "/"--we let autocomplete compete
    141         //   normally with the URL-what-you-typed match.)
    142         // TODO(mpearson): Remove this hacky code and simply score URL-what-
    143         // you-typed in some sane way relative to possible completions:
    144         // URL-what-you-typed should get some sort of a boost relative
    145         // to completions, but completions should naturally win if
    146         // they're a lot more popular.  In this process, if the input
    147         // is a bare intranet hostname that has been visited before, we
    148         // may want to enforce that the only completions that can outscore
    149         // the URL-what-you-typed match are on the same host (i.e., aren't
    150         // from a longer internet hostname for which the omnibox input is
    151         // a prefix).
    152         if (url_db->GetRowForURL(
    153             autocomplete_input_.canonicalized_url(), NULL) != 0) {
    154           // We visited this URL before.
    155           will_have_url_what_you_typed_match_first = true;
    156           // HistoryURLProvider gives visited what-you-typed URLs a high score.
    157           url_what_you_typed_match_score =
    158               HistoryURLProvider::kScoreForBestInlineableResult;
    159         } else if (url_db->IsTypedHost(host) &&
    160              (!autocomplete_input_.parts().path.is_nonempty() ||
    161               ((autocomplete_input_.parts().path.len == 1) &&
    162                (autocomplete_input_.text()[
    163                    autocomplete_input_.parts().path.begin] == '/'))) &&
    164              !autocomplete_input_.parts().query.is_nonempty() &&
    165              !autocomplete_input_.parts().ref.is_nonempty()) {
    166           // Not visited, but we've seen the host before.
    167           will_have_url_what_you_typed_match_first = true;
    168           const size_t registry_length =
    169               net::registry_controlled_domains::GetRegistryLength(
    170                   host,
    171                   net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
    172                   net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
    173           if (registry_length == 0) {
    174             // Known intranet hosts get one score.
    175             url_what_you_typed_match_score =
    176                 HistoryURLProvider::kScoreForUnvisitedIntranetResult;
    177           } else {
    178             // Known internet hosts get another.
    179             url_what_you_typed_match_score =
    180                 HistoryURLProvider::kScoreForWhatYouTypedResult;
    181           }
    182         }
    183       }
    184     }
    185   }
    186 
    187   // Loop over every result and add it to matches_.  In the process,
    188   // guarantee that scores are decreasing.  |max_match_score| keeps
    189   // track of the highest score we can assign to any later results we
    190   // see.  Also, reduce |max_match_score| if we think there will be
    191   // a URL-what-you-typed match.  (We want URL-what-you-typed matches for
    192   // visited URLs to beat out any longer URLs, no matter how frequently
    193   // they're visited.)  The strength of this reduction depends on the
    194   // likely score for the URL-what-you-typed result.
    195 
    196   // |template_url_service| or |template_url| can be NULL in unit tests.
    197   TemplateURLService* template_url_service =
    198       TemplateURLServiceFactory::GetForProfile(profile_);
    199   TemplateURL* template_url = template_url_service ?
    200       template_url_service->GetDefaultSearchProvider() : NULL;
    201   int max_match_score = matches.begin()->raw_score();
    202   if (will_have_url_what_you_typed_match_first) {
    203     max_match_score = std::min(max_match_score,
    204         url_what_you_typed_match_score - 1);
    205   }
    206   for (ScoredHistoryMatches::const_iterator match_iter = matches.begin();
    207        match_iter != matches.end(); ++match_iter) {
    208     const ScoredHistoryMatch& history_match(*match_iter);
    209     // Culls results corresponding to queries from the default search engine.
    210     // These are low-quality, difficult-to-understand matches for users, and the
    211     // SearchProvider should surface past queries in a better way anyway.
    212     if (!template_url ||
    213         !template_url->IsSearchURL(history_match.url_info.url(),
    214                                    template_url_service->search_terms_data())) {
    215       // Set max_match_score to the score we'll assign this result:
    216       max_match_score = std::min(max_match_score, history_match.raw_score());
    217       matches_.push_back(QuickMatchToACMatch(history_match, max_match_score));
    218       // Mark this max_match_score as being used:
    219       max_match_score--;
    220     }
    221   }
    222 }
    223 
    224 AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch(
    225     const ScoredHistoryMatch& history_match,
    226     int score) {
    227   const history::URLRow& info = history_match.url_info;
    228   AutocompleteMatch match(
    229       this, score, !!info.visit_count(),
    230       history_match.url_matches().empty() ?
    231           AutocompleteMatchType::HISTORY_TITLE :
    232           AutocompleteMatchType::HISTORY_URL);
    233   match.typed_count = info.typed_count();
    234   match.destination_url = info.url();
    235   DCHECK(match.destination_url.is_valid());
    236 
    237   // Format the URL autocomplete presentation.
    238   const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll &
    239       ~(!history_match.match_in_scheme ? 0 : net::kFormatUrlOmitHTTP);
    240   match.fill_into_edit =
    241       AutocompleteInput::FormattedStringWithEquivalentMeaning(
    242           info.url(),
    243           net::FormatUrl(info.url(), languages_, format_types,
    244                          net::UnescapeRule::SPACES, NULL, NULL, NULL),
    245           ChromeAutocompleteSchemeClassifier(profile_));
    246   std::vector<size_t> offsets =
    247       OffsetsFromTermMatches(history_match.url_matches());
    248   base::OffsetAdjuster::Adjustments adjustments;
    249   match.contents = net::FormatUrlWithAdjustments(
    250       info.url(), languages_, format_types, net::UnescapeRule::SPACES, NULL,
    251       NULL, &adjustments);
    252   base::OffsetAdjuster::AdjustOffsets(adjustments, &offsets);
    253   history::TermMatches new_matches =
    254       ReplaceOffsetsInTermMatches(history_match.url_matches(), offsets);
    255   match.contents_class =
    256       SpansFromTermMatch(new_matches, match.contents.length(), true);
    257 
    258   // Set |inline_autocompletion| and |allowed_to_be_default_match| if possible.
    259   if (history_match.can_inline()) {
    260     DCHECK(!new_matches.empty());
    261     size_t inline_autocomplete_offset = new_matches[0].offset +
    262         new_matches[0].length;
    263     // |inline_autocomplete_offset| may be beyond the end of the
    264     // |fill_into_edit| if the user has typed an URL with a scheme and the
    265     // last character typed is a slash.  That slash is removed by the
    266     // FormatURLWithOffsets call above.
    267     if (inline_autocomplete_offset < match.fill_into_edit.length()) {
    268       match.inline_autocompletion =
    269           match.fill_into_edit.substr(inline_autocomplete_offset);
    270     }
    271     match.allowed_to_be_default_match = match.inline_autocompletion.empty() ||
    272         !PreventInlineAutocomplete(autocomplete_input_);
    273   }
    274   match.EnsureUWYTIsAllowedToBeDefault(
    275       autocomplete_input_.canonicalized_url(),
    276       TemplateURLServiceFactory::GetForProfile(profile_));
    277 
    278   // Format the description autocomplete presentation.
    279   match.description = info.title();
    280   match.description_class = SpansFromTermMatch(
    281       history_match.title_matches(), match.description.length(), false);
    282 
    283   match.RecordAdditionalInfo("typed count", info.typed_count());
    284   match.RecordAdditionalInfo("visit count", info.visit_count());
    285   match.RecordAdditionalInfo("last visit", info.last_visit());
    286 
    287   return match;
    288 }
    289 
    290 history::InMemoryURLIndex* HistoryQuickProvider::GetIndex() {
    291   if (index_for_testing_.get())
    292     return index_for_testing_.get();
    293 
    294   HistoryService* const history_service =
    295       HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS);
    296   if (!history_service)
    297     return NULL;
    298 
    299   return history_service->InMemoryIndex();
    300 }
    301