Home | History | Annotate | Download | only in autocomplete
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/autocomplete/shortcuts_provider.h"
      6 
      7 #include <algorithm>
      8 #include <cmath>
      9 #include <map>
     10 #include <vector>
     11 
     12 #include "base/i18n/break_iterator.h"
     13 #include "base/i18n/case_conversion.h"
     14 #include "base/logging.h"
     15 #include "base/metrics/histogram.h"
     16 #include "base/prefs/pref_service.h"
     17 #include "base/strings/string_number_conversions.h"
     18 #include "base/strings/string_util.h"
     19 #include "base/strings/utf_string_conversions.h"
     20 #include "base/time/time.h"
     21 #include "chrome/browser/autocomplete/autocomplete_input.h"
     22 #include "chrome/browser/autocomplete/autocomplete_provider_listener.h"
     23 #include "chrome/browser/autocomplete/autocomplete_result.h"
     24 #include "chrome/browser/history/history_notifications.h"
     25 #include "chrome/browser/history/history_service.h"
     26 #include "chrome/browser/history/history_service_factory.h"
     27 #include "chrome/browser/history/shortcuts_backend_factory.h"
     28 #include "chrome/browser/omnibox/omnibox_field_trial.h"
     29 #include "chrome/browser/profiles/profile.h"
     30 #include "chrome/common/pref_names.h"
     31 #include "chrome/common/url_constants.h"
     32 #include "url/url_parse.h"
     33 
     34 namespace {
     35 
     36 class RemoveMatchPredicate {
     37  public:
     38   explicit RemoveMatchPredicate(const std::set<GURL>& urls)
     39       : urls_(urls) {
     40   }
     41   bool operator()(const AutocompleteMatch& match) {
     42     return urls_.find(match.destination_url) != urls_.end();
     43   }
     44  private:
     45   // Lifetime of the object is less than the lifetime of passed |urls|, so
     46   // it is safe to store reference.
     47   const std::set<GURL>& urls_;
     48 };
     49 
     50 }  // namespace
     51 
     52 ShortcutsProvider::ShortcutsProvider(AutocompleteProviderListener* listener,
     53                                      Profile* profile)
     54     : AutocompleteProvider(listener, profile,
     55           AutocompleteProvider::TYPE_SHORTCUTS),
     56       languages_(profile_->GetPrefs()->GetString(prefs::kAcceptLanguages)),
     57       initialized_(false) {
     58   scoped_refptr<history::ShortcutsBackend> backend =
     59       ShortcutsBackendFactory::GetForProfile(profile_);
     60   if (backend.get()) {
     61     backend->AddObserver(this);
     62     if (backend->initialized())
     63       initialized_ = true;
     64   }
     65 }
     66 
     67 void ShortcutsProvider::Start(const AutocompleteInput& input,
     68                               bool minimal_changes) {
     69   matches_.clear();
     70 
     71   if ((input.type() == AutocompleteInput::INVALID) ||
     72       (input.type() == AutocompleteInput::FORCED_QUERY))
     73     return;
     74 
     75   // None of our results are applicable for best match.
     76   if (input.matches_requested() == AutocompleteInput::BEST_MATCH)
     77     return;
     78 
     79   if (input.text().empty())
     80     return;
     81 
     82   if (!initialized_)
     83     return;
     84 
     85   base::TimeTicks start_time = base::TimeTicks::Now();
     86   GetMatches(input);
     87   if (input.text().length() < 6) {
     88     base::TimeTicks end_time = base::TimeTicks::Now();
     89     std::string name = "ShortcutsProvider.QueryIndexTime." +
     90         base::IntToString(input.text().size());
     91     base::HistogramBase* counter = base::Histogram::FactoryGet(
     92         name, 1, 1000, 50, base::Histogram::kUmaTargetedHistogramFlag);
     93     counter->Add(static_cast<int>((end_time - start_time).InMilliseconds()));
     94   }
     95   UpdateStarredStateOfMatches();
     96 }
     97 
     98 void ShortcutsProvider::DeleteMatch(const AutocompleteMatch& match) {
     99   // Copy the URL since DeleteMatchesWithURLs() will invalidate |match|.
    100   GURL url(match.destination_url);
    101 
    102   // When a user deletes a match, he probably means for the URL to disappear out
    103   // of history entirely. So nuke all shortcuts that map to this URL.
    104   std::set<GURL> urls;
    105   urls.insert(url);
    106   // Immediately delete matches and shortcuts with the URL, so we can update the
    107   // controller synchronously.
    108   DeleteShortcutsWithURLs(urls);
    109   DeleteMatchesWithURLs(urls);  // NOTE: |match| is now dead!
    110 
    111   // Delete the match from the history DB. This will eventually result in a
    112   // second call to DeleteShortcutsWithURLs(), which is harmless.
    113   HistoryService* const history_service =
    114       HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS);
    115 
    116   DCHECK(history_service && url.is_valid());
    117   history_service->DeleteURL(url);
    118 }
    119 
    120 ShortcutsProvider::~ShortcutsProvider() {
    121   scoped_refptr<history::ShortcutsBackend> backend =
    122       ShortcutsBackendFactory::GetForProfileIfExists(profile_);
    123   if (backend.get())
    124     backend->RemoveObserver(this);
    125 }
    126 
    127 void ShortcutsProvider::OnShortcutsLoaded() {
    128   initialized_ = true;
    129 }
    130 
    131 void ShortcutsProvider::DeleteMatchesWithURLs(const std::set<GURL>& urls) {
    132   std::remove_if(matches_.begin(), matches_.end(), RemoveMatchPredicate(urls));
    133   listener_->OnProviderUpdate(true);
    134 }
    135 
    136 void ShortcutsProvider::DeleteShortcutsWithURLs(const std::set<GURL>& urls) {
    137   scoped_refptr<history::ShortcutsBackend> backend =
    138       ShortcutsBackendFactory::GetForProfileIfExists(profile_);
    139   if (!backend.get())
    140     return;  // We are off the record.
    141   for (std::set<GURL>::const_iterator url = urls.begin(); url != urls.end();
    142        ++url)
    143     backend->DeleteShortcutsWithUrl(*url);
    144 }
    145 
    146 void ShortcutsProvider::GetMatches(const AutocompleteInput& input) {
    147   scoped_refptr<history::ShortcutsBackend> backend =
    148       ShortcutsBackendFactory::GetForProfileIfExists(profile_);
    149   if (!backend.get())
    150     return;
    151   // Get the URLs from the shortcuts database with keys that partially or
    152   // completely match the search term.
    153   string16 term_string(base::i18n::ToLower(input.text()));
    154   DCHECK(!term_string.empty());
    155 
    156   int max_relevance;
    157   if (!OmniboxFieldTrial::ShortcutsScoringMaxRelevance(
    158       input.current_page_classification(), &max_relevance))
    159     max_relevance = AutocompleteResult::kLowestDefaultScore - 1;
    160 
    161   for (history::ShortcutsBackend::ShortcutMap::const_iterator it =
    162            FindFirstMatch(term_string, backend.get());
    163        it != backend->shortcuts_map().end() &&
    164            StartsWith(it->first, term_string, true); ++it) {
    165     // Don't return shortcuts with zero relevance.
    166     int relevance = CalculateScore(term_string, it->second, max_relevance);
    167     if (relevance)
    168       matches_.push_back(ShortcutToACMatch(relevance, term_string, it->second));
    169   }
    170   std::partial_sort(matches_.begin(),
    171       matches_.begin() +
    172           std::min(AutocompleteProvider::kMaxMatches, matches_.size()),
    173       matches_.end(), &AutocompleteMatch::MoreRelevant);
    174   if (matches_.size() > AutocompleteProvider::kMaxMatches) {
    175     matches_.erase(matches_.begin() + AutocompleteProvider::kMaxMatches,
    176                    matches_.end());
    177   }
    178   // Reset relevance scores to guarantee no match is given a score that may
    179   // allow it to become the highest ranked match (i.e., the default match)
    180   // unless the omnibox will reorder matches as necessary to correct the
    181   // problem.  (Shortcuts matches are sometimes not inline-autocompletable
    182   // and, even when they are, the ShortcutsProvider does not bother to set
    183   // |inline_autocompletion|.  Hence these matches can never be displayed
    184   // corectly in the omnibox as the default match.)  In the process of
    185   // resetting scores, guarantee that all scores are decreasing (but do
    186   // not assign any scores below 1).
    187   if (!OmniboxFieldTrial::ReorderForLegalDefaultMatch(
    188       input.current_page_classification())) {
    189     int max_relevance = AutocompleteResult::kLowestDefaultScore - 1;
    190     for (ACMatches::iterator it = matches_.begin(); it != matches_.end();
    191         ++it) {
    192       max_relevance = std::min(max_relevance, it->relevance);
    193       it->relevance = max_relevance;
    194       if (max_relevance > 1)
    195         --max_relevance;
    196     }
    197   }
    198 }
    199 
    200 AutocompleteMatch ShortcutsProvider::ShortcutToACMatch(
    201     int relevance,
    202     const string16& term_string,
    203     const history::ShortcutsBackend::Shortcut& shortcut) {
    204   DCHECK(!term_string.empty());
    205   AutocompleteMatch match(this, relevance, true,
    206                           AutocompleteMatchType::HISTORY_TITLE);
    207   match.destination_url = shortcut.url;
    208   DCHECK(match.destination_url.is_valid());
    209   match.fill_into_edit = UTF8ToUTF16(shortcut.url.spec());
    210   match.contents = shortcut.contents;
    211   match.contents_class = shortcut.contents_class;
    212   match.description = shortcut.description;
    213   match.description_class = shortcut.description_class;
    214   match.RecordAdditionalInfo("number of hits", shortcut.number_of_hits);
    215   match.RecordAdditionalInfo("last access time", shortcut.last_access_time);
    216   match.RecordAdditionalInfo("original input text", UTF16ToUTF8(shortcut.text));
    217 
    218   // Try to mark pieces of the contents and description as matches if they
    219   // appear in |term_string|.
    220   WordMap terms_map(CreateWordMapForString(term_string));
    221   if (!terms_map.empty()) {
    222     match.contents_class = ClassifyAllMatchesInString(term_string, terms_map,
    223         match.contents, match.contents_class);
    224     match.description_class = ClassifyAllMatchesInString(term_string, terms_map,
    225         match.description, match.description_class);
    226   }
    227   return match;
    228 }
    229 
    230 // static
    231 ShortcutsProvider::WordMap ShortcutsProvider::CreateWordMapForString(
    232     const string16& text) {
    233   // First, convert |text| to a vector of the unique words in it.
    234   WordMap word_map;
    235   base::i18n::BreakIterator word_iter(text,
    236                                       base::i18n::BreakIterator::BREAK_WORD);
    237   if (!word_iter.Init())
    238     return word_map;
    239   std::vector<string16> words;
    240   while (word_iter.Advance()) {
    241     if (word_iter.IsWord())
    242       words.push_back(word_iter.GetString());
    243   }
    244   if (words.empty())
    245     return word_map;
    246   std::sort(words.begin(), words.end());
    247   words.erase(std::unique(words.begin(), words.end()), words.end());
    248 
    249   // Now create a map from (first character) to (words beginning with that
    250   // character).  We insert in reverse lexicographical order and rely on the
    251   // multimap preserving insertion order for values with the same key.  (This
    252   // is mandated in C++11, and part of that decision was based on a survey of
    253   // existing implementations that found that it was already true everywhere.)
    254   std::reverse(words.begin(), words.end());
    255   for (std::vector<string16>::const_iterator i(words.begin()); i != words.end();
    256        ++i)
    257     word_map.insert(std::make_pair((*i)[0], *i));
    258   return word_map;
    259 }
    260 
    261 // static
    262 ACMatchClassifications ShortcutsProvider::ClassifyAllMatchesInString(
    263     const string16& find_text,
    264     const WordMap& find_words,
    265     const string16& text,
    266     const ACMatchClassifications& original_class) {
    267   DCHECK(!find_text.empty());
    268   DCHECK(!find_words.empty());
    269 
    270   // The code below assumes |text| is nonempty and therefore the resulting
    271   // classification vector should always be nonempty as well.  Returning early
    272   // if |text| is empty assures we'll return the (correct) empty vector rather
    273   // than a vector with a single (0, NONE) match.
    274   if (text.empty())
    275     return original_class;
    276 
    277   // First check whether |text| begins with |find_text| and mark that whole
    278   // section as a match if so.
    279   string16 text_lowercase(base::i18n::ToLower(text));
    280   ACMatchClassifications match_class;
    281   size_t last_position = 0;
    282   if (StartsWith(text_lowercase, find_text, true)) {
    283     match_class.push_back(
    284         ACMatchClassification(0, ACMatchClassification::MATCH));
    285     last_position = find_text.length();
    286     // If |text_lowercase| is actually equal to |find_text|, we don't need to
    287     // (and in fact shouldn't) put a trailing NONE classification after the end
    288     // of the string.
    289     if (last_position < text_lowercase.length()) {
    290       match_class.push_back(
    291           ACMatchClassification(last_position, ACMatchClassification::NONE));
    292     }
    293   } else {
    294     // |match_class| should start at position 0.  If the first matching word is
    295     // found at position 0, this will be popped from the vector further down.
    296     match_class.push_back(
    297         ACMatchClassification(0, ACMatchClassification::NONE));
    298   }
    299 
    300   // Now, starting with |last_position|, check each character in
    301   // |text_lowercase| to see if we have words starting with that character in
    302   // |find_words|.  If so, check each of them to see if they match the portion
    303   // of |text_lowercase| beginning with |last_position|.  Accept the first
    304   // matching word found (which should be the longest possible match at this
    305   // location, given the construction of |find_words|) and add a MATCH region to
    306   // |match_class|, moving |last_position| to be after the matching word.  If we
    307   // found no matching words, move to the next character and repeat.
    308   while (last_position < text_lowercase.length()) {
    309     std::pair<WordMap::const_iterator, WordMap::const_iterator> range(
    310         find_words.equal_range(text_lowercase[last_position]));
    311     size_t next_character = last_position + 1;
    312     for (WordMap::const_iterator i(range.first); i != range.second; ++i) {
    313       const string16& word = i->second;
    314       size_t word_end = last_position + word.length();
    315       if ((word_end <= text_lowercase.length()) &&
    316           !text_lowercase.compare(last_position, word.length(), word)) {
    317         // Collapse adjacent ranges into one.
    318         if (match_class.back().offset == last_position)
    319           match_class.pop_back();
    320 
    321         AutocompleteMatch::AddLastClassificationIfNecessary(&match_class,
    322             last_position, ACMatchClassification::MATCH);
    323         if (word_end < text_lowercase.length()) {
    324           match_class.push_back(
    325               ACMatchClassification(word_end, ACMatchClassification::NONE));
    326         }
    327         last_position = word_end;
    328         break;
    329       }
    330     }
    331     last_position = std::max(last_position, next_character);
    332   }
    333 
    334   return AutocompleteMatch::MergeClassifications(original_class, match_class);
    335 }
    336 
    337 history::ShortcutsBackend::ShortcutMap::const_iterator
    338     ShortcutsProvider::FindFirstMatch(const string16& keyword,
    339                                       history::ShortcutsBackend* backend) {
    340   DCHECK(backend);
    341   history::ShortcutsBackend::ShortcutMap::const_iterator it =
    342       backend->shortcuts_map().lower_bound(keyword);
    343   // Lower bound not necessarily matches the keyword, check for item pointed by
    344   // the lower bound iterator to at least start with keyword.
    345   return ((it == backend->shortcuts_map().end()) ||
    346     StartsWith(it->first, keyword, true)) ? it :
    347     backend->shortcuts_map().end();
    348 }
    349 
    350 int ShortcutsProvider::CalculateScore(
    351     const string16& terms,
    352     const history::ShortcutsBackend::Shortcut& shortcut,
    353     int max_relevance) {
    354   DCHECK(!terms.empty());
    355   DCHECK_LE(terms.length(), shortcut.text.length());
    356 
    357   // The initial score is based on how much of the shortcut the user has typed.
    358   // Using the square root of the typed fraction boosts the base score rapidly
    359   // as characters are typed, compared with simply using the typed fraction
    360   // directly. This makes sense since the first characters typed are much more
    361   // important for determining how likely it is a user wants a particular
    362   // shortcut than are the remaining continued characters.
    363   double base_score = max_relevance *
    364       sqrt(static_cast<double>(terms.length()) / shortcut.text.length());
    365 
    366   // Then we decay this by half each week.
    367   const double kLn2 = 0.6931471805599453;
    368   base::TimeDelta time_passed = base::Time::Now() - shortcut.last_access_time;
    369   // Clamp to 0 in case time jumps backwards (e.g. due to DST).
    370   double decay_exponent = std::max(0.0, kLn2 * static_cast<double>(
    371       time_passed.InMicroseconds()) / base::Time::kMicrosecondsPerWeek);
    372 
    373   // We modulate the decay factor based on how many times the shortcut has been
    374   // used. Newly created shortcuts decay at full speed; otherwise, decaying by
    375   // half takes |n| times as much time, where n increases by
    376   // (1.0 / each 5 additional hits), up to a maximum of 5x as long.
    377   const double kMaxDecaySpeedDivisor = 5.0;
    378   const double kNumUsesPerDecaySpeedDivisorIncrement = 5.0;
    379   double decay_divisor = std::min(kMaxDecaySpeedDivisor,
    380       (shortcut.number_of_hits + kNumUsesPerDecaySpeedDivisorIncrement - 1) /
    381       kNumUsesPerDecaySpeedDivisorIncrement);
    382 
    383   return static_cast<int>((base_score / exp(decay_exponent / decay_divisor)) +
    384       0.5);
    385 }
    386