Home | History | Annotate | Download | only in autocomplete
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/autocomplete/bookmark_provider.h"
      6 
      7 #include <algorithm>
      8 #include <functional>
      9 #include <vector>
     10 
     11 #include "base/prefs/pref_service.h"
     12 #include "base/strings/utf_string_conversions.h"
     13 #include "chrome/browser/autocomplete/autocomplete_result.h"
     14 #include "chrome/browser/autocomplete/history_provider.h"
     15 #include "chrome/browser/bookmarks/bookmark_model_factory.h"
     16 #include "chrome/browser/omnibox/omnibox_field_trial.h"
     17 #include "chrome/browser/profiles/profile.h"
     18 #include "chrome/common/pref_names.h"
     19 #include "components/autocomplete/url_prefix.h"
     20 #include "components/bookmarks/browser/bookmark_match.h"
     21 #include "components/bookmarks/browser/bookmark_model.h"
     22 #include "components/metrics/proto/omnibox_input_type.pb.h"
     23 #include "net/base/net_util.h"
     24 
     25 using bookmarks::BookmarkMatch;
     26 
     27 typedef std::vector<BookmarkMatch> BookmarkMatches;
     28 
     29 // BookmarkProvider ------------------------------------------------------------
     30 
     31 BookmarkProvider::BookmarkProvider(
     32     AutocompleteProviderListener* listener,
     33     Profile* profile)
     34     : AutocompleteProvider(listener, profile,
     35                            AutocompleteProvider::TYPE_BOOKMARK),
     36       bookmark_model_(NULL),
     37       score_using_url_matches_(OmniboxFieldTrial::BookmarksIndexURLsValue()) {
     38   if (profile) {
     39     bookmark_model_ = BookmarkModelFactory::GetForProfile(profile);
     40     languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
     41   }
     42 }
     43 
     44 void BookmarkProvider::Start(const AutocompleteInput& input,
     45                              bool minimal_changes) {
     46   if (minimal_changes)
     47     return;
     48   matches_.clear();
     49 
     50   if (input.text().empty() ||
     51       (input.type() == metrics::OmniboxInputType::FORCED_QUERY))
     52     return;
     53 
     54   DoAutocomplete(input);
     55 }
     56 
     57 BookmarkProvider::~BookmarkProvider() {}
     58 
     59 void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input) {
     60   // We may not have a bookmark model for some unit tests.
     61   if (!bookmark_model_)
     62     return;
     63 
     64   BookmarkMatches matches;
     65   // Retrieve enough bookmarks so that we have a reasonable probability of
     66   // suggesting the one that the user desires.
     67   const size_t kMaxBookmarkMatches = 50;
     68 
     69   // GetBookmarksMatching returns bookmarks matching the user's
     70   // search terms using the following rules:
     71   //  - The search text is broken up into search terms. Each term is searched
     72   //    for separately.
     73   //  - Term matches are always performed against the start of a word. 'def'
     74   //    will match against 'define' but not against 'indefinite'.
     75   //  - Terms must be at least three characters in length in order to perform
     76   //    partial word matches. Any term of lesser length will only be used as an
     77   //    exact match. 'def' will match against 'define' but 'de' will not match.
     78   //  - A search containing multiple terms will return results with those words
     79   //    occuring in any order.
     80   //  - Terms enclosed in quotes comprises a phrase that must match exactly.
     81   //  - Multiple terms enclosed in quotes will require those exact words in that
     82   //    exact order to match.
     83   //
     84   // Please refer to the code for BookmarkIndex::GetBookmarksMatching for
     85   // complete details of how searches are performed against the user's
     86   // bookmarks.
     87   bookmark_model_->GetBookmarksMatching(input.text(),
     88                                         kMaxBookmarkMatches,
     89                                         &matches);
     90   if (matches.empty())
     91     return;  // There were no matches.
     92   const base::string16 fixed_up_input(FixupUserInput(input).second);
     93   for (BookmarkMatches::const_iterator i = matches.begin(); i != matches.end();
     94        ++i) {
     95     // Create and score the AutocompleteMatch. If its score is 0 then the
     96     // match is discarded.
     97     AutocompleteMatch match(BookmarkMatchToACMatch(input, fixed_up_input, *i));
     98     if (match.relevance > 0)
     99       matches_.push_back(match);
    100   }
    101 
    102   // Sort and clip the resulting matches.
    103   size_t num_matches =
    104       std::min(matches_.size(), AutocompleteProvider::kMaxMatches);
    105   std::partial_sort(matches_.begin(), matches_.begin() + num_matches,
    106                     matches_.end(), AutocompleteMatch::MoreRelevant);
    107   matches_.resize(num_matches);
    108 }
    109 
    110 namespace {
    111 
    112 // for_each helper functor that calculates a match factor for each query term
    113 // when calculating the final score.
    114 //
    115 // Calculate a 'factor' from 0 to the bookmark's title length for a match
    116 // based on 1) how many characters match and 2) where the match is positioned.
    117 class ScoringFunctor {
    118  public:
    119   // |title_length| is the length of the bookmark title against which this
    120   // match will be scored.
    121   explicit ScoringFunctor(size_t title_length)
    122       : title_length_(static_cast<double>(title_length)),
    123         scoring_factor_(0.0) {
    124   }
    125 
    126   void operator()(const query_parser::Snippet::MatchPosition& match) {
    127     double term_length = static_cast<double>(match.second - match.first);
    128     scoring_factor_ += term_length *
    129         (title_length_ - match.first) / title_length_;
    130   }
    131 
    132   double ScoringFactor() { return scoring_factor_; }
    133 
    134  private:
    135   double title_length_;
    136   double scoring_factor_;
    137 };
    138 
    139 }  // namespace
    140 
    141 AutocompleteMatch BookmarkProvider::BookmarkMatchToACMatch(
    142     const AutocompleteInput& input,
    143     const base::string16& fixed_up_input_text,
    144     const BookmarkMatch& bookmark_match) {
    145   // The AutocompleteMatch we construct is non-deletable because the only
    146   // way to support this would be to delete the underlying bookmark, which is
    147   // unlikely to be what the user intends.
    148   AutocompleteMatch match(this, 0, false,
    149                           AutocompleteMatchType::BOOKMARK_TITLE);
    150   base::string16 title(bookmark_match.node->GetTitle());
    151   const GURL& url(bookmark_match.node->url());
    152   const base::string16& url_utf16 = base::UTF8ToUTF16(url.spec());
    153   size_t inline_autocomplete_offset = URLPrefix::GetInlineAutocompleteOffset(
    154       input.text(), fixed_up_input_text, false, url_utf16);
    155   match.destination_url = url;
    156   const size_t match_start = bookmark_match.url_match_positions.empty() ?
    157       0 : bookmark_match.url_match_positions[0].first;
    158   const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()) &&
    159       ((match_start == base::string16::npos) || (match_start != 0));
    160   std::vector<size_t> offsets = BookmarkMatch::OffsetsFromMatchPositions(
    161       bookmark_match.url_match_positions);
    162   // In addition to knowing how |offsets| is transformed, we need to know how
    163   // |inline_autocomplete_offset| is transformed.  We add it to the end of
    164   // |offsets|, compute how everything is transformed, then remove it from the
    165   // end.
    166   offsets.push_back(inline_autocomplete_offset);
    167   match.contents = net::FormatUrlWithOffsets(url, languages_,
    168       net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP),
    169       net::UnescapeRule::SPACES, NULL, NULL, &offsets);
    170   inline_autocomplete_offset = offsets.back();
    171   offsets.pop_back();
    172   BookmarkMatch::MatchPositions new_url_match_positions =
    173       BookmarkMatch::ReplaceOffsetsInMatchPositions(
    174           bookmark_match.url_match_positions, offsets);
    175   match.contents_class =
    176       ClassificationsFromMatch(new_url_match_positions,
    177                                match.contents.size(),
    178                                true);
    179   match.fill_into_edit =
    180       AutocompleteInput::FormattedStringWithEquivalentMeaning(url,
    181                                                               match.contents);
    182   if (inline_autocomplete_offset != base::string16::npos) {
    183     // |inline_autocomplete_offset| may be beyond the end of the
    184     // |fill_into_edit| if the user has typed an URL with a scheme and the
    185     // last character typed is a slash.  That slash is removed by the
    186     // FormatURLWithOffsets call above.
    187     if (inline_autocomplete_offset < match.fill_into_edit.length()) {
    188       match.inline_autocompletion =
    189           match.fill_into_edit.substr(inline_autocomplete_offset);
    190     }
    191     match.allowed_to_be_default_match = match.inline_autocompletion.empty() ||
    192         !HistoryProvider::PreventInlineAutocomplete(input);
    193   }
    194   match.description = title;
    195   match.description_class =
    196       ClassificationsFromMatch(bookmark_match.title_match_positions,
    197                                match.description.size(),
    198                                false);
    199   match.starred = true;
    200 
    201   // Summary on how a relevance score is determined for the match:
    202   //
    203   // For each match within the bookmark's title or URL (or both), calculate a
    204   // 'factor', sum up those factors, then use the sum to figure out a value
    205   // between the base score and the maximum score.
    206   //
    207   // The factor for each match is the product of:
    208   //
    209   //  1) how many characters in the bookmark's title/URL are part of this match.
    210   //     This is capped at the length of the bookmark's title
    211   //     to prevent terms that match in both the title and the URL from
    212   //     scoring too strongly.
    213   //
    214   //  2) where the match occurs within the bookmark's title or URL,
    215   //     giving more points for matches that appear earlier in the string:
    216   //       ((string_length - position of match start) / string_length).
    217   //
    218   //  Example: Given a bookmark title of 'abcde fghijklm', with a title length
    219   //     of 14, and two different search terms, 'abcde' and 'fghij', with
    220   //     start positions of 0 and 6, respectively, 'abcde' will score higher
    221   //     (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with
    222   //     a partial factor of (14-6)/14 = 0.571 ).  (In this example neither
    223   //     term matches in the URL.)
    224   //
    225   // Once all match factors have been calculated they are summed.  If URL
    226   // matches are not considered, the resulting sum will never be greater than
    227   // the length of the bookmark title because of the way the bookmark model
    228   // matches and removes overlaps.  (In particular, the bookmark model only
    229   // matches terms to the beginning of words and it removes all overlapping
    230   // matches, keeping only the longest.  Together these mean that each
    231   // character is included in at most one match.)  If URL matches are
    232   // considered, the sum can be greater.
    233   //
    234   // This sum is then normalized by the length of the bookmark title (if URL
    235   // matches are not considered) or by the length of the bookmark title + 10
    236   // (if URL matches are considered) and capped at 1.0.  (If URL matches
    237   // are considered, we want to expand the scoring range so fewer bookmarks
    238   // will hit the 1.0 cap and hence lose all ability to distinguish between
    239   // these high-quality bookmarks.)
    240   //
    241   // The normalized value is multiplied against the scoring range available,
    242   // which is 299.  The 299 is calculated by subtracting the minimum possible
    243   // score, 900, from the maximum possible score, 1199.  This product, ranging
    244   // from 0 to 299, is added to the minimum possible score, 900, giving the
    245   // preliminary score.
    246   //
    247   // If the preliminary score is less than the maximum possible score, 1199,
    248   // it can be boosted up to that maximum possible score if the URL referenced
    249   // by the bookmark is also referenced by any of the user's other bookmarks.
    250   // A count of how many times the bookmark's URL is referenced is determined
    251   // and, for each additional reference beyond the one for the bookmark being
    252   // scored up to a maximum of three, the score is boosted by a fixed amount
    253   // given by |kURLCountBoost|, below.
    254   //
    255   if (score_using_url_matches_) {
    256     // Pretend empty titles are identical to the URL.
    257     if (title.empty())
    258       title = base::ASCIIToUTF16(url.spec());
    259   } else {
    260     DCHECK(!title.empty());
    261   }
    262   ScoringFunctor title_position_functor =
    263       for_each(bookmark_match.title_match_positions.begin(),
    264                bookmark_match.title_match_positions.end(),
    265                ScoringFunctor(title.size()));
    266   ScoringFunctor url_position_functor =
    267       for_each(bookmark_match.url_match_positions.begin(),
    268                bookmark_match.url_match_positions.end(),
    269                ScoringFunctor(bookmark_match.node->url().spec().length()));
    270   const double summed_factors = title_position_functor.ScoringFactor() +
    271       (score_using_url_matches_ ? url_position_functor.ScoringFactor() : 0);
    272   const double normalized_sum = std::min(
    273       summed_factors / (title.size() + (score_using_url_matches_ ? 10 : 0)),
    274       1.0);
    275   const int kBaseBookmarkScore = 900;
    276   const int kMaxBookmarkScore = 1199;
    277   const double kBookmarkScoreRange =
    278       static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore);
    279   match.relevance = static_cast<int>(normalized_sum * kBookmarkScoreRange) +
    280       kBaseBookmarkScore;
    281   // Don't waste any time searching for additional referenced URLs if we
    282   // already have a perfect title match.
    283   if (match.relevance >= kMaxBookmarkScore)
    284     return match;
    285   // Boost the score if the bookmark's URL is referenced by other bookmarks.
    286   const int kURLCountBoost[4] = { 0, 75, 125, 150 };
    287   std::vector<const BookmarkNode*> nodes;
    288   bookmark_model_->GetNodesByURL(url, &nodes);
    289   DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U);
    290   match.relevance +=
    291       kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1];
    292   match.relevance = std::min(kMaxBookmarkScore, match.relevance);
    293   return match;
    294 }
    295 
    296 // static
    297 ACMatchClassifications BookmarkProvider::ClassificationsFromMatch(
    298     const query_parser::Snippet::MatchPositions& positions,
    299     size_t text_length,
    300     bool is_url) {
    301   ACMatchClassification::Style url_style =
    302       is_url ? ACMatchClassification::URL : ACMatchClassification::NONE;
    303   ACMatchClassifications classifications;
    304   if (positions.empty()) {
    305     classifications.push_back(
    306         ACMatchClassification(0, url_style));
    307     return classifications;
    308   }
    309 
    310   for (query_parser::Snippet::MatchPositions::const_iterator i =
    311            positions.begin();
    312        i != positions.end();
    313        ++i) {
    314     AutocompleteMatch::ACMatchClassifications new_class;
    315     AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first,
    316         text_length, url_style, &new_class);
    317     classifications = AutocompleteMatch::MergeClassifications(
    318         classifications, new_class);
    319   }
    320   return classifications;
    321 }
    322