Home | History | Annotate | Download | only in autocomplete
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/autocomplete/bookmark_provider.h"
      6 
      7 #include <algorithm>
      8 #include <functional>
      9 #include <vector>
     10 
     11 #include "base/metrics/histogram.h"
     12 #include "base/prefs/pref_service.h"
     13 #include "base/time/time.h"
     14 #include "chrome/browser/autocomplete/autocomplete_result.h"
     15 #include "chrome/browser/bookmarks/bookmark_model.h"
     16 #include "chrome/browser/bookmarks/bookmark_model_factory.h"
     17 #include "chrome/browser/bookmarks/bookmark_title_match.h"
     18 #include "chrome/browser/profiles/profile.h"
     19 #include "chrome/common/pref_names.h"
     20 #include "net/base/net_util.h"
     21 
     22 typedef std::vector<BookmarkTitleMatch> TitleMatches;
     23 
     24 // BookmarkProvider ------------------------------------------------------------
     25 
     26 BookmarkProvider::BookmarkProvider(
     27     AutocompleteProviderListener* listener,
     28     Profile* profile)
     29     : AutocompleteProvider(listener, profile,
     30                            AutocompleteProvider::TYPE_BOOKMARK),
     31       bookmark_model_(NULL) {
     32   if (profile) {
     33     bookmark_model_ = BookmarkModelFactory::GetForProfile(profile);
     34     languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
     35   }
     36 }
     37 
     38 void BookmarkProvider::Start(const AutocompleteInput& input,
     39                              bool minimal_changes) {
     40   if (minimal_changes)
     41     return;
     42   matches_.clear();
     43 
     44   // Short-circuit any matching when inline autocompletion is disabled and
     45   // we're looking for BEST_MATCH because none of the BookmarkProvider's
     46   // matches can score high enough to qualify.
     47   if (input.text().empty() ||
     48       ((input.type() != AutocompleteInput::UNKNOWN) &&
     49        (input.type() != AutocompleteInput::QUERY)) ||
     50       ((input.matches_requested() == AutocompleteInput::BEST_MATCH) &&
     51        input.prevent_inline_autocomplete()))
     52     return;
     53 
     54   base::TimeTicks start_time = base::TimeTicks::Now();
     55   DoAutocomplete(input,
     56                  input.matches_requested() == AutocompleteInput::BEST_MATCH);
     57   UMA_HISTOGRAM_TIMES("Autocomplete.BookmarkProviderMatchTime",
     58                       base::TimeTicks::Now() - start_time);
     59 }
     60 
     61 BookmarkProvider::~BookmarkProvider() {}
     62 
     63 void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input,
     64                                       bool best_match) {
     65   // We may not have a bookmark model for some unit tests.
     66   if (!bookmark_model_)
     67     return;
     68 
     69   TitleMatches matches;
     70   // Retrieve enough bookmarks so that we have a reasonable probability of
     71   // suggesting the one that the user desires.
     72   const size_t kMaxBookmarkMatches = 50;
     73 
     74   // GetBookmarksWithTitlesMatching returns bookmarks matching the user's
     75   // search terms using the following rules:
     76   //  - The search text is broken up into search terms. Each term is searched
     77   //    for separately.
     78   //  - Term matches are always performed against the start of a word. 'def'
     79   //    will match against 'define' but not against 'indefinite'.
     80   //  - Terms must be at least three characters in length in order to perform
     81   //    partial word matches. Any term of lesser length will only be used as an
     82   //    exact match. 'def' will match against 'define' but 'de' will not match.
     83   //  - A search containing multiple terms will return results with those words
     84   //    occuring in any order.
     85   //  - Terms enclosed in quotes comprises a phrase that must match exactly.
     86   //  - Multiple terms enclosed in quotes will require those exact words in that
     87   //    exact order to match.
     88   //
     89   // Note: GetBookmarksWithTitlesMatching() will never return a match span
     90   // greater than the length of the title against which it is being matched,
     91   // nor can those spans ever overlap because the match spans are coalesced
     92   // for all matched terms.
     93   //
     94   // Please refer to the code for BookmarkIndex::GetBookmarksWithTitlesMatching
     95   // for complete details of how title searches are performed against the user's
     96   // bookmarks.
     97   bookmark_model_->GetBookmarksWithTitlesMatching(input.text(),
     98                                                   kMaxBookmarkMatches,
     99                                                   &matches);
    100   if (matches.empty())
    101     return;  // There were no matches.
    102   for (TitleMatches::const_iterator i = matches.begin(); i != matches.end();
    103        ++i) {
    104     // Create and score the AutocompleteMatch. If its score is 0 then the
    105     // match is discarded.
    106     AutocompleteMatch match(TitleMatchToACMatch(*i));
    107     if (match.relevance > 0)
    108       matches_.push_back(match);
    109   }
    110 
    111   // Sort and clip the resulting matches.
    112   size_t max_matches = best_match ? 1 : AutocompleteProvider::kMaxMatches;
    113   if (matches_.size() > max_matches) {
    114     std::partial_sort(matches_.begin(), matches_.end(),
    115                       matches_.begin() + max_matches,
    116                       AutocompleteMatch::MoreRelevant);
    117     matches_.resize(max_matches);
    118   } else {
    119     std::sort(matches_.begin(), matches_.end(),
    120               AutocompleteMatch::MoreRelevant);
    121   }
    122 }
    123 
    124 namespace {
    125 
    126 // for_each helper functor that calculates a match factor for each query term
    127 // when calculating the final score.
    128 //
    129 // Calculate a 'factor' from 0.0 to 1.0 based on 1) how much of the bookmark's
    130 // title the term matches, and 2) where the match is positioned within the
    131 // bookmark's title. A full length match earns a 1.0. A half-length match earns
    132 // at most a 0.5 and at least a 0.25. A single character match against a title
    133 // that is 100 characters long where the match is at the first character will
    134 // earn a 0.01 and at the last character will earn a 0.0001.
    135 class ScoringFunctor {
    136  public:
    137   // |title_length| is the length of the bookmark title against which this
    138   // match will be scored.
    139   explicit ScoringFunctor(size_t title_length)
    140       : title_length_(static_cast<double>(title_length)),
    141         scoring_factor_(0.0) {
    142   }
    143 
    144   void operator()(const Snippet::MatchPosition& match) {
    145     double term_length = static_cast<double>(match.second - match.first);
    146     scoring_factor_ += term_length / title_length_ *
    147         (title_length_ - match.first) / title_length_;
    148   }
    149 
    150   double ScoringFactor() { return scoring_factor_; }
    151 
    152  private:
    153   double title_length_;
    154   double scoring_factor_;
    155 };
    156 
    157 }  // namespace
    158 
    159 AutocompleteMatch BookmarkProvider::TitleMatchToACMatch(
    160     const BookmarkTitleMatch& title_match) {
    161   // The AutocompleteMatch we construct is non-deletable because the only
    162   // way to support this would be to delete the underlying bookmark, which is
    163   // unlikely to be what the user intends.
    164   AutocompleteMatch match(this, 0, false,
    165                           AutocompleteMatchType::BOOKMARK_TITLE);
    166   const string16& title(title_match.node->GetTitle());
    167   DCHECK(!title.empty());
    168   const GURL& url(title_match.node->url());
    169   match.destination_url = url;
    170   match.contents = net::FormatUrl(url, languages_,
    171       net::kFormatUrlOmitAll & net::kFormatUrlOmitHTTP,
    172       net::UnescapeRule::SPACES, NULL, NULL, NULL);
    173   match.contents_class.push_back(
    174       ACMatchClassification(0, ACMatchClassification::NONE));
    175   match.fill_into_edit =
    176       AutocompleteInput::FormattedStringWithEquivalentMeaning(url,
    177                                                               match.contents);
    178   match.description = title;
    179   match.description_class =
    180       ClassificationsFromMatch(title_match.match_positions,
    181                                match.description.size());
    182   match.starred = true;
    183 
    184   // Summary on how a relevance score is determined for the match:
    185   //
    186   // For each term matching within the bookmark's title (as given by the set of
    187   // Snippet::MatchPositions) calculate a 'factor', sum up those factors, then
    188   // use the sum to figure out a value between the base score and the maximum
    189   // score.
    190   //
    191   // The factor for each term is the product of:
    192   //
    193   //  1) how much of the bookmark's title has been matched by the term:
    194   //       (term length / title length).
    195   //
    196   //  Example: Given a bookmark title 'abcde fghijklm', with a title length
    197   //     of 14, and two different search terms, 'abcde' and 'fghijklm', with
    198   //     term lengths of 5 and 8, respectively, 'fghijklm' will score higher
    199   //     (with a partial factor of 8/14 = 0.571) than 'abcde' (5/14 = 0.357).
    200   //
    201   //  2) where the term match occurs within the bookmark's title, giving more
    202   //     points for matches that appear earlier in the title:
    203   //       ((title length - position of match start) / title_length).
    204   //
    205   //  Example: Given a bookmark title of 'abcde fghijklm', with a title length
    206   //     of 14, and two different search terms, 'abcde' and 'fghij', with
    207   //     start positions of 0 and 6, respectively, 'abcde' will score higher
    208   //     (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with
    209   //     a partial factor of (14-6)/14 = 0.571 ).
    210   //
    211   // Once all term factors have been calculated they are summed. The resulting
    212   // sum will never be greater than 1.0 because of the way the bookmark model
    213   // matches and removes overlaps. (In particular, the bookmark model only
    214   // matches terms to the beginning of words and it removes all overlapping
    215   // matches, keeping only the longest. Together these mean that each
    216   // character is included in at most one match. This property ensures the
    217   // sum of factors is at most 1.) This sum is then multiplied against the
    218   // scoring range available, which is 299. The 299 is calculated by
    219   // subtracting the minimum possible score, 900, from the maximum possible
    220   // score, 1199. This product, ranging from 0 to 299, is added to the minimum
    221   // possible score, 900, giving the preliminary score.
    222   //
    223   // If the preliminary score is less than the maximum possible score, 1199,
    224   // it can be boosted up to that maximum possible score if the URL referenced
    225   // by the bookmark is also referenced by any of the user's other bookmarks.
    226   // A count of how many times the bookmark's URL is referenced is determined
    227   // and, for each additional reference beyond the one for the bookmark being
    228   // scored up to a maximum of three, the score is boosted by a fixed amount
    229   // given by |kURLCountBoost|, below.
    230   //
    231   ScoringFunctor position_functor =
    232       for_each(title_match.match_positions.begin(),
    233                title_match.match_positions.end(), ScoringFunctor(title.size()));
    234   const int kBaseBookmarkScore = 900;
    235   const int kMaxBookmarkScore = AutocompleteResult::kLowestDefaultScore - 1;
    236   const double kBookmarkScoreRange =
    237       static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore);
    238   // It's not likely that GetBookmarksWithTitlesMatching will return overlapping
    239   // matches but let's play it safe.
    240   match.relevance = std::min(kMaxBookmarkScore,
    241       static_cast<int>(position_functor.ScoringFactor() * kBookmarkScoreRange) +
    242       kBaseBookmarkScore);
    243   // Don't waste any time searching for additional referenced URLs if we
    244   // already have a perfect title match.
    245   if (match.relevance >= kMaxBookmarkScore)
    246     return match;
    247   // Boost the score if the bookmark's URL is referenced by other bookmarks.
    248   const int kURLCountBoost[4] = { 0, 75, 125, 150 };
    249   std::vector<const BookmarkNode*> nodes;
    250   bookmark_model_->GetNodesByURL(url, &nodes);
    251   DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U);
    252   match.relevance +=
    253       kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1];
    254   match.relevance = std::min(kMaxBookmarkScore, match.relevance);
    255   return match;
    256 }
    257 
    258 // static
    259 ACMatchClassifications BookmarkProvider::ClassificationsFromMatch(
    260     const Snippet::MatchPositions& positions,
    261     size_t text_length) {
    262   ACMatchClassifications classifications;
    263   if (positions.empty()) {
    264     classifications.push_back(
    265         ACMatchClassification(0, ACMatchClassification::NONE));
    266     return classifications;
    267   }
    268 
    269   for (Snippet::MatchPositions::const_iterator i = positions.begin();
    270        i != positions.end(); ++i) {
    271     AutocompleteMatch::ACMatchClassifications new_class;
    272     AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first,
    273         text_length, 0, &new_class);
    274     classifications = AutocompleteMatch::MergeClassifications(
    275         classifications, new_class);
    276   }
    277   return classifications;
    278 }
    279