1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/autocomplete/bookmark_provider.h" 6 7 #include <algorithm> 8 #include <functional> 9 #include <vector> 10 11 #include "base/metrics/histogram.h" 12 #include "base/prefs/pref_service.h" 13 #include "base/time/time.h" 14 #include "chrome/browser/autocomplete/autocomplete_result.h" 15 #include "chrome/browser/bookmarks/bookmark_model.h" 16 #include "chrome/browser/bookmarks/bookmark_model_factory.h" 17 #include "chrome/browser/bookmarks/bookmark_title_match.h" 18 #include "chrome/browser/profiles/profile.h" 19 #include "chrome/common/pref_names.h" 20 #include "net/base/net_util.h" 21 22 typedef std::vector<BookmarkTitleMatch> TitleMatches; 23 24 // BookmarkProvider ------------------------------------------------------------ 25 26 BookmarkProvider::BookmarkProvider( 27 AutocompleteProviderListener* listener, 28 Profile* profile) 29 : AutocompleteProvider(listener, profile, 30 AutocompleteProvider::TYPE_BOOKMARK), 31 bookmark_model_(NULL) { 32 if (profile) { 33 bookmark_model_ = BookmarkModelFactory::GetForProfile(profile); 34 languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); 35 } 36 } 37 38 void BookmarkProvider::Start(const AutocompleteInput& input, 39 bool minimal_changes) { 40 if (minimal_changes) 41 return; 42 matches_.clear(); 43 44 // Short-circuit any matching when inline autocompletion is disabled and 45 // we're looking for BEST_MATCH because none of the BookmarkProvider's 46 // matches can score high enough to qualify. 47 if (input.text().empty() || 48 ((input.type() != AutocompleteInput::UNKNOWN) && 49 (input.type() != AutocompleteInput::QUERY)) || 50 ((input.matches_requested() == AutocompleteInput::BEST_MATCH) && 51 input.prevent_inline_autocomplete())) 52 return; 53 54 base::TimeTicks start_time = base::TimeTicks::Now(); 55 DoAutocomplete(input, 56 input.matches_requested() == AutocompleteInput::BEST_MATCH); 57 UMA_HISTOGRAM_TIMES("Autocomplete.BookmarkProviderMatchTime", 58 base::TimeTicks::Now() - start_time); 59 } 60 61 BookmarkProvider::~BookmarkProvider() {} 62 63 void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input, 64 bool best_match) { 65 // We may not have a bookmark model for some unit tests. 66 if (!bookmark_model_) 67 return; 68 69 TitleMatches matches; 70 // Retrieve enough bookmarks so that we have a reasonable probability of 71 // suggesting the one that the user desires. 72 const size_t kMaxBookmarkMatches = 50; 73 74 // GetBookmarksWithTitlesMatching returns bookmarks matching the user's 75 // search terms using the following rules: 76 // - The search text is broken up into search terms. Each term is searched 77 // for separately. 78 // - Term matches are always performed against the start of a word. 'def' 79 // will match against 'define' but not against 'indefinite'. 80 // - Terms must be at least three characters in length in order to perform 81 // partial word matches. Any term of lesser length will only be used as an 82 // exact match. 'def' will match against 'define' but 'de' will not match. 83 // - A search containing multiple terms will return results with those words 84 // occuring in any order. 85 // - Terms enclosed in quotes comprises a phrase that must match exactly. 86 // - Multiple terms enclosed in quotes will require those exact words in that 87 // exact order to match. 88 // 89 // Note: GetBookmarksWithTitlesMatching() will never return a match span 90 // greater than the length of the title against which it is being matched, 91 // nor can those spans ever overlap because the match spans are coalesced 92 // for all matched terms. 93 // 94 // Please refer to the code for BookmarkIndex::GetBookmarksWithTitlesMatching 95 // for complete details of how title searches are performed against the user's 96 // bookmarks. 97 bookmark_model_->GetBookmarksWithTitlesMatching(input.text(), 98 kMaxBookmarkMatches, 99 &matches); 100 if (matches.empty()) 101 return; // There were no matches. 102 for (TitleMatches::const_iterator i = matches.begin(); i != matches.end(); 103 ++i) { 104 // Create and score the AutocompleteMatch. If its score is 0 then the 105 // match is discarded. 106 AutocompleteMatch match(TitleMatchToACMatch(*i)); 107 if (match.relevance > 0) 108 matches_.push_back(match); 109 } 110 111 // Sort and clip the resulting matches. 112 size_t max_matches = best_match ? 1 : AutocompleteProvider::kMaxMatches; 113 if (matches_.size() > max_matches) { 114 std::partial_sort(matches_.begin(), matches_.end(), 115 matches_.begin() + max_matches, 116 AutocompleteMatch::MoreRelevant); 117 matches_.resize(max_matches); 118 } else { 119 std::sort(matches_.begin(), matches_.end(), 120 AutocompleteMatch::MoreRelevant); 121 } 122 } 123 124 namespace { 125 126 // for_each helper functor that calculates a match factor for each query term 127 // when calculating the final score. 128 // 129 // Calculate a 'factor' from 0.0 to 1.0 based on 1) how much of the bookmark's 130 // title the term matches, and 2) where the match is positioned within the 131 // bookmark's title. A full length match earns a 1.0. A half-length match earns 132 // at most a 0.5 and at least a 0.25. A single character match against a title 133 // that is 100 characters long where the match is at the first character will 134 // earn a 0.01 and at the last character will earn a 0.0001. 135 class ScoringFunctor { 136 public: 137 // |title_length| is the length of the bookmark title against which this 138 // match will be scored. 139 explicit ScoringFunctor(size_t title_length) 140 : title_length_(static_cast<double>(title_length)), 141 scoring_factor_(0.0) { 142 } 143 144 void operator()(const Snippet::MatchPosition& match) { 145 double term_length = static_cast<double>(match.second - match.first); 146 scoring_factor_ += term_length / title_length_ * 147 (title_length_ - match.first) / title_length_; 148 } 149 150 double ScoringFactor() { return scoring_factor_; } 151 152 private: 153 double title_length_; 154 double scoring_factor_; 155 }; 156 157 } // namespace 158 159 AutocompleteMatch BookmarkProvider::TitleMatchToACMatch( 160 const BookmarkTitleMatch& title_match) { 161 // The AutocompleteMatch we construct is non-deletable because the only 162 // way to support this would be to delete the underlying bookmark, which is 163 // unlikely to be what the user intends. 164 AutocompleteMatch match(this, 0, false, 165 AutocompleteMatchType::BOOKMARK_TITLE); 166 const string16& title(title_match.node->GetTitle()); 167 DCHECK(!title.empty()); 168 const GURL& url(title_match.node->url()); 169 match.destination_url = url; 170 match.contents = net::FormatUrl(url, languages_, 171 net::kFormatUrlOmitAll & net::kFormatUrlOmitHTTP, 172 net::UnescapeRule::SPACES, NULL, NULL, NULL); 173 match.contents_class.push_back( 174 ACMatchClassification(0, ACMatchClassification::NONE)); 175 match.fill_into_edit = 176 AutocompleteInput::FormattedStringWithEquivalentMeaning(url, 177 match.contents); 178 match.description = title; 179 match.description_class = 180 ClassificationsFromMatch(title_match.match_positions, 181 match.description.size()); 182 match.starred = true; 183 184 // Summary on how a relevance score is determined for the match: 185 // 186 // For each term matching within the bookmark's title (as given by the set of 187 // Snippet::MatchPositions) calculate a 'factor', sum up those factors, then 188 // use the sum to figure out a value between the base score and the maximum 189 // score. 190 // 191 // The factor for each term is the product of: 192 // 193 // 1) how much of the bookmark's title has been matched by the term: 194 // (term length / title length). 195 // 196 // Example: Given a bookmark title 'abcde fghijklm', with a title length 197 // of 14, and two different search terms, 'abcde' and 'fghijklm', with 198 // term lengths of 5 and 8, respectively, 'fghijklm' will score higher 199 // (with a partial factor of 8/14 = 0.571) than 'abcde' (5/14 = 0.357). 200 // 201 // 2) where the term match occurs within the bookmark's title, giving more 202 // points for matches that appear earlier in the title: 203 // ((title length - position of match start) / title_length). 204 // 205 // Example: Given a bookmark title of 'abcde fghijklm', with a title length 206 // of 14, and two different search terms, 'abcde' and 'fghij', with 207 // start positions of 0 and 6, respectively, 'abcde' will score higher 208 // (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with 209 // a partial factor of (14-6)/14 = 0.571 ). 210 // 211 // Once all term factors have been calculated they are summed. The resulting 212 // sum will never be greater than 1.0 because of the way the bookmark model 213 // matches and removes overlaps. (In particular, the bookmark model only 214 // matches terms to the beginning of words and it removes all overlapping 215 // matches, keeping only the longest. Together these mean that each 216 // character is included in at most one match. This property ensures the 217 // sum of factors is at most 1.) This sum is then multiplied against the 218 // scoring range available, which is 299. The 299 is calculated by 219 // subtracting the minimum possible score, 900, from the maximum possible 220 // score, 1199. This product, ranging from 0 to 299, is added to the minimum 221 // possible score, 900, giving the preliminary score. 222 // 223 // If the preliminary score is less than the maximum possible score, 1199, 224 // it can be boosted up to that maximum possible score if the URL referenced 225 // by the bookmark is also referenced by any of the user's other bookmarks. 226 // A count of how many times the bookmark's URL is referenced is determined 227 // and, for each additional reference beyond the one for the bookmark being 228 // scored up to a maximum of three, the score is boosted by a fixed amount 229 // given by |kURLCountBoost|, below. 230 // 231 ScoringFunctor position_functor = 232 for_each(title_match.match_positions.begin(), 233 title_match.match_positions.end(), ScoringFunctor(title.size())); 234 const int kBaseBookmarkScore = 900; 235 const int kMaxBookmarkScore = AutocompleteResult::kLowestDefaultScore - 1; 236 const double kBookmarkScoreRange = 237 static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore); 238 // It's not likely that GetBookmarksWithTitlesMatching will return overlapping 239 // matches but let's play it safe. 240 match.relevance = std::min(kMaxBookmarkScore, 241 static_cast<int>(position_functor.ScoringFactor() * kBookmarkScoreRange) + 242 kBaseBookmarkScore); 243 // Don't waste any time searching for additional referenced URLs if we 244 // already have a perfect title match. 245 if (match.relevance >= kMaxBookmarkScore) 246 return match; 247 // Boost the score if the bookmark's URL is referenced by other bookmarks. 248 const int kURLCountBoost[4] = { 0, 75, 125, 150 }; 249 std::vector<const BookmarkNode*> nodes; 250 bookmark_model_->GetNodesByURL(url, &nodes); 251 DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U); 252 match.relevance += 253 kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1]; 254 match.relevance = std::min(kMaxBookmarkScore, match.relevance); 255 return match; 256 } 257 258 // static 259 ACMatchClassifications BookmarkProvider::ClassificationsFromMatch( 260 const Snippet::MatchPositions& positions, 261 size_t text_length) { 262 ACMatchClassifications classifications; 263 if (positions.empty()) { 264 classifications.push_back( 265 ACMatchClassification(0, ACMatchClassification::NONE)); 266 return classifications; 267 } 268 269 for (Snippet::MatchPositions::const_iterator i = positions.begin(); 270 i != positions.end(); ++i) { 271 AutocompleteMatch::ACMatchClassifications new_class; 272 AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first, 273 text_length, 0, &new_class); 274 classifications = AutocompleteMatch::MergeClassifications( 275 classifications, new_class); 276 } 277 return classifications; 278 } 279