1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/autocomplete/bookmark_provider.h" 6 7 #include <algorithm> 8 #include <functional> 9 #include <vector> 10 11 #include "base/prefs/pref_service.h" 12 #include "base/strings/string_util.h" 13 #include "base/strings/utf_string_conversions.h" 14 #include "chrome/browser/autocomplete/chrome_autocomplete_scheme_classifier.h" 15 #include "chrome/browser/autocomplete/history_provider.h" 16 #include "chrome/browser/bookmarks/bookmark_model_factory.h" 17 #include "chrome/browser/profiles/profile.h" 18 #include "chrome/common/pref_names.h" 19 #include "components/bookmarks/browser/bookmark_match.h" 20 #include "components/bookmarks/browser/bookmark_model.h" 21 #include "components/metrics/proto/omnibox_input_type.pb.h" 22 #include "components/omnibox/autocomplete_result.h" 23 #include "components/omnibox/url_prefix.h" 24 #include "net/base/net_util.h" 25 26 using bookmarks::BookmarkMatch; 27 28 typedef std::vector<BookmarkMatch> BookmarkMatches; 29 30 namespace { 31 32 // Removes leading spaces from |title| before displaying, otherwise it looks 33 // funny. In the process, corrects |title_match_positions| so the correct 34 // characters are highlighted. 35 void CorrectTitleAndMatchPositions( 36 base::string16* title, 37 BookmarkMatch::MatchPositions* title_match_positions) { 38 size_t leading_whitespace_chars = title->length(); 39 base::TrimWhitespace(*title, base::TRIM_LEADING, title); 40 leading_whitespace_chars-= title->length(); 41 if (leading_whitespace_chars == 0) 42 return; 43 for (query_parser::Snippet::MatchPositions::iterator it = 44 title_match_positions->begin(); 45 it != title_match_positions->end(); ++it) { 46 (*it) = query_parser::Snippet::MatchPosition( 47 it->first - leading_whitespace_chars, 48 it->second - leading_whitespace_chars); 49 } 50 } 51 52 } // namespace 53 54 // BookmarkProvider ------------------------------------------------------------ 55 56 BookmarkProvider::BookmarkProvider(Profile* profile) 57 : AutocompleteProvider(AutocompleteProvider::TYPE_BOOKMARK), 58 profile_(profile), 59 bookmark_model_(NULL) { 60 if (profile) { 61 bookmark_model_ = BookmarkModelFactory::GetForProfile(profile); 62 languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); 63 } 64 } 65 66 void BookmarkProvider::Start(const AutocompleteInput& input, 67 bool minimal_changes) { 68 if (minimal_changes) 69 return; 70 matches_.clear(); 71 72 if (input.text().empty() || 73 (input.type() == metrics::OmniboxInputType::FORCED_QUERY)) 74 return; 75 76 DoAutocomplete(input); 77 } 78 79 BookmarkProvider::~BookmarkProvider() {} 80 81 void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input) { 82 // We may not have a bookmark model for some unit tests. 83 if (!bookmark_model_) 84 return; 85 86 BookmarkMatches matches; 87 // Retrieve enough bookmarks so that we have a reasonable probability of 88 // suggesting the one that the user desires. 89 const size_t kMaxBookmarkMatches = 50; 90 91 // GetBookmarksMatching returns bookmarks matching the user's 92 // search terms using the following rules: 93 // - The search text is broken up into search terms. Each term is searched 94 // for separately. 95 // - Term matches are always performed against the start of a word. 'def' 96 // will match against 'define' but not against 'indefinite'. 97 // - Terms must be at least three characters in length in order to perform 98 // partial word matches. Any term of lesser length will only be used as an 99 // exact match. 'def' will match against 'define' but 'de' will not match. 100 // - A search containing multiple terms will return results with those words 101 // occuring in any order. 102 // - Terms enclosed in quotes comprises a phrase that must match exactly. 103 // - Multiple terms enclosed in quotes will require those exact words in that 104 // exact order to match. 105 // 106 // Please refer to the code for BookmarkIndex::GetBookmarksMatching for 107 // complete details of how searches are performed against the user's 108 // bookmarks. 109 bookmark_model_->GetBookmarksMatching(input.text(), 110 kMaxBookmarkMatches, 111 &matches); 112 if (matches.empty()) 113 return; // There were no matches. 114 const base::string16 fixed_up_input(FixupUserInput(input).second); 115 for (BookmarkMatches::const_iterator i = matches.begin(); i != matches.end(); 116 ++i) { 117 // Create and score the AutocompleteMatch. If its score is 0 then the 118 // match is discarded. 119 AutocompleteMatch match(BookmarkMatchToACMatch(input, fixed_up_input, *i)); 120 if (match.relevance > 0) 121 matches_.push_back(match); 122 } 123 124 // Sort and clip the resulting matches. 125 size_t num_matches = 126 std::min(matches_.size(), AutocompleteProvider::kMaxMatches); 127 std::partial_sort(matches_.begin(), matches_.begin() + num_matches, 128 matches_.end(), AutocompleteMatch::MoreRelevant); 129 matches_.resize(num_matches); 130 } 131 132 namespace { 133 134 // for_each helper functor that calculates a match factor for each query term 135 // when calculating the final score. 136 // 137 // Calculate a 'factor' from 0 to the bookmark's title length for a match 138 // based on 1) how many characters match and 2) where the match is positioned. 139 class ScoringFunctor { 140 public: 141 // |title_length| is the length of the bookmark title against which this 142 // match will be scored. 143 explicit ScoringFunctor(size_t title_length) 144 : title_length_(static_cast<double>(title_length)), 145 scoring_factor_(0.0) { 146 } 147 148 void operator()(const query_parser::Snippet::MatchPosition& match) { 149 double term_length = static_cast<double>(match.second - match.first); 150 scoring_factor_ += term_length * 151 (title_length_ - match.first) / title_length_; 152 } 153 154 double ScoringFactor() { return scoring_factor_; } 155 156 private: 157 double title_length_; 158 double scoring_factor_; 159 }; 160 161 } // namespace 162 163 AutocompleteMatch BookmarkProvider::BookmarkMatchToACMatch( 164 const AutocompleteInput& input, 165 const base::string16& fixed_up_input_text, 166 const BookmarkMatch& bookmark_match) { 167 // The AutocompleteMatch we construct is non-deletable because the only 168 // way to support this would be to delete the underlying bookmark, which is 169 // unlikely to be what the user intends. 170 AutocompleteMatch match(this, 0, false, 171 AutocompleteMatchType::BOOKMARK_TITLE); 172 base::string16 title(bookmark_match.node->GetTitle()); 173 BookmarkMatch::MatchPositions new_title_match_positions = 174 bookmark_match.title_match_positions; 175 CorrectTitleAndMatchPositions(&title, &new_title_match_positions); 176 const GURL& url(bookmark_match.node->url()); 177 const base::string16& url_utf16 = base::UTF8ToUTF16(url.spec()); 178 size_t inline_autocomplete_offset = URLPrefix::GetInlineAutocompleteOffset( 179 input.text(), fixed_up_input_text, false, url_utf16); 180 match.destination_url = url; 181 const size_t match_start = bookmark_match.url_match_positions.empty() ? 182 0 : bookmark_match.url_match_positions[0].first; 183 const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()) && 184 ((match_start == base::string16::npos) || (match_start != 0)); 185 std::vector<size_t> offsets = BookmarkMatch::OffsetsFromMatchPositions( 186 bookmark_match.url_match_positions); 187 // In addition to knowing how |offsets| is transformed, we need to know how 188 // |inline_autocomplete_offset| is transformed. We add it to the end of 189 // |offsets|, compute how everything is transformed, then remove it from the 190 // end. 191 offsets.push_back(inline_autocomplete_offset); 192 match.contents = net::FormatUrlWithOffsets(url, languages_, 193 net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP), 194 net::UnescapeRule::SPACES, NULL, NULL, &offsets); 195 inline_autocomplete_offset = offsets.back(); 196 offsets.pop_back(); 197 BookmarkMatch::MatchPositions new_url_match_positions = 198 BookmarkMatch::ReplaceOffsetsInMatchPositions( 199 bookmark_match.url_match_positions, offsets); 200 match.contents_class = 201 ClassificationsFromMatch(new_url_match_positions, 202 match.contents.size(), 203 true); 204 match.fill_into_edit = 205 AutocompleteInput::FormattedStringWithEquivalentMeaning( 206 url, match.contents, ChromeAutocompleteSchemeClassifier(profile_)); 207 if (inline_autocomplete_offset != base::string16::npos) { 208 // |inline_autocomplete_offset| may be beyond the end of the 209 // |fill_into_edit| if the user has typed an URL with a scheme and the 210 // last character typed is a slash. That slash is removed by the 211 // FormatURLWithOffsets call above. 212 if (inline_autocomplete_offset < match.fill_into_edit.length()) { 213 match.inline_autocompletion = 214 match.fill_into_edit.substr(inline_autocomplete_offset); 215 } 216 match.allowed_to_be_default_match = match.inline_autocompletion.empty() || 217 !HistoryProvider::PreventInlineAutocomplete(input); 218 } 219 match.description = title; 220 match.description_class = 221 ClassificationsFromMatch(bookmark_match.title_match_positions, 222 match.description.size(), 223 false); 224 225 // Summary on how a relevance score is determined for the match: 226 // 227 // For each match within the bookmark's title or URL (or both), calculate a 228 // 'factor', sum up those factors, then use the sum to figure out a value 229 // between the base score and the maximum score. 230 // 231 // The factor for each match is the product of: 232 // 233 // 1) how many characters in the bookmark's title/URL are part of this match. 234 // This is capped at the length of the bookmark's title 235 // to prevent terms that match in both the title and the URL from 236 // scoring too strongly. 237 // 238 // 2) where the match occurs within the bookmark's title or URL, 239 // giving more points for matches that appear earlier in the string: 240 // ((string_length - position of match start) / string_length). 241 // 242 // Example: Given a bookmark title of 'abcde fghijklm', with a title length 243 // of 14, and two different search terms, 'abcde' and 'fghij', with 244 // start positions of 0 and 6, respectively, 'abcde' will score higher 245 // (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with 246 // a partial factor of (14-6)/14 = 0.571 ). (In this example neither 247 // term matches in the URL.) 248 // 249 // Once all match factors have been calculated they are summed. If there 250 // are no URL matches, the resulting sum will never be greater than the 251 // length of the bookmark title because of the way the bookmark model matches 252 // and removes overlaps. (In particular, the bookmark model only 253 // matches terms to the beginning of words and it removes all overlapping 254 // matches, keeping only the longest. Together these mean that each 255 // character is included in at most one match.) If there are matches in the 256 // URL, the sum can be greater. 257 // 258 // This sum is then normalized by the length of the bookmark title + 10 259 // and capped at 1.0. The +10 is to expand the scoring range so fewer 260 // bookmarks will hit the 1.0 cap and hence lose all ability to distinguish 261 // between these high-quality bookmarks. 262 // 263 // The normalized value is multiplied against the scoring range available, 264 // which is 299. The 299 is calculated by subtracting the minimum possible 265 // score, 900, from the maximum possible score, 1199. This product, ranging 266 // from 0 to 299, is added to the minimum possible score, 900, giving the 267 // preliminary score. 268 // 269 // If the preliminary score is less than the maximum possible score, 1199, 270 // it can be boosted up to that maximum possible score if the URL referenced 271 // by the bookmark is also referenced by any of the user's other bookmarks. 272 // A count of how many times the bookmark's URL is referenced is determined 273 // and, for each additional reference beyond the one for the bookmark being 274 // scored up to a maximum of three, the score is boosted by a fixed amount 275 // given by |kURLCountBoost|, below. 276 // 277 278 // Pretend empty titles are identical to the URL. 279 if (title.empty()) 280 title = base::ASCIIToUTF16(url.spec()); 281 ScoringFunctor title_position_functor = 282 for_each(bookmark_match.title_match_positions.begin(), 283 bookmark_match.title_match_positions.end(), 284 ScoringFunctor(title.size())); 285 ScoringFunctor url_position_functor = 286 for_each(bookmark_match.url_match_positions.begin(), 287 bookmark_match.url_match_positions.end(), 288 ScoringFunctor(bookmark_match.node->url().spec().length())); 289 const double summed_factors = title_position_functor.ScoringFactor() + 290 url_position_functor.ScoringFactor(); 291 const double normalized_sum = 292 std::min(summed_factors / (title.size() + 10), 1.0); 293 const int kBaseBookmarkScore = 900; 294 const int kMaxBookmarkScore = 1199; 295 const double kBookmarkScoreRange = 296 static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore); 297 match.relevance = static_cast<int>(normalized_sum * kBookmarkScoreRange) + 298 kBaseBookmarkScore; 299 // Don't waste any time searching for additional referenced URLs if we 300 // already have a perfect title match. 301 if (match.relevance >= kMaxBookmarkScore) 302 return match; 303 // Boost the score if the bookmark's URL is referenced by other bookmarks. 304 const int kURLCountBoost[4] = { 0, 75, 125, 150 }; 305 std::vector<const BookmarkNode*> nodes; 306 bookmark_model_->GetNodesByURL(url, &nodes); 307 DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U); 308 match.relevance += 309 kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1]; 310 match.relevance = std::min(kMaxBookmarkScore, match.relevance); 311 return match; 312 } 313 314 // static 315 ACMatchClassifications BookmarkProvider::ClassificationsFromMatch( 316 const query_parser::Snippet::MatchPositions& positions, 317 size_t text_length, 318 bool is_url) { 319 ACMatchClassification::Style url_style = 320 is_url ? ACMatchClassification::URL : ACMatchClassification::NONE; 321 ACMatchClassifications classifications; 322 if (positions.empty()) { 323 if (text_length > 0) 324 classifications.push_back(ACMatchClassification(0, url_style)); 325 return classifications; 326 } 327 328 for (query_parser::Snippet::MatchPositions::const_iterator i = 329 positions.begin(); 330 i != positions.end(); 331 ++i) { 332 AutocompleteMatch::ACMatchClassifications new_class; 333 AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first, 334 text_length, url_style, &new_class); 335 classifications = AutocompleteMatch::MergeClassifications( 336 classifications, new_class); 337 } 338 return classifications; 339 } 340