1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/autocomplete/bookmark_provider.h" 6 7 #include <algorithm> 8 #include <functional> 9 #include <vector> 10 11 #include "base/prefs/pref_service.h" 12 #include "base/strings/utf_string_conversions.h" 13 #include "chrome/browser/autocomplete/autocomplete_result.h" 14 #include "chrome/browser/autocomplete/history_provider.h" 15 #include "chrome/browser/bookmarks/bookmark_model_factory.h" 16 #include "chrome/browser/omnibox/omnibox_field_trial.h" 17 #include "chrome/browser/profiles/profile.h" 18 #include "chrome/common/pref_names.h" 19 #include "components/autocomplete/url_prefix.h" 20 #include "components/bookmarks/browser/bookmark_match.h" 21 #include "components/bookmarks/browser/bookmark_model.h" 22 #include "components/metrics/proto/omnibox_input_type.pb.h" 23 #include "net/base/net_util.h" 24 25 using bookmarks::BookmarkMatch; 26 27 typedef std::vector<BookmarkMatch> BookmarkMatches; 28 29 // BookmarkProvider ------------------------------------------------------------ 30 31 BookmarkProvider::BookmarkProvider( 32 AutocompleteProviderListener* listener, 33 Profile* profile) 34 : AutocompleteProvider(listener, profile, 35 AutocompleteProvider::TYPE_BOOKMARK), 36 bookmark_model_(NULL), 37 score_using_url_matches_(OmniboxFieldTrial::BookmarksIndexURLsValue()) { 38 if (profile) { 39 bookmark_model_ = BookmarkModelFactory::GetForProfile(profile); 40 languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); 41 } 42 } 43 44 void BookmarkProvider::Start(const AutocompleteInput& input, 45 bool minimal_changes) { 46 if (minimal_changes) 47 return; 48 matches_.clear(); 49 50 if (input.text().empty() || 51 (input.type() == metrics::OmniboxInputType::FORCED_QUERY)) 52 return; 53 54 DoAutocomplete(input); 55 } 56 57 BookmarkProvider::~BookmarkProvider() {} 58 59 void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input) { 60 // We may not have a bookmark model for some unit tests. 61 if (!bookmark_model_) 62 return; 63 64 BookmarkMatches matches; 65 // Retrieve enough bookmarks so that we have a reasonable probability of 66 // suggesting the one that the user desires. 67 const size_t kMaxBookmarkMatches = 50; 68 69 // GetBookmarksMatching returns bookmarks matching the user's 70 // search terms using the following rules: 71 // - The search text is broken up into search terms. Each term is searched 72 // for separately. 73 // - Term matches are always performed against the start of a word. 'def' 74 // will match against 'define' but not against 'indefinite'. 75 // - Terms must be at least three characters in length in order to perform 76 // partial word matches. Any term of lesser length will only be used as an 77 // exact match. 'def' will match against 'define' but 'de' will not match. 78 // - A search containing multiple terms will return results with those words 79 // occuring in any order. 80 // - Terms enclosed in quotes comprises a phrase that must match exactly. 81 // - Multiple terms enclosed in quotes will require those exact words in that 82 // exact order to match. 83 // 84 // Please refer to the code for BookmarkIndex::GetBookmarksMatching for 85 // complete details of how searches are performed against the user's 86 // bookmarks. 87 bookmark_model_->GetBookmarksMatching(input.text(), 88 kMaxBookmarkMatches, 89 &matches); 90 if (matches.empty()) 91 return; // There were no matches. 92 const base::string16 fixed_up_input(FixupUserInput(input).second); 93 for (BookmarkMatches::const_iterator i = matches.begin(); i != matches.end(); 94 ++i) { 95 // Create and score the AutocompleteMatch. If its score is 0 then the 96 // match is discarded. 97 AutocompleteMatch match(BookmarkMatchToACMatch(input, fixed_up_input, *i)); 98 if (match.relevance > 0) 99 matches_.push_back(match); 100 } 101 102 // Sort and clip the resulting matches. 103 size_t num_matches = 104 std::min(matches_.size(), AutocompleteProvider::kMaxMatches); 105 std::partial_sort(matches_.begin(), matches_.begin() + num_matches, 106 matches_.end(), AutocompleteMatch::MoreRelevant); 107 matches_.resize(num_matches); 108 } 109 110 namespace { 111 112 // for_each helper functor that calculates a match factor for each query term 113 // when calculating the final score. 114 // 115 // Calculate a 'factor' from 0 to the bookmark's title length for a match 116 // based on 1) how many characters match and 2) where the match is positioned. 117 class ScoringFunctor { 118 public: 119 // |title_length| is the length of the bookmark title against which this 120 // match will be scored. 121 explicit ScoringFunctor(size_t title_length) 122 : title_length_(static_cast<double>(title_length)), 123 scoring_factor_(0.0) { 124 } 125 126 void operator()(const query_parser::Snippet::MatchPosition& match) { 127 double term_length = static_cast<double>(match.second - match.first); 128 scoring_factor_ += term_length * 129 (title_length_ - match.first) / title_length_; 130 } 131 132 double ScoringFactor() { return scoring_factor_; } 133 134 private: 135 double title_length_; 136 double scoring_factor_; 137 }; 138 139 } // namespace 140 141 AutocompleteMatch BookmarkProvider::BookmarkMatchToACMatch( 142 const AutocompleteInput& input, 143 const base::string16& fixed_up_input_text, 144 const BookmarkMatch& bookmark_match) { 145 // The AutocompleteMatch we construct is non-deletable because the only 146 // way to support this would be to delete the underlying bookmark, which is 147 // unlikely to be what the user intends. 148 AutocompleteMatch match(this, 0, false, 149 AutocompleteMatchType::BOOKMARK_TITLE); 150 base::string16 title(bookmark_match.node->GetTitle()); 151 const GURL& url(bookmark_match.node->url()); 152 const base::string16& url_utf16 = base::UTF8ToUTF16(url.spec()); 153 size_t inline_autocomplete_offset = URLPrefix::GetInlineAutocompleteOffset( 154 input.text(), fixed_up_input_text, false, url_utf16); 155 match.destination_url = url; 156 const size_t match_start = bookmark_match.url_match_positions.empty() ? 157 0 : bookmark_match.url_match_positions[0].first; 158 const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()) && 159 ((match_start == base::string16::npos) || (match_start != 0)); 160 std::vector<size_t> offsets = BookmarkMatch::OffsetsFromMatchPositions( 161 bookmark_match.url_match_positions); 162 // In addition to knowing how |offsets| is transformed, we need to know how 163 // |inline_autocomplete_offset| is transformed. We add it to the end of 164 // |offsets|, compute how everything is transformed, then remove it from the 165 // end. 166 offsets.push_back(inline_autocomplete_offset); 167 match.contents = net::FormatUrlWithOffsets(url, languages_, 168 net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP), 169 net::UnescapeRule::SPACES, NULL, NULL, &offsets); 170 inline_autocomplete_offset = offsets.back(); 171 offsets.pop_back(); 172 BookmarkMatch::MatchPositions new_url_match_positions = 173 BookmarkMatch::ReplaceOffsetsInMatchPositions( 174 bookmark_match.url_match_positions, offsets); 175 match.contents_class = 176 ClassificationsFromMatch(new_url_match_positions, 177 match.contents.size(), 178 true); 179 match.fill_into_edit = 180 AutocompleteInput::FormattedStringWithEquivalentMeaning(url, 181 match.contents); 182 if (inline_autocomplete_offset != base::string16::npos) { 183 // |inline_autocomplete_offset| may be beyond the end of the 184 // |fill_into_edit| if the user has typed an URL with a scheme and the 185 // last character typed is a slash. That slash is removed by the 186 // FormatURLWithOffsets call above. 187 if (inline_autocomplete_offset < match.fill_into_edit.length()) { 188 match.inline_autocompletion = 189 match.fill_into_edit.substr(inline_autocomplete_offset); 190 } 191 match.allowed_to_be_default_match = match.inline_autocompletion.empty() || 192 !HistoryProvider::PreventInlineAutocomplete(input); 193 } 194 match.description = title; 195 match.description_class = 196 ClassificationsFromMatch(bookmark_match.title_match_positions, 197 match.description.size(), 198 false); 199 match.starred = true; 200 201 // Summary on how a relevance score is determined for the match: 202 // 203 // For each match within the bookmark's title or URL (or both), calculate a 204 // 'factor', sum up those factors, then use the sum to figure out a value 205 // between the base score and the maximum score. 206 // 207 // The factor for each match is the product of: 208 // 209 // 1) how many characters in the bookmark's title/URL are part of this match. 210 // This is capped at the length of the bookmark's title 211 // to prevent terms that match in both the title and the URL from 212 // scoring too strongly. 213 // 214 // 2) where the match occurs within the bookmark's title or URL, 215 // giving more points for matches that appear earlier in the string: 216 // ((string_length - position of match start) / string_length). 217 // 218 // Example: Given a bookmark title of 'abcde fghijklm', with a title length 219 // of 14, and two different search terms, 'abcde' and 'fghij', with 220 // start positions of 0 and 6, respectively, 'abcde' will score higher 221 // (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with 222 // a partial factor of (14-6)/14 = 0.571 ). (In this example neither 223 // term matches in the URL.) 224 // 225 // Once all match factors have been calculated they are summed. If URL 226 // matches are not considered, the resulting sum will never be greater than 227 // the length of the bookmark title because of the way the bookmark model 228 // matches and removes overlaps. (In particular, the bookmark model only 229 // matches terms to the beginning of words and it removes all overlapping 230 // matches, keeping only the longest. Together these mean that each 231 // character is included in at most one match.) If URL matches are 232 // considered, the sum can be greater. 233 // 234 // This sum is then normalized by the length of the bookmark title (if URL 235 // matches are not considered) or by the length of the bookmark title + 10 236 // (if URL matches are considered) and capped at 1.0. (If URL matches 237 // are considered, we want to expand the scoring range so fewer bookmarks 238 // will hit the 1.0 cap and hence lose all ability to distinguish between 239 // these high-quality bookmarks.) 240 // 241 // The normalized value is multiplied against the scoring range available, 242 // which is 299. The 299 is calculated by subtracting the minimum possible 243 // score, 900, from the maximum possible score, 1199. This product, ranging 244 // from 0 to 299, is added to the minimum possible score, 900, giving the 245 // preliminary score. 246 // 247 // If the preliminary score is less than the maximum possible score, 1199, 248 // it can be boosted up to that maximum possible score if the URL referenced 249 // by the bookmark is also referenced by any of the user's other bookmarks. 250 // A count of how many times the bookmark's URL is referenced is determined 251 // and, for each additional reference beyond the one for the bookmark being 252 // scored up to a maximum of three, the score is boosted by a fixed amount 253 // given by |kURLCountBoost|, below. 254 // 255 if (score_using_url_matches_) { 256 // Pretend empty titles are identical to the URL. 257 if (title.empty()) 258 title = base::ASCIIToUTF16(url.spec()); 259 } else { 260 DCHECK(!title.empty()); 261 } 262 ScoringFunctor title_position_functor = 263 for_each(bookmark_match.title_match_positions.begin(), 264 bookmark_match.title_match_positions.end(), 265 ScoringFunctor(title.size())); 266 ScoringFunctor url_position_functor = 267 for_each(bookmark_match.url_match_positions.begin(), 268 bookmark_match.url_match_positions.end(), 269 ScoringFunctor(bookmark_match.node->url().spec().length())); 270 const double summed_factors = title_position_functor.ScoringFactor() + 271 (score_using_url_matches_ ? url_position_functor.ScoringFactor() : 0); 272 const double normalized_sum = std::min( 273 summed_factors / (title.size() + (score_using_url_matches_ ? 10 : 0)), 274 1.0); 275 const int kBaseBookmarkScore = 900; 276 const int kMaxBookmarkScore = 1199; 277 const double kBookmarkScoreRange = 278 static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore); 279 match.relevance = static_cast<int>(normalized_sum * kBookmarkScoreRange) + 280 kBaseBookmarkScore; 281 // Don't waste any time searching for additional referenced URLs if we 282 // already have a perfect title match. 283 if (match.relevance >= kMaxBookmarkScore) 284 return match; 285 // Boost the score if the bookmark's URL is referenced by other bookmarks. 286 const int kURLCountBoost[4] = { 0, 75, 125, 150 }; 287 std::vector<const BookmarkNode*> nodes; 288 bookmark_model_->GetNodesByURL(url, &nodes); 289 DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U); 290 match.relevance += 291 kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1]; 292 match.relevance = std::min(kMaxBookmarkScore, match.relevance); 293 return match; 294 } 295 296 // static 297 ACMatchClassifications BookmarkProvider::ClassificationsFromMatch( 298 const query_parser::Snippet::MatchPositions& positions, 299 size_t text_length, 300 bool is_url) { 301 ACMatchClassification::Style url_style = 302 is_url ? ACMatchClassification::URL : ACMatchClassification::NONE; 303 ACMatchClassifications classifications; 304 if (positions.empty()) { 305 classifications.push_back( 306 ACMatchClassification(0, url_style)); 307 return classifications; 308 } 309 310 for (query_parser::Snippet::MatchPositions::const_iterator i = 311 positions.begin(); 312 i != positions.end(); 313 ++i) { 314 AutocompleteMatch::ACMatchClassifications new_class; 315 AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first, 316 text_length, url_style, &new_class); 317 classifications = AutocompleteMatch::MergeClassifications( 318 classifications, new_class); 319 } 320 return classifications; 321 } 322