1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/autocomplete/bookmark_provider.h" 6 7 #include <algorithm> 8 #include <string> 9 #include <vector> 10 11 #include "base/memory/ref_counted.h" 12 #include "base/memory/scoped_ptr.h" 13 #include "base/strings/string16.h" 14 #include "base/strings/string_number_conversions.h" 15 #include "base/strings/string_split.h" 16 #include "base/strings/utf_string_conversions.h" 17 #include "chrome/browser/autocomplete/chrome_autocomplete_scheme_classifier.h" 18 #include "chrome/test/base/testing_profile.h" 19 #include "components/bookmarks/browser/bookmark_match.h" 20 #include "components/bookmarks/browser/bookmark_model.h" 21 #include "components/bookmarks/test/test_bookmark_client.h" 22 #include "components/metrics/proto/omnibox_event.pb.h" 23 #include "components/omnibox/autocomplete_provider.h" 24 #include "testing/gtest/include/gtest/gtest.h" 25 26 using bookmarks::BookmarkMatch; 27 28 // The bookmark corpus against which we will simulate searches. 29 struct BookmarksTestInfo { 30 std::string title; 31 std::string url; 32 } bookmark_provider_test_data[] = { 33 { "abc def", "http://www.catsanddogs.com/a" }, 34 { "abcde", "http://www.catsanddogs.com/b" }, 35 { "abcdef", "http://www.catsanddogs.com/c" }, 36 { "carry carbon carefully", "http://www.catsanddogs.com/d" }, 37 { "a definition", "http://www.catsanddogs.com/e" }, 38 { "ghi jkl", "http://www.catsanddogs.com/f" }, 39 { "jkl ghi", "http://www.catsanddogs.com/g" }, 40 { "frankly frankly frank", "http://www.catsanddogs.com/h" }, 41 { "foobar foobar", "http://www.foobar.com/" }, 42 { "domain", "http://www.domain.com/http/" }, 43 { "repeat", "http://www.repeat.com/1/repeat/2/" }, 44 // For testing inline_autocompletion. 45 { "http://blah.com/", "http://blah.com/" }, 46 { "http://fiddle.com/", "http://fiddle.com/" }, 47 { "http://www.www.com/", "http://www.www.com/" }, 48 { "chrome://version", "chrome://version" }, 49 { "chrome://omnibox", "chrome://omnibox" }, 50 // For testing ranking with different URLs. 51 { "achlorhydric featherheads resuscitates mockingbirds", 52 "http://www.manylongwords.com/1a" }, 53 { "achlorhydric mockingbirds resuscitates featherhead", 54 "http://www.manylongwords.com/2b" }, 55 { "featherhead resuscitates achlorhydric mockingbirds", 56 "http://www.manylongwords.com/3c" }, 57 { "mockingbirds resuscitates featherheads achlorhydric", 58 "http://www.manylongwords.com/4d" }, 59 // For testing URL boosting. (URLs referenced multiple times are boosted.) 60 { "burning worms #1", "http://www.burns.com/" }, 61 { "burning worms #2", "http://www.worms.com/" }, 62 { "worming burns #10", "http://www.burns.com/" }, 63 // For testing strange spacing in bookmark titles. 64 { " hello1 hello2 ", "http://whatever.com/" }, 65 { "", "http://emptytitle.com/" }, 66 }; 67 68 class BookmarkProviderTest : public testing::Test { 69 public: 70 BookmarkProviderTest(); 71 72 protected: 73 virtual void SetUp() OVERRIDE; 74 75 bookmarks::TestBookmarkClient client_; 76 scoped_ptr<TestingProfile> profile_; 77 scoped_ptr<BookmarkModel> model_; 78 scoped_refptr<BookmarkProvider> provider_; 79 80 private: 81 DISALLOW_COPY_AND_ASSIGN(BookmarkProviderTest); 82 }; 83 84 BookmarkProviderTest::BookmarkProviderTest() { 85 model_ = client_.CreateModel(); 86 } 87 88 void BookmarkProviderTest::SetUp() { 89 profile_.reset(new TestingProfile()); 90 DCHECK(profile_.get()); 91 provider_ = new BookmarkProvider(profile_.get()); 92 DCHECK(provider_.get()); 93 provider_->set_bookmark_model_for_testing(model_.get()); 94 95 const BookmarkNode* other_node = model_->other_node(); 96 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(bookmark_provider_test_data); ++i) { 97 const BookmarksTestInfo& cur(bookmark_provider_test_data[i]); 98 const GURL url(cur.url); 99 model_->AddURL(other_node, other_node->child_count(), 100 base::ASCIIToUTF16(cur.title), url); 101 } 102 } 103 104 // Structures and functions supporting the BookmarkProviderTest.Positions 105 // unit test. 106 107 struct TestBookmarkPosition { 108 TestBookmarkPosition(size_t begin, size_t end) 109 : begin(begin), end(end) {} 110 111 size_t begin; 112 size_t end; 113 }; 114 typedef std::vector<TestBookmarkPosition> TestBookmarkPositions; 115 116 // Return |positions| as a formatted string for unit test diagnostic output. 117 std::string TestBookmarkPositionsAsString( 118 const TestBookmarkPositions& positions) { 119 std::string position_string("{"); 120 for (TestBookmarkPositions::const_iterator i = positions.begin(); 121 i != positions.end(); ++i) { 122 if (i != positions.begin()) 123 position_string += ", "; 124 position_string += "{" + base::IntToString(i->begin) + ", " + 125 base::IntToString(i->end) + "}"; 126 } 127 position_string += "}\n"; 128 return position_string; 129 } 130 131 // Return the positions in |matches| as a formatted string for unit test 132 // diagnostic output. 133 base::string16 MatchesAsString16(const ACMatches& matches) { 134 base::string16 matches_string; 135 for (ACMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) { 136 matches_string.append(base::ASCIIToUTF16(" '")); 137 matches_string.append(i->description); 138 matches_string.append(base::ASCIIToUTF16("'\n")); 139 } 140 return matches_string; 141 } 142 143 // Comparison function for sorting search terms by descending length. 144 bool TestBookmarkPositionsEqual(const TestBookmarkPosition& pos_a, 145 const TestBookmarkPosition& pos_b) { 146 return pos_a.begin == pos_b.begin && pos_a.end == pos_b.end; 147 } 148 149 // Convience function to make comparing ACMatchClassifications against the 150 // test expectations structure easier. 151 TestBookmarkPositions PositionsFromAutocompleteMatch( 152 const AutocompleteMatch& match) { 153 TestBookmarkPositions positions; 154 bool started = false; 155 size_t start = 0; 156 for (AutocompleteMatch::ACMatchClassifications::const_iterator 157 i = match.description_class.begin(); 158 i != match.description_class.end(); ++i) { 159 if (i->style & AutocompleteMatch::ACMatchClassification::MATCH) { 160 // We have found the start of a match. 161 EXPECT_FALSE(started); 162 started = true; 163 start = i->offset; 164 } else if (started) { 165 // We have found the end of a match. 166 started = false; 167 positions.push_back(TestBookmarkPosition(start, i->offset)); 168 start = 0; 169 } 170 } 171 // Record the final position if the last match goes to the end of the 172 // candidate string. 173 if (started) 174 positions.push_back(TestBookmarkPosition(start, match.description.size())); 175 return positions; 176 } 177 178 // Convience function to make comparing test expectations structure against the 179 // actual ACMatchClassifications easier. 180 TestBookmarkPositions PositionsFromExpectations( 181 const size_t expectations[9][2]) { 182 TestBookmarkPositions positions; 183 size_t i = 0; 184 // The array is zero-terminated in the [1]th element. 185 while (expectations[i][1]) { 186 positions.push_back( 187 TestBookmarkPosition(expectations[i][0], expectations[i][1])); 188 ++i; 189 } 190 return positions; 191 } 192 193 TEST_F(BookmarkProviderTest, Positions) { 194 // Simulate searches. 195 // Description of |positions|: 196 // The first index represents the collection of positions for each expected 197 // match. The count of the actual subarrays in each instance of |query_data| 198 // must equal |match_count|. The second index represents each expected 199 // match position. The third index represents the |start| and |end| of the 200 // expected match's position within the |test_data|. This array must be 201 // terminated by an entry with a value of '0' for |end|. 202 // Example: 203 // Consider the line for 'def' below: 204 // {"def", 2, {{{4, 7}, {XXX, 0}}, {{2, 5}, {11, 14}, {XXX, 0}}}}, 205 // There are two expected matches: 206 // 0. {{4, 7}, {XXX, 0}} 207 // 1. {{2, 5}, {11 ,14}, {XXX, 0}} 208 // For the first match, [0], there is one match within the bookmark's title 209 // expected, {4, 7}, which maps to the 'def' within "abc def". The 'XXX' 210 // value is ignored. The second match, [1], indicates that two matches are 211 // expected within the bookmark title "a definite definition". In each case, 212 // the {XXX, 0} indicates the end of the subarray. Or: 213 // Match #1 Match #2 214 // ------------------ ---------------------------- 215 // Pos1 Term Pos1 Pos2 Term 216 // ------ -------- ------ -------- -------- 217 // {"def", 2, {{{4, 7}, {999, 0}}, {{2, 5}, {11, 14}, {999, 0}}}}, 218 // 219 struct QueryData { 220 const std::string query; 221 const size_t match_count; // This count must match the number of major 222 // elements in the following |positions| array. 223 const size_t positions[99][9][2]; 224 } query_data[] = { 225 // This first set is primarily for position detection validation. 226 {"abc", 3, {{{0, 3}, {0, 0}}, 227 {{0, 3}, {0, 0}}, 228 {{0, 3}, {0, 0}}}}, 229 {"abcde", 2, {{{0, 5}, {0, 0}}, 230 {{0, 5}, {0, 0}}}}, 231 {"foo bar", 0, {{{0, 0}}}}, 232 {"fooey bark", 0, {{{0, 0}}}}, 233 {"def", 2, {{{2, 5}, {0, 0}}, 234 {{4, 7}, {0, 0}}}}, 235 {"ghi jkl", 2, {{{0, 3}, {4, 7}, {0, 0}}, 236 {{0, 3}, {4, 7}, {0, 0}}}}, 237 // NB: GetBookmarksMatching(...) uses exact match for "a" in title or URL. 238 {"a", 2, {{{0, 1}, {0, 0}}, 239 {{0, 0}}}}, 240 {"a d", 0, {{{0, 0}}}}, 241 {"carry carbon", 1, {{{0, 5}, {6, 12}, {0, 0}}}}, 242 // NB: GetBookmarksMatching(...) sorts the match positions. 243 {"carbon carry", 1, {{{0, 5}, {6, 12}, {0, 0}}}}, 244 {"arbon", 0, {{{0, 0}}}}, 245 {"ar", 0, {{{0, 0}}}}, 246 {"arry", 0, {{{0, 0}}}}, 247 // Quoted terms are single terms. 248 {"\"carry carbon\"", 1, {{{0, 12}, {0, 0}}}}, 249 {"\"carry carbon\" care", 1, {{{0, 12}, {13, 17}, {0, 0}}}}, 250 // Quoted terms require complete word matches. 251 {"\"carry carbo\"", 0, {{{0, 0}}}}, 252 // This set uses duplicated and/or overlaps search terms in the title. 253 {"frank", 1, {{{0, 5}, {8, 13}, {16, 21}, {0, 0}}}}, 254 {"frankly", 1, {{{0, 7}, {8, 15}, {0, 0}}}}, 255 {"frankly frankly", 1, {{{0, 7}, {8, 15}, {0, 0}}}}, 256 {"foobar foo", 1, {{{0, 6}, {7, 13}, {0, 0}}}}, 257 {"foo foobar", 1, {{{0, 6}, {7, 13}, {0, 0}}}}, 258 // This ensures that leading whitespace in the title is removed. 259 {"hello", 1, {{{0, 5}, {7, 12}, {0, 0}}}}, 260 // This ensures that empty titles yield empty classifications. 261 {"emptytitle", 1, {}}, 262 }; 263 264 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) { 265 AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query), 266 base::string16::npos, base::string16(), GURL(), 267 metrics::OmniboxEventProto::INVALID_SPEC, false, 268 false, false, true, 269 ChromeAutocompleteSchemeClassifier(profile_.get())); 270 provider_->Start(input, false); 271 const ACMatches& matches(provider_->matches()); 272 // Validate number of results is as expected. 273 EXPECT_LE(matches.size(), query_data[i].match_count) 274 << "One or more of the following matches were unexpected:\n" 275 << MatchesAsString16(matches) 276 << "For query '" << query_data[i].query << "'."; 277 EXPECT_GE(matches.size(), query_data[i].match_count) 278 << "One or more expected matches are missing. Matches found:\n" 279 << MatchesAsString16(matches) 280 << "for query '" << query_data[i].query << "'."; 281 // Validate positions within each match is as expected. 282 for (size_t j = 0; j < matches.size(); ++j) { 283 // Collect the expected positions as a vector, collect the match's 284 // classifications for match positions as a vector, then compare. 285 TestBookmarkPositions expected_positions( 286 PositionsFromExpectations(query_data[i].positions[j])); 287 TestBookmarkPositions actual_positions( 288 PositionsFromAutocompleteMatch(matches[j])); 289 EXPECT_TRUE(std::equal(expected_positions.begin(), 290 expected_positions.end(), 291 actual_positions.begin(), 292 TestBookmarkPositionsEqual)) 293 << "EXPECTED: " << TestBookmarkPositionsAsString(expected_positions) 294 << "ACTUAL: " << TestBookmarkPositionsAsString(actual_positions) 295 << " for query: '" << query_data[i].query << "'."; 296 } 297 } 298 } 299 300 TEST_F(BookmarkProviderTest, Rankings) { 301 // Simulate searches. 302 struct QueryData { 303 const std::string query; 304 // |match_count| must match the number of elements in the following 305 // |matches| array. 306 const size_t match_count; 307 // |matches| specifies the titles for all bookmarks expected to be matched 308 // by the |query| 309 const std::string matches[3]; 310 } query_data[] = { 311 // Basic ranking test. 312 {"abc", 3, {"abcde", // Most complete match. 313 "abcdef", 314 "abc def"}}, // Least complete match. 315 {"ghi", 2, {"ghi jkl", // Matched earlier. 316 "jkl ghi", // Matched later. 317 ""}}, 318 // Rankings of exact-word matches with different URLs. 319 {"achlorhydric", 320 3, {"achlorhydric mockingbirds resuscitates featherhead", 321 "achlorhydric featherheads resuscitates mockingbirds", 322 "featherhead resuscitates achlorhydric mockingbirds"}}, 323 {"achlorhydric featherheads", 324 2, {"achlorhydric featherheads resuscitates mockingbirds", 325 "mockingbirds resuscitates featherheads achlorhydric", 326 ""}}, 327 {"mockingbirds resuscitates", 328 3, {"mockingbirds resuscitates featherheads achlorhydric", 329 "achlorhydric mockingbirds resuscitates featherhead", 330 "featherhead resuscitates achlorhydric mockingbirds"}}, 331 // Ranking of exact-word matches with URL boosts. 332 {"worms", 2, {"burning worms #1", // boosted 333 "burning worms #2", // not boosted 334 ""}}, 335 // Ranking of prefix matches with URL boost. 336 {"burn worm", 3, {"burning worms #1", // boosted 337 "worming burns #10", // boosted but longer title 338 "burning worms #2"}}, // not boosted 339 // A query of "worm burn" will have the same results. 340 {"worm burn", 3, {"burning worms #1", // boosted 341 "worming burns #10", // boosted but longer title 342 "burning worms #2"}}, // not boosted 343 }; 344 345 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) { 346 AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query), 347 base::string16::npos, base::string16(), GURL(), 348 metrics::OmniboxEventProto::INVALID_SPEC, false, 349 false, false, true, 350 ChromeAutocompleteSchemeClassifier(profile_.get())); 351 provider_->Start(input, false); 352 const ACMatches& matches(provider_->matches()); 353 // Validate number and content of results is as expected. 354 for (size_t j = 0; j < std::max(query_data[i].match_count, matches.size()); 355 ++j) { 356 EXPECT_LT(j, query_data[i].match_count) << " Unexpected match '" 357 << base::UTF16ToUTF8(matches[j].description) << "' for query: '" 358 << query_data[i].query << "'."; 359 if (j >= query_data[i].match_count) 360 continue; 361 EXPECT_LT(j, matches.size()) << " Missing match '" 362 << query_data[i].matches[j] << "' for query: '" 363 << query_data[i].query << "'."; 364 if (j >= matches.size()) 365 continue; 366 EXPECT_EQ(query_data[i].matches[j], 367 base::UTF16ToUTF8(matches[j].description)) 368 << " Mismatch at [" << base::IntToString(j) << "] for query '" 369 << query_data[i].query << "'."; 370 } 371 } 372 } 373 374 TEST_F(BookmarkProviderTest, InlineAutocompletion) { 375 // Simulate searches. 376 struct QueryData { 377 const std::string query; 378 const std::string url; 379 const bool allowed_to_be_default_match; 380 const std::string inline_autocompletion; 381 } query_data[] = { 382 { "bla", "http://blah.com/", true, "h.com" }, 383 { "blah ", "http://blah.com/", false, ".com" }, 384 { "http://bl", "http://blah.com/", true, "ah.com" }, 385 { "fiddle.c", "http://fiddle.com/", true, "om" }, 386 { "www", "http://www.www.com/", true, ".com" }, 387 { "chro", "chrome://version", true, "me://version" }, 388 { "chrome://ve", "chrome://version", true, "rsion" }, 389 { "chrome ver", "chrome://version", false, "" }, 390 { "versi", "chrome://version", false, "" }, 391 { "abou", "chrome://omnibox", false, "" }, 392 { "about:om", "chrome://omnibox", true, "nibox" } 393 // Note: when adding a new URL to this test, be sure to add it to the list 394 // of bookmarks at the top of the file as well. All items in this list 395 // need to be in the bookmarks list because BookmarkProvider's 396 // TitleMatchToACMatch() has an assertion that verifies the URL is 397 // actually bookmarked. 398 }; 399 400 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) { 401 const std::string description = "for query=" + query_data[i].query + 402 " and url=" + query_data[i].url; 403 AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query), 404 base::string16::npos, base::string16(), GURL(), 405 metrics::OmniboxEventProto::INVALID_SPEC, false, 406 false, false, true, 407 ChromeAutocompleteSchemeClassifier(profile_.get())); 408 const base::string16 fixed_up_input( 409 provider_->FixupUserInput(input).second); 410 BookmarkNode node(GURL(query_data[i].url)); 411 node.SetTitle(base::ASCIIToUTF16(query_data[i].url)); 412 BookmarkMatch bookmark_match; 413 bookmark_match.node = &node; 414 const AutocompleteMatch& ac_match = provider_->BookmarkMatchToACMatch( 415 input, fixed_up_input, bookmark_match); 416 EXPECT_EQ(query_data[i].allowed_to_be_default_match, 417 ac_match.allowed_to_be_default_match) << description; 418 EXPECT_EQ(base::ASCIIToUTF16(query_data[i].inline_autocompletion), 419 ac_match.inline_autocompletion) << description; 420 } 421 } 422 423 TEST_F(BookmarkProviderTest, StripHttpAndAdjustOffsets) { 424 // Simulate searches. 425 struct QueryData { 426 const std::string query; 427 const std::string expected_contents; 428 // |expected_contents_class| is in format offset:style,offset:style,... 429 const std::string expected_contents_class; 430 } query_data[] = { 431 { "foo", "www.foobar.com", "0:1,4:3,7:1" }, 432 { "www foo", "www.foobar.com", "0:3,3:1,4:3,7:1" }, 433 { "foo www", "www.foobar.com", "0:3,3:1,4:3,7:1" }, 434 { "foo http", "http://www.foobar.com", "0:3,4:1,11:3,14:1" }, 435 { "blah", "blah.com", "0:3,4:1" }, 436 { "http blah", "http://blah.com", "0:3,4:1,7:3,11:1" }, 437 { "dom", "www.domain.com/http/", "0:1,4:3,7:1" }, 438 { "dom http", "http://www.domain.com/http/", 439 "0:3,4:1,11:3,14:1,22:3,26:1" }, 440 { "rep", "www.repeat.com/1/repeat/2/", "0:1,4:3,7:1,17:3,20:1" }, 441 { "versi", "chrome://version", "0:1,9:3,14:1" } 442 }; 443 444 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) { 445 std::string description = "for query=" + query_data[i].query; 446 AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query), 447 base::string16::npos, base::string16(), GURL(), 448 metrics::OmniboxEventProto::INVALID_SPEC, false, 449 false, false, true, 450 ChromeAutocompleteSchemeClassifier(profile_.get())); 451 provider_->Start(input, false); 452 const ACMatches& matches(provider_->matches()); 453 ASSERT_EQ(1U, matches.size()) << description; 454 const AutocompleteMatch& match = matches[0]; 455 EXPECT_EQ(base::ASCIIToUTF16(query_data[i].expected_contents), 456 match.contents) << description; 457 std::vector<std::string> class_strings; 458 base::SplitString( 459 query_data[i].expected_contents_class, ',', &class_strings); 460 ASSERT_EQ(class_strings.size(), match.contents_class.size()) 461 << description; 462 for (size_t i = 0; i < class_strings.size(); ++i) { 463 std::vector<std::string> chunks; 464 base::SplitString(class_strings[i], ':', &chunks); 465 ASSERT_EQ(2U, chunks.size()) << description; 466 size_t offset; 467 EXPECT_TRUE(base::StringToSizeT(chunks[0], &offset)) << description; 468 EXPECT_EQ(offset, match.contents_class[i].offset) << description; 469 int style; 470 EXPECT_TRUE(base::StringToInt(chunks[1], &style)) << description; 471 EXPECT_EQ(style, match.contents_class[i].style) << description; 472 } 473 } 474 } 475