Home | History | Annotate | Download | only in autocomplete
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/autocomplete/bookmark_provider.h"
      6 
      7 #include <algorithm>
      8 #include <string>
      9 #include <vector>
     10 
     11 #include "base/memory/ref_counted.h"
     12 #include "base/memory/scoped_ptr.h"
     13 #include "base/strings/string16.h"
     14 #include "base/strings/string_number_conversions.h"
     15 #include "base/strings/string_split.h"
     16 #include "base/strings/utf_string_conversions.h"
     17 #include "chrome/browser/autocomplete/chrome_autocomplete_scheme_classifier.h"
     18 #include "chrome/test/base/testing_profile.h"
     19 #include "components/bookmarks/browser/bookmark_match.h"
     20 #include "components/bookmarks/browser/bookmark_model.h"
     21 #include "components/bookmarks/test/test_bookmark_client.h"
     22 #include "components/metrics/proto/omnibox_event.pb.h"
     23 #include "components/omnibox/autocomplete_provider.h"
     24 #include "testing/gtest/include/gtest/gtest.h"
     25 
     26 using bookmarks::BookmarkMatch;
     27 
     28 // The bookmark corpus against which we will simulate searches.
     29 struct BookmarksTestInfo {
     30   std::string title;
     31   std::string url;
     32 } bookmark_provider_test_data[] = {
     33   { "abc def", "http://www.catsanddogs.com/a" },
     34   { "abcde", "http://www.catsanddogs.com/b" },
     35   { "abcdef", "http://www.catsanddogs.com/c" },
     36   { "carry carbon carefully", "http://www.catsanddogs.com/d" },
     37   { "a definition", "http://www.catsanddogs.com/e" },
     38   { "ghi jkl", "http://www.catsanddogs.com/f" },
     39   { "jkl ghi", "http://www.catsanddogs.com/g" },
     40   { "frankly frankly frank", "http://www.catsanddogs.com/h" },
     41   { "foobar foobar", "http://www.foobar.com/" },
     42   { "domain", "http://www.domain.com/http/" },
     43   { "repeat", "http://www.repeat.com/1/repeat/2/" },
     44   // For testing inline_autocompletion.
     45   { "http://blah.com/", "http://blah.com/" },
     46   { "http://fiddle.com/", "http://fiddle.com/" },
     47   { "http://www.www.com/", "http://www.www.com/" },
     48   { "chrome://version", "chrome://version" },
     49   { "chrome://omnibox", "chrome://omnibox" },
     50   // For testing ranking with different URLs.
     51   { "achlorhydric featherheads resuscitates mockingbirds",
     52     "http://www.manylongwords.com/1a" },
     53   { "achlorhydric mockingbirds resuscitates featherhead",
     54     "http://www.manylongwords.com/2b" },
     55   { "featherhead resuscitates achlorhydric mockingbirds",
     56     "http://www.manylongwords.com/3c" },
     57   { "mockingbirds resuscitates featherheads achlorhydric",
     58     "http://www.manylongwords.com/4d" },
     59   // For testing URL boosting.  (URLs referenced multiple times are boosted.)
     60   { "burning worms #1",  "http://www.burns.com/" },
     61   { "burning worms #2",  "http://www.worms.com/" },
     62   { "worming burns #10", "http://www.burns.com/" },
     63   // For testing strange spacing in bookmark titles.
     64   { " hello1  hello2  ", "http://whatever.com/" },
     65   { "",                  "http://emptytitle.com/" },
     66 };
     67 
     68 class BookmarkProviderTest : public testing::Test {
     69  public:
     70   BookmarkProviderTest();
     71 
     72  protected:
     73   virtual void SetUp() OVERRIDE;
     74 
     75   bookmarks::TestBookmarkClient client_;
     76   scoped_ptr<TestingProfile> profile_;
     77   scoped_ptr<BookmarkModel> model_;
     78   scoped_refptr<BookmarkProvider> provider_;
     79 
     80  private:
     81   DISALLOW_COPY_AND_ASSIGN(BookmarkProviderTest);
     82 };
     83 
     84 BookmarkProviderTest::BookmarkProviderTest() {
     85   model_ = client_.CreateModel();
     86 }
     87 
     88 void BookmarkProviderTest::SetUp() {
     89   profile_.reset(new TestingProfile());
     90   DCHECK(profile_.get());
     91   provider_ = new BookmarkProvider(profile_.get());
     92   DCHECK(provider_.get());
     93   provider_->set_bookmark_model_for_testing(model_.get());
     94 
     95   const BookmarkNode* other_node = model_->other_node();
     96   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(bookmark_provider_test_data); ++i) {
     97     const BookmarksTestInfo& cur(bookmark_provider_test_data[i]);
     98     const GURL url(cur.url);
     99     model_->AddURL(other_node, other_node->child_count(),
    100                    base::ASCIIToUTF16(cur.title), url);
    101   }
    102 }
    103 
    104 // Structures and functions supporting the BookmarkProviderTest.Positions
    105 // unit test.
    106 
    107 struct TestBookmarkPosition {
    108   TestBookmarkPosition(size_t begin, size_t end)
    109       : begin(begin), end(end) {}
    110 
    111   size_t begin;
    112   size_t end;
    113 };
    114 typedef std::vector<TestBookmarkPosition> TestBookmarkPositions;
    115 
    116 // Return |positions| as a formatted string for unit test diagnostic output.
    117 std::string TestBookmarkPositionsAsString(
    118     const TestBookmarkPositions& positions) {
    119   std::string position_string("{");
    120   for (TestBookmarkPositions::const_iterator i = positions.begin();
    121        i != positions.end(); ++i) {
    122     if (i != positions.begin())
    123       position_string += ", ";
    124     position_string += "{" + base::IntToString(i->begin) + ", " +
    125         base::IntToString(i->end) + "}";
    126   }
    127   position_string += "}\n";
    128   return position_string;
    129 }
    130 
    131 // Return the positions in |matches| as a formatted string for unit test
    132 // diagnostic output.
    133 base::string16 MatchesAsString16(const ACMatches& matches) {
    134   base::string16 matches_string;
    135   for (ACMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) {
    136     matches_string.append(base::ASCIIToUTF16("    '"));
    137     matches_string.append(i->description);
    138     matches_string.append(base::ASCIIToUTF16("'\n"));
    139   }
    140   return matches_string;
    141 }
    142 
    143 // Comparison function for sorting search terms by descending length.
    144 bool TestBookmarkPositionsEqual(const TestBookmarkPosition& pos_a,
    145                                 const TestBookmarkPosition& pos_b) {
    146   return pos_a.begin == pos_b.begin && pos_a.end == pos_b.end;
    147 }
    148 
    149 // Convience function to make comparing ACMatchClassifications against the
    150 // test expectations structure easier.
    151 TestBookmarkPositions PositionsFromAutocompleteMatch(
    152     const AutocompleteMatch& match) {
    153   TestBookmarkPositions positions;
    154   bool started = false;
    155   size_t start = 0;
    156   for (AutocompleteMatch::ACMatchClassifications::const_iterator
    157        i = match.description_class.begin();
    158        i != match.description_class.end(); ++i) {
    159     if (i->style & AutocompleteMatch::ACMatchClassification::MATCH) {
    160       // We have found the start of a match.
    161       EXPECT_FALSE(started);
    162       started = true;
    163       start = i->offset;
    164     } else if (started) {
    165       // We have found the end of a match.
    166       started = false;
    167       positions.push_back(TestBookmarkPosition(start, i->offset));
    168       start = 0;
    169     }
    170   }
    171   // Record the final position if the last match goes to the end of the
    172   // candidate string.
    173   if (started)
    174     positions.push_back(TestBookmarkPosition(start, match.description.size()));
    175   return positions;
    176 }
    177 
    178 // Convience function to make comparing test expectations structure against the
    179 // actual ACMatchClassifications easier.
    180 TestBookmarkPositions PositionsFromExpectations(
    181     const size_t expectations[9][2]) {
    182   TestBookmarkPositions positions;
    183   size_t i = 0;
    184   // The array is zero-terminated in the [1]th element.
    185   while (expectations[i][1]) {
    186     positions.push_back(
    187         TestBookmarkPosition(expectations[i][0], expectations[i][1]));
    188     ++i;
    189   }
    190   return positions;
    191 }
    192 
    193 TEST_F(BookmarkProviderTest, Positions) {
    194   // Simulate searches.
    195   // Description of |positions|:
    196   //   The first index represents the collection of positions for each expected
    197   //   match. The count of the actual subarrays in each instance of |query_data|
    198   //   must equal |match_count|. The second index represents each expected
    199   //   match position. The third index represents the |start| and |end| of the
    200   //   expected match's position within the |test_data|. This array must be
    201   //   terminated by an entry with a value of '0' for |end|.
    202   // Example:
    203   //   Consider the line for 'def' below:
    204   //     {"def", 2, {{{4, 7}, {XXX, 0}}, {{2, 5}, {11, 14}, {XXX, 0}}}},
    205   //   There are two expected matches:
    206   //     0. {{4, 7}, {XXX, 0}}
    207   //     1. {{2, 5}, {11 ,14}, {XXX, 0}}
    208   //   For the first match, [0], there is one match within the bookmark's title
    209   //   expected, {4, 7}, which maps to the 'def' within "abc def". The 'XXX'
    210   //   value is ignored. The second match, [1], indicates that two matches are
    211   //   expected within the bookmark title "a definite definition". In each case,
    212   //   the {XXX, 0} indicates the end of the subarray. Or:
    213   //                 Match #1            Match #2
    214   //                 ------------------  ----------------------------
    215   //                  Pos1    Term        Pos1    Pos2      Term
    216   //                  ------  --------    ------  --------  --------
    217   //     {"def", 2, {{{4, 7}, {999, 0}}, {{2, 5}, {11, 14}, {999, 0}}}},
    218   //
    219   struct QueryData {
    220     const std::string query;
    221     const size_t match_count;  // This count must match the number of major
    222                                // elements in the following |positions| array.
    223     const size_t positions[99][9][2];
    224   } query_data[] = {
    225     // This first set is primarily for position detection validation.
    226     {"abc",                   3, {{{0, 3}, {0, 0}},
    227                                   {{0, 3}, {0, 0}},
    228                                   {{0, 3}, {0, 0}}}},
    229     {"abcde",                 2, {{{0, 5}, {0, 0}},
    230                                   {{0, 5}, {0, 0}}}},
    231     {"foo bar",               0, {{{0, 0}}}},
    232     {"fooey bark",            0, {{{0, 0}}}},
    233     {"def",                   2, {{{2, 5}, {0, 0}},
    234                                   {{4, 7}, {0, 0}}}},
    235     {"ghi jkl",               2, {{{0, 3}, {4, 7}, {0, 0}},
    236                                   {{0, 3}, {4, 7}, {0, 0}}}},
    237     // NB: GetBookmarksMatching(...) uses exact match for "a" in title or URL.
    238     {"a",                     2, {{{0, 1}, {0, 0}},
    239                                   {{0, 0}}}},
    240     {"a d",                   0, {{{0, 0}}}},
    241     {"carry carbon",          1, {{{0, 5}, {6, 12}, {0, 0}}}},
    242     // NB: GetBookmarksMatching(...) sorts the match positions.
    243     {"carbon carry",          1, {{{0, 5}, {6, 12}, {0, 0}}}},
    244     {"arbon",                 0, {{{0, 0}}}},
    245     {"ar",                    0, {{{0, 0}}}},
    246     {"arry",                  0, {{{0, 0}}}},
    247     // Quoted terms are single terms.
    248     {"\"carry carbon\"",      1, {{{0, 12}, {0, 0}}}},
    249     {"\"carry carbon\" care", 1, {{{0, 12}, {13, 17}, {0, 0}}}},
    250     // Quoted terms require complete word matches.
    251     {"\"carry carbo\"",       0, {{{0, 0}}}},
    252     // This set uses duplicated and/or overlaps search terms in the title.
    253     {"frank",                 1, {{{0, 5}, {8, 13}, {16, 21}, {0, 0}}}},
    254     {"frankly",               1, {{{0, 7}, {8, 15}, {0, 0}}}},
    255     {"frankly frankly",       1, {{{0, 7}, {8, 15}, {0, 0}}}},
    256     {"foobar foo",            1, {{{0, 6}, {7, 13}, {0, 0}}}},
    257     {"foo foobar",            1, {{{0, 6}, {7, 13}, {0, 0}}}},
    258     // This ensures that leading whitespace in the title is removed.
    259     {"hello",                 1, {{{0, 5}, {7, 12}, {0, 0}}}},
    260     // This ensures that empty titles yield empty classifications.
    261     {"emptytitle",            1, {}},
    262   };
    263 
    264   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
    265     AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
    266                             base::string16::npos, base::string16(), GURL(),
    267                             metrics::OmniboxEventProto::INVALID_SPEC, false,
    268                             false, false, true,
    269                             ChromeAutocompleteSchemeClassifier(profile_.get()));
    270     provider_->Start(input, false);
    271     const ACMatches& matches(provider_->matches());
    272     // Validate number of results is as expected.
    273     EXPECT_LE(matches.size(), query_data[i].match_count)
    274         << "One or more of the following matches were unexpected:\n"
    275         << MatchesAsString16(matches)
    276         << "For query '" << query_data[i].query << "'.";
    277     EXPECT_GE(matches.size(), query_data[i].match_count)
    278         << "One or more expected matches are missing. Matches found:\n"
    279         << MatchesAsString16(matches)
    280         << "for query '" << query_data[i].query << "'.";
    281     // Validate positions within each match is as expected.
    282     for (size_t j = 0; j < matches.size(); ++j) {
    283       // Collect the expected positions as a vector, collect the match's
    284       // classifications for match positions as a vector, then compare.
    285       TestBookmarkPositions expected_positions(
    286           PositionsFromExpectations(query_data[i].positions[j]));
    287       TestBookmarkPositions actual_positions(
    288           PositionsFromAutocompleteMatch(matches[j]));
    289       EXPECT_TRUE(std::equal(expected_positions.begin(),
    290                              expected_positions.end(),
    291                              actual_positions.begin(),
    292                              TestBookmarkPositionsEqual))
    293           << "EXPECTED: " << TestBookmarkPositionsAsString(expected_positions)
    294           << "ACTUAL:   " << TestBookmarkPositionsAsString(actual_positions)
    295           << "    for query: '" << query_data[i].query << "'.";
    296     }
    297   }
    298 }
    299 
    300 TEST_F(BookmarkProviderTest, Rankings) {
    301   // Simulate searches.
    302   struct QueryData {
    303     const std::string query;
    304     // |match_count| must match the number of elements in the following
    305     // |matches| array.
    306     const size_t match_count;
    307     // |matches| specifies the titles for all bookmarks expected to be matched
    308     // by the |query|
    309     const std::string matches[3];
    310   } query_data[] = {
    311     // Basic ranking test.
    312     {"abc",       3, {"abcde",      // Most complete match.
    313                       "abcdef",
    314                       "abc def"}},  // Least complete match.
    315     {"ghi",       2, {"ghi jkl",    // Matched earlier.
    316                       "jkl ghi",    // Matched later.
    317                       ""}},
    318     // Rankings of exact-word matches with different URLs.
    319     {"achlorhydric",
    320                   3, {"achlorhydric mockingbirds resuscitates featherhead",
    321                       "achlorhydric featherheads resuscitates mockingbirds",
    322                       "featherhead resuscitates achlorhydric mockingbirds"}},
    323     {"achlorhydric featherheads",
    324                   2, {"achlorhydric featherheads resuscitates mockingbirds",
    325                       "mockingbirds resuscitates featherheads achlorhydric",
    326                       ""}},
    327     {"mockingbirds resuscitates",
    328                   3, {"mockingbirds resuscitates featherheads achlorhydric",
    329                       "achlorhydric mockingbirds resuscitates featherhead",
    330                       "featherhead resuscitates achlorhydric mockingbirds"}},
    331     // Ranking of exact-word matches with URL boosts.
    332     {"worms",     2, {"burning worms #1",    // boosted
    333                       "burning worms #2",    // not boosted
    334                       ""}},
    335     // Ranking of prefix matches with URL boost.
    336     {"burn worm", 3, {"burning worms #1",    // boosted
    337                       "worming burns #10",   // boosted but longer title
    338                       "burning worms #2"}},  // not boosted
    339     // A query of "worm burn" will have the same results.
    340     {"worm burn", 3, {"burning worms #1",    // boosted
    341                       "worming burns #10",   // boosted but longer title
    342                       "burning worms #2"}},  // not boosted
    343   };
    344 
    345   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
    346     AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
    347                             base::string16::npos, base::string16(), GURL(),
    348                             metrics::OmniboxEventProto::INVALID_SPEC, false,
    349                             false, false, true,
    350                             ChromeAutocompleteSchemeClassifier(profile_.get()));
    351     provider_->Start(input, false);
    352     const ACMatches& matches(provider_->matches());
    353     // Validate number and content of results is as expected.
    354     for (size_t j = 0; j < std::max(query_data[i].match_count, matches.size());
    355          ++j) {
    356       EXPECT_LT(j, query_data[i].match_count) << "    Unexpected match '"
    357           << base::UTF16ToUTF8(matches[j].description) << "' for query: '"
    358           <<  query_data[i].query << "'.";
    359       if (j >= query_data[i].match_count)
    360         continue;
    361       EXPECT_LT(j, matches.size()) << "    Missing match '"
    362           << query_data[i].matches[j] << "' for query: '"
    363           << query_data[i].query << "'.";
    364       if (j >= matches.size())
    365         continue;
    366       EXPECT_EQ(query_data[i].matches[j],
    367                 base::UTF16ToUTF8(matches[j].description))
    368           << "    Mismatch at [" << base::IntToString(j) << "] for query '"
    369           << query_data[i].query << "'.";
    370     }
    371   }
    372 }
    373 
    374 TEST_F(BookmarkProviderTest, InlineAutocompletion) {
    375   // Simulate searches.
    376   struct QueryData {
    377     const std::string query;
    378     const std::string url;
    379     const bool allowed_to_be_default_match;
    380     const std::string inline_autocompletion;
    381   } query_data[] = {
    382     { "bla", "http://blah.com/", true, "h.com" },
    383     { "blah ", "http://blah.com/", false, ".com" },
    384     { "http://bl", "http://blah.com/", true, "ah.com" },
    385     { "fiddle.c", "http://fiddle.com/", true, "om" },
    386     { "www", "http://www.www.com/", true, ".com" },
    387     { "chro", "chrome://version", true, "me://version" },
    388     { "chrome://ve", "chrome://version", true, "rsion" },
    389     { "chrome ver", "chrome://version", false, "" },
    390     { "versi", "chrome://version", false, "" },
    391     { "abou", "chrome://omnibox", false, "" },
    392     { "about:om", "chrome://omnibox", true, "nibox" }
    393     // Note: when adding a new URL to this test, be sure to add it to the list
    394     // of bookmarks at the top of the file as well.  All items in this list
    395     // need to be in the bookmarks list because BookmarkProvider's
    396     // TitleMatchToACMatch() has an assertion that verifies the URL is
    397     // actually bookmarked.
    398   };
    399 
    400   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
    401     const std::string description = "for query=" + query_data[i].query +
    402         " and url=" + query_data[i].url;
    403     AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
    404                             base::string16::npos, base::string16(), GURL(),
    405                             metrics::OmniboxEventProto::INVALID_SPEC, false,
    406                             false, false, true,
    407                             ChromeAutocompleteSchemeClassifier(profile_.get()));
    408     const base::string16 fixed_up_input(
    409         provider_->FixupUserInput(input).second);
    410     BookmarkNode node(GURL(query_data[i].url));
    411     node.SetTitle(base::ASCIIToUTF16(query_data[i].url));
    412     BookmarkMatch bookmark_match;
    413     bookmark_match.node = &node;
    414     const AutocompleteMatch& ac_match = provider_->BookmarkMatchToACMatch(
    415         input, fixed_up_input, bookmark_match);
    416     EXPECT_EQ(query_data[i].allowed_to_be_default_match,
    417               ac_match.allowed_to_be_default_match) << description;
    418     EXPECT_EQ(base::ASCIIToUTF16(query_data[i].inline_autocompletion),
    419               ac_match.inline_autocompletion) << description;
    420   }
    421 }
    422 
    423 TEST_F(BookmarkProviderTest, StripHttpAndAdjustOffsets) {
    424   // Simulate searches.
    425   struct QueryData {
    426     const std::string query;
    427     const std::string expected_contents;
    428     // |expected_contents_class| is in format offset:style,offset:style,...
    429     const std::string expected_contents_class;
    430   } query_data[] = {
    431     { "foo",       "www.foobar.com",             "0:1,4:3,7:1"           },
    432     { "www foo",   "www.foobar.com",             "0:3,3:1,4:3,7:1"       },
    433     { "foo www",   "www.foobar.com",             "0:3,3:1,4:3,7:1"       },
    434     { "foo http",  "http://www.foobar.com",      "0:3,4:1,11:3,14:1"     },
    435     { "blah",      "blah.com",                   "0:3,4:1"               },
    436     { "http blah", "http://blah.com",            "0:3,4:1,7:3,11:1"      },
    437     { "dom",       "www.domain.com/http/",       "0:1,4:3,7:1"           },
    438     { "dom http",  "http://www.domain.com/http/",
    439       "0:3,4:1,11:3,14:1,22:3,26:1"                                      },
    440     { "rep",       "www.repeat.com/1/repeat/2/", "0:1,4:3,7:1,17:3,20:1" },
    441     { "versi",     "chrome://version",           "0:1,9:3,14:1"          }
    442   };
    443 
    444   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(query_data); ++i) {
    445     std::string description = "for query=" + query_data[i].query;
    446     AutocompleteInput input(base::ASCIIToUTF16(query_data[i].query),
    447                             base::string16::npos, base::string16(), GURL(),
    448                             metrics::OmniboxEventProto::INVALID_SPEC, false,
    449                             false, false, true,
    450                             ChromeAutocompleteSchemeClassifier(profile_.get()));
    451     provider_->Start(input, false);
    452     const ACMatches& matches(provider_->matches());
    453     ASSERT_EQ(1U, matches.size()) << description;
    454     const AutocompleteMatch& match = matches[0];
    455     EXPECT_EQ(base::ASCIIToUTF16(query_data[i].expected_contents),
    456               match.contents) << description;
    457     std::vector<std::string> class_strings;
    458     base::SplitString(
    459         query_data[i].expected_contents_class, ',', &class_strings);
    460     ASSERT_EQ(class_strings.size(), match.contents_class.size())
    461         << description;
    462     for (size_t i = 0; i < class_strings.size(); ++i) {
    463       std::vector<std::string> chunks;
    464       base::SplitString(class_strings[i], ':', &chunks);
    465       ASSERT_EQ(2U, chunks.size()) << description;
    466       size_t offset;
    467       EXPECT_TRUE(base::StringToSizeT(chunks[0], &offset)) << description;
    468       EXPECT_EQ(offset, match.contents_class[i].offset) << description;
    469       int style;
    470       EXPECT_TRUE(base::StringToInt(chunks[1], &style)) << description;
    471       EXPECT_EQ(style, match.contents_class[i].style) << description;
    472     }
    473   }
    474 }
    475