Home | History | Annotate | Download | only in autocomplete
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/file_util.h"
      6 #include "base/message_loop.h"
      7 #include "base/path_service.h"
      8 #include "base/string_util.h"
      9 #include "base/utf_string_conversions.h"
     10 #include "chrome/browser/autocomplete/autocomplete_match.h"
     11 #include "chrome/browser/autocomplete/history_url_provider.h"
     12 #include "chrome/browser/history/history.h"
     13 #include "chrome/test/testing_browser_process.h"
     14 #include "chrome/test/testing_browser_process_test.h"
     15 #include "chrome/test/testing_profile.h"
     16 #include "content/browser/browser_thread.h"
     17 #include "testing/gtest/include/gtest/gtest.h"
     18 
     19 using base::Time;
     20 using base::TimeDelta;
     21 
     22 struct TestURLInfo {
     23   std::string url;
     24   std::string title;
     25   int visit_count;
     26   int typed_count;
     27 } test_db[] = {
     28   {"http://www.google.com/", "Google", 3, 3},
     29 
     30   // High-quality pages should get a host synthesized as a lower-quality match.
     31   {"http://slashdot.org/favorite_page.html", "Favorite page", 200, 100},
     32 
     33   // Less popular pages should have hosts synthesized as higher-quality
     34   // matches.
     35   {"http://kerneltrap.org/not_very_popular.html", "Less popular", 4, 0},
     36 
     37   // Unpopular pages should not appear in the results at all.
     38   {"http://freshmeat.net/unpopular.html", "Unpopular", 1, 1},
     39 
     40   // If a host has a match, we should pick it up during host synthesis.
     41   {"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2},
     42   {"http://news.google.com/", "Google News", 1, 1},
     43 
     44   // Suggested short URLs must be "good enough" and must match user input.
     45   {"http://foo.com/", "Dir", 5, 5},
     46   {"http://foo.com/dir/", "Dir", 2, 2},
     47   {"http://foo.com/dir/another/", "Dir", 5, 1},
     48   {"http://foo.com/dir/another/again/", "Dir", 10, 0},
     49   {"http://foo.com/dir/another/again/myfile.html", "File", 10, 2},
     50 
     51   // We throw in a lot of extra URLs here to make sure we're testing the
     52   // history database's query, not just the autocomplete provider.
     53   {"http://startest.com/y/a", "A", 2, 2},
     54   {"http://startest.com/y/b", "B", 5, 2},
     55   {"http://startest.com/x/c", "C", 5, 2},
     56   {"http://startest.com/x/d", "D", 5, 5},
     57   {"http://startest.com/y/e", "E", 4, 2},
     58   {"http://startest.com/y/f", "F", 3, 2},
     59   {"http://startest.com/y/g", "G", 3, 2},
     60   {"http://startest.com/y/h", "H", 3, 2},
     61   {"http://startest.com/y/i", "I", 3, 2},
     62   {"http://startest.com/y/j", "J", 3, 2},
     63   {"http://startest.com/y/k", "K", 3, 2},
     64   {"http://startest.com/y/l", "L", 3, 2},
     65   {"http://startest.com/y/m", "M", 3, 2},
     66 
     67   // A file: URL is useful for testing that fixup does the right thing w.r.t.
     68   // the number of trailing slashes on the user's input.
     69   {"file:///C:/foo.txt", "", 2, 2},
     70 
     71   // Results with absurdly high typed_counts so that very generic queries like
     72   // "http" will give consistent results even if more data is added above.
     73   {"http://bogussite.com/a", "Bogus A", 10002, 10000},
     74   {"http://bogussite.com/b", "Bogus B", 10001, 10000},
     75   {"http://bogussite.com/c", "Bogus C", 10000, 10000},
     76 
     77   // Domain name with number.
     78   {"http://www.17173.com/", "Domain with number", 3, 3},
     79 
     80   // URLs to test exact-matching behavior.
     81   {"http://go/", "Intranet URL", 1, 1},
     82   {"http://gooey/", "Intranet URL 2", 5, 5},
     83 
     84   // URLs for testing offset adjustment.
     85   {"http://www.\xEA\xB5\x90\xEC\x9C\xA1.kr/", "Korean", 2, 2},
     86   {"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2},
     87   {"http://ms/c++%20style%20guide", "Style guide", 2, 2},
     88 
     89   // URLs for testing ctrl-enter behavior.
     90   {"http://binky/", "Intranet binky", 2, 2},
     91   {"http://winky/", "Intranet winky", 2, 2},
     92   {"http://www.winky.com/", "Internet winky", 5, 0},
     93 
     94   // URLs used by EmptyVisits.
     95   {"http://pandora.com/", "Pandora", 2, 2},
     96   {"http://p/", "p", 0, 0},
     97 };
     98 
     99 class HistoryURLProviderTest : public TestingBrowserProcessTest,
    100                                public ACProviderListener {
    101  public:
    102   HistoryURLProviderTest()
    103       : ui_thread_(BrowserThread::UI, &message_loop_),
    104         file_thread_(BrowserThread::FILE, &message_loop_) {}
    105 
    106   // ACProviderListener
    107   virtual void OnProviderUpdate(bool updated_matches);
    108 
    109  protected:
    110   // testing::Test
    111   virtual void SetUp() {
    112     SetUpImpl(false);
    113   }
    114   virtual void TearDown();
    115 
    116   // Does the real setup.
    117   void SetUpImpl(bool no_db);
    118 
    119   // Fills test data into the history system.
    120   void FillData();
    121 
    122   // Runs an autocomplete query on |text| and checks to see that the returned
    123   // results' destination URLs match those provided.
    124   void RunTest(const string16 text,
    125                const string16& desired_tld,
    126                bool prevent_inline_autocomplete,
    127                const std::string* expected_urls,
    128                size_t num_results);
    129 
    130   void RunAdjustOffsetTest(const string16 text, size_t expected_offset);
    131 
    132   MessageLoopForUI message_loop_;
    133   BrowserThread ui_thread_;
    134   BrowserThread file_thread_;
    135   ACMatches matches_;
    136   scoped_ptr<TestingProfile> profile_;
    137   HistoryService* history_service_;
    138   scoped_refptr<HistoryURLProvider> autocomplete_;
    139 };
    140 
    141 class HistoryURLProviderTestNoDB : public HistoryURLProviderTest {
    142  protected:
    143   virtual void SetUp() {
    144     SetUpImpl(true);
    145   }
    146 };
    147 
    148 void HistoryURLProviderTest::OnProviderUpdate(bool updated_matches) {
    149   if (autocomplete_->done())
    150     MessageLoop::current()->Quit();
    151 }
    152 
    153 void HistoryURLProviderTest::SetUpImpl(bool no_db) {
    154   profile_.reset(new TestingProfile());
    155   profile_->CreateHistoryService(true, no_db);
    156   history_service_ = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
    157 
    158   autocomplete_ = new HistoryURLProvider(this, profile_.get(), "en-US,en,ko");
    159 
    160   FillData();
    161 }
    162 
    163 void HistoryURLProviderTest::TearDown() {
    164   autocomplete_ = NULL;
    165 }
    166 
    167 void HistoryURLProviderTest::FillData() {
    168   // All visits are a long time ago (some tests require this since we do some
    169   // special logic for things visited very recently). Note that this time must
    170   // be more recent than the "archived history" threshold for the data to go
    171   // into the main database.
    172   //
    173   // TODO(brettw) It would be nice if we could test this behavior, in which
    174   // case the time would be specifed in the test_db structure.
    175   Time visit_time = Time::Now() - TimeDelta::FromDays(80);
    176 
    177   for (size_t i = 0; i < arraysize(test_db); ++i) {
    178     const TestURLInfo& cur = test_db[i];
    179     const GURL current_url(cur.url);
    180     history_service_->AddPageWithDetails(current_url, UTF8ToUTF16(cur.title),
    181                                          cur.visit_count, cur.typed_count,
    182                                          visit_time, false,
    183                                          history::SOURCE_BROWSED);
    184   }
    185 }
    186 
    187 void HistoryURLProviderTest::RunTest(const string16 text,
    188                                      const string16& desired_tld,
    189                                      bool prevent_inline_autocomplete,
    190                                      const std::string* expected_urls,
    191                                      size_t num_results) {
    192   AutocompleteInput input(text, desired_tld, prevent_inline_autocomplete,
    193                           false, true, AutocompleteInput::ALL_MATCHES);
    194   autocomplete_->Start(input, false);
    195   if (!autocomplete_->done())
    196     MessageLoop::current()->Run();
    197 
    198   matches_ = autocomplete_->matches();
    199   ASSERT_EQ(num_results, matches_.size()) << "Input text: " << text
    200                                           << "\nTLD: \"" << desired_tld << "\"";
    201   for (size_t i = 0; i < num_results; ++i)
    202     EXPECT_EQ(expected_urls[i], matches_[i].destination_url.spec());
    203 }
    204 
    205 void HistoryURLProviderTest::RunAdjustOffsetTest(const string16 text,
    206                                                  size_t expected_offset) {
    207   AutocompleteInput input(text, string16(), false, false, true,
    208                           AutocompleteInput::ALL_MATCHES);
    209   autocomplete_->Start(input, false);
    210   if (!autocomplete_->done())
    211     MessageLoop::current()->Run();
    212 
    213   matches_ = autocomplete_->matches();
    214   ASSERT_GE(matches_.size(), 1U) << "Input text: " << text;
    215   EXPECT_EQ(expected_offset, matches_[0].inline_autocomplete_offset);
    216 }
    217 
    218 TEST_F(HistoryURLProviderTest, PromoteShorterURLs) {
    219   // Test that hosts get synthesized below popular pages.
    220   const std::string expected_nonsynth[] = {
    221     "http://slashdot.org/favorite_page.html",
    222     "http://slashdot.org/",
    223   };
    224   RunTest(ASCIIToUTF16("slash"), string16(), true, expected_nonsynth,
    225           arraysize(expected_nonsynth));
    226 
    227   // Test that hosts get synthesized above less popular pages.
    228   const std::string expected_synth[] = {
    229     "http://kerneltrap.org/",
    230     "http://kerneltrap.org/not_very_popular.html",
    231   };
    232   RunTest(ASCIIToUTF16("kernel"), string16(), true, expected_synth,
    233           arraysize(expected_synth));
    234 
    235   // Test that unpopular pages are ignored completely.
    236   RunTest(ASCIIToUTF16("fresh"), string16(), true, NULL, 0);
    237 
    238   // Test that if we have a synthesized host that matches a suggestion, they
    239   // get combined into one.
    240   const std::string expected_combine[] = {
    241     "http://news.google.com/",
    242     "http://news.google.com/?ned=us&topic=n",
    243   };
    244   ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("news"), string16(), true,
    245       expected_combine, arraysize(expected_combine)));
    246   // The title should also have gotten set properly on the host for the
    247   // synthesized one, since it was also in the results.
    248   EXPECT_EQ(ASCIIToUTF16("Google News"), matches_.front().description);
    249 
    250   // Test that short URL matching works correctly as the user types more
    251   // (several tests):
    252   // The entry for foo.com is the best of all five foo.com* entries.
    253   const std::string short_1[] = {
    254     "http://foo.com/",
    255     "http://foo.com/dir/another/again/myfile.html",
    256     "http://foo.com/dir/",
    257   };
    258   RunTest(ASCIIToUTF16("foo"), string16(), true, short_1, arraysize(short_1));
    259 
    260   // When the user types the whole host, make sure we don't get two results for
    261   // it.
    262   const std::string short_2[] = {
    263     "http://foo.com/",
    264     "http://foo.com/dir/another/again/myfile.html",
    265     "http://foo.com/dir/",
    266     "http://foo.com/dir/another/",
    267   };
    268   RunTest(ASCIIToUTF16("foo.com"), string16(), true, short_2,
    269           arraysize(short_2));
    270   RunTest(ASCIIToUTF16("foo.com/"), string16(), true, short_2,
    271           arraysize(short_2));
    272 
    273   // The filename is the second best of the foo.com* entries, but there is a
    274   // shorter URL that's "good enough".  The host doesn't match the user input
    275   // and so should not appear.
    276   const std::string short_3[] = {
    277     "http://foo.com/d",
    278     "http://foo.com/dir/another/",
    279     "http://foo.com/dir/another/again/myfile.html",
    280     "http://foo.com/dir/",
    281   };
    282   RunTest(ASCIIToUTF16("foo.com/d"), string16(), true, short_3,
    283           arraysize(short_3));
    284 
    285   // We shouldn't promote shorter URLs than the best if they're not good
    286   // enough.
    287   const std::string short_4[] = {
    288     "http://foo.com/dir/another/a",
    289     "http://foo.com/dir/another/again/myfile.html",
    290     "http://foo.com/dir/another/again/",
    291   };
    292   RunTest(ASCIIToUTF16("foo.com/dir/another/a"), string16(), true, short_4,
    293           arraysize(short_4));
    294 
    295   // Exact matches should always be best no matter how much more another match
    296   // has been typed.
    297   const std::string short_5a[] = {
    298     "http://gooey/",
    299     "http://www.google.com/",
    300   };
    301   const std::string short_5b[] = {
    302     "http://go/",
    303     "http://gooey/",
    304     "http://www.google.com/",
    305   };
    306   RunTest(ASCIIToUTF16("g"), string16(), false, short_5a, arraysize(short_5a));
    307   RunTest(ASCIIToUTF16("go"), string16(), false, short_5b, arraysize(short_5b));
    308 }
    309 
    310 TEST_F(HistoryURLProviderTest, CullRedirects) {
    311   // URLs we will be using, plus the visit counts they will initially get
    312   // (the redirect set below will also increment the visit counts). We want
    313   // the results to be in A,B,C order. Note also that our visit counts are
    314   // all high enough so that domain synthesizing won't get triggered.
    315   struct RedirectCase {
    316     const char* url;
    317     int count;
    318   };
    319   static const RedirectCase redirect[] = {
    320     {"http://redirects/A", 30},
    321     {"http://redirects/B", 20},
    322     {"http://redirects/C", 10}
    323   };
    324   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(redirect); i++) {
    325     history_service_->AddPageWithDetails(GURL(redirect[i].url),
    326                                          UTF8ToUTF16("Title"),
    327                                          redirect[i].count, redirect[i].count,
    328                                          Time::Now(), false,
    329                                          history::SOURCE_BROWSED);
    330   }
    331 
    332   // Create a B->C->A redirect chain, but set the visit counts such that they
    333   // will appear in A,B,C order in the results. The autocomplete query will
    334   // search for the most recent visit when looking for redirects, so this will
    335   // be found even though the previous visits had no redirects.
    336   history::RedirectList redirects_to_a;
    337   redirects_to_a.push_back(GURL(redirect[1].url));
    338   redirects_to_a.push_back(GURL(redirect[2].url));
    339   redirects_to_a.push_back(GURL(redirect[0].url));
    340   history_service_->AddPage(GURL(redirect[0].url), NULL, 0, GURL(),
    341                             PageTransition::TYPED, redirects_to_a,
    342                             history::SOURCE_BROWSED, true);
    343 
    344   // Because all the results are part of a redirect chain with other results,
    345   // all but the first one (A) should be culled. We should get the default
    346   // "what you typed" result, plus this one.
    347   const string16 typing(ASCIIToUTF16("http://redirects/"));
    348   const std::string expected_results[] = {
    349     UTF16ToUTF8(typing),
    350     redirect[0].url};
    351   RunTest(typing, string16(), true, expected_results,
    352           arraysize(expected_results));
    353 }
    354 
    355 TEST_F(HistoryURLProviderTest, WhatYouTyped) {
    356   // Make sure we suggest a What You Typed match at the right times.
    357   RunTest(ASCIIToUTF16("wytmatch"), string16(), false, NULL, 0);
    358   RunTest(ASCIIToUTF16("wytmatch foo bar"), string16(), false, NULL, 0);
    359   RunTest(ASCIIToUTF16("wytmatch+foo+bar"), string16(), false, NULL, 0);
    360   RunTest(ASCIIToUTF16("wytmatch+foo+bar.com"), string16(), false, NULL, 0);
    361 
    362   const std::string results_1[] = {"http://www.wytmatch.com/"};
    363   RunTest(ASCIIToUTF16("wytmatch"), ASCIIToUTF16("com"), false, results_1,
    364           arraysize(results_1));
    365 
    366   const std::string results_2[] = {"http://wytmatch%20foo%20bar/"};
    367   RunTest(ASCIIToUTF16("http://wytmatch foo bar"), string16(), false, results_2,
    368           arraysize(results_2));
    369 
    370   const std::string results_3[] = {"https://wytmatch%20foo%20bar/"};
    371   RunTest(ASCIIToUTF16("https://wytmatch foo bar"), string16(), false,
    372           results_3, arraysize(results_3));
    373 
    374   // Test the corner case where a user has fully typed a previously visited
    375   // intranet address and is now hitting ctrl-enter, which completes to a
    376   // previously unvisted internet domain.
    377   const std::string binky_results[] = {"http://binky/"};
    378   const std::string binky_com_results[] = {
    379     "http://www.binky.com/",
    380     "http://binky/",
    381   };
    382   RunTest(ASCIIToUTF16("binky"), string16(), false, binky_results,
    383           arraysize(binky_results));
    384   RunTest(ASCIIToUTF16("binky"), ASCIIToUTF16("com"), false, binky_com_results,
    385           arraysize(binky_com_results));
    386 
    387   // Test the related case where a user has fully typed a previously visited
    388   // intranet address and is now hitting ctrl-enter, which completes to a
    389   // previously visted internet domain.
    390   const std::string winky_results[] = {
    391     "http://winky/",
    392     "http://www.winky.com/",
    393   };
    394   const std::string winky_com_results[] = {
    395     "http://www.winky.com/",
    396     "http://winky/",
    397   };
    398   RunTest(ASCIIToUTF16("winky"), string16(), false, winky_results,
    399           arraysize(winky_results));
    400   RunTest(ASCIIToUTF16("winky"), ASCIIToUTF16("com"), false, winky_com_results,
    401           arraysize(winky_com_results));
    402 }
    403 
    404 TEST_F(HistoryURLProviderTest, Fixup) {
    405   // Test for various past crashes we've had.
    406   RunTest(ASCIIToUTF16("\\"), string16(), false, NULL, 0);
    407   RunTest(ASCIIToUTF16("#"), string16(), false, NULL, 0);
    408   RunTest(ASCIIToUTF16("%20"), string16(), false, NULL, 0);
    409   RunTest(WideToUTF16(L"\uff65@s"), string16(), false, NULL, 0);
    410   RunTest(WideToUTF16(L"\u2015\u2015@ \uff7c"), string16(), false, NULL, 0);
    411 
    412   // Fixing up "file:" should result in an inline autocomplete offset of just
    413   // after "file:", not just after "file://".
    414   const string16 input_1(ASCIIToUTF16("file:"));
    415   const std::string fixup_1[] = {"file:///C:/foo.txt"};
    416   ASSERT_NO_FATAL_FAILURE(RunTest(input_1, string16(), false, fixup_1,
    417                                   arraysize(fixup_1)));
    418   EXPECT_EQ(input_1.length(), matches_.front().inline_autocomplete_offset);
    419 
    420   // Fixing up "http:/" should result in an inline autocomplete offset of just
    421   // after "http:/", not just after "http:".
    422   const string16 input_2(ASCIIToUTF16("http:/"));
    423   const std::string fixup_2[] = {
    424     "http://bogussite.com/a",
    425     "http://bogussite.com/b",
    426     "http://bogussite.com/c",
    427   };
    428   ASSERT_NO_FATAL_FAILURE(RunTest(input_2, string16(), false, fixup_2,
    429                                   arraysize(fixup_2)));
    430   EXPECT_EQ(input_2.length(), matches_.front().inline_autocomplete_offset);
    431 
    432   // Adding a TLD to a small number like "56" should result in "www.56.com"
    433   // rather than "0.0.0.56.com".
    434   const std::string fixup_3[] = {"http://www.56.com/"};
    435   RunTest(ASCIIToUTF16("56"), ASCIIToUTF16("com"), true, fixup_3,
    436           arraysize(fixup_3));
    437 
    438   // An input looks like a IP address like "127.0.0.1" should result in
    439   // "http://127.0.0.1/".
    440   const std::string fixup_4[] = {"http://127.0.0.1/"};
    441   RunTest(ASCIIToUTF16("127.0.0.1"), string16(), false, fixup_4,
    442           arraysize(fixup_4));
    443 
    444   // An number "17173" should result in "http://www.17173.com/" in db.
    445   const std::string fixup_5[] = {"http://www.17173.com/"};
    446   RunTest(ASCIIToUTF16("17173"), string16(), false, fixup_5,
    447           arraysize(fixup_5));
    448 }
    449 
    450 TEST_F(HistoryURLProviderTest, AdjustOffset) {
    451   RunAdjustOffsetTest(WideToUTF16(L"http://www.\uAD50\uC721"), 13);
    452   RunAdjustOffsetTest(ASCIIToUTF16("http://spaces.com/path%20with%20spa"), 31);
    453   RunAdjustOffsetTest(ASCIIToUTF16("http://ms/c++ s"), 15);
    454 }
    455 
    456 // Make sure the results for the input 'p' don't change between the first and
    457 // second passes.
    458 TEST_F(HistoryURLProviderTest, EmptyVisits) {
    459   // Wait for history to create the in memory DB.
    460   profile_->BlockUntilHistoryProcessesPendingRequests();
    461 
    462   AutocompleteInput input(ASCIIToUTF16("p"), string16(), false, false, true,
    463                           AutocompleteInput::ALL_MATCHES);
    464   autocomplete_->Start(input, false);
    465   // HistoryURLProvider shouldn't be done (waiting on async results).
    466   EXPECT_FALSE(autocomplete_->done());
    467 
    468   // We should get back an entry for pandora.
    469   matches_ = autocomplete_->matches();
    470   ASSERT_GT(matches_.size(), 0u);
    471   EXPECT_EQ(GURL("http://pandora.com/"), matches_[0].destination_url);
    472   int pandora_relevance = matches_[0].relevance;
    473 
    474   // Run the message loop. When |autocomplete_| finishes the loop is quit.
    475   MessageLoop::current()->Run();
    476   EXPECT_TRUE(autocomplete_->done());
    477   matches_ = autocomplete_->matches();
    478   ASSERT_GT(matches_.size(), 0u);
    479   EXPECT_EQ(GURL("http://pandora.com/"), matches_[0].destination_url);
    480   EXPECT_EQ(pandora_relevance, matches_[0].relevance);
    481 }
    482 
    483 TEST_F(HistoryURLProviderTestNoDB, NavigateWithoutDB) {
    484   // Ensure that we will still produce matches for navigation when there is no
    485   // database.
    486   std::string navigation_1[] = {"http://test.com/"};
    487   RunTest(ASCIIToUTF16("test.com"), string16(), false, navigation_1,
    488           arraysize(navigation_1));
    489 
    490   std::string navigation_2[] = {"http://slash/"};
    491   RunTest(ASCIIToUTF16("slash"), string16(), false, navigation_2,
    492           arraysize(navigation_2));
    493 
    494   RunTest(ASCIIToUTF16("this is a query"), string16(), false, NULL, 0);
    495 }
    496