1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/file_util.h" 6 #include "base/message_loop.h" 7 #include "base/path_service.h" 8 #include "base/string_util.h" 9 #include "base/utf_string_conversions.h" 10 #include "chrome/browser/autocomplete/autocomplete_match.h" 11 #include "chrome/browser/autocomplete/history_url_provider.h" 12 #include "chrome/browser/history/history.h" 13 #include "chrome/test/testing_browser_process.h" 14 #include "chrome/test/testing_browser_process_test.h" 15 #include "chrome/test/testing_profile.h" 16 #include "content/browser/browser_thread.h" 17 #include "testing/gtest/include/gtest/gtest.h" 18 19 using base::Time; 20 using base::TimeDelta; 21 22 struct TestURLInfo { 23 std::string url; 24 std::string title; 25 int visit_count; 26 int typed_count; 27 } test_db[] = { 28 {"http://www.google.com/", "Google", 3, 3}, 29 30 // High-quality pages should get a host synthesized as a lower-quality match. 31 {"http://slashdot.org/favorite_page.html", "Favorite page", 200, 100}, 32 33 // Less popular pages should have hosts synthesized as higher-quality 34 // matches. 35 {"http://kerneltrap.org/not_very_popular.html", "Less popular", 4, 0}, 36 37 // Unpopular pages should not appear in the results at all. 38 {"http://freshmeat.net/unpopular.html", "Unpopular", 1, 1}, 39 40 // If a host has a match, we should pick it up during host synthesis. 41 {"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2}, 42 {"http://news.google.com/", "Google News", 1, 1}, 43 44 // Suggested short URLs must be "good enough" and must match user input. 45 {"http://foo.com/", "Dir", 5, 5}, 46 {"http://foo.com/dir/", "Dir", 2, 2}, 47 {"http://foo.com/dir/another/", "Dir", 5, 1}, 48 {"http://foo.com/dir/another/again/", "Dir", 10, 0}, 49 {"http://foo.com/dir/another/again/myfile.html", "File", 10, 2}, 50 51 // We throw in a lot of extra URLs here to make sure we're testing the 52 // history database's query, not just the autocomplete provider. 53 {"http://startest.com/y/a", "A", 2, 2}, 54 {"http://startest.com/y/b", "B", 5, 2}, 55 {"http://startest.com/x/c", "C", 5, 2}, 56 {"http://startest.com/x/d", "D", 5, 5}, 57 {"http://startest.com/y/e", "E", 4, 2}, 58 {"http://startest.com/y/f", "F", 3, 2}, 59 {"http://startest.com/y/g", "G", 3, 2}, 60 {"http://startest.com/y/h", "H", 3, 2}, 61 {"http://startest.com/y/i", "I", 3, 2}, 62 {"http://startest.com/y/j", "J", 3, 2}, 63 {"http://startest.com/y/k", "K", 3, 2}, 64 {"http://startest.com/y/l", "L", 3, 2}, 65 {"http://startest.com/y/m", "M", 3, 2}, 66 67 // A file: URL is useful for testing that fixup does the right thing w.r.t. 68 // the number of trailing slashes on the user's input. 69 {"file:///C:/foo.txt", "", 2, 2}, 70 71 // Results with absurdly high typed_counts so that very generic queries like 72 // "http" will give consistent results even if more data is added above. 73 {"http://bogussite.com/a", "Bogus A", 10002, 10000}, 74 {"http://bogussite.com/b", "Bogus B", 10001, 10000}, 75 {"http://bogussite.com/c", "Bogus C", 10000, 10000}, 76 77 // Domain name with number. 78 {"http://www.17173.com/", "Domain with number", 3, 3}, 79 80 // URLs to test exact-matching behavior. 81 {"http://go/", "Intranet URL", 1, 1}, 82 {"http://gooey/", "Intranet URL 2", 5, 5}, 83 84 // URLs for testing offset adjustment. 85 {"http://www.\xEA\xB5\x90\xEC\x9C\xA1.kr/", "Korean", 2, 2}, 86 {"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2}, 87 {"http://ms/c++%20style%20guide", "Style guide", 2, 2}, 88 89 // URLs for testing ctrl-enter behavior. 90 {"http://binky/", "Intranet binky", 2, 2}, 91 {"http://winky/", "Intranet winky", 2, 2}, 92 {"http://www.winky.com/", "Internet winky", 5, 0}, 93 94 // URLs used by EmptyVisits. 95 {"http://pandora.com/", "Pandora", 2, 2}, 96 {"http://p/", "p", 0, 0}, 97 }; 98 99 class HistoryURLProviderTest : public TestingBrowserProcessTest, 100 public ACProviderListener { 101 public: 102 HistoryURLProviderTest() 103 : ui_thread_(BrowserThread::UI, &message_loop_), 104 file_thread_(BrowserThread::FILE, &message_loop_) {} 105 106 // ACProviderListener 107 virtual void OnProviderUpdate(bool updated_matches); 108 109 protected: 110 // testing::Test 111 virtual void SetUp() { 112 SetUpImpl(false); 113 } 114 virtual void TearDown(); 115 116 // Does the real setup. 117 void SetUpImpl(bool no_db); 118 119 // Fills test data into the history system. 120 void FillData(); 121 122 // Runs an autocomplete query on |text| and checks to see that the returned 123 // results' destination URLs match those provided. 124 void RunTest(const string16 text, 125 const string16& desired_tld, 126 bool prevent_inline_autocomplete, 127 const std::string* expected_urls, 128 size_t num_results); 129 130 void RunAdjustOffsetTest(const string16 text, size_t expected_offset); 131 132 MessageLoopForUI message_loop_; 133 BrowserThread ui_thread_; 134 BrowserThread file_thread_; 135 ACMatches matches_; 136 scoped_ptr<TestingProfile> profile_; 137 HistoryService* history_service_; 138 scoped_refptr<HistoryURLProvider> autocomplete_; 139 }; 140 141 class HistoryURLProviderTestNoDB : public HistoryURLProviderTest { 142 protected: 143 virtual void SetUp() { 144 SetUpImpl(true); 145 } 146 }; 147 148 void HistoryURLProviderTest::OnProviderUpdate(bool updated_matches) { 149 if (autocomplete_->done()) 150 MessageLoop::current()->Quit(); 151 } 152 153 void HistoryURLProviderTest::SetUpImpl(bool no_db) { 154 profile_.reset(new TestingProfile()); 155 profile_->CreateHistoryService(true, no_db); 156 history_service_ = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); 157 158 autocomplete_ = new HistoryURLProvider(this, profile_.get(), "en-US,en,ko"); 159 160 FillData(); 161 } 162 163 void HistoryURLProviderTest::TearDown() { 164 autocomplete_ = NULL; 165 } 166 167 void HistoryURLProviderTest::FillData() { 168 // All visits are a long time ago (some tests require this since we do some 169 // special logic for things visited very recently). Note that this time must 170 // be more recent than the "archived history" threshold for the data to go 171 // into the main database. 172 // 173 // TODO(brettw) It would be nice if we could test this behavior, in which 174 // case the time would be specifed in the test_db structure. 175 Time visit_time = Time::Now() - TimeDelta::FromDays(80); 176 177 for (size_t i = 0; i < arraysize(test_db); ++i) { 178 const TestURLInfo& cur = test_db[i]; 179 const GURL current_url(cur.url); 180 history_service_->AddPageWithDetails(current_url, UTF8ToUTF16(cur.title), 181 cur.visit_count, cur.typed_count, 182 visit_time, false, 183 history::SOURCE_BROWSED); 184 } 185 } 186 187 void HistoryURLProviderTest::RunTest(const string16 text, 188 const string16& desired_tld, 189 bool prevent_inline_autocomplete, 190 const std::string* expected_urls, 191 size_t num_results) { 192 AutocompleteInput input(text, desired_tld, prevent_inline_autocomplete, 193 false, true, AutocompleteInput::ALL_MATCHES); 194 autocomplete_->Start(input, false); 195 if (!autocomplete_->done()) 196 MessageLoop::current()->Run(); 197 198 matches_ = autocomplete_->matches(); 199 ASSERT_EQ(num_results, matches_.size()) << "Input text: " << text 200 << "\nTLD: \"" << desired_tld << "\""; 201 for (size_t i = 0; i < num_results; ++i) 202 EXPECT_EQ(expected_urls[i], matches_[i].destination_url.spec()); 203 } 204 205 void HistoryURLProviderTest::RunAdjustOffsetTest(const string16 text, 206 size_t expected_offset) { 207 AutocompleteInput input(text, string16(), false, false, true, 208 AutocompleteInput::ALL_MATCHES); 209 autocomplete_->Start(input, false); 210 if (!autocomplete_->done()) 211 MessageLoop::current()->Run(); 212 213 matches_ = autocomplete_->matches(); 214 ASSERT_GE(matches_.size(), 1U) << "Input text: " << text; 215 EXPECT_EQ(expected_offset, matches_[0].inline_autocomplete_offset); 216 } 217 218 TEST_F(HistoryURLProviderTest, PromoteShorterURLs) { 219 // Test that hosts get synthesized below popular pages. 220 const std::string expected_nonsynth[] = { 221 "http://slashdot.org/favorite_page.html", 222 "http://slashdot.org/", 223 }; 224 RunTest(ASCIIToUTF16("slash"), string16(), true, expected_nonsynth, 225 arraysize(expected_nonsynth)); 226 227 // Test that hosts get synthesized above less popular pages. 228 const std::string expected_synth[] = { 229 "http://kerneltrap.org/", 230 "http://kerneltrap.org/not_very_popular.html", 231 }; 232 RunTest(ASCIIToUTF16("kernel"), string16(), true, expected_synth, 233 arraysize(expected_synth)); 234 235 // Test that unpopular pages are ignored completely. 236 RunTest(ASCIIToUTF16("fresh"), string16(), true, NULL, 0); 237 238 // Test that if we have a synthesized host that matches a suggestion, they 239 // get combined into one. 240 const std::string expected_combine[] = { 241 "http://news.google.com/", 242 "http://news.google.com/?ned=us&topic=n", 243 }; 244 ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("news"), string16(), true, 245 expected_combine, arraysize(expected_combine))); 246 // The title should also have gotten set properly on the host for the 247 // synthesized one, since it was also in the results. 248 EXPECT_EQ(ASCIIToUTF16("Google News"), matches_.front().description); 249 250 // Test that short URL matching works correctly as the user types more 251 // (several tests): 252 // The entry for foo.com is the best of all five foo.com* entries. 253 const std::string short_1[] = { 254 "http://foo.com/", 255 "http://foo.com/dir/another/again/myfile.html", 256 "http://foo.com/dir/", 257 }; 258 RunTest(ASCIIToUTF16("foo"), string16(), true, short_1, arraysize(short_1)); 259 260 // When the user types the whole host, make sure we don't get two results for 261 // it. 262 const std::string short_2[] = { 263 "http://foo.com/", 264 "http://foo.com/dir/another/again/myfile.html", 265 "http://foo.com/dir/", 266 "http://foo.com/dir/another/", 267 }; 268 RunTest(ASCIIToUTF16("foo.com"), string16(), true, short_2, 269 arraysize(short_2)); 270 RunTest(ASCIIToUTF16("foo.com/"), string16(), true, short_2, 271 arraysize(short_2)); 272 273 // The filename is the second best of the foo.com* entries, but there is a 274 // shorter URL that's "good enough". The host doesn't match the user input 275 // and so should not appear. 276 const std::string short_3[] = { 277 "http://foo.com/d", 278 "http://foo.com/dir/another/", 279 "http://foo.com/dir/another/again/myfile.html", 280 "http://foo.com/dir/", 281 }; 282 RunTest(ASCIIToUTF16("foo.com/d"), string16(), true, short_3, 283 arraysize(short_3)); 284 285 // We shouldn't promote shorter URLs than the best if they're not good 286 // enough. 287 const std::string short_4[] = { 288 "http://foo.com/dir/another/a", 289 "http://foo.com/dir/another/again/myfile.html", 290 "http://foo.com/dir/another/again/", 291 }; 292 RunTest(ASCIIToUTF16("foo.com/dir/another/a"), string16(), true, short_4, 293 arraysize(short_4)); 294 295 // Exact matches should always be best no matter how much more another match 296 // has been typed. 297 const std::string short_5a[] = { 298 "http://gooey/", 299 "http://www.google.com/", 300 }; 301 const std::string short_5b[] = { 302 "http://go/", 303 "http://gooey/", 304 "http://www.google.com/", 305 }; 306 RunTest(ASCIIToUTF16("g"), string16(), false, short_5a, arraysize(short_5a)); 307 RunTest(ASCIIToUTF16("go"), string16(), false, short_5b, arraysize(short_5b)); 308 } 309 310 TEST_F(HistoryURLProviderTest, CullRedirects) { 311 // URLs we will be using, plus the visit counts they will initially get 312 // (the redirect set below will also increment the visit counts). We want 313 // the results to be in A,B,C order. Note also that our visit counts are 314 // all high enough so that domain synthesizing won't get triggered. 315 struct RedirectCase { 316 const char* url; 317 int count; 318 }; 319 static const RedirectCase redirect[] = { 320 {"http://redirects/A", 30}, 321 {"http://redirects/B", 20}, 322 {"http://redirects/C", 10} 323 }; 324 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(redirect); i++) { 325 history_service_->AddPageWithDetails(GURL(redirect[i].url), 326 UTF8ToUTF16("Title"), 327 redirect[i].count, redirect[i].count, 328 Time::Now(), false, 329 history::SOURCE_BROWSED); 330 } 331 332 // Create a B->C->A redirect chain, but set the visit counts such that they 333 // will appear in A,B,C order in the results. The autocomplete query will 334 // search for the most recent visit when looking for redirects, so this will 335 // be found even though the previous visits had no redirects. 336 history::RedirectList redirects_to_a; 337 redirects_to_a.push_back(GURL(redirect[1].url)); 338 redirects_to_a.push_back(GURL(redirect[2].url)); 339 redirects_to_a.push_back(GURL(redirect[0].url)); 340 history_service_->AddPage(GURL(redirect[0].url), NULL, 0, GURL(), 341 PageTransition::TYPED, redirects_to_a, 342 history::SOURCE_BROWSED, true); 343 344 // Because all the results are part of a redirect chain with other results, 345 // all but the first one (A) should be culled. We should get the default 346 // "what you typed" result, plus this one. 347 const string16 typing(ASCIIToUTF16("http://redirects/")); 348 const std::string expected_results[] = { 349 UTF16ToUTF8(typing), 350 redirect[0].url}; 351 RunTest(typing, string16(), true, expected_results, 352 arraysize(expected_results)); 353 } 354 355 TEST_F(HistoryURLProviderTest, WhatYouTyped) { 356 // Make sure we suggest a What You Typed match at the right times. 357 RunTest(ASCIIToUTF16("wytmatch"), string16(), false, NULL, 0); 358 RunTest(ASCIIToUTF16("wytmatch foo bar"), string16(), false, NULL, 0); 359 RunTest(ASCIIToUTF16("wytmatch+foo+bar"), string16(), false, NULL, 0); 360 RunTest(ASCIIToUTF16("wytmatch+foo+bar.com"), string16(), false, NULL, 0); 361 362 const std::string results_1[] = {"http://www.wytmatch.com/"}; 363 RunTest(ASCIIToUTF16("wytmatch"), ASCIIToUTF16("com"), false, results_1, 364 arraysize(results_1)); 365 366 const std::string results_2[] = {"http://wytmatch%20foo%20bar/"}; 367 RunTest(ASCIIToUTF16("http://wytmatch foo bar"), string16(), false, results_2, 368 arraysize(results_2)); 369 370 const std::string results_3[] = {"https://wytmatch%20foo%20bar/"}; 371 RunTest(ASCIIToUTF16("https://wytmatch foo bar"), string16(), false, 372 results_3, arraysize(results_3)); 373 374 // Test the corner case where a user has fully typed a previously visited 375 // intranet address and is now hitting ctrl-enter, which completes to a 376 // previously unvisted internet domain. 377 const std::string binky_results[] = {"http://binky/"}; 378 const std::string binky_com_results[] = { 379 "http://www.binky.com/", 380 "http://binky/", 381 }; 382 RunTest(ASCIIToUTF16("binky"), string16(), false, binky_results, 383 arraysize(binky_results)); 384 RunTest(ASCIIToUTF16("binky"), ASCIIToUTF16("com"), false, binky_com_results, 385 arraysize(binky_com_results)); 386 387 // Test the related case where a user has fully typed a previously visited 388 // intranet address and is now hitting ctrl-enter, which completes to a 389 // previously visted internet domain. 390 const std::string winky_results[] = { 391 "http://winky/", 392 "http://www.winky.com/", 393 }; 394 const std::string winky_com_results[] = { 395 "http://www.winky.com/", 396 "http://winky/", 397 }; 398 RunTest(ASCIIToUTF16("winky"), string16(), false, winky_results, 399 arraysize(winky_results)); 400 RunTest(ASCIIToUTF16("winky"), ASCIIToUTF16("com"), false, winky_com_results, 401 arraysize(winky_com_results)); 402 } 403 404 TEST_F(HistoryURLProviderTest, Fixup) { 405 // Test for various past crashes we've had. 406 RunTest(ASCIIToUTF16("\\"), string16(), false, NULL, 0); 407 RunTest(ASCIIToUTF16("#"), string16(), false, NULL, 0); 408 RunTest(ASCIIToUTF16("%20"), string16(), false, NULL, 0); 409 RunTest(WideToUTF16(L"\uff65@s"), string16(), false, NULL, 0); 410 RunTest(WideToUTF16(L"\u2015\u2015@ \uff7c"), string16(), false, NULL, 0); 411 412 // Fixing up "file:" should result in an inline autocomplete offset of just 413 // after "file:", not just after "file://". 414 const string16 input_1(ASCIIToUTF16("file:")); 415 const std::string fixup_1[] = {"file:///C:/foo.txt"}; 416 ASSERT_NO_FATAL_FAILURE(RunTest(input_1, string16(), false, fixup_1, 417 arraysize(fixup_1))); 418 EXPECT_EQ(input_1.length(), matches_.front().inline_autocomplete_offset); 419 420 // Fixing up "http:/" should result in an inline autocomplete offset of just 421 // after "http:/", not just after "http:". 422 const string16 input_2(ASCIIToUTF16("http:/")); 423 const std::string fixup_2[] = { 424 "http://bogussite.com/a", 425 "http://bogussite.com/b", 426 "http://bogussite.com/c", 427 }; 428 ASSERT_NO_FATAL_FAILURE(RunTest(input_2, string16(), false, fixup_2, 429 arraysize(fixup_2))); 430 EXPECT_EQ(input_2.length(), matches_.front().inline_autocomplete_offset); 431 432 // Adding a TLD to a small number like "56" should result in "www.56.com" 433 // rather than "0.0.0.56.com". 434 const std::string fixup_3[] = {"http://www.56.com/"}; 435 RunTest(ASCIIToUTF16("56"), ASCIIToUTF16("com"), true, fixup_3, 436 arraysize(fixup_3)); 437 438 // An input looks like a IP address like "127.0.0.1" should result in 439 // "http://127.0.0.1/". 440 const std::string fixup_4[] = {"http://127.0.0.1/"}; 441 RunTest(ASCIIToUTF16("127.0.0.1"), string16(), false, fixup_4, 442 arraysize(fixup_4)); 443 444 // An number "17173" should result in "http://www.17173.com/" in db. 445 const std::string fixup_5[] = {"http://www.17173.com/"}; 446 RunTest(ASCIIToUTF16("17173"), string16(), false, fixup_5, 447 arraysize(fixup_5)); 448 } 449 450 TEST_F(HistoryURLProviderTest, AdjustOffset) { 451 RunAdjustOffsetTest(WideToUTF16(L"http://www.\uAD50\uC721"), 13); 452 RunAdjustOffsetTest(ASCIIToUTF16("http://spaces.com/path%20with%20spa"), 31); 453 RunAdjustOffsetTest(ASCIIToUTF16("http://ms/c++ s"), 15); 454 } 455 456 // Make sure the results for the input 'p' don't change between the first and 457 // second passes. 458 TEST_F(HistoryURLProviderTest, EmptyVisits) { 459 // Wait for history to create the in memory DB. 460 profile_->BlockUntilHistoryProcessesPendingRequests(); 461 462 AutocompleteInput input(ASCIIToUTF16("p"), string16(), false, false, true, 463 AutocompleteInput::ALL_MATCHES); 464 autocomplete_->Start(input, false); 465 // HistoryURLProvider shouldn't be done (waiting on async results). 466 EXPECT_FALSE(autocomplete_->done()); 467 468 // We should get back an entry for pandora. 469 matches_ = autocomplete_->matches(); 470 ASSERT_GT(matches_.size(), 0u); 471 EXPECT_EQ(GURL("http://pandora.com/"), matches_[0].destination_url); 472 int pandora_relevance = matches_[0].relevance; 473 474 // Run the message loop. When |autocomplete_| finishes the loop is quit. 475 MessageLoop::current()->Run(); 476 EXPECT_TRUE(autocomplete_->done()); 477 matches_ = autocomplete_->matches(); 478 ASSERT_GT(matches_.size(), 0u); 479 EXPECT_EQ(GURL("http://pandora.com/"), matches_[0].destination_url); 480 EXPECT_EQ(pandora_relevance, matches_[0].relevance); 481 } 482 483 TEST_F(HistoryURLProviderTestNoDB, NavigateWithoutDB) { 484 // Ensure that we will still produce matches for navigation when there is no 485 // database. 486 std::string navigation_1[] = {"http://test.com/"}; 487 RunTest(ASCIIToUTF16("test.com"), string16(), false, navigation_1, 488 arraysize(navigation_1)); 489 490 std::string navigation_2[] = {"http://slash/"}; 491 RunTest(ASCIIToUTF16("slash"), string16(), false, navigation_2, 492 arraysize(navigation_2)); 493 494 RunTest(ASCIIToUTF16("this is a query"), string16(), false, NULL, 0); 495 } 496