1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "app/sql/connection.h" 6 #include "base/file_path.h" 7 #include "base/file_util.h" 8 #include "base/message_loop.h" 9 #include "base/utf_string_conversions.h" 10 #include "chrome/browser/history/text_database_manager.h" 11 #include "chrome/browser/history/visit_database.h" 12 #include "testing/gtest/include/gtest/gtest.h" 13 14 using base::Time; 15 using base::TimeDelta; 16 using base::TimeTicks; 17 18 namespace history { 19 20 namespace { 21 22 const char* kURL1 = "http://www.google.com/asdf"; 23 const char* kTitle1 = "Google A"; 24 const char* kBody1 = "FOO page one."; 25 26 const char* kURL2 = "http://www.google.com/qwer"; 27 const char* kTitle2 = "Google B"; 28 const char* kBody2 = "FOO two."; 29 30 const char* kURL3 = "http://www.google.com/zxcv"; 31 const char* kTitle3 = "Google C"; 32 const char* kBody3 = "FOO drei"; 33 34 const char* kURL4 = "http://www.google.com/hjkl"; 35 const char* kTitle4 = "Google D"; 36 const char* kBody4 = "FOO lalala four."; 37 38 const char* kURL5 = "http://www.google.com/uiop"; 39 const char* kTitle5 = "Google cinq"; 40 const char* kBody5 = "FOO page one."; 41 42 // This provides a simple implementation of a URL+VisitDatabase using an 43 // in-memory sqlite connection. The text database manager expects to be able to 44 // update the visit database to keep in sync. 45 class InMemDB : public URLDatabase, public VisitDatabase { 46 public: 47 InMemDB() { 48 EXPECT_TRUE(db_.OpenInMemory()); 49 CreateURLTable(false); 50 InitVisitTable(); 51 } 52 ~InMemDB() { 53 } 54 55 private: 56 virtual sql::Connection& GetDB() { return db_; } 57 58 sql::Connection db_; 59 60 DISALLOW_COPY_AND_ASSIGN(InMemDB); 61 }; 62 63 // Adds all the pages once, and the first page once more in the next month. 64 // The times of all the pages will be filled into |*times|. 65 void AddAllPages(TextDatabaseManager& manager, VisitDatabase* visit_db, 66 std::vector<Time>* times) { 67 Time::Exploded exploded; 68 memset(&exploded, 0, sizeof(Time::Exploded)); 69 70 // Put the visits in two different months so it will query across databases. 71 exploded.year = 2008; 72 exploded.month = 1; 73 exploded.day_of_month = 3; 74 75 VisitRow visit_row; 76 visit_row.url_id = 1; 77 visit_row.visit_time = Time::FromUTCExploded(exploded); 78 visit_row.referring_visit = 0; 79 visit_row.transition = 0; 80 visit_row.segment_id = 0; 81 visit_row.is_indexed = false; 82 VisitID visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED); 83 84 times->push_back(visit_row.visit_time); 85 manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id, 86 visit_row.visit_time, UTF8ToUTF16(kTitle1), 87 UTF8ToUTF16(kBody1)); 88 89 exploded.day_of_month++; 90 visit_row.url_id = 2; 91 visit_row.visit_time = Time::FromUTCExploded(exploded); 92 visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED); 93 times->push_back(visit_row.visit_time); 94 manager.AddPageData(GURL(kURL2), visit_row.url_id, visit_row.visit_id, 95 visit_row.visit_time, UTF8ToUTF16(kTitle2), 96 UTF8ToUTF16(kBody2)); 97 98 exploded.day_of_month++; 99 visit_row.url_id = 2; 100 visit_row.visit_time = Time::FromUTCExploded(exploded); 101 visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED); 102 times->push_back(visit_row.visit_time); 103 manager.AddPageData(GURL(kURL3), visit_row.url_id, visit_row.visit_id, 104 visit_row.visit_time, UTF8ToUTF16(kTitle3), 105 UTF8ToUTF16(kBody3)); 106 107 // Put the next ones in the next month. 108 exploded.month++; 109 visit_row.url_id = 2; 110 visit_row.visit_time = Time::FromUTCExploded(exploded); 111 visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED); 112 times->push_back(visit_row.visit_time); 113 manager.AddPageData(GURL(kURL4), visit_row.url_id, visit_row.visit_id, 114 visit_row.visit_time, UTF8ToUTF16(kTitle4), 115 UTF8ToUTF16(kBody4)); 116 117 exploded.day_of_month++; 118 visit_row.url_id = 2; 119 visit_row.visit_time = Time::FromUTCExploded(exploded); 120 visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED); 121 times->push_back(visit_row.visit_time); 122 manager.AddPageData(GURL(kURL5), visit_row.url_id, visit_row.visit_id, 123 visit_row.visit_time, UTF8ToUTF16(kTitle5), 124 UTF8ToUTF16(kBody5)); 125 126 // Put the first one in again in the second month. 127 exploded.day_of_month++; 128 visit_row.url_id = 2; 129 visit_row.visit_time = Time::FromUTCExploded(exploded); 130 visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED); 131 times->push_back(visit_row.visit_time); 132 manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id, 133 visit_row.visit_time, UTF8ToUTF16(kTitle1), 134 UTF8ToUTF16(kBody1)); 135 } 136 137 bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results, 138 const char* url) { 139 GURL gurl(url); 140 for (size_t i = 0; i < results.size(); i++) { 141 if (results[i].url == gurl) 142 return true; 143 } 144 return false; 145 } 146 147 } // namespace 148 149 class TextDatabaseManagerTest : public testing::Test { 150 public: 151 // Called manually by the test so it can report failure to initialize. 152 bool Init() { 153 return file_util::CreateNewTempDirectory( 154 FILE_PATH_LITERAL("TestSearchTest"), &dir_); 155 } 156 157 protected: 158 void SetUp() { 159 } 160 161 void TearDown() { 162 file_util::Delete(dir_, true); 163 } 164 165 MessageLoop message_loop_; 166 167 // Directory containing the databases. 168 FilePath dir_; 169 }; 170 171 // Tests basic querying. 172 TEST_F(TextDatabaseManagerTest, InsertQuery) { 173 ASSERT_TRUE(Init()); 174 InMemDB visit_db; 175 TextDatabaseManager manager(dir_, &visit_db, &visit_db); 176 ASSERT_TRUE(manager.Init(NULL)); 177 178 std::vector<Time> times; 179 AddAllPages(manager, &visit_db, ×); 180 181 QueryOptions options; 182 options.begin_time = times[0] - TimeDelta::FromDays(100); 183 options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100); 184 std::vector<TextDatabase::Match> results; 185 Time first_time_searched; 186 manager.GetTextMatches(UTF8ToUTF16("FOO"), options, 187 &results, &first_time_searched); 188 189 // We should have matched every page. 190 EXPECT_EQ(6U, results.size()); 191 EXPECT_TRUE(ResultsHaveURL(results, kURL1)); 192 EXPECT_TRUE(ResultsHaveURL(results, kURL2)); 193 EXPECT_TRUE(ResultsHaveURL(results, kURL3)); 194 EXPECT_TRUE(ResultsHaveURL(results, kURL4)); 195 EXPECT_TRUE(ResultsHaveURL(results, kURL5)); 196 197 // The first time searched should have been the first page's time or before 198 // (it could have eliminated some time for us). 199 EXPECT_TRUE(first_time_searched <= times[0]); 200 } 201 202 // Tests that adding page components piecemeal will get them added properly. 203 // This does not supply a visit to update, this mode is used only by the unit 204 // tests right now, but we test it anyway. 205 TEST_F(TextDatabaseManagerTest, InsertCompleteNoVisit) { 206 ASSERT_TRUE(Init()); 207 InMemDB visit_db; 208 TextDatabaseManager manager(dir_, &visit_db, &visit_db); 209 ASSERT_TRUE(manager.Init(NULL)); 210 211 // First add one without a visit. 212 const GURL url(kURL1); 213 manager.AddPageURL(url, 0, 0, Time::Now()); 214 manager.AddPageTitle(url, UTF8ToUTF16(kTitle1)); 215 manager.AddPageContents(url, UTF8ToUTF16(kBody1)); 216 217 // Check that the page got added. 218 QueryOptions options; 219 std::vector<TextDatabase::Match> results; 220 Time first_time_searched; 221 222 manager.GetTextMatches(UTF8ToUTF16("FOO"), options, 223 &results, &first_time_searched); 224 ASSERT_EQ(1U, results.size()); 225 EXPECT_EQ(kTitle1, UTF16ToUTF8(results[0].title)); 226 } 227 228 // Like InsertCompleteNoVisit but specifies a visit to update. We check that the 229 // visit was updated properly. 230 TEST_F(TextDatabaseManagerTest, InsertCompleteVisit) { 231 ASSERT_TRUE(Init()); 232 InMemDB visit_db; 233 TextDatabaseManager manager(dir_, &visit_db, &visit_db); 234 ASSERT_TRUE(manager.Init(NULL)); 235 236 // First add a visit to a page. We can just make up a URL ID since there is 237 // not actually any URL database around. 238 VisitRow visit; 239 visit.url_id = 1; 240 visit.visit_time = Time::Now(); 241 visit.referring_visit = 0; 242 visit.transition = PageTransition::LINK; 243 visit.segment_id = 0; 244 visit.is_indexed = false; 245 visit_db.AddVisit(&visit, SOURCE_BROWSED); 246 247 // Add a full text indexed entry for that visit. 248 const GURL url(kURL2); 249 manager.AddPageURL(url, visit.url_id, visit.visit_id, visit.visit_time); 250 manager.AddPageContents(url, UTF8ToUTF16(kBody2)); 251 manager.AddPageTitle(url, UTF8ToUTF16(kTitle2)); 252 253 // Check that the page got added. 254 QueryOptions options; 255 std::vector<TextDatabase::Match> results; 256 Time first_time_searched; 257 258 manager.GetTextMatches(UTF8ToUTF16("FOO"), options, 259 &results, &first_time_searched); 260 ASSERT_EQ(1U, results.size()); 261 EXPECT_EQ(kTitle2, UTF16ToUTF8(results[0].title)); 262 263 // Check that the visit got updated for its new indexed state. 264 VisitRow out_visit; 265 ASSERT_TRUE(visit_db.GetRowForVisit(visit.visit_id, &out_visit)); 266 EXPECT_TRUE(out_visit.is_indexed); 267 } 268 269 // Tests that partial inserts that expire are added to the database. 270 TEST_F(TextDatabaseManagerTest, InsertPartial) { 271 ASSERT_TRUE(Init()); 272 InMemDB visit_db; 273 TextDatabaseManager manager(dir_, &visit_db, &visit_db); 274 ASSERT_TRUE(manager.Init(NULL)); 275 276 // Add the first one with just a URL. 277 GURL url1(kURL1); 278 manager.AddPageURL(url1, 0, 0, Time::Now()); 279 280 // Now add a second one with a URL and title. 281 GURL url2(kURL2); 282 manager.AddPageURL(url2, 0, 0, Time::Now()); 283 manager.AddPageTitle(url2, UTF8ToUTF16(kTitle2)); 284 285 // The third one has a URL and body. 286 GURL url3(kURL3); 287 manager.AddPageURL(url3, 0, 0, Time::Now()); 288 manager.AddPageContents(url3, UTF8ToUTF16(kBody3)); 289 290 // Expire stuff very fast. This assumes that the time between the first 291 // AddPageURL and this line is less than the expiration time (20 seconds). 292 TimeTicks added_time = TimeTicks::Now(); 293 TimeTicks expire_time = added_time + TimeDelta::FromSeconds(5); 294 manager.FlushOldChangesForTime(expire_time); 295 296 // Do a query, nothing should be added yet. 297 QueryOptions options; 298 std::vector<TextDatabase::Match> results; 299 Time first_time_searched; 300 manager.GetTextMatches(UTF8ToUTF16("google"), options, 301 &results, &first_time_searched); 302 ASSERT_EQ(0U, results.size()); 303 304 // Compute a time threshold that will cause everything to be flushed, and 305 // poke at the manager's internals to cause this to happen. 306 expire_time = added_time + TimeDelta::FromDays(1); 307 manager.FlushOldChangesForTime(expire_time); 308 309 // Now we should have all 3 URLs added. 310 manager.GetTextMatches(UTF8ToUTF16("google"), options, 311 &results, &first_time_searched); 312 ASSERT_EQ(3U, results.size()); 313 EXPECT_TRUE(ResultsHaveURL(results, kURL1)); 314 EXPECT_TRUE(ResultsHaveURL(results, kURL2)); 315 EXPECT_TRUE(ResultsHaveURL(results, kURL3)); 316 } 317 318 // Tests that partial inserts (due to timeouts) will still get updated if the 319 // data comes in later. 320 TEST_F(TextDatabaseManagerTest, PartialComplete) { 321 ASSERT_TRUE(Init()); 322 InMemDB visit_db; 323 TextDatabaseManager manager(dir_, &visit_db, &visit_db); 324 ASSERT_TRUE(manager.Init(NULL)); 325 326 Time added_time = Time::Now(); 327 GURL url(kURL1); 328 329 // We have to have the URL in the URL and visit databases for this test to 330 // work. 331 URLRow url_row(url); 332 url_row.set_title(UTF8ToUTF16("chocolate")); 333 URLID url_id = visit_db.AddURL(url_row); 334 ASSERT_TRUE(url_id); 335 VisitRow visit_row; 336 visit_row.url_id = url_id; 337 visit_row.visit_time = added_time; 338 visit_db.AddVisit(&visit_row, SOURCE_BROWSED); 339 340 // Add a URL with no title or body, and say that it expired. 341 manager.AddPageURL(url, 0, 0, added_time); 342 TimeTicks expire_time = TimeTicks::Now() + TimeDelta::FromDays(1); 343 manager.FlushOldChangesForTime(expire_time); 344 345 // Add the title. We should be able to query based on that. The title in the 346 // URL row we set above should not come into the picture. 347 manager.AddPageTitle(url, UTF8ToUTF16("Some unique title")); 348 Time first_time_searched; 349 QueryOptions options; 350 std::vector<TextDatabase::Match> results; 351 manager.GetTextMatches(UTF8ToUTF16("unique"), options, 352 &results, &first_time_searched); 353 EXPECT_EQ(1U, results.size()); 354 manager.GetTextMatches(UTF8ToUTF16("chocolate"), options, 355 &results, &first_time_searched); 356 EXPECT_EQ(0U, results.size()); 357 358 // Now add the body, which should be queryable. 359 manager.AddPageContents(url, UTF8ToUTF16("Very awesome body")); 360 manager.GetTextMatches(UTF8ToUTF16("awesome"), options, &results, &first_time_searched); 361 EXPECT_EQ(1U, results.size()); 362 363 // Adding the body will actually copy the title from the URL table rather 364 // than the previously indexed row (we made them not match above). This isn't 365 // necessarily what we want, but it's how it's implemented, and we don't want 366 // to regress it. 367 manager.GetTextMatches(UTF8ToUTF16("chocolate"), options, &results, &first_time_searched); 368 EXPECT_EQ(1U, results.size()); 369 } 370 371 // Tests that changes get properly committed to disk. 372 TEST_F(TextDatabaseManagerTest, Writing) { 373 ASSERT_TRUE(Init()); 374 375 QueryOptions options; 376 std::vector<TextDatabase::Match> results; 377 Time first_time_searched; 378 379 InMemDB visit_db; 380 381 // Create the manager and write some stuff to it. 382 { 383 TextDatabaseManager manager(dir_, &visit_db, &visit_db); 384 ASSERT_TRUE(manager.Init(NULL)); 385 386 std::vector<Time> times; 387 AddAllPages(manager, &visit_db, ×); 388 389 // We should have matched every page. 390 manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched); 391 EXPECT_EQ(6U, results.size()); 392 } 393 results.clear(); 394 395 // Recreate the manager and make sure it finds the written stuff. 396 { 397 TextDatabaseManager manager(dir_, &visit_db, &visit_db); 398 ASSERT_TRUE(manager.Init(NULL)); 399 400 // We should have matched every page again. 401 manager.GetTextMatches(UTF8ToUTF16("FOO"), options, 402 &results, &first_time_searched); 403 EXPECT_EQ(6U, results.size()); 404 } 405 } 406 407 // Tests that changes get properly committed to disk, as in the Writing test 408 // above, but when there is a transaction around the adds. 409 TEST_F(TextDatabaseManagerTest, WritingTransaction) { 410 ASSERT_TRUE(Init()); 411 412 QueryOptions options; 413 std::vector<TextDatabase::Match> results; 414 Time first_time_searched; 415 416 InMemDB visit_db; 417 418 // Create the manager and write some stuff to it. 419 { 420 TextDatabaseManager manager(dir_, &visit_db, &visit_db); 421 ASSERT_TRUE(manager.Init(NULL)); 422 423 std::vector<Time> times; 424 manager.BeginTransaction(); 425 AddAllPages(manager, &visit_db, ×); 426 // "Forget" to commit, it should be autocommittedd for us. 427 428 // We should have matched every page. 429 manager.GetTextMatches(UTF8ToUTF16("FOO"), options, 430 &results, &first_time_searched); 431 EXPECT_EQ(6U, results.size()); 432 } 433 results.clear(); 434 435 // Recreate the manager and make sure it finds the written stuff. 436 { 437 TextDatabaseManager manager(dir_, &visit_db, &visit_db); 438 ASSERT_TRUE(manager.Init(NULL)); 439 440 // We should have matched every page again. 441 manager.GetTextMatches(UTF8ToUTF16("FOO"), options, 442 &results, &first_time_searched); 443 EXPECT_EQ(6U, results.size()); 444 } 445 } 446 447 // Tests querying where the maximum number of items is met. 448 TEST_F(TextDatabaseManagerTest, QueryMax) { 449 ASSERT_TRUE(Init()); 450 InMemDB visit_db; 451 TextDatabaseManager manager(dir_, &visit_db, &visit_db); 452 ASSERT_TRUE(manager.Init(NULL)); 453 454 std::vector<Time> times; 455 AddAllPages(manager, &visit_db, ×); 456 457 string16 foo = UTF8ToUTF16("FOO"); 458 459 QueryOptions options; 460 options.begin_time = times[0] - TimeDelta::FromDays(100); 461 options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100); 462 options.max_count = 2; 463 std::vector<TextDatabase::Match> results; 464 Time first_time_searched; 465 manager.GetTextMatches(foo, options, &results, &first_time_searched); 466 467 // We should have gotten the last two pages as results (the first page is 468 // also the last). 469 EXPECT_EQ(2U, results.size()); 470 EXPECT_TRUE(first_time_searched <= times[4]); 471 EXPECT_TRUE(ResultsHaveURL(results, kURL5)); 472 EXPECT_TRUE(ResultsHaveURL(results, kURL1)); 473 474 // Asking for 4 pages, the first one should be in another DB. 475 options.max_count = 4; 476 manager.GetTextMatches(foo, options, &results, &first_time_searched); 477 478 EXPECT_EQ(4U, results.size()); 479 EXPECT_TRUE(first_time_searched <= times[4]); 480 EXPECT_TRUE(ResultsHaveURL(results, kURL3)); 481 EXPECT_TRUE(ResultsHaveURL(results, kURL4)); 482 EXPECT_TRUE(ResultsHaveURL(results, kURL5)); 483 EXPECT_TRUE(ResultsHaveURL(results, kURL1)); 484 } 485 486 // Tests querying backwards in time in chunks. 487 TEST_F(TextDatabaseManagerTest, QueryBackwards) { 488 ASSERT_TRUE(Init()); 489 InMemDB visit_db; 490 TextDatabaseManager manager(dir_, &visit_db, &visit_db); 491 ASSERT_TRUE(manager.Init(NULL)); 492 493 std::vector<Time> times; 494 AddAllPages(manager, &visit_db, ×); 495 496 string16 foo = UTF8ToUTF16("FOO"); 497 498 // First do a query for all time, but with a max of 2. This will give us the 499 // last two results and will tell us where to start searching when we want 500 // to go back in time. 501 QueryOptions options; 502 options.begin_time = times[0] - TimeDelta::FromDays(100); 503 options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100); 504 options.max_count = 2; 505 std::vector<TextDatabase::Match> results; 506 Time first_time_searched; 507 manager.GetTextMatches(foo, options, &results, &first_time_searched); 508 509 // Check that we got the last two results. 510 EXPECT_EQ(2U, results.size()); 511 EXPECT_TRUE(first_time_searched <= times[4]); 512 EXPECT_TRUE(ResultsHaveURL(results, kURL5)); 513 EXPECT_TRUE(ResultsHaveURL(results, kURL1)); 514 515 // Query the previous two URLs and make sure we got the correct ones. 516 options.end_time = first_time_searched; 517 manager.GetTextMatches(foo, options, &results, &first_time_searched); 518 EXPECT_EQ(2U, results.size()); 519 EXPECT_TRUE(first_time_searched <= times[2]); 520 EXPECT_TRUE(ResultsHaveURL(results, kURL3)); 521 EXPECT_TRUE(ResultsHaveURL(results, kURL4)); 522 523 // Query the previous two URLs... 524 options.end_time = first_time_searched; 525 manager.GetTextMatches(foo, options, &results, &first_time_searched); 526 EXPECT_EQ(2U, results.size()); 527 EXPECT_TRUE(first_time_searched <= times[0]); 528 EXPECT_TRUE(ResultsHaveURL(results, kURL2)); 529 EXPECT_TRUE(ResultsHaveURL(results, kURL1)); 530 531 // Try to query some more, there should be no results. 532 options.end_time = first_time_searched; 533 manager.GetTextMatches(foo, options, &results, &first_time_searched); 534 EXPECT_EQ(0U, results.size()); 535 } 536 537 } // namespace history 538