Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "app/sql/connection.h"
      6 #include "base/file_path.h"
      7 #include "base/file_util.h"
      8 #include "base/message_loop.h"
      9 #include "base/utf_string_conversions.h"
     10 #include "chrome/browser/history/text_database_manager.h"
     11 #include "chrome/browser/history/visit_database.h"
     12 #include "testing/gtest/include/gtest/gtest.h"
     13 
     14 using base::Time;
     15 using base::TimeDelta;
     16 using base::TimeTicks;
     17 
     18 namespace history {
     19 
     20 namespace {
     21 
     22 const char* kURL1 = "http://www.google.com/asdf";
     23 const char* kTitle1 = "Google A";
     24 const char* kBody1 = "FOO page one.";
     25 
     26 const char* kURL2 = "http://www.google.com/qwer";
     27 const char* kTitle2 = "Google B";
     28 const char* kBody2 = "FOO two.";
     29 
     30 const char* kURL3 = "http://www.google.com/zxcv";
     31 const char* kTitle3 = "Google C";
     32 const char* kBody3 = "FOO drei";
     33 
     34 const char* kURL4 = "http://www.google.com/hjkl";
     35 const char* kTitle4 = "Google D";
     36 const char* kBody4 = "FOO lalala four.";
     37 
     38 const char* kURL5 = "http://www.google.com/uiop";
     39 const char* kTitle5 = "Google cinq";
     40 const char* kBody5 = "FOO page one.";
     41 
     42 // This provides a simple implementation of a URL+VisitDatabase using an
     43 // in-memory sqlite connection. The text database manager expects to be able to
     44 // update the visit database to keep in sync.
     45 class InMemDB : public URLDatabase, public VisitDatabase {
     46  public:
     47   InMemDB() {
     48     EXPECT_TRUE(db_.OpenInMemory());
     49     CreateURLTable(false);
     50     InitVisitTable();
     51   }
     52   ~InMemDB() {
     53   }
     54 
     55  private:
     56   virtual sql::Connection& GetDB() { return db_; }
     57 
     58   sql::Connection db_;
     59 
     60   DISALLOW_COPY_AND_ASSIGN(InMemDB);
     61 };
     62 
     63 // Adds all the pages once, and the first page once more in the next month.
     64 // The times of all the pages will be filled into |*times|.
     65 void AddAllPages(TextDatabaseManager& manager, VisitDatabase* visit_db,
     66                  std::vector<Time>* times) {
     67   Time::Exploded exploded;
     68   memset(&exploded, 0, sizeof(Time::Exploded));
     69 
     70   // Put the visits in two different months so it will query across databases.
     71   exploded.year = 2008;
     72   exploded.month = 1;
     73   exploded.day_of_month = 3;
     74 
     75   VisitRow visit_row;
     76   visit_row.url_id = 1;
     77   visit_row.visit_time = Time::FromUTCExploded(exploded);
     78   visit_row.referring_visit = 0;
     79   visit_row.transition = 0;
     80   visit_row.segment_id = 0;
     81   visit_row.is_indexed = false;
     82   VisitID visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
     83 
     84   times->push_back(visit_row.visit_time);
     85   manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id,
     86                       visit_row.visit_time, UTF8ToUTF16(kTitle1),
     87                       UTF8ToUTF16(kBody1));
     88 
     89   exploded.day_of_month++;
     90   visit_row.url_id = 2;
     91   visit_row.visit_time = Time::FromUTCExploded(exploded);
     92   visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
     93   times->push_back(visit_row.visit_time);
     94   manager.AddPageData(GURL(kURL2), visit_row.url_id, visit_row.visit_id,
     95                       visit_row.visit_time, UTF8ToUTF16(kTitle2),
     96                       UTF8ToUTF16(kBody2));
     97 
     98   exploded.day_of_month++;
     99   visit_row.url_id = 2;
    100   visit_row.visit_time = Time::FromUTCExploded(exploded);
    101   visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
    102   times->push_back(visit_row.visit_time);
    103   manager.AddPageData(GURL(kURL3), visit_row.url_id, visit_row.visit_id,
    104                       visit_row.visit_time, UTF8ToUTF16(kTitle3),
    105                       UTF8ToUTF16(kBody3));
    106 
    107   // Put the next ones in the next month.
    108   exploded.month++;
    109   visit_row.url_id = 2;
    110   visit_row.visit_time = Time::FromUTCExploded(exploded);
    111   visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
    112   times->push_back(visit_row.visit_time);
    113   manager.AddPageData(GURL(kURL4), visit_row.url_id, visit_row.visit_id,
    114                       visit_row.visit_time, UTF8ToUTF16(kTitle4),
    115                       UTF8ToUTF16(kBody4));
    116 
    117   exploded.day_of_month++;
    118   visit_row.url_id = 2;
    119   visit_row.visit_time = Time::FromUTCExploded(exploded);
    120   visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
    121   times->push_back(visit_row.visit_time);
    122   manager.AddPageData(GURL(kURL5), visit_row.url_id, visit_row.visit_id,
    123                       visit_row.visit_time, UTF8ToUTF16(kTitle5),
    124                       UTF8ToUTF16(kBody5));
    125 
    126   // Put the first one in again in the second month.
    127   exploded.day_of_month++;
    128   visit_row.url_id = 2;
    129   visit_row.visit_time = Time::FromUTCExploded(exploded);
    130   visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED);
    131   times->push_back(visit_row.visit_time);
    132   manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id,
    133                       visit_row.visit_time, UTF8ToUTF16(kTitle1),
    134                       UTF8ToUTF16(kBody1));
    135 }
    136 
    137 bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results,
    138                     const char* url) {
    139   GURL gurl(url);
    140   for (size_t i = 0; i < results.size(); i++) {
    141     if (results[i].url == gurl)
    142       return true;
    143   }
    144   return false;
    145 }
    146 
    147 }  // namespace
    148 
    149 class TextDatabaseManagerTest : public testing::Test {
    150  public:
    151   // Called manually by the test so it can report failure to initialize.
    152   bool Init() {
    153     return file_util::CreateNewTempDirectory(
    154         FILE_PATH_LITERAL("TestSearchTest"), &dir_);
    155   }
    156 
    157  protected:
    158   void SetUp() {
    159   }
    160 
    161   void TearDown() {
    162     file_util::Delete(dir_, true);
    163   }
    164 
    165   MessageLoop message_loop_;
    166 
    167   // Directory containing the databases.
    168   FilePath dir_;
    169 };
    170 
    171 // Tests basic querying.
    172 TEST_F(TextDatabaseManagerTest, InsertQuery) {
    173   ASSERT_TRUE(Init());
    174   InMemDB visit_db;
    175   TextDatabaseManager manager(dir_, &visit_db, &visit_db);
    176   ASSERT_TRUE(manager.Init(NULL));
    177 
    178   std::vector<Time> times;
    179   AddAllPages(manager, &visit_db, &times);
    180 
    181   QueryOptions options;
    182   options.begin_time = times[0] - TimeDelta::FromDays(100);
    183   options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
    184   std::vector<TextDatabase::Match> results;
    185   Time first_time_searched;
    186   manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
    187                          &results, &first_time_searched);
    188 
    189   // We should have matched every page.
    190   EXPECT_EQ(6U, results.size());
    191   EXPECT_TRUE(ResultsHaveURL(results, kURL1));
    192   EXPECT_TRUE(ResultsHaveURL(results, kURL2));
    193   EXPECT_TRUE(ResultsHaveURL(results, kURL3));
    194   EXPECT_TRUE(ResultsHaveURL(results, kURL4));
    195   EXPECT_TRUE(ResultsHaveURL(results, kURL5));
    196 
    197   // The first time searched should have been the first page's time or before
    198   // (it could have eliminated some time for us).
    199   EXPECT_TRUE(first_time_searched <= times[0]);
    200 }
    201 
    202 // Tests that adding page components piecemeal will get them added properly.
    203 // This does not supply a visit to update, this mode is used only by the unit
    204 // tests right now, but we test it anyway.
    205 TEST_F(TextDatabaseManagerTest, InsertCompleteNoVisit) {
    206   ASSERT_TRUE(Init());
    207   InMemDB visit_db;
    208   TextDatabaseManager manager(dir_, &visit_db, &visit_db);
    209   ASSERT_TRUE(manager.Init(NULL));
    210 
    211   // First add one without a visit.
    212   const GURL url(kURL1);
    213   manager.AddPageURL(url, 0, 0, Time::Now());
    214   manager.AddPageTitle(url, UTF8ToUTF16(kTitle1));
    215   manager.AddPageContents(url, UTF8ToUTF16(kBody1));
    216 
    217   // Check that the page got added.
    218   QueryOptions options;
    219   std::vector<TextDatabase::Match> results;
    220   Time first_time_searched;
    221 
    222   manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
    223                          &results, &first_time_searched);
    224   ASSERT_EQ(1U, results.size());
    225   EXPECT_EQ(kTitle1, UTF16ToUTF8(results[0].title));
    226 }
    227 
    228 // Like InsertCompleteNoVisit but specifies a visit to update. We check that the
    229 // visit was updated properly.
    230 TEST_F(TextDatabaseManagerTest, InsertCompleteVisit) {
    231   ASSERT_TRUE(Init());
    232   InMemDB visit_db;
    233   TextDatabaseManager manager(dir_, &visit_db, &visit_db);
    234   ASSERT_TRUE(manager.Init(NULL));
    235 
    236   // First add a visit to a page. We can just make up a URL ID since there is
    237   // not actually any URL database around.
    238   VisitRow visit;
    239   visit.url_id = 1;
    240   visit.visit_time = Time::Now();
    241   visit.referring_visit = 0;
    242   visit.transition = PageTransition::LINK;
    243   visit.segment_id = 0;
    244   visit.is_indexed = false;
    245   visit_db.AddVisit(&visit, SOURCE_BROWSED);
    246 
    247   // Add a full text indexed entry for that visit.
    248   const GURL url(kURL2);
    249   manager.AddPageURL(url, visit.url_id, visit.visit_id, visit.visit_time);
    250   manager.AddPageContents(url, UTF8ToUTF16(kBody2));
    251   manager.AddPageTitle(url, UTF8ToUTF16(kTitle2));
    252 
    253   // Check that the page got added.
    254   QueryOptions options;
    255   std::vector<TextDatabase::Match> results;
    256   Time first_time_searched;
    257 
    258   manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
    259                          &results, &first_time_searched);
    260   ASSERT_EQ(1U, results.size());
    261   EXPECT_EQ(kTitle2, UTF16ToUTF8(results[0].title));
    262 
    263   // Check that the visit got updated for its new indexed state.
    264   VisitRow out_visit;
    265   ASSERT_TRUE(visit_db.GetRowForVisit(visit.visit_id, &out_visit));
    266   EXPECT_TRUE(out_visit.is_indexed);
    267 }
    268 
    269 // Tests that partial inserts that expire are added to the database.
    270 TEST_F(TextDatabaseManagerTest, InsertPartial) {
    271   ASSERT_TRUE(Init());
    272   InMemDB visit_db;
    273   TextDatabaseManager manager(dir_, &visit_db, &visit_db);
    274   ASSERT_TRUE(manager.Init(NULL));
    275 
    276   // Add the first one with just a URL.
    277   GURL url1(kURL1);
    278   manager.AddPageURL(url1, 0, 0, Time::Now());
    279 
    280   // Now add a second one with a URL and title.
    281   GURL url2(kURL2);
    282   manager.AddPageURL(url2, 0, 0, Time::Now());
    283   manager.AddPageTitle(url2, UTF8ToUTF16(kTitle2));
    284 
    285   // The third one has a URL and body.
    286   GURL url3(kURL3);
    287   manager.AddPageURL(url3, 0, 0, Time::Now());
    288   manager.AddPageContents(url3, UTF8ToUTF16(kBody3));
    289 
    290   // Expire stuff very fast. This assumes that the time between the first
    291   // AddPageURL and this line is less than the expiration time (20 seconds).
    292   TimeTicks added_time = TimeTicks::Now();
    293   TimeTicks expire_time = added_time + TimeDelta::FromSeconds(5);
    294   manager.FlushOldChangesForTime(expire_time);
    295 
    296   // Do a query, nothing should be added yet.
    297   QueryOptions options;
    298   std::vector<TextDatabase::Match> results;
    299   Time first_time_searched;
    300   manager.GetTextMatches(UTF8ToUTF16("google"), options,
    301                          &results, &first_time_searched);
    302   ASSERT_EQ(0U, results.size());
    303 
    304   // Compute a time threshold that will cause everything to be flushed, and
    305   // poke at the manager's internals to cause this to happen.
    306   expire_time = added_time + TimeDelta::FromDays(1);
    307   manager.FlushOldChangesForTime(expire_time);
    308 
    309   // Now we should have all 3 URLs added.
    310   manager.GetTextMatches(UTF8ToUTF16("google"), options,
    311                          &results, &first_time_searched);
    312   ASSERT_EQ(3U, results.size());
    313   EXPECT_TRUE(ResultsHaveURL(results, kURL1));
    314   EXPECT_TRUE(ResultsHaveURL(results, kURL2));
    315   EXPECT_TRUE(ResultsHaveURL(results, kURL3));
    316 }
    317 
    318 // Tests that partial inserts (due to timeouts) will still get updated if the
    319 // data comes in later.
    320 TEST_F(TextDatabaseManagerTest, PartialComplete) {
    321   ASSERT_TRUE(Init());
    322   InMemDB visit_db;
    323   TextDatabaseManager manager(dir_, &visit_db, &visit_db);
    324   ASSERT_TRUE(manager.Init(NULL));
    325 
    326   Time added_time = Time::Now();
    327   GURL url(kURL1);
    328 
    329   // We have to have the URL in the URL and visit databases for this test to
    330   // work.
    331   URLRow url_row(url);
    332   url_row.set_title(UTF8ToUTF16("chocolate"));
    333   URLID url_id = visit_db.AddURL(url_row);
    334   ASSERT_TRUE(url_id);
    335   VisitRow visit_row;
    336   visit_row.url_id = url_id;
    337   visit_row.visit_time = added_time;
    338   visit_db.AddVisit(&visit_row, SOURCE_BROWSED);
    339 
    340   // Add a URL with no title or body, and say that it expired.
    341   manager.AddPageURL(url, 0, 0, added_time);
    342   TimeTicks expire_time = TimeTicks::Now() + TimeDelta::FromDays(1);
    343   manager.FlushOldChangesForTime(expire_time);
    344 
    345   // Add the title. We should be able to query based on that. The title in the
    346   // URL row we set above should not come into the picture.
    347   manager.AddPageTitle(url, UTF8ToUTF16("Some unique title"));
    348   Time first_time_searched;
    349   QueryOptions options;
    350   std::vector<TextDatabase::Match> results;
    351   manager.GetTextMatches(UTF8ToUTF16("unique"), options,
    352                          &results, &first_time_searched);
    353   EXPECT_EQ(1U, results.size());
    354   manager.GetTextMatches(UTF8ToUTF16("chocolate"), options,
    355                          &results, &first_time_searched);
    356   EXPECT_EQ(0U, results.size());
    357 
    358   // Now add the body, which should be queryable.
    359   manager.AddPageContents(url, UTF8ToUTF16("Very awesome body"));
    360   manager.GetTextMatches(UTF8ToUTF16("awesome"), options, &results, &first_time_searched);
    361   EXPECT_EQ(1U, results.size());
    362 
    363   // Adding the body will actually copy the title from the URL table rather
    364   // than the previously indexed row (we made them not match above). This isn't
    365   // necessarily what we want, but it's how it's implemented, and we don't want
    366   // to regress it.
    367   manager.GetTextMatches(UTF8ToUTF16("chocolate"), options, &results, &first_time_searched);
    368   EXPECT_EQ(1U, results.size());
    369 }
    370 
    371 // Tests that changes get properly committed to disk.
    372 TEST_F(TextDatabaseManagerTest, Writing) {
    373   ASSERT_TRUE(Init());
    374 
    375   QueryOptions options;
    376   std::vector<TextDatabase::Match> results;
    377   Time first_time_searched;
    378 
    379   InMemDB visit_db;
    380 
    381   // Create the manager and write some stuff to it.
    382   {
    383     TextDatabaseManager manager(dir_, &visit_db, &visit_db);
    384     ASSERT_TRUE(manager.Init(NULL));
    385 
    386     std::vector<Time> times;
    387     AddAllPages(manager, &visit_db, &times);
    388 
    389     // We should have matched every page.
    390     manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched);
    391     EXPECT_EQ(6U, results.size());
    392   }
    393   results.clear();
    394 
    395   // Recreate the manager and make sure it finds the written stuff.
    396   {
    397     TextDatabaseManager manager(dir_, &visit_db, &visit_db);
    398     ASSERT_TRUE(manager.Init(NULL));
    399 
    400     // We should have matched every page again.
    401     manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
    402                            &results, &first_time_searched);
    403     EXPECT_EQ(6U, results.size());
    404   }
    405 }
    406 
    407 // Tests that changes get properly committed to disk, as in the Writing test
    408 // above, but when there is a transaction around the adds.
    409 TEST_F(TextDatabaseManagerTest, WritingTransaction) {
    410   ASSERT_TRUE(Init());
    411 
    412   QueryOptions options;
    413   std::vector<TextDatabase::Match> results;
    414   Time first_time_searched;
    415 
    416   InMemDB visit_db;
    417 
    418   // Create the manager and write some stuff to it.
    419   {
    420     TextDatabaseManager manager(dir_, &visit_db, &visit_db);
    421     ASSERT_TRUE(manager.Init(NULL));
    422 
    423     std::vector<Time> times;
    424     manager.BeginTransaction();
    425     AddAllPages(manager, &visit_db, &times);
    426     // "Forget" to commit, it should be autocommittedd for us.
    427 
    428     // We should have matched every page.
    429     manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
    430                            &results, &first_time_searched);
    431     EXPECT_EQ(6U, results.size());
    432   }
    433   results.clear();
    434 
    435   // Recreate the manager and make sure it finds the written stuff.
    436   {
    437     TextDatabaseManager manager(dir_, &visit_db, &visit_db);
    438     ASSERT_TRUE(manager.Init(NULL));
    439 
    440     // We should have matched every page again.
    441     manager.GetTextMatches(UTF8ToUTF16("FOO"), options,
    442                            &results, &first_time_searched);
    443     EXPECT_EQ(6U, results.size());
    444   }
    445 }
    446 
    447 // Tests querying where the maximum number of items is met.
    448 TEST_F(TextDatabaseManagerTest, QueryMax) {
    449   ASSERT_TRUE(Init());
    450   InMemDB visit_db;
    451   TextDatabaseManager manager(dir_, &visit_db, &visit_db);
    452   ASSERT_TRUE(manager.Init(NULL));
    453 
    454   std::vector<Time> times;
    455   AddAllPages(manager, &visit_db, &times);
    456 
    457   string16 foo = UTF8ToUTF16("FOO");
    458 
    459   QueryOptions options;
    460   options.begin_time = times[0] - TimeDelta::FromDays(100);
    461   options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
    462   options.max_count = 2;
    463   std::vector<TextDatabase::Match> results;
    464   Time first_time_searched;
    465   manager.GetTextMatches(foo, options, &results, &first_time_searched);
    466 
    467   // We should have gotten the last two pages as results (the first page is
    468   // also the last).
    469   EXPECT_EQ(2U, results.size());
    470   EXPECT_TRUE(first_time_searched <= times[4]);
    471   EXPECT_TRUE(ResultsHaveURL(results, kURL5));
    472   EXPECT_TRUE(ResultsHaveURL(results, kURL1));
    473 
    474   // Asking for 4 pages, the first one should be in another DB.
    475   options.max_count = 4;
    476   manager.GetTextMatches(foo, options, &results, &first_time_searched);
    477 
    478   EXPECT_EQ(4U, results.size());
    479   EXPECT_TRUE(first_time_searched <= times[4]);
    480   EXPECT_TRUE(ResultsHaveURL(results, kURL3));
    481   EXPECT_TRUE(ResultsHaveURL(results, kURL4));
    482   EXPECT_TRUE(ResultsHaveURL(results, kURL5));
    483   EXPECT_TRUE(ResultsHaveURL(results, kURL1));
    484 }
    485 
    486 // Tests querying backwards in time in chunks.
    487 TEST_F(TextDatabaseManagerTest, QueryBackwards) {
    488   ASSERT_TRUE(Init());
    489   InMemDB visit_db;
    490   TextDatabaseManager manager(dir_, &visit_db, &visit_db);
    491   ASSERT_TRUE(manager.Init(NULL));
    492 
    493   std::vector<Time> times;
    494   AddAllPages(manager, &visit_db, &times);
    495 
    496   string16 foo = UTF8ToUTF16("FOO");
    497 
    498   // First do a query for all time, but with a max of 2. This will give us the
    499   // last two results and will tell us where to start searching when we want
    500   // to go back in time.
    501   QueryOptions options;
    502   options.begin_time = times[0] - TimeDelta::FromDays(100);
    503   options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100);
    504   options.max_count = 2;
    505   std::vector<TextDatabase::Match> results;
    506   Time first_time_searched;
    507   manager.GetTextMatches(foo, options, &results, &first_time_searched);
    508 
    509   // Check that we got the last two results.
    510   EXPECT_EQ(2U, results.size());
    511   EXPECT_TRUE(first_time_searched <= times[4]);
    512   EXPECT_TRUE(ResultsHaveURL(results, kURL5));
    513   EXPECT_TRUE(ResultsHaveURL(results, kURL1));
    514 
    515   // Query the previous two URLs and make sure we got the correct ones.
    516   options.end_time = first_time_searched;
    517   manager.GetTextMatches(foo, options, &results, &first_time_searched);
    518   EXPECT_EQ(2U, results.size());
    519   EXPECT_TRUE(first_time_searched <= times[2]);
    520   EXPECT_TRUE(ResultsHaveURL(results, kURL3));
    521   EXPECT_TRUE(ResultsHaveURL(results, kURL4));
    522 
    523   // Query the previous two URLs...
    524   options.end_time = first_time_searched;
    525   manager.GetTextMatches(foo, options, &results, &first_time_searched);
    526   EXPECT_EQ(2U, results.size());
    527   EXPECT_TRUE(first_time_searched <= times[0]);
    528   EXPECT_TRUE(ResultsHaveURL(results, kURL2));
    529   EXPECT_TRUE(ResultsHaveURL(results, kURL1));
    530 
    531   // Try to query some more, there should be no results.
    532   options.end_time = first_time_searched;
    533   manager.GetTextMatches(foo, options, &results, &first_time_searched);
    534   EXPECT_EQ(0U, results.size());
    535 }
    536 
    537 }  // namespace history
    538