Home | History | Annotate | Download | only in predictors
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/predictors/resource_prefetch_predictor_tables.h"
      6 
      7 #include <algorithm>
      8 #include <utility>
      9 #include "base/logging.h"
     10 #include "base/metrics/histogram.h"
     11 #include "base/strings/stringprintf.h"
     12 #include "content/public/browser/browser_thread.h"
     13 #include "sql/statement.h"
     14 
     15 using content::BrowserThread;
     16 using sql::Statement;
     17 
     18 namespace {
     19 
     20 const char kUrlResourceTableName[] = "resource_prefetch_predictor_url";
     21 const char kUrlMetadataTableName[] = "resource_prefetch_predictor_url_metadata";
     22 const char kHostResourceTableName[] = "resource_prefetch_predictor_host";
     23 const char kHostMetadataTableName[] =
     24     "resource_prefetch_predictor_host_metadata";
     25 
     26 void BindResourceRowToStatement(
     27     const predictors::ResourcePrefetchPredictorTables::ResourceRow& row,
     28     const std::string& primary_key,
     29     Statement* statement) {
     30   statement->BindString(0, primary_key);
     31   statement->BindString(1, row.resource_url.spec());
     32   statement->BindInt(2, static_cast<int>(row.resource_type));
     33   statement->BindInt(3, row.number_of_hits);
     34   statement->BindInt(4, row.number_of_misses);
     35   statement->BindInt(5, row.consecutive_misses);
     36   statement->BindDouble(6, row.average_position);
     37 }
     38 
     39 bool StepAndInitializeResourceRow(
     40     Statement* statement,
     41     predictors::ResourcePrefetchPredictorTables::ResourceRow* row) {
     42   if (!statement->Step())
     43     return false;
     44 
     45   row->primary_key = statement->ColumnString(0);
     46   row->resource_url = GURL(statement->ColumnString(1));
     47   row->resource_type = ResourceType::FromInt(statement->ColumnInt(2));
     48   row->number_of_hits = statement->ColumnInt(3);
     49   row->number_of_misses = statement->ColumnInt(4);
     50   row->consecutive_misses = statement->ColumnInt(5);
     51   row->average_position = statement->ColumnDouble(6);
     52   return true;
     53 }
     54 
     55 }  // namespace
     56 
     57 namespace predictors {
     58 
     59 // static
     60 const size_t ResourcePrefetchPredictorTables::kMaxStringLength = 1024;
     61 
     62 ResourcePrefetchPredictorTables::ResourceRow::ResourceRow()
     63     : resource_type(ResourceType::LAST_TYPE),
     64       number_of_hits(0),
     65       number_of_misses(0),
     66       consecutive_misses(0),
     67       average_position(0.0),
     68       score(0.0) {
     69 }
     70 
     71 ResourcePrefetchPredictorTables::ResourceRow::ResourceRow(
     72     const ResourceRow& other)
     73         : primary_key(other.primary_key),
     74           resource_url(other.resource_url),
     75           resource_type(other.resource_type),
     76           number_of_hits(other.number_of_hits),
     77           number_of_misses(other.number_of_misses),
     78           consecutive_misses(other.consecutive_misses),
     79           average_position(other.average_position),
     80           score(other.score) {
     81 }
     82 
     83 ResourcePrefetchPredictorTables::ResourceRow::ResourceRow(
     84     const std::string& i_primary_key,
     85     const std::string& i_resource_url,
     86     ResourceType::Type i_resource_type,
     87     int i_number_of_hits,
     88     int i_number_of_misses,
     89     int i_consecutive_misses,
     90     double i_average_position)
     91         : primary_key(i_primary_key),
     92           resource_url(i_resource_url),
     93           resource_type(i_resource_type),
     94           number_of_hits(i_number_of_hits),
     95           number_of_misses(i_number_of_misses),
     96           consecutive_misses(i_consecutive_misses),
     97           average_position(i_average_position) {
     98   UpdateScore();
     99 }
    100 
    101 void ResourcePrefetchPredictorTables::ResourceRow::UpdateScore() {
    102   // The score is calculated so that when the rows are sorted, the stylesheets
    103   // and scripts appear first, sorted by position(ascending) and then the rest
    104   // of the resources sorted by position(ascending).
    105   static const int kMaxResourcesPerType = 100;
    106   switch (resource_type) {
    107     case ResourceType::STYLESHEET:
    108     case ResourceType::SCRIPT:
    109       score = (2 * kMaxResourcesPerType) - average_position;
    110       break;
    111 
    112     case ResourceType::IMAGE:
    113       score = kMaxResourcesPerType - average_position;
    114       break;
    115 
    116     default:
    117       score = kMaxResourcesPerType - average_position;
    118       break;
    119   }
    120 }
    121 
    122 bool ResourcePrefetchPredictorTables::ResourceRow::operator==(
    123     const ResourceRow& rhs) const {
    124   return primary_key == rhs.primary_key &&
    125       resource_url == rhs.resource_url &&
    126       resource_type == rhs.resource_type &&
    127       number_of_hits == rhs.number_of_hits &&
    128       number_of_misses == rhs.number_of_misses &&
    129       consecutive_misses == rhs.consecutive_misses &&
    130       average_position == rhs.average_position &&
    131       score == rhs.score;
    132 }
    133 
    134 bool ResourcePrefetchPredictorTables::ResourceRowSorter::operator()(
    135     const ResourceRow& x, const ResourceRow& y) const {
    136   return x.score > y.score;
    137 }
    138 
    139 ResourcePrefetchPredictorTables::PrefetchData::PrefetchData(
    140     PrefetchKeyType i_key_type,
    141     const std::string& i_primary_key)
    142     : key_type(i_key_type),
    143       primary_key(i_primary_key) {
    144 }
    145 
    146 ResourcePrefetchPredictorTables::PrefetchData::PrefetchData(
    147     const PrefetchData& other)
    148     : key_type(other.key_type),
    149       primary_key(other.primary_key),
    150       last_visit(other.last_visit),
    151       resources(other.resources) {
    152 }
    153 
    154 ResourcePrefetchPredictorTables::PrefetchData::~PrefetchData() {
    155 }
    156 
    157 bool ResourcePrefetchPredictorTables::PrefetchData::operator==(
    158     const PrefetchData& rhs) const {
    159   return key_type == rhs.key_type && primary_key == rhs.primary_key &&
    160       resources == rhs.resources;
    161 }
    162 
    163 void ResourcePrefetchPredictorTables::GetAllData(
    164     PrefetchDataMap* url_data_map,
    165     PrefetchDataMap* host_data_map) {
    166   CHECK(BrowserThread::CurrentlyOn(BrowserThread::DB));
    167   if (CantAccessDatabase())
    168     return;
    169 
    170   DCHECK(url_data_map);
    171   DCHECK(host_data_map);
    172   url_data_map->clear();
    173   host_data_map->clear();
    174 
    175   std::vector<std::string> urls_to_delete, hosts_to_delete;
    176   GetAllDataHelper(PREFETCH_KEY_TYPE_URL, url_data_map, &urls_to_delete);
    177   GetAllDataHelper(PREFETCH_KEY_TYPE_HOST, host_data_map, &hosts_to_delete);
    178 
    179   if (!urls_to_delete.empty() || !hosts_to_delete.empty())
    180     DeleteData(urls_to_delete, hosts_to_delete);
    181 }
    182 
    183 void ResourcePrefetchPredictorTables::UpdateData(
    184     const PrefetchData& url_data,
    185     const PrefetchData& host_data) {
    186   CHECK(BrowserThread::CurrentlyOn(BrowserThread::DB));
    187   if (CantAccessDatabase())
    188     return;
    189 
    190   DCHECK(!url_data.is_host() && host_data.is_host());
    191   DCHECK(!url_data.primary_key.empty() || !host_data.primary_key.empty());
    192 
    193   DB()->BeginTransaction();
    194 
    195   bool success = (url_data.primary_key.empty() || UpdateDataHelper(url_data)) &&
    196       (host_data.primary_key.empty() || UpdateDataHelper(host_data));
    197   if (!success)
    198     DB()->RollbackTransaction();
    199 
    200   DB()->CommitTransaction();
    201 }
    202 
    203 void ResourcePrefetchPredictorTables::DeleteData(
    204     const std::vector<std::string>& urls,
    205     const std::vector<std::string>& hosts) {
    206   CHECK(BrowserThread::CurrentlyOn(BrowserThread::DB));
    207   if (CantAccessDatabase())
    208     return;
    209 
    210   DCHECK(!urls.empty() || !hosts.empty());
    211 
    212   if (!urls.empty())
    213     DeleteDataHelper(PREFETCH_KEY_TYPE_URL, urls);
    214   if (!hosts.empty())
    215     DeleteDataHelper(PREFETCH_KEY_TYPE_HOST, hosts);
    216 }
    217 
    218 void ResourcePrefetchPredictorTables::DeleteSingleDataPoint(
    219     const std::string& key,
    220     PrefetchKeyType key_type) {
    221   CHECK(BrowserThread::CurrentlyOn(BrowserThread::DB));
    222   if (CantAccessDatabase())
    223     return;
    224 
    225   DeleteDataHelper(key_type, std::vector<std::string>(1, key));
    226 }
    227 
    228 void ResourcePrefetchPredictorTables::DeleteAllData() {
    229   if (CantAccessDatabase())
    230     return;
    231 
    232   Statement deleter(DB()->GetUniqueStatement(
    233       base::StringPrintf("DELETE FROM %s", kUrlResourceTableName).c_str()));
    234   deleter.Run();
    235   deleter.Assign(DB()->GetUniqueStatement(
    236       base::StringPrintf("DELETE FROM %s", kUrlMetadataTableName).c_str()));
    237   deleter.Run();
    238   deleter.Assign(DB()->GetUniqueStatement(
    239       base::StringPrintf("DELETE FROM %s", kHostResourceTableName).c_str()));
    240   deleter.Run();
    241   deleter.Assign(DB()->GetUniqueStatement(
    242       base::StringPrintf("DELETE FROM %s", kHostMetadataTableName).c_str()));
    243   deleter.Run();
    244 }
    245 
    246 ResourcePrefetchPredictorTables::ResourcePrefetchPredictorTables()
    247     : PredictorTableBase() {
    248 }
    249 
    250 ResourcePrefetchPredictorTables::~ResourcePrefetchPredictorTables() {
    251 }
    252 
    253 void ResourcePrefetchPredictorTables::GetAllDataHelper(
    254     PrefetchKeyType key_type,
    255     PrefetchDataMap* data_map,
    256     std::vector<std::string>* to_delete) {
    257   bool is_host = key_type == PREFETCH_KEY_TYPE_HOST;
    258 
    259   // Read the resources table and organize it per primary key.
    260   const char* resource_table_name = is_host ? kHostResourceTableName :
    261       kUrlResourceTableName;
    262   Statement resource_reader(DB()->GetUniqueStatement(
    263       base::StringPrintf("SELECT * FROM %s", resource_table_name).c_str()));
    264 
    265   ResourceRow row;
    266   while (StepAndInitializeResourceRow(&resource_reader, &row)) {
    267     row.UpdateScore();
    268     std::string primary_key = row.primary_key;
    269     // Don't need to store primary key since the data is grouped by primary key.
    270     row.primary_key.clear();
    271 
    272     PrefetchDataMap::iterator it = data_map->find(primary_key);
    273     if (it == data_map->end()) {
    274       it = data_map->insert(std::make_pair(
    275           primary_key, PrefetchData(key_type, primary_key))).first;
    276     }
    277     it->second.resources.push_back(row);
    278   }
    279 
    280   // Sort each of the resource row vectors by score.
    281   for (PrefetchDataMap::iterator it = data_map->begin(); it != data_map->end();
    282        ++it) {
    283     std::sort(it->second.resources.begin(),
    284               it->second.resources.end(),
    285               ResourceRowSorter());
    286   }
    287 
    288   // Read the metadata and keep track of entries that have metadata, but no
    289   // resource entries, so they can be deleted.
    290   const char* metadata_table_name = is_host ? kHostMetadataTableName :
    291       kUrlMetadataTableName;
    292   Statement metadata_reader(DB()->GetUniqueStatement(
    293       base::StringPrintf("SELECT * FROM %s", metadata_table_name).c_str()));
    294 
    295   while (metadata_reader.Step()) {
    296     std::string primary_key = metadata_reader.ColumnString(0);
    297 
    298     PrefetchDataMap::iterator it = data_map->find(primary_key);
    299     if (it != data_map->end()) {
    300       int64 last_visit = metadata_reader.ColumnInt64(1);
    301       it->second.last_visit = base::Time::FromInternalValue(last_visit);
    302     } else {
    303       to_delete->push_back(primary_key);
    304     }
    305   }
    306 }
    307 
    308 bool ResourcePrefetchPredictorTables::UpdateDataHelper(
    309     const PrefetchData& data) {
    310   DCHECK(!data.primary_key.empty());
    311 
    312   if (!StringsAreSmallerThanDBLimit(data)) {
    313     UMA_HISTOGRAM_BOOLEAN("ResourcePrefetchPredictor.DbStringTooLong", true);
    314     return false;
    315   }
    316 
    317   // Delete the older data from both the tables.
    318   scoped_ptr<Statement> deleter(data.is_host() ?
    319       GetHostResourceDeleteStatement() : GetUrlResourceDeleteStatement());
    320   deleter->BindString(0, data.primary_key);
    321   if (!deleter->Run())
    322     return false;
    323 
    324   deleter.reset(data.is_host() ? GetHostMetadataDeleteStatement() :
    325       GetUrlMetadataDeleteStatement());
    326   deleter->BindString(0, data.primary_key);
    327   if (!deleter->Run())
    328     return false;
    329 
    330   // Add the new data to the tables.
    331   const ResourceRows& resources = data.resources;
    332   for (ResourceRows::const_iterator it = resources.begin();
    333        it != resources.end(); ++it) {
    334     scoped_ptr<Statement> resource_inserter(data.is_host() ?
    335         GetHostResourceUpdateStatement() : GetUrlResourceUpdateStatement());
    336     BindResourceRowToStatement(*it, data.primary_key, resource_inserter.get());
    337     if (!resource_inserter->Run())
    338       return false;
    339   }
    340 
    341   scoped_ptr<Statement> metadata_inserter(data.is_host() ?
    342       GetHostMetadataUpdateStatement() : GetUrlMetadataUpdateStatement());
    343   metadata_inserter->BindString(0, data.primary_key);
    344   metadata_inserter->BindInt64(1, data.last_visit.ToInternalValue());
    345   if (!metadata_inserter->Run())
    346     return false;
    347 
    348   return true;
    349 }
    350 
    351 void ResourcePrefetchPredictorTables::DeleteDataHelper(
    352     PrefetchKeyType key_type,
    353     const std::vector<std::string>& keys) {
    354   bool is_host = key_type == PREFETCH_KEY_TYPE_HOST;
    355 
    356   for (std::vector<std::string>::const_iterator it = keys.begin();
    357        it != keys.end(); ++it) {
    358     scoped_ptr<Statement> deleter(is_host ? GetHostResourceDeleteStatement() :
    359         GetUrlResourceDeleteStatement());
    360     deleter->BindString(0, *it);
    361     deleter->Run();
    362 
    363     deleter.reset(is_host ? GetHostMetadataDeleteStatement() :
    364         GetUrlMetadataDeleteStatement());
    365     deleter->BindString(0, *it);
    366     deleter->Run();
    367   }
    368 }
    369 
    370 bool ResourcePrefetchPredictorTables::StringsAreSmallerThanDBLimit(
    371     const PrefetchData& data) const {
    372   if (data.primary_key.length() > kMaxStringLength)
    373     return false;
    374 
    375   for (ResourceRows::const_iterator it = data.resources.begin();
    376        it != data.resources.end(); ++it) {
    377     if (it->resource_url.spec().length() > kMaxStringLength)
    378       return false;
    379   }
    380   return true;
    381 }
    382 
    383 void ResourcePrefetchPredictorTables::CreateTableIfNonExistent() {
    384   CHECK(BrowserThread::CurrentlyOn(BrowserThread::DB));
    385   if (CantAccessDatabase())
    386     return;
    387 
    388   const char* resource_table_creator =
    389       "CREATE TABLE %s ( "
    390       "main_page_url TEXT, "
    391       "resource_url TEXT, "
    392       "resource_type INTEGER, "
    393       "number_of_hits INTEGER, "
    394       "number_of_misses INTEGER, "
    395       "consecutive_misses INTEGER, "
    396       "average_position DOUBLE, "
    397       "PRIMARY KEY(main_page_url, resource_url))";
    398   const char* metadata_table_creator =
    399       "CREATE TABLE %s ( "
    400       "main_page_url TEXT, "
    401       "last_visit_time INTEGER, "
    402       "PRIMARY KEY(main_page_url))";
    403 
    404   sql::Connection* db = DB();
    405   bool success =
    406       (db->DoesTableExist(kUrlResourceTableName) ||
    407        db->Execute(base::StringPrintf(resource_table_creator,
    408                                       kUrlResourceTableName).c_str())) &&
    409       (db->DoesTableExist(kUrlMetadataTableName) ||
    410        db->Execute(base::StringPrintf(metadata_table_creator,
    411                                       kUrlMetadataTableName).c_str())) &&
    412       (db->DoesTableExist(kHostResourceTableName) ||
    413        db->Execute(base::StringPrintf(resource_table_creator,
    414                                       kHostResourceTableName).c_str())) &&
    415       (db->DoesTableExist(kHostMetadataTableName) ||
    416        db->Execute(base::StringPrintf(metadata_table_creator,
    417                                       kHostMetadataTableName).c_str()));
    418 
    419   if (!success)
    420     ResetDB();
    421 }
    422 
    423 void ResourcePrefetchPredictorTables::LogDatabaseStats()  {
    424   CHECK(BrowserThread::CurrentlyOn(BrowserThread::DB));
    425   if (CantAccessDatabase())
    426     return;
    427 
    428   Statement statement(DB()->GetUniqueStatement(
    429       base::StringPrintf("SELECT count(*) FROM %s",
    430                          kUrlResourceTableName).c_str()));
    431   if (statement.Step())
    432     UMA_HISTOGRAM_COUNTS("ResourcePrefetchPredictor.UrlTableRowCount",
    433                          statement.ColumnInt(0));
    434 
    435   statement.Assign(DB()->GetUniqueStatement(
    436       base::StringPrintf("SELECT count(*) FROM %s",
    437                          kHostResourceTableName).c_str()));
    438   if (statement.Step())
    439     UMA_HISTOGRAM_COUNTS("ResourcePrefetchPredictor.HostTableRowCount",
    440                          statement.ColumnInt(0));
    441 }
    442 
    443 Statement*
    444     ResourcePrefetchPredictorTables::GetUrlResourceDeleteStatement() {
    445   return new Statement(DB()->GetCachedStatement(
    446       SQL_FROM_HERE,
    447       base::StringPrintf("DELETE FROM %s WHERE main_page_url=?",
    448                          kUrlResourceTableName).c_str()));
    449 }
    450 
    451 Statement*
    452     ResourcePrefetchPredictorTables::GetUrlResourceUpdateStatement() {
    453   return new Statement(DB()->GetCachedStatement(
    454       SQL_FROM_HERE,
    455       base::StringPrintf(
    456           "INSERT INTO %s "
    457           "(main_page_url, resource_url, resource_type, number_of_hits, "
    458           "number_of_misses, consecutive_misses, average_position) "
    459           "VALUES (?,?,?,?,?,?,?)", kUrlResourceTableName).c_str()));
    460 }
    461 
    462 Statement*
    463     ResourcePrefetchPredictorTables::GetUrlMetadataDeleteStatement() {
    464   return new Statement(DB()->GetCachedStatement(
    465       SQL_FROM_HERE,
    466       base::StringPrintf("DELETE FROM %s WHERE main_page_url=?",
    467                          kUrlMetadataTableName).c_str()));
    468 }
    469 
    470 Statement*
    471     ResourcePrefetchPredictorTables::GetUrlMetadataUpdateStatement() {
    472   return new Statement(DB()->GetCachedStatement(
    473       SQL_FROM_HERE,
    474       base::StringPrintf(
    475           "INSERT INTO %s (main_page_url, last_visit_time) VALUES (?,?)",
    476           kUrlMetadataTableName).c_str()));
    477 }
    478 
    479 Statement*
    480     ResourcePrefetchPredictorTables::GetHostResourceDeleteStatement() {
    481   return new Statement(DB()->GetCachedStatement(
    482       SQL_FROM_HERE,
    483       base::StringPrintf("DELETE FROM %s WHERE main_page_url=?",
    484                          kHostResourceTableName).c_str()));
    485 }
    486 
    487 Statement*
    488     ResourcePrefetchPredictorTables::GetHostResourceUpdateStatement() {
    489   return new Statement(DB()->GetCachedStatement(
    490       SQL_FROM_HERE,
    491       base::StringPrintf(
    492           "INSERT INTO %s "
    493           "(main_page_url, resource_url, resource_type, number_of_hits, "
    494           "number_of_misses, consecutive_misses, average_position) "
    495           "VALUES (?,?,?,?,?,?,?)", kHostResourceTableName).c_str()));
    496 }
    497 
    498 Statement*
    499     ResourcePrefetchPredictorTables::GetHostMetadataDeleteStatement() {
    500   return new Statement(DB()->GetCachedStatement(
    501       SQL_FROM_HERE,
    502       base::StringPrintf("DELETE FROM %s WHERE main_page_url=?",
    503                          kHostMetadataTableName).c_str()));
    504 }
    505 
    506 Statement* ResourcePrefetchPredictorTables::GetHostMetadataUpdateStatement() {
    507   return new Statement(DB()->GetCachedStatement(
    508       SQL_FROM_HERE,
    509       base::StringPrintf(
    510           "INSERT INTO %s (main_page_url, last_visit_time) VALUES (?,?)",
    511           kHostMetadataTableName).c_str()));
    512 }
    513 
    514 }  // namespace predictors
    515