Home | History | Annotate | Download | only in predictors
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/predictors/resource_prefetch_predictor_tables.h"
      6 
      7 #include <algorithm>
      8 #include <utility>
      9 
     10 #include "base/logging.h"
     11 #include "base/metrics/histogram.h"
     12 #include "base/strings/stringprintf.h"
     13 #include "content/public/browser/browser_thread.h"
     14 #include "sql/statement.h"
     15 
     16 using content::BrowserThread;
     17 using sql::Statement;
     18 
     19 namespace {
     20 
     21 const char kUrlResourceTableName[] = "resource_prefetch_predictor_url";
     22 const char kUrlMetadataTableName[] = "resource_prefetch_predictor_url_metadata";
     23 const char kHostResourceTableName[] = "resource_prefetch_predictor_host";
     24 const char kHostMetadataTableName[] =
     25     "resource_prefetch_predictor_host_metadata";
     26 
     27 void BindResourceRowToStatement(
     28     const predictors::ResourcePrefetchPredictorTables::ResourceRow& row,
     29     const std::string& primary_key,
     30     Statement* statement) {
     31   statement->BindString(0, primary_key);
     32   statement->BindString(1, row.resource_url.spec());
     33   statement->BindInt(2, static_cast<int>(row.resource_type));
     34   statement->BindInt(3, row.number_of_hits);
     35   statement->BindInt(4, row.number_of_misses);
     36   statement->BindInt(5, row.consecutive_misses);
     37   statement->BindDouble(6, row.average_position);
     38 }
     39 
     40 bool StepAndInitializeResourceRow(
     41     Statement* statement,
     42     predictors::ResourcePrefetchPredictorTables::ResourceRow* row) {
     43   if (!statement->Step())
     44     return false;
     45 
     46   row->primary_key = statement->ColumnString(0);
     47   row->resource_url = GURL(statement->ColumnString(1));
     48   row->resource_type = static_cast<content::ResourceType>(
     49       statement->ColumnInt(2));
     50   row->number_of_hits = statement->ColumnInt(3);
     51   row->number_of_misses = statement->ColumnInt(4);
     52   row->consecutive_misses = statement->ColumnInt(5);
     53   row->average_position = statement->ColumnDouble(6);
     54   return true;
     55 }
     56 
     57 }  // namespace
     58 
     59 namespace predictors {
     60 
     61 // static
     62 const size_t ResourcePrefetchPredictorTables::kMaxStringLength = 1024;
     63 
     64 ResourcePrefetchPredictorTables::ResourceRow::ResourceRow()
     65     : resource_type(content::RESOURCE_TYPE_LAST_TYPE),
     66       number_of_hits(0),
     67       number_of_misses(0),
     68       consecutive_misses(0),
     69       average_position(0.0),
     70       score(0.0) {
     71 }
     72 
     73 ResourcePrefetchPredictorTables::ResourceRow::ResourceRow(
     74     const ResourceRow& other)
     75         : primary_key(other.primary_key),
     76           resource_url(other.resource_url),
     77           resource_type(other.resource_type),
     78           number_of_hits(other.number_of_hits),
     79           number_of_misses(other.number_of_misses),
     80           consecutive_misses(other.consecutive_misses),
     81           average_position(other.average_position),
     82           score(other.score) {
     83 }
     84 
     85 ResourcePrefetchPredictorTables::ResourceRow::ResourceRow(
     86     const std::string& i_primary_key,
     87     const std::string& i_resource_url,
     88     content::ResourceType i_resource_type,
     89     int i_number_of_hits,
     90     int i_number_of_misses,
     91     int i_consecutive_misses,
     92     double i_average_position)
     93         : primary_key(i_primary_key),
     94           resource_url(i_resource_url),
     95           resource_type(i_resource_type),
     96           number_of_hits(i_number_of_hits),
     97           number_of_misses(i_number_of_misses),
     98           consecutive_misses(i_consecutive_misses),
     99           average_position(i_average_position) {
    100   UpdateScore();
    101 }
    102 
    103 void ResourcePrefetchPredictorTables::ResourceRow::UpdateScore() {
    104   // The score is calculated so that when the rows are sorted, the stylesheets
    105   // and scripts appear first, sorted by position(ascending) and then the rest
    106   // of the resources sorted by position(ascending).
    107   static const int kMaxResourcesPerType = 100;
    108   switch (resource_type) {
    109     case content::RESOURCE_TYPE_STYLESHEET:
    110     case content::RESOURCE_TYPE_SCRIPT:
    111       score = (2 * kMaxResourcesPerType) - average_position;
    112       break;
    113 
    114     case content::RESOURCE_TYPE_IMAGE:
    115     default:
    116       score = kMaxResourcesPerType - average_position;
    117       break;
    118   }
    119 }
    120 
    121 bool ResourcePrefetchPredictorTables::ResourceRow::operator==(
    122     const ResourceRow& rhs) const {
    123   return primary_key == rhs.primary_key &&
    124       resource_url == rhs.resource_url &&
    125       resource_type == rhs.resource_type &&
    126       number_of_hits == rhs.number_of_hits &&
    127       number_of_misses == rhs.number_of_misses &&
    128       consecutive_misses == rhs.consecutive_misses &&
    129       average_position == rhs.average_position &&
    130       score == rhs.score;
    131 }
    132 
    133 bool ResourcePrefetchPredictorTables::ResourceRowSorter::operator()(
    134     const ResourceRow& x, const ResourceRow& y) const {
    135   return x.score > y.score;
    136 }
    137 
    138 ResourcePrefetchPredictorTables::PrefetchData::PrefetchData(
    139     PrefetchKeyType i_key_type,
    140     const std::string& i_primary_key)
    141     : key_type(i_key_type),
    142       primary_key(i_primary_key) {
    143 }
    144 
    145 ResourcePrefetchPredictorTables::PrefetchData::PrefetchData(
    146     const PrefetchData& other)
    147     : key_type(other.key_type),
    148       primary_key(other.primary_key),
    149       last_visit(other.last_visit),
    150       resources(other.resources) {
    151 }
    152 
    153 ResourcePrefetchPredictorTables::PrefetchData::~PrefetchData() {
    154 }
    155 
    156 bool ResourcePrefetchPredictorTables::PrefetchData::operator==(
    157     const PrefetchData& rhs) const {
    158   return key_type == rhs.key_type && primary_key == rhs.primary_key &&
    159       resources == rhs.resources;
    160 }
    161 
    162 void ResourcePrefetchPredictorTables::GetAllData(
    163     PrefetchDataMap* url_data_map,
    164     PrefetchDataMap* host_data_map) {
    165   DCHECK_CURRENTLY_ON(BrowserThread::DB);
    166   if (CantAccessDatabase())
    167     return;
    168 
    169   DCHECK(url_data_map);
    170   DCHECK(host_data_map);
    171   url_data_map->clear();
    172   host_data_map->clear();
    173 
    174   std::vector<std::string> urls_to_delete, hosts_to_delete;
    175   GetAllDataHelper(PREFETCH_KEY_TYPE_URL, url_data_map, &urls_to_delete);
    176   GetAllDataHelper(PREFETCH_KEY_TYPE_HOST, host_data_map, &hosts_to_delete);
    177 
    178   if (!urls_to_delete.empty() || !hosts_to_delete.empty())
    179     DeleteData(urls_to_delete, hosts_to_delete);
    180 }
    181 
    182 void ResourcePrefetchPredictorTables::UpdateData(
    183     const PrefetchData& url_data,
    184     const PrefetchData& host_data) {
    185   DCHECK_CURRENTLY_ON(BrowserThread::DB);
    186   if (CantAccessDatabase())
    187     return;
    188 
    189   DCHECK(!url_data.is_host() && host_data.is_host());
    190   DCHECK(!url_data.primary_key.empty() || !host_data.primary_key.empty());
    191 
    192   DB()->BeginTransaction();
    193 
    194   bool success = (url_data.primary_key.empty() || UpdateDataHelper(url_data)) &&
    195       (host_data.primary_key.empty() || UpdateDataHelper(host_data));
    196   if (!success)
    197     DB()->RollbackTransaction();
    198 
    199   DB()->CommitTransaction();
    200 }
    201 
    202 void ResourcePrefetchPredictorTables::DeleteData(
    203     const std::vector<std::string>& urls,
    204     const std::vector<std::string>& hosts) {
    205   DCHECK_CURRENTLY_ON(BrowserThread::DB);
    206   if (CantAccessDatabase())
    207     return;
    208 
    209   DCHECK(!urls.empty() || !hosts.empty());
    210 
    211   if (!urls.empty())
    212     DeleteDataHelper(PREFETCH_KEY_TYPE_URL, urls);
    213   if (!hosts.empty())
    214     DeleteDataHelper(PREFETCH_KEY_TYPE_HOST, hosts);
    215 }
    216 
    217 void ResourcePrefetchPredictorTables::DeleteSingleDataPoint(
    218     const std::string& key,
    219     PrefetchKeyType key_type) {
    220   DCHECK_CURRENTLY_ON(BrowserThread::DB);
    221   if (CantAccessDatabase())
    222     return;
    223 
    224   DeleteDataHelper(key_type, std::vector<std::string>(1, key));
    225 }
    226 
    227 void ResourcePrefetchPredictorTables::DeleteAllData() {
    228   if (CantAccessDatabase())
    229     return;
    230 
    231   Statement deleter(DB()->GetUniqueStatement(
    232       base::StringPrintf("DELETE FROM %s", kUrlResourceTableName).c_str()));
    233   deleter.Run();
    234   deleter.Assign(DB()->GetUniqueStatement(
    235       base::StringPrintf("DELETE FROM %s", kUrlMetadataTableName).c_str()));
    236   deleter.Run();
    237   deleter.Assign(DB()->GetUniqueStatement(
    238       base::StringPrintf("DELETE FROM %s", kHostResourceTableName).c_str()));
    239   deleter.Run();
    240   deleter.Assign(DB()->GetUniqueStatement(
    241       base::StringPrintf("DELETE FROM %s", kHostMetadataTableName).c_str()));
    242   deleter.Run();
    243 }
    244 
    245 ResourcePrefetchPredictorTables::ResourcePrefetchPredictorTables()
    246     : PredictorTableBase() {
    247 }
    248 
    249 ResourcePrefetchPredictorTables::~ResourcePrefetchPredictorTables() {
    250 }
    251 
    252 void ResourcePrefetchPredictorTables::GetAllDataHelper(
    253     PrefetchKeyType key_type,
    254     PrefetchDataMap* data_map,
    255     std::vector<std::string>* to_delete) {
    256   bool is_host = key_type == PREFETCH_KEY_TYPE_HOST;
    257 
    258   // Read the resources table and organize it per primary key.
    259   const char* resource_table_name = is_host ? kHostResourceTableName :
    260       kUrlResourceTableName;
    261   Statement resource_reader(DB()->GetUniqueStatement(
    262       base::StringPrintf("SELECT * FROM %s", resource_table_name).c_str()));
    263 
    264   ResourceRow row;
    265   while (StepAndInitializeResourceRow(&resource_reader, &row)) {
    266     row.UpdateScore();
    267     std::string primary_key = row.primary_key;
    268     // Don't need to store primary key since the data is grouped by primary key.
    269     row.primary_key.clear();
    270 
    271     PrefetchDataMap::iterator it = data_map->find(primary_key);
    272     if (it == data_map->end()) {
    273       it = data_map->insert(std::make_pair(
    274           primary_key, PrefetchData(key_type, primary_key))).first;
    275     }
    276     it->second.resources.push_back(row);
    277   }
    278 
    279   // Sort each of the resource row vectors by score.
    280   for (PrefetchDataMap::iterator it = data_map->begin(); it != data_map->end();
    281        ++it) {
    282     std::sort(it->second.resources.begin(),
    283               it->second.resources.end(),
    284               ResourceRowSorter());
    285   }
    286 
    287   // Read the metadata and keep track of entries that have metadata, but no
    288   // resource entries, so they can be deleted.
    289   const char* metadata_table_name = is_host ? kHostMetadataTableName :
    290       kUrlMetadataTableName;
    291   Statement metadata_reader(DB()->GetUniqueStatement(
    292       base::StringPrintf("SELECT * FROM %s", metadata_table_name).c_str()));
    293 
    294   while (metadata_reader.Step()) {
    295     std::string primary_key = metadata_reader.ColumnString(0);
    296 
    297     PrefetchDataMap::iterator it = data_map->find(primary_key);
    298     if (it != data_map->end()) {
    299       int64 last_visit = metadata_reader.ColumnInt64(1);
    300       it->second.last_visit = base::Time::FromInternalValue(last_visit);
    301     } else {
    302       to_delete->push_back(primary_key);
    303     }
    304   }
    305 }
    306 
    307 bool ResourcePrefetchPredictorTables::UpdateDataHelper(
    308     const PrefetchData& data) {
    309   DCHECK(!data.primary_key.empty());
    310 
    311   if (!StringsAreSmallerThanDBLimit(data)) {
    312     UMA_HISTOGRAM_BOOLEAN("ResourcePrefetchPredictor.DbStringTooLong", true);
    313     return false;
    314   }
    315 
    316   // Delete the older data from both the tables.
    317   scoped_ptr<Statement> deleter(data.is_host() ?
    318       GetHostResourceDeleteStatement() : GetUrlResourceDeleteStatement());
    319   deleter->BindString(0, data.primary_key);
    320   if (!deleter->Run())
    321     return false;
    322 
    323   deleter.reset(data.is_host() ? GetHostMetadataDeleteStatement() :
    324       GetUrlMetadataDeleteStatement());
    325   deleter->BindString(0, data.primary_key);
    326   if (!deleter->Run())
    327     return false;
    328 
    329   // Add the new data to the tables.
    330   const ResourceRows& resources = data.resources;
    331   for (ResourceRows::const_iterator it = resources.begin();
    332        it != resources.end(); ++it) {
    333     scoped_ptr<Statement> resource_inserter(data.is_host() ?
    334         GetHostResourceUpdateStatement() : GetUrlResourceUpdateStatement());
    335     BindResourceRowToStatement(*it, data.primary_key, resource_inserter.get());
    336     if (!resource_inserter->Run())
    337       return false;
    338   }
    339 
    340   scoped_ptr<Statement> metadata_inserter(data.is_host() ?
    341       GetHostMetadataUpdateStatement() : GetUrlMetadataUpdateStatement());
    342   metadata_inserter->BindString(0, data.primary_key);
    343   metadata_inserter->BindInt64(1, data.last_visit.ToInternalValue());
    344   if (!metadata_inserter->Run())
    345     return false;
    346 
    347   return true;
    348 }
    349 
    350 void ResourcePrefetchPredictorTables::DeleteDataHelper(
    351     PrefetchKeyType key_type,
    352     const std::vector<std::string>& keys) {
    353   bool is_host = key_type == PREFETCH_KEY_TYPE_HOST;
    354 
    355   for (std::vector<std::string>::const_iterator it = keys.begin();
    356        it != keys.end(); ++it) {
    357     scoped_ptr<Statement> deleter(is_host ? GetHostResourceDeleteStatement() :
    358         GetUrlResourceDeleteStatement());
    359     deleter->BindString(0, *it);
    360     deleter->Run();
    361 
    362     deleter.reset(is_host ? GetHostMetadataDeleteStatement() :
    363         GetUrlMetadataDeleteStatement());
    364     deleter->BindString(0, *it);
    365     deleter->Run();
    366   }
    367 }
    368 
    369 bool ResourcePrefetchPredictorTables::StringsAreSmallerThanDBLimit(
    370     const PrefetchData& data) const {
    371   if (data.primary_key.length() > kMaxStringLength)
    372     return false;
    373 
    374   for (ResourceRows::const_iterator it = data.resources.begin();
    375        it != data.resources.end(); ++it) {
    376     if (it->resource_url.spec().length() > kMaxStringLength)
    377       return false;
    378   }
    379   return true;
    380 }
    381 
    382 void ResourcePrefetchPredictorTables::CreateTableIfNonExistent() {
    383   DCHECK_CURRENTLY_ON(BrowserThread::DB);
    384   if (CantAccessDatabase())
    385     return;
    386 
    387   const char resource_table_creator[] =
    388       "CREATE TABLE %s ( "
    389       "main_page_url TEXT, "
    390       "resource_url TEXT, "
    391       "resource_type INTEGER, "
    392       "number_of_hits INTEGER, "
    393       "number_of_misses INTEGER, "
    394       "consecutive_misses INTEGER, "
    395       "average_position DOUBLE, "
    396       "PRIMARY KEY(main_page_url, resource_url))";
    397   const char* metadata_table_creator =
    398       "CREATE TABLE %s ( "
    399       "main_page_url TEXT, "
    400       "last_visit_time INTEGER, "
    401       "PRIMARY KEY(main_page_url))";
    402 
    403   sql::Connection* db = DB();
    404   bool success =
    405       (db->DoesTableExist(kUrlResourceTableName) ||
    406        db->Execute(base::StringPrintf(resource_table_creator,
    407                                       kUrlResourceTableName).c_str())) &&
    408       (db->DoesTableExist(kUrlMetadataTableName) ||
    409        db->Execute(base::StringPrintf(metadata_table_creator,
    410                                       kUrlMetadataTableName).c_str())) &&
    411       (db->DoesTableExist(kHostResourceTableName) ||
    412        db->Execute(base::StringPrintf(resource_table_creator,
    413                                       kHostResourceTableName).c_str())) &&
    414       (db->DoesTableExist(kHostMetadataTableName) ||
    415        db->Execute(base::StringPrintf(metadata_table_creator,
    416                                       kHostMetadataTableName).c_str()));
    417 
    418   if (!success)
    419     ResetDB();
    420 }
    421 
    422 void ResourcePrefetchPredictorTables::LogDatabaseStats()  {
    423   DCHECK_CURRENTLY_ON(BrowserThread::DB);
    424   if (CantAccessDatabase())
    425     return;
    426 
    427   Statement statement(DB()->GetUniqueStatement(
    428       base::StringPrintf("SELECT count(*) FROM %s",
    429                          kUrlResourceTableName).c_str()));
    430   if (statement.Step())
    431     UMA_HISTOGRAM_COUNTS("ResourcePrefetchPredictor.UrlTableRowCount",
    432                          statement.ColumnInt(0));
    433 
    434   statement.Assign(DB()->GetUniqueStatement(
    435       base::StringPrintf("SELECT count(*) FROM %s",
    436                          kHostResourceTableName).c_str()));
    437   if (statement.Step())
    438     UMA_HISTOGRAM_COUNTS("ResourcePrefetchPredictor.HostTableRowCount",
    439                          statement.ColumnInt(0));
    440 }
    441 
    442 Statement*
    443     ResourcePrefetchPredictorTables::GetUrlResourceDeleteStatement() {
    444   return new Statement(DB()->GetCachedStatement(
    445       SQL_FROM_HERE,
    446       base::StringPrintf("DELETE FROM %s WHERE main_page_url=?",
    447                          kUrlResourceTableName).c_str()));
    448 }
    449 
    450 Statement*
    451     ResourcePrefetchPredictorTables::GetUrlResourceUpdateStatement() {
    452   return new Statement(DB()->GetCachedStatement(
    453       SQL_FROM_HERE,
    454       base::StringPrintf(
    455           "INSERT INTO %s "
    456           "(main_page_url, resource_url, resource_type, number_of_hits, "
    457           "number_of_misses, consecutive_misses, average_position) "
    458           "VALUES (?,?,?,?,?,?,?)", kUrlResourceTableName).c_str()));
    459 }
    460 
    461 Statement*
    462     ResourcePrefetchPredictorTables::GetUrlMetadataDeleteStatement() {
    463   return new Statement(DB()->GetCachedStatement(
    464       SQL_FROM_HERE,
    465       base::StringPrintf("DELETE FROM %s WHERE main_page_url=?",
    466                          kUrlMetadataTableName).c_str()));
    467 }
    468 
    469 Statement*
    470     ResourcePrefetchPredictorTables::GetUrlMetadataUpdateStatement() {
    471   return new Statement(DB()->GetCachedStatement(
    472       SQL_FROM_HERE,
    473       base::StringPrintf(
    474           "INSERT INTO %s (main_page_url, last_visit_time) VALUES (?,?)",
    475           kUrlMetadataTableName).c_str()));
    476 }
    477 
    478 Statement*
    479     ResourcePrefetchPredictorTables::GetHostResourceDeleteStatement() {
    480   return new Statement(DB()->GetCachedStatement(
    481       SQL_FROM_HERE,
    482       base::StringPrintf("DELETE FROM %s WHERE main_page_url=?",
    483                          kHostResourceTableName).c_str()));
    484 }
    485 
    486 Statement*
    487     ResourcePrefetchPredictorTables::GetHostResourceUpdateStatement() {
    488   return new Statement(DB()->GetCachedStatement(
    489       SQL_FROM_HERE,
    490       base::StringPrintf(
    491           "INSERT INTO %s "
    492           "(main_page_url, resource_url, resource_type, number_of_hits, "
    493           "number_of_misses, consecutive_misses, average_position) "
    494           "VALUES (?,?,?,?,?,?,?)", kHostResourceTableName).c_str()));
    495 }
    496 
    497 Statement*
    498     ResourcePrefetchPredictorTables::GetHostMetadataDeleteStatement() {
    499   return new Statement(DB()->GetCachedStatement(
    500       SQL_FROM_HERE,
    501       base::StringPrintf("DELETE FROM %s WHERE main_page_url=?",
    502                          kHostMetadataTableName).c_str()));
    503 }
    504 
    505 Statement* ResourcePrefetchPredictorTables::GetHostMetadataUpdateStatement() {
    506   return new Statement(DB()->GetCachedStatement(
    507       SQL_FROM_HERE,
    508       base::StringPrintf(
    509           "INSERT INTO %s (main_page_url, last_visit_time) VALUES (?,?)",
    510           kHostMetadataTableName).c_str()));
    511 }
    512 
    513 }  // namespace predictors
    514