Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/history/expire_history_backend.h"
      6 
      7 #include <algorithm>
      8 #include <functional>
      9 #include <limits>
     10 
     11 #include "base/bind.h"
     12 #include "base/compiler_specific.h"
     13 #include "base/file_util.h"
     14 #include "base/files/file_enumerator.h"
     15 #include "base/logging.h"
     16 #include "base/message_loop/message_loop.h"
     17 #include "chrome/browser/chrome_notification_types.h"
     18 #include "chrome/browser/history/history_database.h"
     19 #include "chrome/browser/history/history_notifications.h"
     20 #include "chrome/browser/history/thumbnail_database.h"
     21 #include "components/history/core/browser/history_client.h"
     22 
     23 namespace history {
     24 
     25 // Helpers --------------------------------------------------------------------
     26 
     27 namespace {
     28 
     29 // The number of days by which the expiration threshold is advanced for items
     30 // that we want to expire early, such as those of AUTO_SUBFRAME transition type.
     31 //
     32 // Early expiration stuff is kept around only for edge cases, as subframes
     33 // don't appear in history and the vast majority of them are ads anyway. The
     34 // main use case for these is if you're on a site with links to different
     35 // frames, you'll be able to see those links as visited, and we'll also be
     36 // able to get redirect information for those URLs.
     37 //
     38 // But since these uses are most valuable when you're actually on the site,
     39 // and because these can take up the bulk of your history, we get a lot of
     40 // space savings by deleting them quickly.
     41 const int kEarlyExpirationAdvanceDays = 3;
     42 
     43 // Reads all types of visits starting from beginning of time to the given end
     44 // time. This is the most general reader.
     45 class AllVisitsReader : public ExpiringVisitsReader {
     46  public:
     47   virtual bool Read(base::Time end_time,
     48                     HistoryDatabase* db,
     49                     VisitVector* visits,
     50                     int max_visits) const OVERRIDE {
     51     DCHECK(db) << "must have a database to operate upon";
     52     DCHECK(visits) << "visit vector has to exist in order to populate it";
     53 
     54     db->GetAllVisitsInRange(base::Time(), end_time, max_visits, visits);
     55     // When we got the maximum number of visits we asked for, we say there could
     56     // be additional things to expire now.
     57     return static_cast<int>(visits->size()) == max_visits;
     58   }
     59 };
     60 
     61 // Reads only AUTO_SUBFRAME visits, within a computed range. The range is
     62 // computed as follows:
     63 // * |begin_time| is read from the meta table. This value is updated whenever
     64 //   there are no more additional visits to expire by this reader.
     65 // * |end_time| is advanced forward by a constant (kEarlyExpirationAdvanceDay),
     66 //   but not past the current time.
     67 class AutoSubframeVisitsReader : public ExpiringVisitsReader {
     68  public:
     69   virtual bool Read(base::Time end_time,
     70                     HistoryDatabase* db,
     71                     VisitVector* visits,
     72                     int max_visits) const OVERRIDE {
     73     DCHECK(db) << "must have a database to operate upon";
     74     DCHECK(visits) << "visit vector has to exist in order to populate it";
     75 
     76     base::Time begin_time = db->GetEarlyExpirationThreshold();
     77     // Advance |end_time| to expire early.
     78     base::Time early_end_time = end_time +
     79         base::TimeDelta::FromDays(kEarlyExpirationAdvanceDays);
     80 
     81     // We don't want to set the early expiration threshold to a time in the
     82     // future.
     83     base::Time now = base::Time::Now();
     84     if (early_end_time > now)
     85       early_end_time = now;
     86 
     87     db->GetVisitsInRangeForTransition(begin_time, early_end_time,
     88                                       max_visits,
     89                                       content::PAGE_TRANSITION_AUTO_SUBFRAME,
     90                                       visits);
     91     bool more = static_cast<int>(visits->size()) == max_visits;
     92     if (!more)
     93       db->UpdateEarlyExpirationThreshold(early_end_time);
     94 
     95     return more;
     96   }
     97 };
     98 
     99 // The number of visits we will expire very time we check for old items. This
    100 // Prevents us from doing too much work any given time.
    101 const int kNumExpirePerIteration = 32;
    102 
    103 // The number of seconds between checking for items that should be expired when
    104 // we think there might be more items to expire. This timeout is used when the
    105 // last expiration found at least kNumExpirePerIteration and we want to check
    106 // again "soon."
    107 const int kExpirationDelaySec = 30;
    108 
    109 // The number of minutes between checking, as with kExpirationDelaySec, but
    110 // when we didn't find enough things to expire last time. If there was no
    111 // history to expire last iteration, it's likely there is nothing next
    112 // iteration, so we want to wait longer before checking to avoid wasting CPU.
    113 const int kExpirationEmptyDelayMin = 5;
    114 
    115 }  // namespace
    116 
    117 
    118 // ExpireHistoryBackend::DeleteEffects ----------------------------------------
    119 
    120 ExpireHistoryBackend::DeleteEffects::DeleteEffects() {
    121 }
    122 
    123 ExpireHistoryBackend::DeleteEffects::~DeleteEffects() {
    124 }
    125 
    126 
    127 // ExpireHistoryBackend -------------------------------------------------------
    128 
    129 ExpireHistoryBackend::ExpireHistoryBackend(
    130     BroadcastNotificationDelegate* delegate,
    131     HistoryClient* history_client)
    132     : delegate_(delegate),
    133       main_db_(NULL),
    134       thumb_db_(NULL),
    135       weak_factory_(this),
    136       history_client_(history_client) {
    137 }
    138 
    139 ExpireHistoryBackend::~ExpireHistoryBackend() {
    140 }
    141 
    142 void ExpireHistoryBackend::SetDatabases(HistoryDatabase* main_db,
    143                                         ThumbnailDatabase* thumb_db) {
    144   main_db_ = main_db;
    145   thumb_db_ = thumb_db;
    146 }
    147 
    148 void ExpireHistoryBackend::DeleteURL(const GURL& url) {
    149   DeleteURLs(std::vector<GURL>(1, url));
    150 }
    151 
    152 void ExpireHistoryBackend::DeleteURLs(const std::vector<GURL>& urls) {
    153   if (!main_db_)
    154     return;
    155 
    156   DeleteEffects effects;
    157   HistoryClient* history_client = GetHistoryClient();
    158   for (std::vector<GURL>::const_iterator url = urls.begin(); url != urls.end();
    159        ++url) {
    160     URLRow url_row;
    161     if (!main_db_->GetRowForURL(*url, &url_row))
    162       continue;  // Nothing to delete.
    163 
    164     // Collect all the visits and delete them. Note that we don't give up if
    165     // there are no visits, since the URL could still have an entry that we
    166     // should delete.
    167     VisitVector visits;
    168     main_db_->GetVisitsForURL(url_row.id(), &visits);
    169 
    170     DeleteVisitRelatedInfo(visits, &effects);
    171 
    172     // We skip ExpireURLsForVisits (since we are deleting from the
    173     // URL, and not starting with visits in a given time range). We
    174     // therefore need to call the deletion and favicon update
    175     // functions manually.
    176     DeleteOneURL(url_row,
    177                  history_client && history_client->IsBookmarked(*url),
    178                  &effects);
    179   }
    180 
    181   DeleteFaviconsIfPossible(&effects);
    182 
    183   BroadcastNotifications(&effects, DELETION_USER_INITIATED);
    184 }
    185 
    186 void ExpireHistoryBackend::ExpireHistoryBetween(
    187     const std::set<GURL>& restrict_urls,
    188     base::Time begin_time,
    189     base::Time end_time) {
    190   if (!main_db_)
    191     return;
    192 
    193   // Find the affected visits and delete them.
    194   VisitVector visits;
    195   main_db_->GetAllVisitsInRange(begin_time, end_time, 0, &visits);
    196   if (!restrict_urls.empty()) {
    197     std::set<URLID> url_ids;
    198     for (std::set<GURL>::const_iterator url = restrict_urls.begin();
    199         url != restrict_urls.end(); ++url)
    200       url_ids.insert(main_db_->GetRowForURL(*url, NULL));
    201     VisitVector all_visits;
    202     all_visits.swap(visits);
    203     for (VisitVector::iterator visit = all_visits.begin();
    204          visit != all_visits.end(); ++visit) {
    205       if (url_ids.find(visit->url_id) != url_ids.end())
    206         visits.push_back(*visit);
    207     }
    208   }
    209   ExpireVisits(visits);
    210 }
    211 
    212 void ExpireHistoryBackend::ExpireHistoryForTimes(
    213     const std::vector<base::Time>& times) {
    214   // |times| must be in reverse chronological order and have no
    215   // duplicates, i.e. each member must be earlier than the one before
    216   // it.
    217   DCHECK(
    218       std::adjacent_find(
    219           times.begin(), times.end(), std::less_equal<base::Time>()) ==
    220       times.end());
    221 
    222   if (!main_db_)
    223     return;
    224 
    225   // Find the affected visits and delete them.
    226   VisitVector visits;
    227   main_db_->GetVisitsForTimes(times, &visits);
    228   ExpireVisits(visits);
    229 }
    230 
    231 void ExpireHistoryBackend::ExpireVisits(const VisitVector& visits) {
    232   if (visits.empty())
    233     return;
    234 
    235   DeleteEffects effects;
    236   DeleteVisitRelatedInfo(visits, &effects);
    237 
    238   // Delete or update the URLs affected. We want to update the visit counts
    239   // since this is called by the user who wants to delete their recent history,
    240   // and we don't want to leave any evidence.
    241   ExpireURLsForVisits(visits, &effects);
    242   DeleteFaviconsIfPossible(&effects);
    243   BroadcastNotifications(&effects, DELETION_USER_INITIATED);
    244 
    245   // Pick up any bits possibly left over.
    246   ParanoidExpireHistory();
    247 }
    248 
    249 void ExpireHistoryBackend::ExpireHistoryBefore(base::Time end_time) {
    250   if (!main_db_)
    251     return;
    252 
    253   // Expire as much history as possible before the given date.
    254   ExpireSomeOldHistory(end_time, GetAllVisitsReader(),
    255                        std::numeric_limits<int>::max());
    256   ParanoidExpireHistory();
    257 }
    258 
    259 void ExpireHistoryBackend::InitWorkQueue() {
    260   DCHECK(work_queue_.empty()) << "queue has to be empty prior to init";
    261 
    262   for (size_t i = 0; i < readers_.size(); i++)
    263     work_queue_.push(readers_[i]);
    264 }
    265 
    266 const ExpiringVisitsReader* ExpireHistoryBackend::GetAllVisitsReader() {
    267   if (!all_visits_reader_)
    268     all_visits_reader_.reset(new AllVisitsReader());
    269   return all_visits_reader_.get();
    270 }
    271 
    272 const ExpiringVisitsReader*
    273     ExpireHistoryBackend::GetAutoSubframeVisitsReader() {
    274   if (!auto_subframe_visits_reader_)
    275     auto_subframe_visits_reader_.reset(new AutoSubframeVisitsReader());
    276   return auto_subframe_visits_reader_.get();
    277 }
    278 
    279 void ExpireHistoryBackend::StartExpiringOldStuff(
    280     base::TimeDelta expiration_threshold) {
    281   expiration_threshold_ = expiration_threshold;
    282 
    283   // Remove all readers, just in case this was method was called before.
    284   readers_.clear();
    285   // For now, we explicitly add all known readers. If we come up with more
    286   // reader types (in case we want to expire different types of visits in
    287   // different ways), we can make it be populated by creator/owner of
    288   // ExpireHistoryBackend.
    289   readers_.push_back(GetAllVisitsReader());
    290   readers_.push_back(GetAutoSubframeVisitsReader());
    291 
    292   // Initialize the queue with all tasks for the first set of iterations.
    293   InitWorkQueue();
    294   ScheduleExpire();
    295 }
    296 
    297 void ExpireHistoryBackend::DeleteFaviconsIfPossible(DeleteEffects* effects) {
    298   if (!thumb_db_)
    299     return;
    300 
    301   for (std::set<favicon_base::FaviconID>::const_iterator i =
    302            effects->affected_favicons.begin();
    303        i != effects->affected_favicons.end(); ++i) {
    304     if (!thumb_db_->HasMappingFor(*i)) {
    305       GURL icon_url;
    306       favicon_base::IconType icon_type;
    307       if (thumb_db_->GetFaviconHeader(*i,
    308                                       &icon_url,
    309                                       &icon_type) &&
    310           thumb_db_->DeleteFavicon(*i)) {
    311         effects->deleted_favicons.insert(icon_url);
    312       }
    313     }
    314   }
    315 }
    316 
    317 void ExpireHistoryBackend::BroadcastNotifications(DeleteEffects* effects,
    318                                                   DeletionType type) {
    319   if (!effects->modified_urls.empty()) {
    320     scoped_ptr<URLsModifiedDetails> details(new URLsModifiedDetails);
    321     details->changed_urls = effects->modified_urls;
    322     delegate_->NotifySyncURLsModified(&details->changed_urls);
    323     delegate_->BroadcastNotifications(
    324         chrome::NOTIFICATION_HISTORY_URLS_MODIFIED,
    325         details.PassAs<HistoryDetails>());
    326   }
    327   if (!effects->deleted_urls.empty()) {
    328     scoped_ptr<URLsDeletedDetails> details(new URLsDeletedDetails);
    329     details->all_history = false;
    330     details->expired = (type == DELETION_EXPIRED);
    331     details->rows = effects->deleted_urls;
    332     details->favicon_urls = effects->deleted_favicons;
    333     delegate_->NotifySyncURLsDeleted(details->all_history, details->expired,
    334                                      &details->rows);
    335     delegate_->BroadcastNotifications(chrome::NOTIFICATION_HISTORY_URLS_DELETED,
    336                                       details.PassAs<HistoryDetails>());
    337   }
    338 }
    339 
    340 void ExpireHistoryBackend::DeleteVisitRelatedInfo(const VisitVector& visits,
    341                                                   DeleteEffects* effects) {
    342   for (size_t i = 0; i < visits.size(); i++) {
    343     // Delete the visit itself.
    344     main_db_->DeleteVisit(visits[i]);
    345 
    346     // Add the URL row to the affected URL list.
    347     if (!effects->affected_urls.count(visits[i].url_id)) {
    348       URLRow row;
    349       if (main_db_->GetURLRow(visits[i].url_id, &row))
    350         effects->affected_urls[visits[i].url_id] = row;
    351     }
    352   }
    353 }
    354 
    355 void ExpireHistoryBackend::DeleteOneURL(const URLRow& url_row,
    356                                         bool is_bookmarked,
    357                                         DeleteEffects* effects) {
    358   main_db_->DeleteSegmentForURL(url_row.id());
    359 
    360   if (!is_bookmarked) {
    361     effects->deleted_urls.push_back(url_row);
    362 
    363     // Delete stuff that references this URL.
    364     if (thumb_db_) {
    365       // Collect shared information.
    366       std::vector<IconMapping> icon_mappings;
    367       if (thumb_db_->GetIconMappingsForPageURL(url_row.url(), &icon_mappings)) {
    368         for (std::vector<IconMapping>::iterator m = icon_mappings.begin();
    369              m != icon_mappings.end(); ++m) {
    370           effects->affected_favicons.insert(m->icon_id);
    371         }
    372         // Delete the mapping entries for the url.
    373         thumb_db_->DeleteIconMappings(url_row.url());
    374       }
    375     }
    376     // Last, delete the URL entry.
    377     main_db_->DeleteURLRow(url_row.id());
    378   }
    379 }
    380 
    381 namespace {
    382 
    383 struct ChangedURL {
    384   ChangedURL() : visit_count(0), typed_count(0) {}
    385   int visit_count;
    386   int typed_count;
    387 };
    388 
    389 }  // namespace
    390 
    391 void ExpireHistoryBackend::ExpireURLsForVisits(const VisitVector& visits,
    392                                                DeleteEffects* effects) {
    393   // First find all unique URLs and the number of visits we're deleting for
    394   // each one.
    395   std::map<URLID, ChangedURL> changed_urls;
    396   for (size_t i = 0; i < visits.size(); i++) {
    397     ChangedURL& cur = changed_urls[visits[i].url_id];
    398     // NOTE: This code must stay in sync with HistoryBackend::AddPageVisit().
    399     // TODO(pkasting): http://b/1148304 We shouldn't be marking so many URLs as
    400     // typed, which would help eliminate the need for this code (we still would
    401     // need to handle RELOAD transitions specially, though).
    402     content::PageTransition transition =
    403         content::PageTransitionStripQualifier(visits[i].transition);
    404     if (transition != content::PAGE_TRANSITION_RELOAD)
    405       cur.visit_count++;
    406     if ((transition == content::PAGE_TRANSITION_TYPED &&
    407         !content::PageTransitionIsRedirect(visits[i].transition)) ||
    408         transition == content::PAGE_TRANSITION_KEYWORD_GENERATED)
    409       cur.typed_count++;
    410   }
    411 
    412   // Check each unique URL with deleted visits.
    413   HistoryClient* history_client = GetHistoryClient();
    414   for (std::map<URLID, ChangedURL>::const_iterator i = changed_urls.begin();
    415        i != changed_urls.end(); ++i) {
    416     // The unique URL rows should already be filled in.
    417     URLRow& url_row = effects->affected_urls[i->first];
    418     if (!url_row.id())
    419       continue;  // URL row doesn't exist in the database.
    420 
    421     // Check if there are any other visits for this URL and update the time
    422     // (the time change may not actually be synced to disk below when we're
    423     // archiving).
    424     VisitRow last_visit;
    425     if (main_db_->GetMostRecentVisitForURL(url_row.id(), &last_visit))
    426       url_row.set_last_visit(last_visit.visit_time);
    427     else
    428       url_row.set_last_visit(base::Time());
    429 
    430     // Don't delete URLs with visits still in the DB, or bookmarked.
    431     bool is_bookmarked =
    432         (history_client && history_client->IsBookmarked(url_row.url()));
    433     if (!is_bookmarked && url_row.last_visit().is_null()) {
    434       // Not bookmarked and no more visits. Nuke the url.
    435       DeleteOneURL(url_row, is_bookmarked, effects);
    436     } else {
    437       // NOTE: The calls to std::max() below are a backstop, but they should
    438       // never actually be needed unless the database is corrupt (I think).
    439       url_row.set_visit_count(
    440           std::max(0, url_row.visit_count() - i->second.visit_count));
    441       url_row.set_typed_count(
    442           std::max(0, url_row.typed_count() - i->second.typed_count));
    443 
    444       // Update the db with the new details.
    445       main_db_->UpdateURLRow(url_row.id(), url_row);
    446 
    447       effects->modified_urls.push_back(url_row);
    448     }
    449   }
    450 }
    451 
    452 void ExpireHistoryBackend::ScheduleExpire() {
    453   base::TimeDelta delay;
    454   if (work_queue_.empty()) {
    455     // If work queue is empty, reset the work queue to contain all tasks and
    456     // schedule next iteration after a longer delay.
    457     InitWorkQueue();
    458     delay = base::TimeDelta::FromMinutes(kExpirationEmptyDelayMin);
    459   } else {
    460     delay = base::TimeDelta::FromSeconds(kExpirationDelaySec);
    461   }
    462 
    463   base::MessageLoop::current()->PostDelayedTask(
    464       FROM_HERE,
    465       base::Bind(&ExpireHistoryBackend::DoExpireIteration,
    466                  weak_factory_.GetWeakPtr()),
    467       delay);
    468 }
    469 
    470 void ExpireHistoryBackend::DoExpireIteration() {
    471   DCHECK(!work_queue_.empty()) << "queue has to be non-empty";
    472 
    473   const ExpiringVisitsReader* reader = work_queue_.front();
    474   bool more_to_expire = ExpireSomeOldHistory(
    475       GetCurrentExpirationTime(), reader, kNumExpirePerIteration);
    476 
    477   work_queue_.pop();
    478   // If there are more items to expire, add the reader back to the queue, thus
    479   // creating a new task for future iterations.
    480   if (more_to_expire)
    481     work_queue_.push(reader);
    482 
    483   ScheduleExpire();
    484 }
    485 
    486 bool ExpireHistoryBackend::ExpireSomeOldHistory(
    487     base::Time end_time,
    488     const ExpiringVisitsReader* reader,
    489     int max_visits) {
    490   if (!main_db_)
    491     return false;
    492 
    493   // Add an extra time unit to given end time, because
    494   // GetAllVisitsInRange, et al. queries' end value is non-inclusive.
    495   base::Time effective_end_time =
    496       base::Time::FromInternalValue(end_time.ToInternalValue() + 1);
    497 
    498   VisitVector deleted_visits;
    499   bool more_to_expire = reader->Read(effective_end_time, main_db_,
    500                                      &deleted_visits, max_visits);
    501 
    502   DeleteEffects deleted_effects;
    503   DeleteVisitRelatedInfo(deleted_visits, &deleted_effects);
    504   ExpireURLsForVisits(deleted_visits, &deleted_effects);
    505   DeleteFaviconsIfPossible(&deleted_effects);
    506 
    507   BroadcastNotifications(&deleted_effects, DELETION_EXPIRED);
    508 
    509   return more_to_expire;
    510 }
    511 
    512 void ExpireHistoryBackend::ParanoidExpireHistory() {
    513   // TODO(brettw): Bug 1067331: write this to clean up any errors.
    514 }
    515 
    516 HistoryClient* ExpireHistoryBackend::GetHistoryClient() {
    517   // We use the history client to determine if a URL is bookmarked. The data is
    518   // loaded on a separate thread and may not be done by the time we get here.
    519   // We therefore block until the bookmarks have finished loading.
    520   if (history_client_)
    521     history_client_->BlockUntilBookmarksLoaded();
    522   return history_client_;
    523 }
    524 
    525 }  // namespace history
    526