1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/history/expire_history_backend.h" 6 7 #include <algorithm> 8 #include <functional> 9 #include <limits> 10 11 #include "base/bind.h" 12 #include "base/compiler_specific.h" 13 #include "base/files/file_enumerator.h" 14 #include "base/files/file_util.h" 15 #include "base/logging.h" 16 #include "base/message_loop/message_loop.h" 17 #include "chrome/browser/chrome_notification_types.h" 18 #include "chrome/browser/history/history_database.h" 19 #include "chrome/browser/history/history_notifications.h" 20 #include "chrome/browser/history/thumbnail_database.h" 21 #include "components/history/core/browser/history_client.h" 22 23 namespace history { 24 25 // Helpers -------------------------------------------------------------------- 26 27 namespace { 28 29 // The number of days by which the expiration threshold is advanced for items 30 // that we want to expire early, such as those of AUTO_SUBFRAME transition type. 31 // 32 // Early expiration stuff is kept around only for edge cases, as subframes 33 // don't appear in history and the vast majority of them are ads anyway. The 34 // main use case for these is if you're on a site with links to different 35 // frames, you'll be able to see those links as visited, and we'll also be 36 // able to get redirect information for those URLs. 37 // 38 // But since these uses are most valuable when you're actually on the site, 39 // and because these can take up the bulk of your history, we get a lot of 40 // space savings by deleting them quickly. 41 const int kEarlyExpirationAdvanceDays = 3; 42 43 // Reads all types of visits starting from beginning of time to the given end 44 // time. This is the most general reader. 45 class AllVisitsReader : public ExpiringVisitsReader { 46 public: 47 virtual bool Read(base::Time end_time, 48 HistoryDatabase* db, 49 VisitVector* visits, 50 int max_visits) const OVERRIDE { 51 DCHECK(db) << "must have a database to operate upon"; 52 DCHECK(visits) << "visit vector has to exist in order to populate it"; 53 54 db->GetAllVisitsInRange(base::Time(), end_time, max_visits, visits); 55 // When we got the maximum number of visits we asked for, we say there could 56 // be additional things to expire now. 57 return static_cast<int>(visits->size()) == max_visits; 58 } 59 }; 60 61 // Reads only AUTO_SUBFRAME visits, within a computed range. The range is 62 // computed as follows: 63 // * |begin_time| is read from the meta table. This value is updated whenever 64 // there are no more additional visits to expire by this reader. 65 // * |end_time| is advanced forward by a constant (kEarlyExpirationAdvanceDay), 66 // but not past the current time. 67 class AutoSubframeVisitsReader : public ExpiringVisitsReader { 68 public: 69 virtual bool Read(base::Time end_time, 70 HistoryDatabase* db, 71 VisitVector* visits, 72 int max_visits) const OVERRIDE { 73 DCHECK(db) << "must have a database to operate upon"; 74 DCHECK(visits) << "visit vector has to exist in order to populate it"; 75 76 base::Time begin_time = db->GetEarlyExpirationThreshold(); 77 // Advance |end_time| to expire early. 78 base::Time early_end_time = end_time + 79 base::TimeDelta::FromDays(kEarlyExpirationAdvanceDays); 80 81 // We don't want to set the early expiration threshold to a time in the 82 // future. 83 base::Time now = base::Time::Now(); 84 if (early_end_time > now) 85 early_end_time = now; 86 87 db->GetVisitsInRangeForTransition(begin_time, early_end_time, 88 max_visits, 89 ui::PAGE_TRANSITION_AUTO_SUBFRAME, 90 visits); 91 bool more = static_cast<int>(visits->size()) == max_visits; 92 if (!more) 93 db->UpdateEarlyExpirationThreshold(early_end_time); 94 95 return more; 96 } 97 }; 98 99 // The number of visits we will expire very time we check for old items. This 100 // Prevents us from doing too much work any given time. 101 const int kNumExpirePerIteration = 32; 102 103 // The number of seconds between checking for items that should be expired when 104 // we think there might be more items to expire. This timeout is used when the 105 // last expiration found at least kNumExpirePerIteration and we want to check 106 // again "soon." 107 const int kExpirationDelaySec = 30; 108 109 // The number of minutes between checking, as with kExpirationDelaySec, but 110 // when we didn't find enough things to expire last time. If there was no 111 // history to expire last iteration, it's likely there is nothing next 112 // iteration, so we want to wait longer before checking to avoid wasting CPU. 113 const int kExpirationEmptyDelayMin = 5; 114 115 } // namespace 116 117 118 // ExpireHistoryBackend::DeleteEffects ---------------------------------------- 119 120 ExpireHistoryBackend::DeleteEffects::DeleteEffects() { 121 } 122 123 ExpireHistoryBackend::DeleteEffects::~DeleteEffects() { 124 } 125 126 127 // ExpireHistoryBackend ------------------------------------------------------- 128 129 ExpireHistoryBackend::ExpireHistoryBackend( 130 BroadcastNotificationDelegate* delegate, 131 HistoryClient* history_client) 132 : delegate_(delegate), 133 main_db_(NULL), 134 thumb_db_(NULL), 135 history_client_(history_client), 136 weak_factory_(this) { 137 } 138 139 ExpireHistoryBackend::~ExpireHistoryBackend() { 140 } 141 142 void ExpireHistoryBackend::SetDatabases(HistoryDatabase* main_db, 143 ThumbnailDatabase* thumb_db) { 144 main_db_ = main_db; 145 thumb_db_ = thumb_db; 146 } 147 148 void ExpireHistoryBackend::DeleteURL(const GURL& url) { 149 DeleteURLs(std::vector<GURL>(1, url)); 150 } 151 152 void ExpireHistoryBackend::DeleteURLs(const std::vector<GURL>& urls) { 153 if (!main_db_) 154 return; 155 156 DeleteEffects effects; 157 HistoryClient* history_client = GetHistoryClient(); 158 for (std::vector<GURL>::const_iterator url = urls.begin(); url != urls.end(); 159 ++url) { 160 URLRow url_row; 161 if (!main_db_->GetRowForURL(*url, &url_row)) 162 continue; // Nothing to delete. 163 164 // Collect all the visits and delete them. Note that we don't give up if 165 // there are no visits, since the URL could still have an entry that we 166 // should delete. 167 VisitVector visits; 168 main_db_->GetVisitsForURL(url_row.id(), &visits); 169 170 DeleteVisitRelatedInfo(visits, &effects); 171 172 // We skip ExpireURLsForVisits (since we are deleting from the 173 // URL, and not starting with visits in a given time range). We 174 // therefore need to call the deletion and favicon update 175 // functions manually. 176 DeleteOneURL(url_row, 177 history_client && history_client->IsBookmarked(*url), 178 &effects); 179 } 180 181 DeleteFaviconsIfPossible(&effects); 182 183 BroadcastNotifications(&effects, DELETION_USER_INITIATED); 184 } 185 186 void ExpireHistoryBackend::ExpireHistoryBetween( 187 const std::set<GURL>& restrict_urls, 188 base::Time begin_time, 189 base::Time end_time) { 190 if (!main_db_) 191 return; 192 193 // Find the affected visits and delete them. 194 VisitVector visits; 195 main_db_->GetAllVisitsInRange(begin_time, end_time, 0, &visits); 196 if (!restrict_urls.empty()) { 197 std::set<URLID> url_ids; 198 for (std::set<GURL>::const_iterator url = restrict_urls.begin(); 199 url != restrict_urls.end(); ++url) 200 url_ids.insert(main_db_->GetRowForURL(*url, NULL)); 201 VisitVector all_visits; 202 all_visits.swap(visits); 203 for (VisitVector::iterator visit = all_visits.begin(); 204 visit != all_visits.end(); ++visit) { 205 if (url_ids.find(visit->url_id) != url_ids.end()) 206 visits.push_back(*visit); 207 } 208 } 209 ExpireVisits(visits); 210 } 211 212 void ExpireHistoryBackend::ExpireHistoryForTimes( 213 const std::vector<base::Time>& times) { 214 // |times| must be in reverse chronological order and have no 215 // duplicates, i.e. each member must be earlier than the one before 216 // it. 217 DCHECK( 218 std::adjacent_find( 219 times.begin(), times.end(), std::less_equal<base::Time>()) == 220 times.end()); 221 222 if (!main_db_) 223 return; 224 225 // Find the affected visits and delete them. 226 VisitVector visits; 227 main_db_->GetVisitsForTimes(times, &visits); 228 ExpireVisits(visits); 229 } 230 231 void ExpireHistoryBackend::ExpireVisits(const VisitVector& visits) { 232 if (visits.empty()) 233 return; 234 235 DeleteEffects effects; 236 DeleteVisitRelatedInfo(visits, &effects); 237 238 // Delete or update the URLs affected. We want to update the visit counts 239 // since this is called by the user who wants to delete their recent history, 240 // and we don't want to leave any evidence. 241 ExpireURLsForVisits(visits, &effects); 242 DeleteFaviconsIfPossible(&effects); 243 BroadcastNotifications(&effects, DELETION_USER_INITIATED); 244 245 // Pick up any bits possibly left over. 246 ParanoidExpireHistory(); 247 } 248 249 void ExpireHistoryBackend::ExpireHistoryBefore(base::Time end_time) { 250 if (!main_db_) 251 return; 252 253 // Expire as much history as possible before the given date. 254 ExpireSomeOldHistory(end_time, GetAllVisitsReader(), 255 std::numeric_limits<int>::max()); 256 ParanoidExpireHistory(); 257 } 258 259 void ExpireHistoryBackend::InitWorkQueue() { 260 DCHECK(work_queue_.empty()) << "queue has to be empty prior to init"; 261 262 for (size_t i = 0; i < readers_.size(); i++) 263 work_queue_.push(readers_[i]); 264 } 265 266 const ExpiringVisitsReader* ExpireHistoryBackend::GetAllVisitsReader() { 267 if (!all_visits_reader_) 268 all_visits_reader_.reset(new AllVisitsReader()); 269 return all_visits_reader_.get(); 270 } 271 272 const ExpiringVisitsReader* 273 ExpireHistoryBackend::GetAutoSubframeVisitsReader() { 274 if (!auto_subframe_visits_reader_) 275 auto_subframe_visits_reader_.reset(new AutoSubframeVisitsReader()); 276 return auto_subframe_visits_reader_.get(); 277 } 278 279 void ExpireHistoryBackend::StartExpiringOldStuff( 280 base::TimeDelta expiration_threshold) { 281 expiration_threshold_ = expiration_threshold; 282 283 // Remove all readers, just in case this was method was called before. 284 readers_.clear(); 285 // For now, we explicitly add all known readers. If we come up with more 286 // reader types (in case we want to expire different types of visits in 287 // different ways), we can make it be populated by creator/owner of 288 // ExpireHistoryBackend. 289 readers_.push_back(GetAllVisitsReader()); 290 readers_.push_back(GetAutoSubframeVisitsReader()); 291 292 // Initialize the queue with all tasks for the first set of iterations. 293 InitWorkQueue(); 294 ScheduleExpire(); 295 } 296 297 void ExpireHistoryBackend::DeleteFaviconsIfPossible(DeleteEffects* effects) { 298 if (!thumb_db_) 299 return; 300 301 for (std::set<favicon_base::FaviconID>::const_iterator i = 302 effects->affected_favicons.begin(); 303 i != effects->affected_favicons.end(); ++i) { 304 if (!thumb_db_->HasMappingFor(*i)) { 305 GURL icon_url; 306 favicon_base::IconType icon_type; 307 if (thumb_db_->GetFaviconHeader(*i, 308 &icon_url, 309 &icon_type) && 310 thumb_db_->DeleteFavicon(*i)) { 311 effects->deleted_favicons.insert(icon_url); 312 } 313 } 314 } 315 } 316 317 void ExpireHistoryBackend::BroadcastNotifications(DeleteEffects* effects, 318 DeletionType type) { 319 if (!effects->modified_urls.empty()) { 320 scoped_ptr<URLsModifiedDetails> details(new URLsModifiedDetails); 321 details->changed_urls = effects->modified_urls; 322 delegate_->NotifySyncURLsModified(&details->changed_urls); 323 delegate_->BroadcastNotifications( 324 chrome::NOTIFICATION_HISTORY_URLS_MODIFIED, 325 details.PassAs<HistoryDetails>()); 326 } 327 if (!effects->deleted_urls.empty()) { 328 scoped_ptr<URLsDeletedDetails> details(new URLsDeletedDetails); 329 details->all_history = false; 330 details->expired = (type == DELETION_EXPIRED); 331 details->rows = effects->deleted_urls; 332 details->favicon_urls = effects->deleted_favicons; 333 delegate_->NotifySyncURLsDeleted(details->all_history, details->expired, 334 &details->rows); 335 delegate_->BroadcastNotifications(chrome::NOTIFICATION_HISTORY_URLS_DELETED, 336 details.PassAs<HistoryDetails>()); 337 } 338 } 339 340 void ExpireHistoryBackend::DeleteVisitRelatedInfo(const VisitVector& visits, 341 DeleteEffects* effects) { 342 for (size_t i = 0; i < visits.size(); i++) { 343 // Delete the visit itself. 344 main_db_->DeleteVisit(visits[i]); 345 346 // Add the URL row to the affected URL list. 347 if (!effects->affected_urls.count(visits[i].url_id)) { 348 URLRow row; 349 if (main_db_->GetURLRow(visits[i].url_id, &row)) 350 effects->affected_urls[visits[i].url_id] = row; 351 } 352 } 353 } 354 355 void ExpireHistoryBackend::DeleteOneURL(const URLRow& url_row, 356 bool is_bookmarked, 357 DeleteEffects* effects) { 358 main_db_->DeleteSegmentForURL(url_row.id()); 359 360 if (!is_bookmarked) { 361 effects->deleted_urls.push_back(url_row); 362 363 // Delete stuff that references this URL. 364 if (thumb_db_) { 365 // Collect shared information. 366 std::vector<IconMapping> icon_mappings; 367 if (thumb_db_->GetIconMappingsForPageURL(url_row.url(), &icon_mappings)) { 368 for (std::vector<IconMapping>::iterator m = icon_mappings.begin(); 369 m != icon_mappings.end(); ++m) { 370 effects->affected_favicons.insert(m->icon_id); 371 } 372 // Delete the mapping entries for the url. 373 thumb_db_->DeleteIconMappings(url_row.url()); 374 } 375 } 376 // Last, delete the URL entry. 377 main_db_->DeleteURLRow(url_row.id()); 378 } 379 } 380 381 namespace { 382 383 struct ChangedURL { 384 ChangedURL() : visit_count(0), typed_count(0) {} 385 int visit_count; 386 int typed_count; 387 }; 388 389 } // namespace 390 391 void ExpireHistoryBackend::ExpireURLsForVisits(const VisitVector& visits, 392 DeleteEffects* effects) { 393 // First find all unique URLs and the number of visits we're deleting for 394 // each one. 395 std::map<URLID, ChangedURL> changed_urls; 396 for (size_t i = 0; i < visits.size(); i++) { 397 ChangedURL& cur = changed_urls[visits[i].url_id]; 398 // NOTE: This code must stay in sync with HistoryBackend::AddPageVisit(). 399 // TODO(pkasting): http://b/1148304 We shouldn't be marking so many URLs as 400 // typed, which would help eliminate the need for this code (we still would 401 // need to handle RELOAD transitions specially, though). 402 ui::PageTransition transition = 403 ui::PageTransitionStripQualifier(visits[i].transition); 404 if (transition != ui::PAGE_TRANSITION_RELOAD) 405 cur.visit_count++; 406 if ((transition == ui::PAGE_TRANSITION_TYPED && 407 !ui::PageTransitionIsRedirect(visits[i].transition)) || 408 transition == ui::PAGE_TRANSITION_KEYWORD_GENERATED) 409 cur.typed_count++; 410 } 411 412 // Check each unique URL with deleted visits. 413 HistoryClient* history_client = GetHistoryClient(); 414 for (std::map<URLID, ChangedURL>::const_iterator i = changed_urls.begin(); 415 i != changed_urls.end(); ++i) { 416 // The unique URL rows should already be filled in. 417 URLRow& url_row = effects->affected_urls[i->first]; 418 if (!url_row.id()) 419 continue; // URL row doesn't exist in the database. 420 421 // Check if there are any other visits for this URL and update the time 422 // (the time change may not actually be synced to disk below when we're 423 // archiving). 424 VisitRow last_visit; 425 if (main_db_->GetMostRecentVisitForURL(url_row.id(), &last_visit)) 426 url_row.set_last_visit(last_visit.visit_time); 427 else 428 url_row.set_last_visit(base::Time()); 429 430 // Don't delete URLs with visits still in the DB, or bookmarked. 431 bool is_bookmarked = 432 (history_client && history_client->IsBookmarked(url_row.url())); 433 if (!is_bookmarked && url_row.last_visit().is_null()) { 434 // Not bookmarked and no more visits. Nuke the url. 435 DeleteOneURL(url_row, is_bookmarked, effects); 436 } else { 437 // NOTE: The calls to std::max() below are a backstop, but they should 438 // never actually be needed unless the database is corrupt (I think). 439 url_row.set_visit_count( 440 std::max(0, url_row.visit_count() - i->second.visit_count)); 441 url_row.set_typed_count( 442 std::max(0, url_row.typed_count() - i->second.typed_count)); 443 444 // Update the db with the new details. 445 main_db_->UpdateURLRow(url_row.id(), url_row); 446 447 effects->modified_urls.push_back(url_row); 448 } 449 } 450 } 451 452 void ExpireHistoryBackend::ScheduleExpire() { 453 base::TimeDelta delay; 454 if (work_queue_.empty()) { 455 // If work queue is empty, reset the work queue to contain all tasks and 456 // schedule next iteration after a longer delay. 457 InitWorkQueue(); 458 delay = base::TimeDelta::FromMinutes(kExpirationEmptyDelayMin); 459 } else { 460 delay = base::TimeDelta::FromSeconds(kExpirationDelaySec); 461 } 462 463 base::MessageLoop::current()->PostDelayedTask( 464 FROM_HERE, 465 base::Bind(&ExpireHistoryBackend::DoExpireIteration, 466 weak_factory_.GetWeakPtr()), 467 delay); 468 } 469 470 void ExpireHistoryBackend::DoExpireIteration() { 471 DCHECK(!work_queue_.empty()) << "queue has to be non-empty"; 472 473 const ExpiringVisitsReader* reader = work_queue_.front(); 474 bool more_to_expire = ExpireSomeOldHistory( 475 GetCurrentExpirationTime(), reader, kNumExpirePerIteration); 476 477 work_queue_.pop(); 478 // If there are more items to expire, add the reader back to the queue, thus 479 // creating a new task for future iterations. 480 if (more_to_expire) 481 work_queue_.push(reader); 482 483 ScheduleExpire(); 484 } 485 486 bool ExpireHistoryBackend::ExpireSomeOldHistory( 487 base::Time end_time, 488 const ExpiringVisitsReader* reader, 489 int max_visits) { 490 if (!main_db_) 491 return false; 492 493 // Add an extra time unit to given end time, because 494 // GetAllVisitsInRange, et al. queries' end value is non-inclusive. 495 base::Time effective_end_time = 496 base::Time::FromInternalValue(end_time.ToInternalValue() + 1); 497 498 VisitVector deleted_visits; 499 bool more_to_expire = reader->Read(effective_end_time, main_db_, 500 &deleted_visits, max_visits); 501 502 DeleteEffects deleted_effects; 503 DeleteVisitRelatedInfo(deleted_visits, &deleted_effects); 504 ExpireURLsForVisits(deleted_visits, &deleted_effects); 505 DeleteFaviconsIfPossible(&deleted_effects); 506 507 BroadcastNotifications(&deleted_effects, DELETION_EXPIRED); 508 509 return more_to_expire; 510 } 511 512 void ExpireHistoryBackend::ParanoidExpireHistory() { 513 // TODO(brettw): Bug 1067331: write this to clean up any errors. 514 } 515 516 HistoryClient* ExpireHistoryBackend::GetHistoryClient() { 517 // We use the history client to determine if a URL is bookmarked. The data is 518 // loaded on a separate thread and may not be done by the time we get here. 519 // We therefore block until the bookmarks have finished loading. 520 if (history_client_) 521 history_client_->BlockUntilBookmarksLoaded(); 522 return history_client_; 523 } 524 525 } // namespace history 526