Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/history/page_collector.h"
      6 
      7 #include "base/bind.h"
      8 #include "base/message_loop/message_loop.h"
      9 #include "base/strings/utf_string_conversions.h"
     10 #include "chrome/browser/history/history_publisher.h"
     11 #include "url/gurl.h"
     12 
     13 namespace {
     14 
     15 // Page info older than this will be published even if we haven't
     16 // gotten a title and/or body.
     17 const int kExpirationSeconds = 20;
     18 
     19 }  // namespace
     20 
     21 namespace history {
     22 
     23 // PageCollector::PageInfo -----------------------------------------------
     24 
     25 PageCollector::PageInfo::PageInfo(base::Time visit_time)
     26     : visit_time_(visit_time),
     27       added_time_(base::TimeTicks::Now()) {
     28 }
     29 
     30 PageCollector::PageInfo::~PageInfo() {}
     31 
     32 // NOTE(shess): Per the comment on has_title() and has_body(), this
     33 // code maps empty strings to single space to differentiate set title
     34 // and body from empty.  This approach is held over from the original
     35 // TextDatabaseManager version.
     36 void PageCollector::PageInfo::set_title(const string16& ttl) {
     37   if (ttl.empty())
     38     title_ = ASCIIToUTF16(" ");
     39   else
     40     title_ = ttl;
     41 }
     42 
     43 void PageCollector::PageInfo::set_body(const string16& bdy) {
     44   if (bdy.empty())
     45     body_ = ASCIIToUTF16(" ");
     46   else
     47     body_ = bdy;
     48 }
     49 
     50 bool PageCollector::PageInfo::Expired(base::TimeTicks now) const {
     51   return now - added_time_ > base::TimeDelta::FromSeconds(kExpirationSeconds);
     52 }
     53 
     54 PageCollector::PageCollector()
     55     : recent_changes_(RecentChangeList::NO_AUTO_EVICT),
     56       weak_factory_(this) {
     57 }
     58 
     59 PageCollector::~PageCollector() {
     60 }
     61 
     62 void PageCollector::Init(const HistoryPublisher* history_publisher) {
     63   history_publisher_ = history_publisher;
     64 }
     65 
     66 void PageCollector::AddPageURL(const GURL& url, base::Time time) {
     67   // Don't collect data which cannot be published.
     68   if (!history_publisher_)
     69     return;
     70 
     71   // Just save this info for later (evicting any previous data). We
     72   // will delete it when it expires or when all the data is complete.
     73   recent_changes_.Put(url, PageInfo(time));
     74 
     75   // Schedule flush if not already scheduled.
     76   if (!weak_factory_.HasWeakPtrs())
     77     ScheduleFlushCollected();
     78 }
     79 
     80 void PageCollector::AddPageTitle(const GURL& url, const string16& title) {
     81   if (!history_publisher_)
     82     return;
     83 
     84   // If the title comes in after the page has aged out, drop it.
     85   // Older code would manufacture information from the database.
     86   RecentChangeList::iterator found = recent_changes_.Peek(url);
     87   if (found == recent_changes_.end())
     88     return;
     89 
     90   // Publish the info if complete.
     91   if (found->second.has_body()) {
     92     history_publisher_->PublishPageContent(
     93         found->second.visit_time(), url, title, found->second.body());
     94     recent_changes_.Erase(found);
     95   } else {
     96     found->second.set_title(title);
     97   }
     98 }
     99 
    100 void PageCollector::AddPageContents(const GURL& url,
    101                                     const string16& body) {
    102   if (!history_publisher_)
    103     return;
    104 
    105   // If the body comes in after the page has aged out, drop it.
    106   // Older code would manufacture information from the database.
    107   RecentChangeList::iterator found = recent_changes_.Peek(url);
    108   if (found == recent_changes_.end())
    109     return;
    110 
    111   // Publish the info if complete.
    112   if (found->second.has_title()) {
    113     history_publisher_->PublishPageContent(
    114         found->second.visit_time(), url, found->second.title(), body);
    115     recent_changes_.Erase(found);
    116   } else {
    117     found->second.set_body(body);
    118   }
    119 }
    120 
    121 void PageCollector::AddPageData(const GURL& url,
    122                                 base::Time visit_time,
    123                                 const string16& title,
    124                                 const string16& body) {
    125   if (!history_publisher_)
    126     return;
    127 
    128   // Publish the item.
    129   history_publisher_->PublishPageContent(visit_time, url, title, body);
    130 }
    131 
    132 void PageCollector::ScheduleFlushCollected() {
    133   weak_factory_.InvalidateWeakPtrs();
    134   base::MessageLoop::current()->PostDelayedTask(
    135       FROM_HERE,
    136       base::Bind(&PageCollector::FlushCollected,
    137                  weak_factory_.GetWeakPtr()),
    138       base::TimeDelta::FromSeconds(kExpirationSeconds));
    139 }
    140 
    141 void PageCollector::FlushCollected() {
    142   base::TimeTicks now = base::TimeTicks::Now();
    143 
    144   // Iterate from oldest to newest publishing items which expire while
    145   // waiting for title or body.
    146   RecentChangeList::reverse_iterator iter = recent_changes_.rbegin();
    147   while (iter != recent_changes_.rend() && iter->second.Expired(now)) {
    148     AddPageData(iter->first, iter->second.visit_time(),
    149                 iter->second.title(), iter->second.body());
    150     iter = recent_changes_.Erase(iter);
    151   }
    152 
    153   if (!recent_changes_.empty())
    154     ScheduleFlushCollected();
    155 }
    156 
    157 }  // namespace history
    158