1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/history/page_collector.h" 6 7 #include "base/bind.h" 8 #include "base/message_loop/message_loop.h" 9 #include "base/strings/utf_string_conversions.h" 10 #include "chrome/browser/history/history_publisher.h" 11 #include "url/gurl.h" 12 13 namespace { 14 15 // Page info older than this will be published even if we haven't 16 // gotten a title and/or body. 17 const int kExpirationSeconds = 20; 18 19 } // namespace 20 21 namespace history { 22 23 // PageCollector::PageInfo ----------------------------------------------- 24 25 PageCollector::PageInfo::PageInfo(base::Time visit_time) 26 : visit_time_(visit_time), 27 added_time_(base::TimeTicks::Now()) { 28 } 29 30 PageCollector::PageInfo::~PageInfo() {} 31 32 // NOTE(shess): Per the comment on has_title() and has_body(), this 33 // code maps empty strings to single space to differentiate set title 34 // and body from empty. This approach is held over from the original 35 // TextDatabaseManager version. 36 void PageCollector::PageInfo::set_title(const string16& ttl) { 37 if (ttl.empty()) 38 title_ = ASCIIToUTF16(" "); 39 else 40 title_ = ttl; 41 } 42 43 void PageCollector::PageInfo::set_body(const string16& bdy) { 44 if (bdy.empty()) 45 body_ = ASCIIToUTF16(" "); 46 else 47 body_ = bdy; 48 } 49 50 bool PageCollector::PageInfo::Expired(base::TimeTicks now) const { 51 return now - added_time_ > base::TimeDelta::FromSeconds(kExpirationSeconds); 52 } 53 54 PageCollector::PageCollector() 55 : recent_changes_(RecentChangeList::NO_AUTO_EVICT), 56 weak_factory_(this) { 57 } 58 59 PageCollector::~PageCollector() { 60 } 61 62 void PageCollector::Init(const HistoryPublisher* history_publisher) { 63 history_publisher_ = history_publisher; 64 } 65 66 void PageCollector::AddPageURL(const GURL& url, base::Time time) { 67 // Don't collect data which cannot be published. 68 if (!history_publisher_) 69 return; 70 71 // Just save this info for later (evicting any previous data). We 72 // will delete it when it expires or when all the data is complete. 73 recent_changes_.Put(url, PageInfo(time)); 74 75 // Schedule flush if not already scheduled. 76 if (!weak_factory_.HasWeakPtrs()) 77 ScheduleFlushCollected(); 78 } 79 80 void PageCollector::AddPageTitle(const GURL& url, const string16& title) { 81 if (!history_publisher_) 82 return; 83 84 // If the title comes in after the page has aged out, drop it. 85 // Older code would manufacture information from the database. 86 RecentChangeList::iterator found = recent_changes_.Peek(url); 87 if (found == recent_changes_.end()) 88 return; 89 90 // Publish the info if complete. 91 if (found->second.has_body()) { 92 history_publisher_->PublishPageContent( 93 found->second.visit_time(), url, title, found->second.body()); 94 recent_changes_.Erase(found); 95 } else { 96 found->second.set_title(title); 97 } 98 } 99 100 void PageCollector::AddPageContents(const GURL& url, 101 const string16& body) { 102 if (!history_publisher_) 103 return; 104 105 // If the body comes in after the page has aged out, drop it. 106 // Older code would manufacture information from the database. 107 RecentChangeList::iterator found = recent_changes_.Peek(url); 108 if (found == recent_changes_.end()) 109 return; 110 111 // Publish the info if complete. 112 if (found->second.has_title()) { 113 history_publisher_->PublishPageContent( 114 found->second.visit_time(), url, found->second.title(), body); 115 recent_changes_.Erase(found); 116 } else { 117 found->second.set_body(body); 118 } 119 } 120 121 void PageCollector::AddPageData(const GURL& url, 122 base::Time visit_time, 123 const string16& title, 124 const string16& body) { 125 if (!history_publisher_) 126 return; 127 128 // Publish the item. 129 history_publisher_->PublishPageContent(visit_time, url, title, body); 130 } 131 132 void PageCollector::ScheduleFlushCollected() { 133 weak_factory_.InvalidateWeakPtrs(); 134 base::MessageLoop::current()->PostDelayedTask( 135 FROM_HERE, 136 base::Bind(&PageCollector::FlushCollected, 137 weak_factory_.GetWeakPtr()), 138 base::TimeDelta::FromSeconds(kExpirationSeconds)); 139 } 140 141 void PageCollector::FlushCollected() { 142 base::TimeTicks now = base::TimeTicks::Now(); 143 144 // Iterate from oldest to newest publishing items which expire while 145 // waiting for title or body. 146 RecentChangeList::reverse_iterator iter = recent_changes_.rbegin(); 147 while (iter != recent_changes_.rend() && iter->second.Expired(now)) { 148 AddPageData(iter->first, iter->second.visit_time(), 149 iter->second.title(), iter->second.body()); 150 iter = recent_changes_.Erase(iter); 151 } 152 153 if (!recent_changes_.empty()) 154 ScheduleFlushCollected(); 155 } 156 157 } // namespace history 158