1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_ 6 #define CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_ 7 8 #include "base/basictypes.h" 9 #include "base/containers/mru_cache.h" 10 #include "base/memory/weak_ptr.h" 11 #include "base/strings/string16.h" 12 #include "base/time/time.h" 13 14 class GURL; 15 16 namespace history { 17 18 class HistoryPublisher; 19 20 // Collect page data and publish to HistoryPublisher. 21 class PageCollector { 22 public: 23 // You must call Init() to complete initialization. 24 PageCollector(); 25 ~PageCollector(); 26 27 // Must call before using other functions. 28 void Init(const HistoryPublisher* history_publisher); 29 30 // Sets specific information for the given page to be published. 31 // In normal operation, URLs will be added as the user visits them, the titles 32 // and bodies will come in some time after that. These changes will be 33 // automatically coalesced and added to the database some time in the future 34 // using AddPageData(). 35 // 36 // AddPageURL must be called for a given URL before either the title 37 // or body set. The visit time should be the time corresponding to 38 // that visit in the history database. 39 void AddPageURL(const GURL& url, base::Time visit_time); 40 void AddPageTitle(const GURL& url, const string16& title); 41 void AddPageContents(const GURL& url, const string16& body); 42 43 void AddPageData(const GURL& url, 44 base::Time visit_time, 45 const string16& title, 46 const string16& body); 47 48 private: 49 // Stores "recent stuff" that has happened with the page, since the page 50 // visit, title, and body all come in at different times. 51 class PageInfo { 52 public: 53 explicit PageInfo(base::Time visit_time); 54 ~PageInfo(); 55 56 // Getters. 57 base::Time visit_time() const { return visit_time_; } 58 const string16& title() const { return title_; } 59 const string16& body() const { return body_; } 60 61 // Setters, we can only update the title and body. 62 void set_title(const string16& ttl); 63 void set_body(const string16& bdy); 64 65 // Returns true if both the title or body of the entry has been set. Since 66 // both the title and body setters will "fix" empty strings to be a space, 67 // these indicate if the setter was ever called. 68 bool has_title() const { return !title_.empty(); } 69 bool has_body() const { return !body_.empty(); } 70 71 // Returns true if this entry was added too long ago and we should give up 72 // waiting for more data. The current time is passed in as an argument so we 73 // can check many without re-querying the timer. 74 bool Expired(base::TimeTicks now) const; 75 76 private: 77 // Time of the visit of the URL. This will be the value stored in the URL 78 // and visit tables for the entry. 79 base::Time visit_time_; 80 81 // When this page entry was created. We have a cap on the maximum time that 82 // an entry will be in the queue before being flushed to the database. 83 base::TimeTicks added_time_; 84 85 // Will be the string " " when they are set to distinguish set and unset. 86 string16 title_; 87 string16 body_; 88 }; 89 90 // Collected data is published when both the title and body are 91 // present. https data is never passed to AddPageContents(), so 92 // periodically collected data is published without the contents. 93 // Pages which take a long time to load will not have their bodies 94 // published. 95 void ScheduleFlushCollected(); 96 void FlushCollected(); 97 98 // Lists recent additions that we have not yet filled out with the title and 99 // body. Sorted by time, we will flush them when they are complete or have 100 // been in the queue too long without modification. 101 // 102 // We kind of abuse the MRUCache because we never move things around in it 103 // using Get. Instead, we keep them in the order they were inserted, since 104 // this is the metric we use to measure age. The MRUCache gives us an ordered 105 // list with fast lookup by URL. 106 typedef base::MRUCache<GURL, PageInfo> RecentChangeList; 107 RecentChangeList recent_changes_; 108 109 // Generates tasks for our periodic checking of expired "recent changes". 110 base::WeakPtrFactory<PageCollector> weak_factory_; 111 112 // This object is created and managed by the history backend. We maintain an 113 // opaque pointer to the object for our use. 114 // This can be NULL if there are no indexers registered to receive indexing 115 // data from us. 116 const HistoryPublisher* history_publisher_; 117 118 DISALLOW_COPY_AND_ASSIGN(PageCollector); 119 }; 120 121 } // namespace history 122 123 #endif // CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_ 124