Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
      6 #define CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
      7 
      8 #include "base/basictypes.h"
      9 #include "base/containers/mru_cache.h"
     10 #include "base/memory/weak_ptr.h"
     11 #include "base/strings/string16.h"
     12 #include "base/time/time.h"
     13 
     14 class GURL;
     15 
     16 namespace history {
     17 
     18 class HistoryPublisher;
     19 
     20 // Collect page data and publish to HistoryPublisher.
     21 class PageCollector {
     22  public:
     23   // You must call Init() to complete initialization.
     24   PageCollector();
     25   ~PageCollector();
     26 
     27   // Must call before using other functions.
     28   void Init(const HistoryPublisher* history_publisher);
     29 
     30   // Sets specific information for the given page to be published.
     31   // In normal operation, URLs will be added as the user visits them, the titles
     32   // and bodies will come in some time after that. These changes will be
     33   // automatically coalesced and added to the database some time in the future
     34   // using AddPageData().
     35   //
     36   // AddPageURL must be called for a given URL before either the title
     37   // or body set. The visit time should be the time corresponding to
     38   // that visit in the history database.
     39   void AddPageURL(const GURL& url, base::Time visit_time);
     40   void AddPageTitle(const GURL& url, const string16& title);
     41   void AddPageContents(const GURL& url, const string16& body);
     42 
     43   void AddPageData(const GURL& url,
     44                    base::Time visit_time,
     45                    const string16& title,
     46                    const string16& body);
     47 
     48  private:
     49   // Stores "recent stuff" that has happened with the page, since the page
     50   // visit, title, and body all come in at different times.
     51   class PageInfo {
     52    public:
     53     explicit PageInfo(base::Time visit_time);
     54     ~PageInfo();
     55 
     56     // Getters.
     57     base::Time visit_time() const { return visit_time_; }
     58     const string16& title() const { return title_; }
     59     const string16& body() const { return body_; }
     60 
     61     // Setters, we can only update the title and body.
     62     void set_title(const string16& ttl);
     63     void set_body(const string16& bdy);
     64 
     65     // Returns true if both the title or body of the entry has been set. Since
     66     // both the title and body setters will "fix" empty strings to be a space,
     67     // these indicate if the setter was ever called.
     68     bool has_title() const { return !title_.empty(); }
     69     bool has_body() const { return !body_.empty(); }
     70 
     71     // Returns true if this entry was added too long ago and we should give up
     72     // waiting for more data. The current time is passed in as an argument so we
     73     // can check many without re-querying the timer.
     74     bool Expired(base::TimeTicks now) const;
     75 
     76    private:
     77     // Time of the visit of the URL. This will be the value stored in the URL
     78     // and visit tables for the entry.
     79     base::Time visit_time_;
     80 
     81     // When this page entry was created. We have a cap on the maximum time that
     82     // an entry will be in the queue before being flushed to the database.
     83     base::TimeTicks added_time_;
     84 
     85     // Will be the string " " when they are set to distinguish set and unset.
     86     string16 title_;
     87     string16 body_;
     88   };
     89 
     90   // Collected data is published when both the title and body are
     91   // present.  https data is never passed to AddPageContents(), so
     92   // periodically collected data is published without the contents.
     93   // Pages which take a long time to load will not have their bodies
     94   // published.
     95   void ScheduleFlushCollected();
     96   void FlushCollected();
     97 
     98   // Lists recent additions that we have not yet filled out with the title and
     99   // body. Sorted by time, we will flush them when they are complete or have
    100   // been in the queue too long without modification.
    101   //
    102   // We kind of abuse the MRUCache because we never move things around in it
    103   // using Get. Instead, we keep them in the order they were inserted, since
    104   // this is the metric we use to measure age. The MRUCache gives us an ordered
    105   // list with fast lookup by URL.
    106   typedef base::MRUCache<GURL, PageInfo> RecentChangeList;
    107   RecentChangeList recent_changes_;
    108 
    109   // Generates tasks for our periodic checking of expired "recent changes".
    110   base::WeakPtrFactory<PageCollector> weak_factory_;
    111 
    112   // This object is created and managed by the history backend. We maintain an
    113   // opaque pointer to the object for our use.
    114   // This can be NULL if there are no indexers registered to receive indexing
    115   // data from us.
    116   const HistoryPublisher* history_publisher_;
    117 
    118   DISALLOW_COPY_AND_ASSIGN(PageCollector);
    119 };
    120 
    121 }  // namespace history
    122 
    123 #endif  // CHROME_BROWSER_HISTORY_PAGE_COLLECTOR_H_
    124