Home | History | Annotate | Download | only in history
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
      6 #define CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
      7 
      8 #include <map>
      9 #include <utility>
     10 
     11 #include "base/memory/ref_counted.h"
     12 #include "chrome/browser/history/url_utils.h"
     13 #include "components/history/core/browser/history_types.h"
     14 #include "url/gurl.h"
     15 
     16 class GURL;
     17 
     18 namespace history {
     19 
     20 // TopSiteCache caches thumbnails for visited pages. Retrieving thumbnails from
     21 // a given input URL is a two-stage process:
     22 //
     23 //   input URL --(map 1)--> canonical URL --(map 2)--> image.
     24 //
     25 // (map 1) searches for an URL in |canonical_urls_| that "matches" (see below)
     26 // input URL. If found, canonical URL assigned to the result. Otherwise the
     27 // input URL is considered to already be a canonical URL.
     28 //
     29 // (map 2) simply looks up canonical URL in |images_|.
     30 //
     31 // The rule to "match" URL in |canonical_urls_| always favors exact match.
     32 // - In GetCanonicalURL(), exact match is the only case examined.
     33 // - In GetGeneralizedCanonicalURL(), we also perform "generalized" URL matches,
     34 //   i.e., stored URLs in |canonical_urls_| that are prefixes of input URL,
     35 //   ignoring "?query#ref".
     36 // For the latter two "URL prefix matches", we prefer the match that is closest
     37 // to input URL, w.r.t. path hierarchy.
     38 
     39 // TopSitesCache caches the top sites and thumbnails for TopSites.
     40 class TopSitesCache {
     41  public:
     42   TopSitesCache();
     43   ~TopSitesCache();
     44 
     45   // Set the top sites. In |top_sites| all forced URLs must appear before
     46   // non-forced URLs. This is only checked in debug.
     47   void SetTopSites(const MostVisitedURLList& top_sites);
     48   const MostVisitedURLList& top_sites() const { return top_sites_; }
     49 
     50   // The thumbnails.
     51   void SetThumbnails(const URLToImagesMap& images);
     52   const URLToImagesMap& images() const { return images_; }
     53 
     54   // Returns the thumbnail as an Image for the specified url. This adds an entry
     55   // for |url| if one has not yet been added.
     56   Images* GetImage(const GURL& url);
     57 
     58   // Fetches the thumbnail for the specified url. Returns true if there is a
     59   // thumbnail for the specified url. It is possible for a URL to be in TopSites
     60   // but not have an thumbnail.
     61   bool GetPageThumbnail(const GURL& url,
     62                         scoped_refptr<base::RefCountedMemory>* bytes) const;
     63 
     64   // Fetches the thumbnail score for the specified url. Returns true if
     65   // there is a thumbnail score for the specified url.
     66   bool GetPageThumbnailScore(const GURL& url, ThumbnailScore* score) const;
     67 
     68   // Returns the canonical URL for |url|.
     69   const GURL& GetCanonicalURL(const GURL& url) const;
     70 
     71   // Searches for a URL in |canonical_urls_| that is a URL prefix of |url|.
     72   // Prefers an exact match if it exists, or the least generalized match while
     73   // ignoring "?query#ref". Returns the resulting canonical URL if match is
     74   // found, otherwise returns an empty GURL.
     75   GURL GetGeneralizedCanonicalURL(const GURL& url) const;
     76 
     77   // Returns true if |url| is known.
     78   bool IsKnownURL(const GURL& url) const;
     79 
     80   // Returns the index into |top_sites_| for |url|.
     81   size_t GetURLIndex(const GURL& url) const;
     82 
     83   // Returns the number of non-forced URLs in the cache.
     84   size_t GetNumNonForcedURLs() const;
     85 
     86   // Returns the number of forced URLs in the cache.
     87   size_t GetNumForcedURLs() const;
     88 
     89  private:
     90   // The entries in CanonicalURLs, see CanonicalURLs for details. The second
     91   // argument gives the index of the URL into MostVisitedURLs redirects.
     92   typedef std::pair<MostVisitedURL*, size_t> CanonicalURLEntry;
     93 
     94   // Comparator used for CanonicalURLs.
     95   class CanonicalURLComparator {
     96    public:
     97     bool operator()(const CanonicalURLEntry& e1,
     98                     const CanonicalURLEntry& e2) const {
     99       return CanonicalURLStringCompare(e1.first->redirects[e1.second].spec(),
    100                                        e2.first->redirects[e2.second].spec());
    101     }
    102   };
    103 
    104   // Creates the object needed to form std::map queries into |canonical_urls_|,
    105   // wrapping all required temporary data to allow inlining.
    106   class CanonicalURLQuery {
    107    public:
    108     explicit CanonicalURLQuery(const GURL& url);
    109     ~CanonicalURLQuery();
    110     const CanonicalURLEntry& entry() { return entry_; }
    111 
    112    private:
    113     MostVisitedURL most_visited_url_;
    114     CanonicalURLEntry entry_;
    115   };
    116 
    117   // This is used to map from redirect url to the MostVisitedURL the redirect is
    118   // from. Ideally this would be map<GURL, size_t> (second param indexing into
    119   // top_sites_), but this results in duplicating all redirect urls. As some
    120   // sites have a lot of redirects, we instead use the MostVisitedURL* and the
    121   // index of the redirect as the key, and the index into top_sites_ as the
    122   // value. This way we aren't duplicating GURLs. CanonicalURLComparator
    123   // enforces the ordering as if we were using GURLs.
    124   typedef std::map<CanonicalURLEntry, size_t,
    125                    CanonicalURLComparator> CanonicalURLs;
    126 
    127   // Count the number of forced URLs.
    128   void CountForcedURLs();
    129 
    130   // Generates the set of canonical urls from |top_sites_|.
    131   void GenerateCanonicalURLs();
    132 
    133   // Stores a set of redirects. This is used by GenerateCanonicalURLs.
    134   void StoreRedirectChain(const RedirectList& redirects, size_t destination);
    135 
    136   // Returns the iterator into |canonical_urls_| for the |url|.
    137   CanonicalURLs::const_iterator GetCanonicalURLsIterator(const GURL& url) const;
    138 
    139   // Returns the GURL corresponding to an iterator in |canonical_urls_|.
    140   const GURL& GetURLFromIterator(CanonicalURLs::const_iterator it) const;
    141 
    142   // The number of top sites with forced URLs.
    143   size_t num_forced_urls_;
    144 
    145   // The top sites. This list must always contain the forced URLs first followed
    146   // by the non-forced URLs. This is not strictly enforced but is checked in
    147   // debug.
    148   MostVisitedURLList top_sites_;
    149 
    150   // The images. These map from canonical url to image.
    151   URLToImagesMap images_;
    152 
    153   // Generated from the redirects to and from the most visited pages. See
    154   // description above typedef for details.
    155   CanonicalURLs canonical_urls_;
    156 
    157   // Helper to clear "?query#ref" from any GURL. This is set in the constructor
    158   // and never modified after.
    159   GURL::Replacements clear_query_ref_;
    160 
    161   // Helper to clear "/path?query#ref" from any GURL. This is set in the
    162   // constructor and never modified after.
    163   GURL::Replacements clear_path_query_ref_;
    164 
    165   DISALLOW_COPY_AND_ASSIGN(TopSitesCache);
    166 };
    167 
    168 }  // namespace history
    169 
    170 #endif  // CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_
    171