1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_ 6 #define CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_ 7 8 #include <map> 9 #include <utility> 10 11 #include "base/memory/ref_counted.h" 12 #include "chrome/browser/history/history_types.h" 13 #include "chrome/browser/history/url_utils.h" 14 #include "url/gurl.h" 15 16 class GURL; 17 18 namespace history { 19 20 // TopSiteCache caches thumbnails for visited pages. Retrieving thumbnails from 21 // a given input URL is a two-stage process: 22 // 23 // input URL --(map 1)--> canonical URL --(map 2)--> image. 24 // 25 // (map 1) searches for an URL in |canonical_urls_| that "matches" (see below) 26 // input URL. If found, canonical URL assigned to the result. Otherwise the 27 // input URL is considered to already be a canonical URL. 28 // 29 // (map 2) simply looks up canonical URL in |images_|. 30 // 31 // The rule to "match" URL in |canonical_urls_| always favors exact match. 32 // - In GetCanonicalURL(), exact match is the only case examined. 33 // - In GetGeneralizedCanonicalURL(), we also perform "generalized" URL matches, 34 // i.e., stored URLs in |canonical_urls_| that are prefixes of input URL, 35 // ignoring "?query#ref". 36 // For the latter two "URL prefix matches", we prefer the match that is closest 37 // to input URL, w.r.t. path hierarchy. 38 39 // TopSitesCache caches the top sites and thumbnails for TopSites. 40 class TopSitesCache { 41 public: 42 TopSitesCache(); 43 ~TopSitesCache(); 44 45 // Set the top sites. In |top_sites| all forced URLs must appear before 46 // non-forced URLs. This is only checked in debug. 47 void SetTopSites(const MostVisitedURLList& top_sites); 48 const MostVisitedURLList& top_sites() const { return top_sites_; } 49 50 // The thumbnails. 51 void SetThumbnails(const URLToImagesMap& images); 52 const URLToImagesMap& images() const { return images_; } 53 54 // Returns the thumbnail as an Image for the specified url. This adds an entry 55 // for |url| if one has not yet been added. 56 Images* GetImage(const GURL& url); 57 58 // Fetches the thumbnail for the specified url. Returns true if there is a 59 // thumbnail for the specified url. It is possible for a URL to be in TopSites 60 // but not have an thumbnail. 61 bool GetPageThumbnail(const GURL& url, 62 scoped_refptr<base::RefCountedMemory>* bytes) const; 63 64 // Fetches the thumbnail score for the specified url. Returns true if 65 // there is a thumbnail score for the specified url. 66 bool GetPageThumbnailScore(const GURL& url, ThumbnailScore* score) const; 67 68 // Returns the canonical URL for |url|. 69 const GURL& GetCanonicalURL(const GURL& url) const; 70 71 // Searches for a URL in |canonical_urls_| that is a URL prefix of |url|. 72 // Prefers an exact match if it exists, or the least generalized match while 73 // ignoring "?query#ref". Returns the resulting canonical URL if match is 74 // found, otherwise returns an empty GURL. 75 GURL GetGeneralizedCanonicalURL(const GURL& url) const; 76 77 // Returns true if |url| is known. 78 bool IsKnownURL(const GURL& url) const; 79 80 // Returns the index into |top_sites_| for |url|. 81 size_t GetURLIndex(const GURL& url) const; 82 83 // Returns the number of non-forced URLs in the cache. 84 size_t GetNumNonForcedURLs() const; 85 86 // Returns the number of forced URLs in the cache. 87 size_t GetNumForcedURLs() const; 88 89 private: 90 // The entries in CanonicalURLs, see CanonicalURLs for details. The second 91 // argument gives the index of the URL into MostVisitedURLs redirects. 92 typedef std::pair<MostVisitedURL*, size_t> CanonicalURLEntry; 93 94 // Comparator used for CanonicalURLs. 95 class CanonicalURLComparator { 96 public: 97 bool operator()(const CanonicalURLEntry& e1, 98 const CanonicalURLEntry& e2) const { 99 return CanonicalURLStringCompare(e1.first->redirects[e1.second].spec(), 100 e2.first->redirects[e2.second].spec()); 101 } 102 }; 103 104 // Creates the object needed to form std::map queries into |canonical_urls_|, 105 // wrapping all required temporary data to allow inlining. 106 class CanonicalURLQuery { 107 public: 108 explicit CanonicalURLQuery(const GURL& url); 109 ~CanonicalURLQuery(); 110 const CanonicalURLEntry& entry() { return entry_; } 111 112 private: 113 MostVisitedURL most_visited_url_; 114 CanonicalURLEntry entry_; 115 }; 116 117 // This is used to map from redirect url to the MostVisitedURL the redirect is 118 // from. Ideally this would be map<GURL, size_t> (second param indexing into 119 // top_sites_), but this results in duplicating all redirect urls. As some 120 // sites have a lot of redirects, we instead use the MostVisitedURL* and the 121 // index of the redirect as the key, and the index into top_sites_ as the 122 // value. This way we aren't duplicating GURLs. CanonicalURLComparator 123 // enforces the ordering as if we were using GURLs. 124 typedef std::map<CanonicalURLEntry, size_t, 125 CanonicalURLComparator> CanonicalURLs; 126 127 // Count the number of forced URLs. 128 void CountForcedURLs(); 129 130 // Generates the set of canonical urls from |top_sites_|. 131 void GenerateCanonicalURLs(); 132 133 // Stores a set of redirects. This is used by GenerateCanonicalURLs. 134 void StoreRedirectChain(const RedirectList& redirects, size_t destination); 135 136 // Returns the iterator into |canonical_urls_| for the |url|. 137 CanonicalURLs::const_iterator GetCanonicalURLsIterator(const GURL& url) const; 138 139 // Returns the GURL corresponding to an iterator in |canonical_urls_|. 140 const GURL& GetURLFromIterator(CanonicalURLs::const_iterator it) const; 141 142 // The number of top sites with forced URLs. 143 size_t num_forced_urls_; 144 145 // The top sites. This list must always contain the forced URLs first followed 146 // by the non-forced URLs. This is not strictly enforced but is checked in 147 // debug. 148 MostVisitedURLList top_sites_; 149 150 // The images. These map from canonical url to image. 151 URLToImagesMap images_; 152 153 // Generated from the redirects to and from the most visited pages. See 154 // description above typedef for details. 155 CanonicalURLs canonical_urls_; 156 157 // Helper to clear "?query#ref" from any GURL. This is set in the constructor 158 // and never modified after. 159 GURL::Replacements clear_query_ref_; 160 161 // Helper to clear "/path?query#ref" from any GURL. This is set in the 162 // constructor and never modified after. 163 GURL::Replacements clear_path_query_ref_; 164 165 DISALLOW_COPY_AND_ASSIGN(TopSitesCache); 166 }; 167 168 } // namespace history 169 170 #endif // CHROME_BROWSER_HISTORY_TOP_SITES_CACHE_H_ 171