Home | History | Annotate | Download | only in predictors
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_PREDICTORS_RESOURCE_PREFETCH_PREDICTOR_H_
      6 #define CHROME_BROWSER_PREDICTORS_RESOURCE_PREFETCH_PREDICTOR_H_
      7 
      8 #include <map>
      9 #include <string>
     10 #include <vector>
     11 
     12 #include "base/gtest_prod_util.h"
     13 #include "base/memory/linked_ptr.h"
     14 #include "base/memory/scoped_ptr.h"
     15 #include "base/memory/weak_ptr.h"
     16 #include "base/time/time.h"
     17 #include "chrome/browser/common/cancelable_request.h"
     18 #include "chrome/browser/history/history_types.h"
     19 #include "chrome/browser/predictors/resource_prefetch_common.h"
     20 #include "chrome/browser/predictors/resource_prefetch_predictor_tables.h"
     21 #include "chrome/browser/predictors/resource_prefetcher.h"
     22 #include "components/browser_context_keyed_service/browser_context_keyed_service.h"
     23 #include "content/public/browser/notification_observer.h"
     24 #include "content/public/browser/notification_registrar.h"
     25 #include "url/gurl.h"
     26 #include "webkit/common/resource_type.h"
     27 
     28 class PredictorsHandler;
     29 class Profile;
     30 
     31 namespace content {
     32 class WebContents;
     33 }
     34 
     35 namespace net {
     36 class URLRequest;
     37 }
     38 
     39 namespace predictors {
     40 
     41 class ResourcePrefetcherManager;
     42 
     43 // Contains logic for learning what can be prefetched and for kicking off
     44 // speculative prefetching.
     45 // - The class is a profile keyed service owned by the profile.
     46 // - All the non-static methods of this class need to be called on the UI
     47 //   thread.
     48 //
     49 // The overall flow of the resource prefetching algorithm is as follows:
     50 //
     51 // * ResourcePrefetchPredictorObserver - Listens for URL requests, responses and
     52 //   redirects on the IO thread(via RDHostDelegate) and post tasks to the
     53 //   ResourcePrefetchPredictor on the UI thread. This is owned by the
     54 //   ProfileIOData for the profile.
     55 // * ResourcePrefetchPredictorTables - Persists ResourcePrefetchPredictor data
     56 //   to a sql database. Runs entirely on the DB thread. Owned by the
     57 //   PredictorDatabase.
     58 // * ResourcePrefetchPredictor - Learns about resource requirements per URL in
     59 //   the UI thread through the ResourcePrefetchPredictorObserver and perisists
     60 //   it to disk in the DB thread through the ResourcePrefetchPredictorTables. It
     61 //   initiates resource prefetching using the ResourcePrefetcherManager. Owned
     62 //   by profile.
     63 // * ResourcePrefetcherManager - Manages the ResourcePrefetchers that do the
     64 //   prefetching on the IO thread. The manager is owned by the
     65 //   ResourcePrefetchPredictor and interfaces between the predictor on the UI
     66 //   thread and the prefetchers on the IO thread.
     67 // * ResourcePrefetcher - Lives entirely on the IO thread, owned by the
     68 //   ResourcePrefetcherManager, and issues net::URLRequest to fetch resources.
     69 //
     70 // TODO(shishir): Do speculative prefetching for https resources and/or https
     71 // main frame urls.
     72 class ResourcePrefetchPredictor
     73     : public BrowserContextKeyedService,
     74       public content::NotificationObserver,
     75       public base::SupportsWeakPtr<ResourcePrefetchPredictor> {
     76  public:
     77   // Stores the data that we need to get from the URLRequest.
     78   struct URLRequestSummary {
     79     URLRequestSummary();
     80     URLRequestSummary(const URLRequestSummary& other);
     81     ~URLRequestSummary();
     82 
     83     NavigationID navigation_id;
     84     GURL resource_url;
     85     ResourceType::Type resource_type;
     86 
     87     // Only for responses.
     88     std::string mime_type;
     89     bool was_cached;
     90     GURL redirect_url;  // Empty unless request was redirected to a valid url.
     91   };
     92 
     93   ResourcePrefetchPredictor(const ResourcePrefetchPredictorConfig& config,
     94                             Profile* profile);
     95   virtual ~ResourcePrefetchPredictor();
     96 
     97   // Thread safe.
     98   static bool ShouldRecordRequest(net::URLRequest* request,
     99                                   ResourceType::Type resource_type);
    100   static bool ShouldRecordResponse(net::URLRequest* response);
    101   static bool ShouldRecordRedirect(net::URLRequest* response);
    102 
    103   // Determines the ResourceType from the mime type, defaulting to the
    104   // |fallback| if the ResourceType could not be determined.
    105   static ResourceType::Type GetResourceTypeFromMimeType(
    106       const std::string& mime_type,
    107       ResourceType::Type fallback);
    108 
    109   // 'ResourcePrefetchPredictorObserver' calls the below functions to inform the
    110   // predictor of main frame and resource requests. Should only be called if the
    111   // corresponding Should* functions return true.
    112   void RecordURLRequest(const URLRequestSummary& request);
    113   void RecordUrlResponse(const URLRequestSummary& response);
    114   void RecordUrlRedirect(const URLRequestSummary& response);
    115 
    116   // Called by ResourcePrefetcherManager to notify that prefetching has finished
    117   // for a navigation. Should take ownership of |requests|.
    118   virtual void FinishedPrefetchForNavigation(
    119       const NavigationID& navigation_id,
    120       PrefetchKeyType key_type,
    121       ResourcePrefetcher::RequestVector* requests);
    122 
    123  private:
    124   friend class ::PredictorsHandler;
    125   friend class ResourcePrefetchPredictorTest;
    126 
    127   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, DeleteUrls);
    128   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    129                            LazilyInitializeEmpty);
    130   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    131                            LazilyInitializeWithData);
    132   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    133                            NavigationNotRecorded);
    134   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, NavigationUrlInDB);
    135   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, NavigationUrlNotInDB);
    136   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    137                            NavigationUrlNotInDBAndDBFull);
    138   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, OnMainFrameRequest);
    139   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, OnMainFrameRedirect);
    140   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    141                            OnSubresourceResponse);
    142 
    143   enum InitializationState {
    144     NOT_INITIALIZED = 0,
    145     INITIALIZING = 1,
    146     INITIALIZED = 2
    147   };
    148 
    149   // Stores prefetching results.
    150   struct Result {
    151     // Takes ownership of requests.
    152     Result(PrefetchKeyType key_type,
    153            ResourcePrefetcher::RequestVector* requests);
    154     ~Result();
    155 
    156     PrefetchKeyType key_type;
    157     scoped_ptr<ResourcePrefetcher::RequestVector> requests;
    158 
    159    private:
    160     DISALLOW_COPY_AND_ASSIGN(Result);
    161   };
    162 
    163   typedef ResourcePrefetchPredictorTables::ResourceRow ResourceRow;
    164   typedef ResourcePrefetchPredictorTables::ResourceRows ResourceRows;
    165   typedef ResourcePrefetchPredictorTables::PrefetchData PrefetchData;
    166   typedef ResourcePrefetchPredictorTables::PrefetchDataMap PrefetchDataMap;
    167   typedef std::map<NavigationID, linked_ptr<std::vector<URLRequestSummary> > >
    168       NavigationMap;
    169   typedef std::map<NavigationID, Result*> ResultsMap;
    170 
    171   // Returns true if the main page request is supported for prediction.
    172   static bool IsHandledMainPage(net::URLRequest* request);
    173 
    174   // Returns true if the subresource request is supported for prediction.
    175   static bool IsHandledSubresource(net::URLRequest* request);
    176 
    177   // Returns true if the request (should have a response in it) is cacheable.
    178   static bool IsCacheable(const net::URLRequest* request);
    179 
    180   // content::NotificationObserver methods OVERRIDE.
    181   virtual void Observe(int type,
    182                        const content::NotificationSource& source,
    183                        const content::NotificationDetails& details) OVERRIDE;
    184 
    185   // BrowserContextKeyedService methods OVERRIDE.
    186   virtual void Shutdown() OVERRIDE;
    187 
    188   // Functions called on different network events pertaining to the loading of
    189   // main frame resource or sub resources.
    190   void OnMainFrameRequest(const URLRequestSummary& request);
    191   void OnMainFrameResponse(const URLRequestSummary& response);
    192   void OnMainFrameRedirect(const URLRequestSummary& response);
    193   void OnSubresourceResponse(const URLRequestSummary& response);
    194   void OnSubresourceLoadedFromMemory(const NavigationID& navigation_id,
    195                                      const GURL& resource_url,
    196                                      const std::string& mime_type,
    197                                      ResourceType::Type resource_type);
    198 
    199   // Called when onload completes for a navigation. We treat this point as the
    200   // "completion" of the navigation. The resources requested by the page upto
    201   // this point are the only ones considered for prefetching.
    202   void OnNavigationComplete(const NavigationID& navigation_id);
    203 
    204   // Returns true if there is PrefetchData that can be used for the
    205   // navigation and fills in the |prefetch_data| to resources that need to be
    206   // prefetched.
    207   bool GetPrefetchData(const NavigationID& navigation_id,
    208                        ResourcePrefetcher::RequestVector* prefetch_requests,
    209                        PrefetchKeyType* key_type);
    210 
    211   // Converts a PrefetchData into a ResourcePrefetcher::RequestVector.
    212   void PopulatePrefetcherRequest(const PrefetchData& data,
    213                                  ResourcePrefetcher::RequestVector* requests);
    214 
    215   // Starts prefetching if it is enabled and prefetching data exists for the
    216   // NavigationID either at the URL or at the host level.
    217   void StartPrefetching(const NavigationID& navigation_id);
    218 
    219   // Stops prefetching that may be in progress corresponding to |navigation_id|.
    220   void StopPrefetching(const NavigationID& navigation_id);
    221 
    222   // Starts initialization by posting a task to the DB thread to read the
    223   // predictor database.
    224   void StartInitialization();
    225 
    226   // Callback for task to read predictor database. Takes ownership of
    227   // |url_data_map| and |host_data_map|.
    228   void CreateCaches(scoped_ptr<PrefetchDataMap> url_data_map,
    229                     scoped_ptr<PrefetchDataMap> host_data_map);
    230 
    231   // Called during initialization when history is read and the predictor
    232   // database has been read.
    233   void OnHistoryAndCacheLoaded();
    234 
    235   // Removes data for navigations where the onload never fired. Will cleanup
    236   // inflight_navigations_ and results_map_.
    237   void CleanupAbandonedNavigations(const NavigationID& navigation_id);
    238 
    239   // Deletes all URLs from the predictor database, the caches and removes all
    240   // inflight navigations.
    241   void DeleteAllUrls();
    242 
    243   // Deletes data for the input |urls| and their corresponding hosts from the
    244   // predictor database and caches.
    245   void DeleteUrls(const history::URLRows& urls);
    246 
    247   // Callback for GetUrlVisitCountTask.
    248   void OnVisitCountLookup(int visit_count,
    249                           const NavigationID& navigation_id,
    250                           const std::vector<URLRequestSummary>& requests);
    251 
    252   // Removes the oldest entry in the input |data_map|, also deleting it from the
    253   // predictor database.
    254   void RemoveOldestEntryInPrefetchDataMap(PrefetchKeyType key_type,
    255                                           PrefetchDataMap* data_map);
    256 
    257   // Merges resources in |new_resources| into the |data_map| and correspondingly
    258   // updates the predictor database.
    259   void LearnNavigation(const std::string& key,
    260                        PrefetchKeyType key_type,
    261                        const std::vector<URLRequestSummary>& new_resources,
    262                        int max_data_map_size,
    263                        PrefetchDataMap* data_map);
    264 
    265   // Reports accuracy by comparing prefetched resources with resources that are
    266   // actually used by the page.
    267   void ReportAccuracyStats(PrefetchKeyType key_type,
    268                            const std::vector<URLRequestSummary>& actual,
    269                            ResourcePrefetcher::RequestVector* prefetched) const;
    270 
    271   // Reports predicted accuracy i.e. by comparing resources that are actually
    272   // used by the page with those that may have been prefetched.
    273   void ReportPredictedAccuracyStats(
    274       PrefetchKeyType key_type,
    275       const std::vector<URLRequestSummary>& actual,
    276       const ResourcePrefetcher::RequestVector& predicted) const;
    277   void ReportPredictedAccuracyStatsHelper(
    278       PrefetchKeyType key_type,
    279       const ResourcePrefetcher::RequestVector& predicted,
    280       const std::map<GURL, bool>& actual,
    281       int total_resources_fetched_from_network,
    282       int max_assumed_prefetched) const;
    283 
    284   // Used for testing to inject mock tables.
    285   void set_mock_tables(scoped_refptr<ResourcePrefetchPredictorTables> tables) {
    286     tables_ = tables;
    287   }
    288 
    289   Profile* const profile_;
    290   ResourcePrefetchPredictorConfig const config_;
    291   InitializationState initialization_state_;
    292   scoped_refptr<ResourcePrefetchPredictorTables> tables_;
    293   scoped_refptr<ResourcePrefetcherManager> prefetch_manager_;
    294   content::NotificationRegistrar notification_registrar_;
    295   CancelableRequestConsumer history_lookup_consumer_;
    296 
    297   // Map of all the navigations in flight to their resource requests.
    298   NavigationMap inflight_navigations_;
    299 
    300   // Copy of the data in the predictor tables.
    301   scoped_ptr<PrefetchDataMap> url_table_cache_;
    302   scoped_ptr<PrefetchDataMap> host_table_cache_;
    303 
    304   ResultsMap results_map_;
    305   STLValueDeleter<ResultsMap> results_map_deleter_;
    306 
    307   DISALLOW_COPY_AND_ASSIGN(ResourcePrefetchPredictor);
    308 };
    309 
    310 }  // namespace predictors
    311 
    312 #endif  // CHROME_BROWSER_PREDICTORS_RESOURCE_PREFETCH_PREDICTOR_H_
    313