Home | History | Annotate | Download | only in predictors
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_PREDICTORS_RESOURCE_PREFETCH_PREDICTOR_H_
      6 #define CHROME_BROWSER_PREDICTORS_RESOURCE_PREFETCH_PREDICTOR_H_
      7 
      8 #include <map>
      9 #include <string>
     10 #include <vector>
     11 
     12 #include "base/gtest_prod_util.h"
     13 #include "base/memory/linked_ptr.h"
     14 #include "base/memory/scoped_ptr.h"
     15 #include "base/memory/weak_ptr.h"
     16 #include "base/time/time.h"
     17 #include "chrome/browser/common/cancelable_request.h"
     18 #include "chrome/browser/history/history_types.h"
     19 #include "chrome/browser/predictors/resource_prefetch_common.h"
     20 #include "chrome/browser/predictors/resource_prefetch_predictor_tables.h"
     21 #include "chrome/browser/predictors/resource_prefetcher.h"
     22 #include "components/browser_context_keyed_service/browser_context_keyed_service.h"
     23 #include "content/public/browser/notification_observer.h"
     24 #include "content/public/browser/notification_registrar.h"
     25 #include "url/gurl.h"
     26 #include "webkit/common/resource_type.h"
     27 
     28 class PredictorsHandler;
     29 class Profile;
     30 
     31 namespace content {
     32 class WebContents;
     33 }
     34 
     35 namespace net {
     36 class URLRequest;
     37 }
     38 
     39 namespace predictors {
     40 
     41 class ResourcePrefetcherManager;
     42 
     43 // Contains logic for learning what can be prefetched and for kicking off
     44 // speculative prefetching.
     45 // - The class is a profile keyed service owned by the profile.
     46 // - All the non-static methods of this class need to be called on the UI
     47 //   thread.
     48 //
     49 // The overall flow of the resource prefetching algorithm is as follows:
     50 //
     51 // * ResourcePrefetchPredictorObserver - Listens for URL requests, responses and
     52 //   redirects on the IO thread (via ResourceDispatcherHostDelegate) and posts
     53 //   tasks to the ResourcePrefetchPredictor on the UI thread. This is owned by
     54 //   the ProfileIOData for the profile.
     55 // * ResourcePrefetchPredictorTables - Persists ResourcePrefetchPredictor data
     56 //   to a sql database. Runs entirely on the DB thread. Owned by the
     57 //   PredictorDatabase.
     58 // * ResourcePrefetchPredictor - Learns about resource requirements per URL in
     59 //   the UI thread through the ResourcePrefetchPredictorObserver and persists
     60 //   it to disk in the DB thread through the ResourcePrefetchPredictorTables. It
     61 //   initiates resource prefetching using the ResourcePrefetcherManager. Owned
     62 //   by profile.
     63 // * ResourcePrefetcherManager - Manages the ResourcePrefetchers that do the
     64 //   prefetching on the IO thread. The manager is owned by the
     65 //   ResourcePrefetchPredictor and interfaces between the predictor on the UI
     66 //   thread and the prefetchers on the IO thread.
     67 // * ResourcePrefetcher - Lives entirely on the IO thread, owned by the
     68 //   ResourcePrefetcherManager, and issues net::URLRequest to fetch resources.
     69 //
     70 // TODO(shishir): Do speculative prefetching for https resources and/or https
     71 // main frame urls.
     72 class ResourcePrefetchPredictor
     73     : public BrowserContextKeyedService,
     74       public content::NotificationObserver,
     75       public base::SupportsWeakPtr<ResourcePrefetchPredictor> {
     76  public:
     77   // Stores the data that we need to get from the URLRequest.
     78   struct URLRequestSummary {
     79     URLRequestSummary();
     80     URLRequestSummary(const URLRequestSummary& other);
     81     ~URLRequestSummary();
     82 
     83     NavigationID navigation_id;
     84     GURL resource_url;
     85     ResourceType::Type resource_type;
     86 
     87     // Only for responses.
     88     std::string mime_type;
     89     bool was_cached;
     90     GURL redirect_url;  // Empty unless request was redirected to a valid url.
     91   };
     92 
     93   ResourcePrefetchPredictor(const ResourcePrefetchPredictorConfig& config,
     94                             Profile* profile);
     95   virtual ~ResourcePrefetchPredictor();
     96 
     97   // Thread safe.
     98   static bool ShouldRecordRequest(net::URLRequest* request,
     99                                   ResourceType::Type resource_type);
    100   static bool ShouldRecordResponse(net::URLRequest* response);
    101   static bool ShouldRecordRedirect(net::URLRequest* response);
    102 
    103   // Determines the ResourceType from the mime type, defaulting to the
    104   // |fallback| if the ResourceType could not be determined.
    105   static ResourceType::Type GetResourceTypeFromMimeType(
    106       const std::string& mime_type,
    107       ResourceType::Type fallback);
    108 
    109   // 'ResourcePrefetchPredictorObserver' calls the below functions to inform the
    110   // predictor of main frame and resource requests. Should only be called if the
    111   // corresponding Should* functions return true.
    112   void RecordURLRequest(const URLRequestSummary& request);
    113   void RecordURLResponse(const URLRequestSummary& response);
    114   void RecordURLRedirect(const URLRequestSummary& response);
    115 
    116   // Called when the main frame of a page completes loading.
    117   void RecordMainFrameLoadComplete(const NavigationID& navigation_id);
    118 
    119   // Called by ResourcePrefetcherManager to notify that prefetching has finished
    120   // for a navigation. Should take ownership of |requests|.
    121   virtual void FinishedPrefetchForNavigation(
    122       const NavigationID& navigation_id,
    123       PrefetchKeyType key_type,
    124       ResourcePrefetcher::RequestVector* requests);
    125 
    126  private:
    127   friend class ::PredictorsHandler;
    128   friend class ResourcePrefetchPredictorTest;
    129 
    130   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, DeleteUrls);
    131   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    132                            LazilyInitializeEmpty);
    133   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    134                            LazilyInitializeWithData);
    135   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    136                            NavigationNotRecorded);
    137   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, NavigationUrlInDB);
    138   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, NavigationUrlNotInDB);
    139   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    140                            NavigationUrlNotInDBAndDBFull);
    141   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, OnMainFrameRequest);
    142   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, OnMainFrameRedirect);
    143   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    144                            OnSubresourceResponse);
    145 
    146   enum InitializationState {
    147     NOT_INITIALIZED = 0,
    148     INITIALIZING = 1,
    149     INITIALIZED = 2
    150   };
    151 
    152   // Stores prefetching results.
    153   struct Result {
    154     // Takes ownership of requests.
    155     Result(PrefetchKeyType key_type,
    156            ResourcePrefetcher::RequestVector* requests);
    157     ~Result();
    158 
    159     PrefetchKeyType key_type;
    160     scoped_ptr<ResourcePrefetcher::RequestVector> requests;
    161 
    162    private:
    163     DISALLOW_COPY_AND_ASSIGN(Result);
    164   };
    165 
    166   typedef ResourcePrefetchPredictorTables::ResourceRow ResourceRow;
    167   typedef ResourcePrefetchPredictorTables::ResourceRows ResourceRows;
    168   typedef ResourcePrefetchPredictorTables::PrefetchData PrefetchData;
    169   typedef ResourcePrefetchPredictorTables::PrefetchDataMap PrefetchDataMap;
    170   typedef std::map<NavigationID, linked_ptr<std::vector<URLRequestSummary> > >
    171       NavigationMap;
    172   typedef std::map<NavigationID, Result*> ResultsMap;
    173 
    174   // Returns true if the main page request is supported for prediction.
    175   static bool IsHandledMainPage(net::URLRequest* request);
    176 
    177   // Returns true if the subresource request is supported for prediction.
    178   static bool IsHandledSubresource(net::URLRequest* request);
    179 
    180   // Returns true if the request (should have a response in it) is cacheable.
    181   static bool IsCacheable(const net::URLRequest* request);
    182 
    183   // content::NotificationObserver methods OVERRIDE.
    184   virtual void Observe(int type,
    185                        const content::NotificationSource& source,
    186                        const content::NotificationDetails& details) OVERRIDE;
    187 
    188   // BrowserContextKeyedService methods OVERRIDE.
    189   virtual void Shutdown() OVERRIDE;
    190 
    191   // Functions called on different network events pertaining to the loading of
    192   // main frame resource or sub resources.
    193   void OnMainFrameRequest(const URLRequestSummary& request);
    194   void OnMainFrameResponse(const URLRequestSummary& response);
    195   void OnMainFrameRedirect(const URLRequestSummary& response);
    196   void OnSubresourceResponse(const URLRequestSummary& response);
    197 
    198   // Called when onload completes for a navigation. We treat this point as the
    199   // "completion" of the navigation. The resources requested by the page upto
    200   // this point are the only ones considered for prefetching.
    201   void OnNavigationComplete(const NavigationID& navigation_id);
    202 
    203   // Returns true if there is PrefetchData that can be used for the
    204   // navigation and fills in the |prefetch_data| to resources that need to be
    205   // prefetched.
    206   bool GetPrefetchData(const NavigationID& navigation_id,
    207                        ResourcePrefetcher::RequestVector* prefetch_requests,
    208                        PrefetchKeyType* key_type);
    209 
    210   // Converts a PrefetchData into a ResourcePrefetcher::RequestVector.
    211   void PopulatePrefetcherRequest(const PrefetchData& data,
    212                                  ResourcePrefetcher::RequestVector* requests);
    213 
    214   // Starts prefetching if it is enabled and prefetching data exists for the
    215   // NavigationID either at the URL or at the host level.
    216   void StartPrefetching(const NavigationID& navigation_id);
    217 
    218   // Stops prefetching that may be in progress corresponding to |navigation_id|.
    219   void StopPrefetching(const NavigationID& navigation_id);
    220 
    221   // Starts initialization by posting a task to the DB thread to read the
    222   // predictor database.
    223   void StartInitialization();
    224 
    225   // Callback for task to read predictor database. Takes ownership of
    226   // |url_data_map| and |host_data_map|.
    227   void CreateCaches(scoped_ptr<PrefetchDataMap> url_data_map,
    228                     scoped_ptr<PrefetchDataMap> host_data_map);
    229 
    230   // Called during initialization when history is read and the predictor
    231   // database has been read.
    232   void OnHistoryAndCacheLoaded();
    233 
    234   // Removes data for navigations where the onload never fired. Will cleanup
    235   // inflight_navigations_ and results_map_.
    236   void CleanupAbandonedNavigations(const NavigationID& navigation_id);
    237 
    238   // Deletes all URLs from the predictor database, the caches and removes all
    239   // inflight navigations.
    240   void DeleteAllUrls();
    241 
    242   // Deletes data for the input |urls| and their corresponding hosts from the
    243   // predictor database and caches.
    244   void DeleteUrls(const history::URLRows& urls);
    245 
    246   // Callback for GetUrlVisitCountTask.
    247   void OnVisitCountLookup(int visit_count,
    248                           const NavigationID& navigation_id,
    249                           const std::vector<URLRequestSummary>& requests);
    250 
    251   // Removes the oldest entry in the input |data_map|, also deleting it from the
    252   // predictor database.
    253   void RemoveOldestEntryInPrefetchDataMap(PrefetchKeyType key_type,
    254                                           PrefetchDataMap* data_map);
    255 
    256   // Merges resources in |new_resources| into the |data_map| and correspondingly
    257   // updates the predictor database.
    258   void LearnNavigation(const std::string& key,
    259                        PrefetchKeyType key_type,
    260                        const std::vector<URLRequestSummary>& new_resources,
    261                        int max_data_map_size,
    262                        PrefetchDataMap* data_map);
    263 
    264   // Reports accuracy by comparing prefetched resources with resources that are
    265   // actually used by the page.
    266   void ReportAccuracyStats(PrefetchKeyType key_type,
    267                            const std::vector<URLRequestSummary>& actual,
    268                            ResourcePrefetcher::RequestVector* prefetched) const;
    269 
    270   // Reports predicted accuracy i.e. by comparing resources that are actually
    271   // used by the page with those that may have been prefetched.
    272   void ReportPredictedAccuracyStats(
    273       PrefetchKeyType key_type,
    274       const std::vector<URLRequestSummary>& actual,
    275       const ResourcePrefetcher::RequestVector& predicted) const;
    276   void ReportPredictedAccuracyStatsHelper(
    277       PrefetchKeyType key_type,
    278       const ResourcePrefetcher::RequestVector& predicted,
    279       const std::map<GURL, bool>& actual,
    280       int total_resources_fetched_from_network,
    281       int max_assumed_prefetched) const;
    282 
    283   // Used for testing to inject mock tables.
    284   void set_mock_tables(scoped_refptr<ResourcePrefetchPredictorTables> tables) {
    285     tables_ = tables;
    286   }
    287 
    288   Profile* const profile_;
    289   ResourcePrefetchPredictorConfig const config_;
    290   InitializationState initialization_state_;
    291   scoped_refptr<ResourcePrefetchPredictorTables> tables_;
    292   scoped_refptr<ResourcePrefetcherManager> prefetch_manager_;
    293   content::NotificationRegistrar notification_registrar_;
    294   CancelableRequestConsumer history_lookup_consumer_;
    295 
    296   // Map of all the navigations in flight to their resource requests.
    297   NavigationMap inflight_navigations_;
    298 
    299   // Copy of the data in the predictor tables.
    300   scoped_ptr<PrefetchDataMap> url_table_cache_;
    301   scoped_ptr<PrefetchDataMap> host_table_cache_;
    302 
    303   ResultsMap results_map_;
    304   STLValueDeleter<ResultsMap> results_map_deleter_;
    305 
    306   DISALLOW_COPY_AND_ASSIGN(ResourcePrefetchPredictor);
    307 };
    308 
    309 }  // namespace predictors
    310 
    311 #endif  // CHROME_BROWSER_PREDICTORS_RESOURCE_PREFETCH_PREDICTOR_H_
    312