Home | History | Annotate | Download | only in predictors
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CHROME_BROWSER_PREDICTORS_RESOURCE_PREFETCH_PREDICTOR_H_
      6 #define CHROME_BROWSER_PREDICTORS_RESOURCE_PREFETCH_PREDICTOR_H_
      7 
      8 #include <map>
      9 #include <string>
     10 #include <vector>
     11 
     12 #include "base/gtest_prod_util.h"
     13 #include "base/memory/linked_ptr.h"
     14 #include "base/memory/scoped_ptr.h"
     15 #include "base/memory/weak_ptr.h"
     16 #include "base/task/cancelable_task_tracker.h"
     17 #include "base/time/time.h"
     18 #include "chrome/browser/predictors/resource_prefetch_common.h"
     19 #include "chrome/browser/predictors/resource_prefetch_predictor_tables.h"
     20 #include "chrome/browser/predictors/resource_prefetcher.h"
     21 #include "components/history/core/browser/history_types.h"
     22 #include "components/keyed_service/core/keyed_service.h"
     23 #include "content/public/browser/notification_observer.h"
     24 #include "content/public/browser/notification_registrar.h"
     25 #include "content/public/common/resource_type.h"
     26 #include "url/gurl.h"
     27 
     28 class PredictorsHandler;
     29 class Profile;
     30 
     31 namespace content {
     32 class WebContents;
     33 }
     34 
     35 namespace net {
     36 class URLRequest;
     37 }
     38 
     39 namespace predictors {
     40 
     41 class ResourcePrefetcherManager;
     42 
     43 // Contains logic for learning what can be prefetched and for kicking off
     44 // speculative prefetching.
     45 // - The class is a profile keyed service owned by the profile.
     46 // - All the non-static methods of this class need to be called on the UI
     47 //   thread.
     48 //
     49 // The overall flow of the resource prefetching algorithm is as follows:
     50 //
     51 // * ResourcePrefetchPredictorObserver - Listens for URL requests, responses and
     52 //   redirects on the IO thread (via ResourceDispatcherHostDelegate) and posts
     53 //   tasks to the ResourcePrefetchPredictor on the UI thread. This is owned by
     54 //   the ProfileIOData for the profile.
     55 // * ResourcePrefetchPredictorTables - Persists ResourcePrefetchPredictor data
     56 //   to a sql database. Runs entirely on the DB thread. Owned by the
     57 //   PredictorDatabase.
     58 // * ResourcePrefetchPredictor - Learns about resource requirements per URL in
     59 //   the UI thread through the ResourcePrefetchPredictorObserver and persists
     60 //   it to disk in the DB thread through the ResourcePrefetchPredictorTables. It
     61 //   initiates resource prefetching using the ResourcePrefetcherManager. Owned
     62 //   by profile.
     63 // * ResourcePrefetcherManager - Manages the ResourcePrefetchers that do the
     64 //   prefetching on the IO thread. The manager is owned by the
     65 //   ResourcePrefetchPredictor and interfaces between the predictor on the UI
     66 //   thread and the prefetchers on the IO thread.
     67 // * ResourcePrefetcher - Lives entirely on the IO thread, owned by the
     68 //   ResourcePrefetcherManager, and issues net::URLRequest to fetch resources.
     69 //
     70 // TODO(shishir): Do speculative prefetching for https resources and/or https
     71 // main frame urls.
     72 // TODO(zhenw): Currently only main frame requests/redirects/responses are
     73 // recorded. Consider recording sub-frame responses independently or together
     74 // with main frame.
     75 class ResourcePrefetchPredictor
     76     : public KeyedService,
     77       public content::NotificationObserver,
     78       public base::SupportsWeakPtr<ResourcePrefetchPredictor> {
     79  public:
     80   // Stores the data that we need to get from the URLRequest.
     81   struct URLRequestSummary {
     82     URLRequestSummary();
     83     URLRequestSummary(const URLRequestSummary& other);
     84     ~URLRequestSummary();
     85 
     86     NavigationID navigation_id;
     87     GURL resource_url;
     88     content::ResourceType resource_type;
     89 
     90     // Only for responses.
     91     std::string mime_type;
     92     bool was_cached;
     93     GURL redirect_url;  // Empty unless request was redirected to a valid url.
     94   };
     95 
     96   ResourcePrefetchPredictor(const ResourcePrefetchPredictorConfig& config,
     97                             Profile* profile);
     98   virtual ~ResourcePrefetchPredictor();
     99 
    100   // Thread safe.
    101   static bool ShouldRecordRequest(net::URLRequest* request,
    102                                   content::ResourceType resource_type);
    103   static bool ShouldRecordResponse(net::URLRequest* response);
    104   static bool ShouldRecordRedirect(net::URLRequest* response);
    105 
    106   // Determines the ResourceType from the mime type, defaulting to the
    107   // |fallback| if the ResourceType could not be determined.
    108   static content::ResourceType GetResourceTypeFromMimeType(
    109       const std::string& mime_type,
    110       content::ResourceType fallback);
    111 
    112   // 'ResourcePrefetchPredictorObserver' calls the below functions to inform the
    113   // predictor of main frame and resource requests. Should only be called if the
    114   // corresponding Should* functions return true.
    115   void RecordURLRequest(const URLRequestSummary& request);
    116   void RecordURLResponse(const URLRequestSummary& response);
    117   void RecordURLRedirect(const URLRequestSummary& response);
    118 
    119   // Called when the main frame of a page completes loading.
    120   void RecordMainFrameLoadComplete(const NavigationID& navigation_id);
    121 
    122   // Called by ResourcePrefetcherManager to notify that prefetching has finished
    123   // for a navigation. Should take ownership of |requests|.
    124   virtual void FinishedPrefetchForNavigation(
    125       const NavigationID& navigation_id,
    126       PrefetchKeyType key_type,
    127       ResourcePrefetcher::RequestVector* requests);
    128 
    129  private:
    130   friend class ::PredictorsHandler;
    131   friend class ResourcePrefetchPredictorTest;
    132 
    133   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, DeleteUrls);
    134   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    135                            LazilyInitializeEmpty);
    136   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    137                            LazilyInitializeWithData);
    138   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    139                            NavigationNotRecorded);
    140   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, NavigationUrlInDB);
    141   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, NavigationUrlNotInDB);
    142   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    143                            NavigationUrlNotInDBAndDBFull);
    144   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, OnMainFrameRequest);
    145   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, OnMainFrameRedirect);
    146   FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest,
    147                            OnSubresourceResponse);
    148 
    149   enum InitializationState {
    150     NOT_INITIALIZED = 0,
    151     INITIALIZING = 1,
    152     INITIALIZED = 2
    153   };
    154 
    155   // Stores prefetching results.
    156   struct Result {
    157     // Takes ownership of requests.
    158     Result(PrefetchKeyType key_type,
    159            ResourcePrefetcher::RequestVector* requests);
    160     ~Result();
    161 
    162     PrefetchKeyType key_type;
    163     scoped_ptr<ResourcePrefetcher::RequestVector> requests;
    164 
    165    private:
    166     DISALLOW_COPY_AND_ASSIGN(Result);
    167   };
    168 
    169   typedef ResourcePrefetchPredictorTables::ResourceRow ResourceRow;
    170   typedef ResourcePrefetchPredictorTables::ResourceRows ResourceRows;
    171   typedef ResourcePrefetchPredictorTables::PrefetchData PrefetchData;
    172   typedef ResourcePrefetchPredictorTables::PrefetchDataMap PrefetchDataMap;
    173   typedef std::map<NavigationID, linked_ptr<std::vector<URLRequestSummary> > >
    174       NavigationMap;
    175   typedef std::map<NavigationID, Result*> ResultsMap;
    176 
    177   // Returns true if the main page request is supported for prediction.
    178   static bool IsHandledMainPage(net::URLRequest* request);
    179 
    180   // Returns true if the subresource request is supported for prediction.
    181   static bool IsHandledSubresource(net::URLRequest* request);
    182 
    183   // Returns true if the request (should have a response in it) is cacheable.
    184   static bool IsCacheable(const net::URLRequest* request);
    185 
    186   // content::NotificationObserver methods OVERRIDE.
    187   virtual void Observe(int type,
    188                        const content::NotificationSource& source,
    189                        const content::NotificationDetails& details) OVERRIDE;
    190 
    191   // KeyedService methods OVERRIDE.
    192   virtual void Shutdown() OVERRIDE;
    193 
    194   // Functions called on different network events pertaining to the loading of
    195   // main frame resource or sub resources.
    196   void OnMainFrameRequest(const URLRequestSummary& request);
    197   void OnMainFrameResponse(const URLRequestSummary& response);
    198   void OnMainFrameRedirect(const URLRequestSummary& response);
    199   void OnSubresourceResponse(const URLRequestSummary& response);
    200 
    201   // Called when onload completes for a navigation. We treat this point as the
    202   // "completion" of the navigation. The resources requested by the page upto
    203   // this point are the only ones considered for prefetching.
    204   void OnNavigationComplete(const NavigationID& navigation_id);
    205 
    206   // Returns true if there is PrefetchData that can be used for the
    207   // navigation and fills in the |prefetch_data| to resources that need to be
    208   // prefetched.
    209   bool GetPrefetchData(const NavigationID& navigation_id,
    210                        ResourcePrefetcher::RequestVector* prefetch_requests,
    211                        PrefetchKeyType* key_type);
    212 
    213   // Converts a PrefetchData into a ResourcePrefetcher::RequestVector.
    214   void PopulatePrefetcherRequest(const PrefetchData& data,
    215                                  ResourcePrefetcher::RequestVector* requests);
    216 
    217   // Starts prefetching if it is enabled and prefetching data exists for the
    218   // NavigationID either at the URL or at the host level.
    219   void StartPrefetching(const NavigationID& navigation_id);
    220 
    221   // Stops prefetching that may be in progress corresponding to |navigation_id|.
    222   void StopPrefetching(const NavigationID& navigation_id);
    223 
    224   // Starts initialization by posting a task to the DB thread to read the
    225   // predictor database.
    226   void StartInitialization();
    227 
    228   // Callback for task to read predictor database. Takes ownership of
    229   // |url_data_map| and |host_data_map|.
    230   void CreateCaches(scoped_ptr<PrefetchDataMap> url_data_map,
    231                     scoped_ptr<PrefetchDataMap> host_data_map);
    232 
    233   // Called during initialization when history is read and the predictor
    234   // database has been read.
    235   void OnHistoryAndCacheLoaded();
    236 
    237   // Removes data for navigations where the onload never fired. Will cleanup
    238   // inflight_navigations_ and results_map_.
    239   void CleanupAbandonedNavigations(const NavigationID& navigation_id);
    240 
    241   // Deletes all URLs from the predictor database, the caches and removes all
    242   // inflight navigations.
    243   void DeleteAllUrls();
    244 
    245   // Deletes data for the input |urls| and their corresponding hosts from the
    246   // predictor database and caches.
    247   void DeleteUrls(const history::URLRows& urls);
    248 
    249   // Callback for GetUrlVisitCountTask.
    250   void OnVisitCountLookup(size_t visit_count,
    251                           const NavigationID& navigation_id,
    252                           const std::vector<URLRequestSummary>& requests);
    253 
    254   // Removes the oldest entry in the input |data_map|, also deleting it from the
    255   // predictor database.
    256   void RemoveOldestEntryInPrefetchDataMap(PrefetchKeyType key_type,
    257                                           PrefetchDataMap* data_map);
    258 
    259   // Merges resources in |new_resources| into the |data_map| and correspondingly
    260   // updates the predictor database.
    261   void LearnNavigation(const std::string& key,
    262                        PrefetchKeyType key_type,
    263                        const std::vector<URLRequestSummary>& new_resources,
    264                        size_t max_data_map_size,
    265                        PrefetchDataMap* data_map);
    266 
    267   // Reports accuracy by comparing prefetched resources with resources that are
    268   // actually used by the page.
    269   void ReportAccuracyStats(PrefetchKeyType key_type,
    270                            const std::vector<URLRequestSummary>& actual,
    271                            ResourcePrefetcher::RequestVector* prefetched) const;
    272 
    273   // Reports predicted accuracy i.e. by comparing resources that are actually
    274   // used by the page with those that may have been prefetched.
    275   void ReportPredictedAccuracyStats(
    276       PrefetchKeyType key_type,
    277       const std::vector<URLRequestSummary>& actual,
    278       const ResourcePrefetcher::RequestVector& predicted) const;
    279   void ReportPredictedAccuracyStatsHelper(
    280       PrefetchKeyType key_type,
    281       const ResourcePrefetcher::RequestVector& predicted,
    282       const std::map<GURL, bool>& actual,
    283       size_t total_resources_fetched_from_network,
    284       size_t max_assumed_prefetched) const;
    285 
    286   // Used for testing to inject mock tables.
    287   void set_mock_tables(scoped_refptr<ResourcePrefetchPredictorTables> tables) {
    288     tables_ = tables;
    289   }
    290 
    291   Profile* const profile_;
    292   ResourcePrefetchPredictorConfig const config_;
    293   InitializationState initialization_state_;
    294   scoped_refptr<ResourcePrefetchPredictorTables> tables_;
    295   scoped_refptr<ResourcePrefetcherManager> prefetch_manager_;
    296   content::NotificationRegistrar notification_registrar_;
    297   base::CancelableTaskTracker history_lookup_consumer_;
    298 
    299   // Map of all the navigations in flight to their resource requests.
    300   NavigationMap inflight_navigations_;
    301 
    302   // Copy of the data in the predictor tables.
    303   scoped_ptr<PrefetchDataMap> url_table_cache_;
    304   scoped_ptr<PrefetchDataMap> host_table_cache_;
    305 
    306   ResultsMap results_map_;
    307   STLValueDeleter<ResultsMap> results_map_deleter_;
    308 
    309   DISALLOW_COPY_AND_ASSIGN(ResourcePrefetchPredictor);
    310 };
    311 
    312 }  // namespace predictors
    313 
    314 #endif  // CHROME_BROWSER_PREDICTORS_RESOURCE_PREFETCH_PREDICTOR_H_
    315