Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // This class is used by the RenderView to interact with a PhishingClassifier.
      6 
      7 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_DELEGATE_H_
      8 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_DELEGATE_H_
      9 
     10 #include "base/memory/scoped_ptr.h"
     11 #include "base/strings/string16.h"
     12 #include "content/public/renderer/render_process_observer.h"
     13 #include "content/public/renderer/render_view_observer.h"
     14 #include "ui/base/page_transition_types.h"
     15 #include "url/gurl.h"
     16 
     17 namespace safe_browsing {
     18 class ClientPhishingRequest;
     19 class PhishingClassifier;
     20 class Scorer;
     21 
     22 class PhishingClassifierFilter : public content::RenderProcessObserver {
     23  public:
     24   static PhishingClassifierFilter* Create();
     25   virtual ~PhishingClassifierFilter();
     26 
     27   virtual bool OnControlMessageReceived(const IPC::Message& message) OVERRIDE;
     28 
     29  private:
     30   PhishingClassifierFilter();
     31   void OnSetPhishingModel(const std::string& model);
     32 
     33   DISALLOW_COPY_AND_ASSIGN(PhishingClassifierFilter);
     34 };
     35 
     36 class PhishingClassifierDelegate : public content::RenderViewObserver {
     37  public:
     38   // The RenderView owns us.  This object takes ownership of the classifier.
     39   // Note that if classifier is null, a default instance of PhishingClassifier
     40   // will be used.
     41   static PhishingClassifierDelegate* Create(content::RenderView* render_view,
     42                                             PhishingClassifier* classifier);
     43   virtual ~PhishingClassifierDelegate();
     44 
     45   // Called by the RenderView once there is a phishing scorer available.
     46   // The scorer is passed on to the classifier.
     47   void SetPhishingScorer(const safe_browsing::Scorer* scorer);
     48 
     49   // Called by the RenderView once a page has finished loading.  Updates the
     50   // last-loaded URL and page text, then starts classification if all other
     51   // conditions are met (see MaybeStartClassification for details).
     52   // We ignore preliminary captures, since these happen before the page has
     53   // finished loading.
     54   void PageCaptured(base::string16* page_text, bool preliminary_capture);
     55 
     56   // RenderViewObserver implementation, public for testing.
     57 
     58   // Called by the RenderView when a page has started loading in the given
     59   // WebFrame.  Typically, this will cause any pending classification to be
     60   // cancelled.  However, if the navigation is within the same page, we
     61   // continue running the current classification.
     62   virtual void DidCommitProvisionalLoad(blink::WebLocalFrame* frame,
     63                                         bool is_new_navigation) OVERRIDE;
     64 
     65  private:
     66   friend class PhishingClassifierDelegateTest;
     67 
     68   PhishingClassifierDelegate(content::RenderView* render_view,
     69                              PhishingClassifier* classifier);
     70 
     71   enum CancelClassificationReason {
     72     NAVIGATE_AWAY,
     73     NAVIGATE_WITHIN_PAGE,
     74     PAGE_RECAPTURED,
     75     SHUTDOWN,
     76     NEW_PHISHING_SCORER,
     77     CANCEL_CLASSIFICATION_MAX  // Always add new values before this one.
     78   };
     79 
     80   // Cancels any pending classification and frees the page text.
     81   void CancelPendingClassification(CancelClassificationReason reason);
     82 
     83   // RenderViewObserver implementation.
     84   virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE;
     85 
     86   // Called by the RenderView when it receives a StartPhishingDetection IPC
     87   // from the browser.  This signals that it is ok to begin classification
     88   // for the given toplevel URL.  If the URL has been fully loaded into the
     89   // RenderView and a Scorer has been set, this will begin classification,
     90   // otherwise classification will be deferred until these conditions are met.
     91   void OnStartPhishingDetection(const GURL& url);
     92 
     93   // Called when classification for the current page finishes.
     94   void ClassificationDone(const ClientPhishingRequest& verdict);
     95 
     96   // Returns the RenderView's toplevel URL.
     97   GURL GetToplevelUrl();
     98 
     99   // Shared code to begin classification if all conditions are met.
    100   void MaybeStartClassification();
    101 
    102   // The PhishingClassifier to use for the RenderView.  This is created once
    103   // a scorer is made available via SetPhishingScorer().
    104   scoped_ptr<PhishingClassifier> classifier_;
    105 
    106   // The last URL that the browser instructed us to classify,
    107   // with the ref stripped.
    108   GURL last_url_received_from_browser_;
    109 
    110   // The last top-level URL that has finished loading in the RenderView.
    111   // This corresponds to the text in classifier_page_text_.
    112   GURL last_finished_load_url_;
    113 
    114   // The transition type for the last load in the main frame.  We use this
    115   // to exclude back/forward loads from classification.  Note that this is
    116   // set in DidCommitProvisionalLoad(); the transition is reset after this
    117   // call in the RenderView, so we need to save off the value.
    118   ui::PageTransition last_main_frame_transition_;
    119 
    120   // The URL of the last load that we actually started classification on.
    121   // This is used to suppress phishing classification on subframe navigation
    122   // and back and forward navigations in history.
    123   GURL last_url_sent_to_classifier_;
    124 
    125   // The page text that will be analyzed by the phishing classifier.  This is
    126   // set by OnNavigate and cleared when the classifier finishes.  Note that if
    127   // there is no Scorer yet when OnNavigate is called, or the browser has not
    128   // instructed us to classify the page, the page text will be cached until
    129   // these conditions are met.
    130   base::string16 classifier_page_text_;
    131 
    132   // Tracks whether we have stored anything in classifier_page_text_ for the
    133   // most recent load.  We use this to distinguish empty text from cases where
    134   // PageCaptured has not been called.
    135   bool have_page_text_;
    136 
    137   // Set to true if the classifier is currently running.
    138   bool is_classifying_;
    139 
    140   DISALLOW_COPY_AND_ASSIGN(PhishingClassifierDelegate);
    141 };
    142 
    143 }  // namespace safe_browsing
    144 
    145 #endif  // CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_DELEGATE_H_
    146