Home | History | Annotate | Download | only in core
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_PAGE_H_
      6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_PAGE_H_
      7 
      8 #include <string>
      9 
     10 #include "base/callback.h"
     11 #include "base/memory/scoped_ptr.h"
     12 #include "base/memory/weak_ptr.h"
     13 #include "base/values.h"
     14 #include "third_party/dom_distiller_js/dom_distiller.pb.h"
     15 #include "url/gurl.h"
     16 
     17 namespace dom_distiller {
     18 
     19 struct DistilledPageInfo {
     20   std::string title;
     21   std::string html;
     22   std::string next_page_url;
     23   std::string prev_page_url;
     24   std::vector<std::string> image_urls;
     25   DistilledPageInfo();
     26   ~DistilledPageInfo();
     27 
     28  private:
     29   DISALLOW_COPY_AND_ASSIGN(DistilledPageInfo);
     30 };
     31 
     32 class SourcePageHandle {
     33  public:
     34   virtual ~SourcePageHandle() {}
     35 };
     36 
     37 // Injects JavaScript into a page, and uses it to extract and return long-form
     38 // content. The class can be reused to load and distill multiple pages,
     39 // following the state transitions described along with the class's states.
     40 // Constructing a DistillerPage should be cheap, as some of the instances can be
     41 // thrown away without ever being used.
     42 class DistillerPage {
     43  public:
     44   typedef base::Callback<void(scoped_ptr<DistilledPageInfo> distilled_page,
     45                               bool distillation_successful)>
     46       DistillerPageCallback;
     47 
     48   DistillerPage();
     49   virtual ~DistillerPage();
     50 
     51   // Loads a URL. |OnDistillationDone| is called when the load completes or
     52   // fails. May be called when the distiller is idle. Callers can assume that,
     53   // for a given |url| and |options|, any DistillerPage implementation will
     54   // extract the same content.
     55   void DistillPage(const GURL& url,
     56                    const dom_distiller::proto::DomDistillerOptions options,
     57                    const DistillerPageCallback& callback);
     58 
     59   // Called when the JavaScript execution completes. |page_url| is the url of
     60   // the distilled page. |value| contains data returned by the script.
     61   virtual void OnDistillationDone(const GURL& page_url,
     62                                   const base::Value* value);
     63 
     64  protected:
     65   // Called by |DistillPage| to carry out platform-specific instructions to load
     66   // and distill the |url| using the provided |script|. The extracted content
     67   // should be the same regardless of the DistillerPage implementation.
     68   virtual void DistillPageImpl(const GURL& url, const std::string& script) = 0;
     69 
     70  private:
     71   bool ready_;
     72   DistillerPageCallback distiller_page_callback_;
     73   DISALLOW_COPY_AND_ASSIGN(DistillerPage);
     74 };
     75 
     76 // Factory for generating a |DistillerPage|.
     77 class DistillerPageFactory {
     78  public:
     79   virtual ~DistillerPageFactory();
     80 
     81   // Constructs and returns a new DistillerPage. The implementation of this
     82   // should be very cheap, since the pages can be thrown away without being
     83   // used.
     84   virtual scoped_ptr<DistillerPage> CreateDistillerPage() const = 0;
     85   virtual scoped_ptr<DistillerPage> CreateDistillerPageWithHandle(
     86       scoped_ptr<SourcePageHandle> handle) const = 0;
     87 };
     88 
     89 }  // namespace dom_distiller
     90 
     91 #endif  // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_PAGE_H_
     92