1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_PAGE_H_ 6 #define COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_PAGE_H_ 7 8 #include <string> 9 10 #include "base/callback.h" 11 #include "base/memory/scoped_ptr.h" 12 #include "base/memory/weak_ptr.h" 13 #include "base/values.h" 14 #include "third_party/dom_distiller_js/dom_distiller.pb.h" 15 #include "url/gurl.h" 16 17 namespace dom_distiller { 18 19 struct DistilledPageInfo { 20 std::string title; 21 std::string html; 22 std::string next_page_url; 23 std::string prev_page_url; 24 std::vector<std::string> image_urls; 25 DistilledPageInfo(); 26 ~DistilledPageInfo(); 27 28 private: 29 DISALLOW_COPY_AND_ASSIGN(DistilledPageInfo); 30 }; 31 32 class SourcePageHandle { 33 public: 34 virtual ~SourcePageHandle() {} 35 }; 36 37 // Injects JavaScript into a page, and uses it to extract and return long-form 38 // content. The class can be reused to load and distill multiple pages, 39 // following the state transitions described along with the class's states. 40 // Constructing a DistillerPage should be cheap, as some of the instances can be 41 // thrown away without ever being used. 42 class DistillerPage { 43 public: 44 typedef base::Callback<void(scoped_ptr<DistilledPageInfo> distilled_page, 45 bool distillation_successful)> 46 DistillerPageCallback; 47 48 DistillerPage(); 49 virtual ~DistillerPage(); 50 51 // Loads a URL. |OnDistillationDone| is called when the load completes or 52 // fails. May be called when the distiller is idle. Callers can assume that, 53 // for a given |url| and |options|, any DistillerPage implementation will 54 // extract the same content. 55 void DistillPage(const GURL& url, 56 const dom_distiller::proto::DomDistillerOptions options, 57 const DistillerPageCallback& callback); 58 59 // Called when the JavaScript execution completes. |page_url| is the url of 60 // the distilled page. |value| contains data returned by the script. 61 virtual void OnDistillationDone(const GURL& page_url, 62 const base::Value* value); 63 64 protected: 65 // Called by |DistillPage| to carry out platform-specific instructions to load 66 // and distill the |url| using the provided |script|. The extracted content 67 // should be the same regardless of the DistillerPage implementation. 68 virtual void DistillPageImpl(const GURL& url, const std::string& script) = 0; 69 70 private: 71 bool ready_; 72 DistillerPageCallback distiller_page_callback_; 73 DISALLOW_COPY_AND_ASSIGN(DistillerPage); 74 }; 75 76 // Factory for generating a |DistillerPage|. 77 class DistillerPageFactory { 78 public: 79 virtual ~DistillerPageFactory(); 80 81 // Constructs and returns a new DistillerPage. The implementation of this 82 // should be very cheap, since the pages can be thrown away without being 83 // used. 84 virtual scoped_ptr<DistillerPage> CreateDistillerPage() const = 0; 85 virtual scoped_ptr<DistillerPage> CreateDistillerPageWithHandle( 86 scoped_ptr<SourcePageHandle> handle) const = 0; 87 }; 88 89 } // namespace dom_distiller 90 91 #endif // COMPONENTS_DOM_DISTILLER_CORE_DISTILLER_PAGE_H_ 92