Home | History | Annotate | Download | only in web
      1 /*
      2  * Copyright (C) 2009 Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #ifndef WebPageSerializer_h
     32 #define WebPageSerializer_h
     33 
     34 #include "../platform/WebCString.h"
     35 #include "../platform/WebCommon.h"
     36 #include "../platform/WebURL.h"
     37 #include "WebFrame.h"
     38 
     39 namespace blink {
     40 class WebPageSerializerClient;
     41 class WebString;
     42 class WebView;
     43 template <typename T> class WebVector;
     44 
     45 // Get html data by serializing all frames of current page with lists
     46 // which contain all resource links that have local copy.
     47 class WebPageSerializer {
     48 public:
     49     struct Resource {
     50         WebURL url;
     51         WebCString mimeType;
     52         WebCString data;
     53     };
     54 
     55     // Serializes all the frames from the WebView, retrieves the page's
     56     // resources (such as images and CSS) and adds them to the passed vector.
     57     // The first resource in that vector is the top frame contents.
     58     // Note that this also strips-out any script tag or link to JavaScript.
     59     BLINK_EXPORT static void serialize(WebView*, WebVector<Resource>*);
     60 
     61     // Serializes the WebView contents to a MHTML representation.
     62     BLINK_EXPORT static WebCString serializeToMHTML(WebView*);
     63 
     64     // Similar to serializeToMHTML but uses binary encoding for the MHTML parts.
     65     // This results in a smaller MHTML file but it might not be supported by other browsers.
     66     BLINK_EXPORT static WebCString serializeToMHTMLUsingBinaryEncoding(WebView*);
     67 
     68     // IMPORTANT:
     69     // The API below is an older implementation of a pageserialization that
     70     // will be removed soon.
     71 
     72 
     73     // This function will find out all frames and serialize them to HTML data.
     74     // We have a data buffer to temporary saving generated html data. We will
     75     // sequentially call WebPageSeriazlierClient once the data buffer is full.
     76     //
     77     // Return false means no available frame has been serialized, otherwise
     78     // return true.
     79     //
     80     // The parameter frame specifies which frame need to be serialized.
     81     // The parameter recursive specifies whether we need to
     82     // serialize all sub frames of the specified frame or not.
     83     // The parameter client specifies the pointer of interface
     84     // WebPageSerializerClient providing a sink interface to receive the
     85     // individual chunks of data to be saved.
     86     // The parameter links contain original URLs of all saved links.
     87     // The parameter localPaths contain corresponding local file paths of all
     88     // saved links, which matched with vector:links one by one.
     89     // The parameter localDirectoryName is relative path of directory which
     90     // contain all saved auxiliary files included all sub frames and resources.
     91     BLINK_EXPORT static bool serialize(
     92         WebLocalFrame*,
     93         bool recursive,
     94         WebPageSerializerClient*,
     95         const WebVector<WebURL>& links,
     96         const WebVector<WebString>& localPaths,
     97         const WebString& localDirectoryName);
     98 
     99     // Retrieve all the resource for the passed view, including the main frame
    100     // and sub-frames. Returns true if all resources were retrieved
    101     // successfully.
    102     BLINK_EXPORT static bool retrieveAllResources(WebView*,
    103                                                    const WebVector<WebCString>& supportedSchemes,
    104                                                    WebVector<WebURL>* resources,
    105                                                    WebVector<WebURL>* frames);
    106 
    107     // FIXME: The following are here for unit testing purposes. Consider
    108     // changing the unit tests instead.
    109 
    110     // Generate the META for charset declaration.
    111     BLINK_EXPORT static WebString generateMetaCharsetDeclaration(const WebString& charset);
    112     // Generate the MOTW declaration.
    113     BLINK_EXPORT static WebString generateMarkOfTheWebDeclaration(const WebURL&);
    114     // Generate the default base tag declaration.
    115     BLINK_EXPORT static WebString generateBaseTagDeclaration(const WebString& baseTarget);
    116 };
    117 
    118 } // namespace blink
    119 
    120 #endif
    121