1 /* 2 * Copyright (C) 2009 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #ifndef WebPageSerializerImpl_h 32 #define WebPageSerializerImpl_h 33 34 #include "wtf/Forward.h" 35 #include "wtf/HashMap.h" 36 #include "wtf/Vector.h" 37 #include "wtf/text/StringBuilder.h" 38 #include "wtf/text/StringHash.h" 39 #include "wtf/text/WTFString.h" 40 41 #include "WebEntities.h" 42 #include "WebPageSerializer.h" 43 #include "WebPageSerializerClient.h" 44 #include "public/platform/WebString.h" 45 #include "public/platform/WebURL.h" 46 47 namespace WTF{ 48 class TextEncoding; 49 } 50 51 namespace WebCore { 52 class Document; 53 class Element; 54 class Node; 55 } 56 57 namespace WebKit { 58 class WebFrameImpl; 59 60 // Get html data by serializing all frames of current page with lists 61 // which contain all resource links that have local copy. 62 // contain all saved auxiliary files included all sub frames and resources. 63 // This function will find out all frames and serialize them to HTML data. 64 // We have a data buffer to temporary saving generated html data. We will 65 // sequentially call WebViewDelegate::SendSerializedHtmlData once the data 66 // buffer is full. See comments of WebViewDelegate::SendSerializedHtmlData 67 // for getting more information. 68 class WebPageSerializerImpl { 69 public: 70 // Do serialization action. Return false means no available frame has been 71 // serialized, otherwise return true. 72 bool serialize(); 73 74 // The parameter specifies which frame need to be serialized. 75 // The parameter recursive_serialization specifies whether we need to 76 // serialize all sub frames of the specified frame or not. 77 // The parameter delegate specifies the pointer of interface 78 // DomSerializerDelegate provide sink interface which can receive the 79 // individual chunks of data to be saved. 80 // The parameter links contain original URLs of all saved links. 81 // The parameter local_paths contain corresponding local file paths of all 82 // saved links, which matched with vector:links one by one. 83 // The parameter local_directory_name is relative path of directory which 84 // contain all saved auxiliary files included all sub frames and resources. 85 WebPageSerializerImpl(WebFrame* frame, 86 bool recursive, 87 WebPageSerializerClient* client, 88 const WebVector<WebURL>& links, 89 const WebVector<WebString>& localPaths, 90 const WebString& localDirectoryName); 91 92 private: 93 // Specified frame which need to be serialized; 94 WebFrameImpl* m_specifiedWebFrameImpl; 95 // Pointer of WebPageSerializerClient 96 WebPageSerializerClient* m_client; 97 // This hash map is used to map resource URL of original link to its local 98 // file path. 99 typedef HashMap<WTF::String, WTF::String> LinkLocalPathMap; 100 // local_links_ include all pair of local resource path and corresponding 101 // original link. 102 LinkLocalPathMap m_localLinks; 103 // Data buffer for saving result of serialized DOM data. 104 StringBuilder m_dataBuffer; 105 // Passing true to recursive_serialization_ indicates we will serialize not 106 // only the specified frame but also all sub-frames in the specific frame. 107 // Otherwise we only serialize the specified frame excluded all sub-frames. 108 bool m_recursiveSerialization; 109 // Flag indicates whether we have collected all frames which need to be 110 // serialized or not; 111 bool m_framesCollected; 112 // Local directory name of all local resource files. 113 WTF::String m_localDirectoryName; 114 // Vector for saving all frames which need to be serialized. 115 Vector<WebFrameImpl*> m_frames; 116 117 // Web entities conversion maps. 118 WebEntities m_htmlEntities; 119 WebEntities m_xmlEntities; 120 121 struct SerializeDomParam { 122 const WebCore::KURL& url; 123 const WTF::TextEncoding& textEncoding; 124 WebCore::Document* document; 125 const WTF::String& directoryName; 126 bool isHTMLDocument; // document.isHTMLDocument() 127 bool haveSeenDocType; 128 bool haveAddedCharsetDeclaration; 129 // This meta element need to be skipped when serializing DOM. 130 const WebCore::Element* skipMetaElement; 131 // Flag indicates we are in script or style tag. 132 bool isInScriptOrStyleTag; 133 bool haveAddedXMLProcessingDirective; 134 // Flag indicates whether we have added additional contents before end tag. 135 // This flag will be re-assigned in each call of function 136 // PostActionAfterSerializeOpenTag and it could be changed in function 137 // PreActionBeforeSerializeEndTag if the function adds new contents into 138 // serialization stream. 139 bool haveAddedContentsBeforeEnd; 140 141 SerializeDomParam(const WebCore::KURL&, const WTF::TextEncoding&, WebCore::Document*, const WTF::String& directoryName); 142 }; 143 144 // Collect all target frames which need to be serialized. 145 void collectTargetFrames(); 146 // Before we begin serializing open tag of a element, we give the target 147 // element a chance to do some work prior to add some additional data. 148 WTF::String preActionBeforeSerializeOpenTag(const WebCore::Element* element, 149 SerializeDomParam* param, 150 bool* needSkip); 151 // After we finish serializing open tag of a element, we give the target 152 // element a chance to do some post work to add some additional data. 153 WTF::String postActionAfterSerializeOpenTag(const WebCore::Element* element, 154 SerializeDomParam* param); 155 // Before we begin serializing end tag of a element, we give the target 156 // element a chance to do some work prior to add some additional data. 157 WTF::String preActionBeforeSerializeEndTag(const WebCore::Element* element, 158 SerializeDomParam* param, 159 bool* needSkip); 160 // After we finish serializing end tag of a element, we give the target 161 // element a chance to do some post work to add some additional data. 162 WTF::String postActionAfterSerializeEndTag(const WebCore::Element* element, 163 SerializeDomParam* param); 164 // Save generated html content to data buffer. 165 void saveHTMLContentToBuffer(const WTF::String& content, 166 SerializeDomParam* param); 167 168 enum FlushOption { 169 ForceFlush, 170 DoNotForceFlush, 171 }; 172 173 // Flushes the content buffer by encoding and sending the content to the 174 // WebPageSerializerClient. Content is not flushed if the buffer is not full 175 // unless force is 1. 176 void encodeAndFlushBuffer(WebPageSerializerClient::PageSerializationStatus status, 177 SerializeDomParam* param, 178 FlushOption); 179 // Serialize open tag of an specified element. 180 void openTagToString(WebCore::Element*, 181 SerializeDomParam* param); 182 // Serialize end tag of an specified element. 183 void endTagToString(WebCore::Element*, 184 SerializeDomParam* param); 185 // Build content for a specified node 186 void buildContentForNode(WebCore::Node*, 187 SerializeDomParam* param); 188 }; 189 190 } // namespace WebKit 191 192 #endif 193