1 /* 2 * Copyright (C) 2009 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #ifndef WebPageSerializerImpl_h 32 #define WebPageSerializerImpl_h 33 34 #include <wtf/Forward.h> 35 #include <wtf/HashMap.h> 36 #include <wtf/Vector.h> 37 #include <wtf/text/StringBuilder.h> 38 #include <wtf/text/StringHash.h> 39 #include <wtf/text/WTFString.h> 40 41 #include "WebEntities.h" 42 #include "WebPageSerializer.h" 43 #include "WebPageSerializerClient.h" 44 #include "WebString.h" 45 #include "WebURL.h" 46 47 namespace WebCore { 48 class Document; 49 class Element; 50 class Node; 51 class TextEncoding; 52 } 53 54 namespace WebKit { 55 class WebFrameImpl; 56 57 // Get html data by serializing all frames of current page with lists 58 // which contain all resource links that have local copy. 59 // contain all saved auxiliary files included all sub frames and resources. 60 // This function will find out all frames and serialize them to HTML data. 61 // We have a data buffer to temporary saving generated html data. We will 62 // sequentially call WebViewDelegate::SendSerializedHtmlData once the data 63 // buffer is full. See comments of WebViewDelegate::SendSerializedHtmlData 64 // for getting more information. 65 class WebPageSerializerImpl { 66 public: 67 // Do serialization action. Return false means no available frame has been 68 // serialized, otherwise return true. 69 bool serialize(); 70 71 // The parameter specifies which frame need to be serialized. 72 // The parameter recursive_serialization specifies whether we need to 73 // serialize all sub frames of the specified frame or not. 74 // The parameter delegate specifies the pointer of interface 75 // DomSerializerDelegate provide sink interface which can receive the 76 // individual chunks of data to be saved. 77 // The parameter links contain original URLs of all saved links. 78 // The parameter local_paths contain corresponding local file paths of all 79 // saved links, which matched with vector:links one by one. 80 // The parameter local_directory_name is relative path of directory which 81 // contain all saved auxiliary files included all sub frames and resources. 82 WebPageSerializerImpl(WebFrame* frame, 83 bool recursive, 84 WebPageSerializerClient* client, 85 const WebVector<WebURL>& links, 86 const WebVector<WebString>& localPaths, 87 const WebString& localDirectoryName); 88 89 private: 90 // Specified frame which need to be serialized; 91 WebFrameImpl* m_specifiedWebFrameImpl; 92 // Pointer of WebPageSerializerClient 93 WebPageSerializerClient* m_client; 94 // This hash map is used to map resource URL of original link to its local 95 // file path. 96 typedef HashMap<WTF::String, WTF::String> LinkLocalPathMap; 97 // local_links_ include all pair of local resource path and corresponding 98 // original link. 99 LinkLocalPathMap m_localLinks; 100 // Data buffer for saving result of serialized DOM data. 101 StringBuilder m_dataBuffer; 102 // Passing true to recursive_serialization_ indicates we will serialize not 103 // only the specified frame but also all sub-frames in the specific frame. 104 // Otherwise we only serialize the specified frame excluded all sub-frames. 105 bool m_recursiveSerialization; 106 // Flag indicates whether we have collected all frames which need to be 107 // serialized or not; 108 bool m_framesCollected; 109 // Local directory name of all local resource files. 110 WTF::String m_localDirectoryName; 111 // Vector for saving all frames which need to be serialized. 112 Vector<WebFrameImpl*> m_frames; 113 114 // Web entities conversion maps. 115 WebEntities m_htmlEntities; 116 WebEntities m_xmlEntities; 117 118 struct SerializeDomParam { 119 const WebCore::KURL& url; 120 const WebCore::TextEncoding& textEncoding; 121 WebCore::Document* document; 122 const WTF::String& directoryName; 123 bool isHTMLDocument; // document.isHTMLDocument() 124 bool haveSeenDocType; 125 bool haveAddedCharsetDeclaration; 126 // This meta element need to be skipped when serializing DOM. 127 const WebCore::Element* skipMetaElement; 128 // Flag indicates we are in script or style tag. 129 bool isInScriptOrStyleTag; 130 bool haveAddedXMLProcessingDirective; 131 // Flag indicates whether we have added additional contents before end tag. 132 // This flag will be re-assigned in each call of function 133 // PostActionAfterSerializeOpenTag and it could be changed in function 134 // PreActionBeforeSerializeEndTag if the function adds new contents into 135 // serialization stream. 136 bool haveAddedContentsBeforeEnd; 137 138 SerializeDomParam(const WebCore::KURL&, const WebCore::TextEncoding&, WebCore::Document*, const WTF::String& directoryName); 139 }; 140 141 // Collect all target frames which need to be serialized. 142 void collectTargetFrames(); 143 // Before we begin serializing open tag of a element, we give the target 144 // element a chance to do some work prior to add some additional data. 145 WTF::String preActionBeforeSerializeOpenTag(const WebCore::Element* element, 146 SerializeDomParam* param, 147 bool* needSkip); 148 // After we finish serializing open tag of a element, we give the target 149 // element a chance to do some post work to add some additional data. 150 WTF::String postActionAfterSerializeOpenTag(const WebCore::Element* element, 151 SerializeDomParam* param); 152 // Before we begin serializing end tag of a element, we give the target 153 // element a chance to do some work prior to add some additional data. 154 WTF::String preActionBeforeSerializeEndTag(const WebCore::Element* element, 155 SerializeDomParam* param, 156 bool* needSkip); 157 // After we finish serializing end tag of a element, we give the target 158 // element a chance to do some post work to add some additional data. 159 WTF::String postActionAfterSerializeEndTag(const WebCore::Element* element, 160 SerializeDomParam* param); 161 // Save generated html content to data buffer. 162 void saveHTMLContentToBuffer(const WTF::String& content, 163 SerializeDomParam* param); 164 165 enum FlushOption { 166 ForceFlush, 167 DoNotForceFlush, 168 }; 169 170 // Flushes the content buffer by encoding and sending the content to the 171 // WebPageSerializerClient. Content is not flushed if the buffer is not full 172 // unless force is 1. 173 void encodeAndFlushBuffer(WebPageSerializerClient::PageSerializationStatus status, 174 SerializeDomParam* param, 175 FlushOption); 176 // Serialize open tag of an specified element. 177 void openTagToString(WebCore::Element*, 178 SerializeDomParam* param); 179 // Serialize end tag of an specified element. 180 void endTagToString(WebCore::Element*, 181 SerializeDomParam* param); 182 // Build content for a specified node 183 void buildContentForNode(WebCore::Node*, 184 SerializeDomParam* param); 185 }; 186 187 } // namespace WebKit 188 189 #endif 190