1 /* 2 * Copyright (C) 2011 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "config.h" 32 #include "platform/mhtml/MHTMLArchive.h" 33 34 #include "platform/DateComponents.h" 35 #include "platform/MIMETypeRegistry.h" 36 #include "platform/SerializedResource.h" 37 #include "platform/SharedBuffer.h" 38 #include "platform/mhtml/ArchiveResource.h" 39 #include "platform/mhtml/MHTMLParser.h" 40 #include "platform/text/QuotedPrintable.h" 41 #include "platform/weborigin/SchemeRegistry.h" 42 #include "wtf/CryptographicallyRandomNumber.h" 43 #include "wtf/DateMath.h" 44 #include "wtf/text/Base64.h" 45 #include "wtf/text/StringBuilder.h" 46 47 namespace blink { 48 49 const char* const quotedPrintable = "quoted-printable"; 50 const char* const base64 = "base64"; 51 const char* const binary = "binary"; 52 53 static String generateRandomBoundary() 54 { 55 // Trying to generate random boundaries similar to IE/UnMHT (ex: ----=_NextPart_000_001B_01CC157B.96F808A0). 56 const size_t randomValuesLength = 10; 57 char randomValues[randomValuesLength]; 58 cryptographicallyRandomValues(&randomValues, randomValuesLength); 59 StringBuilder stringBuilder; 60 stringBuilder.appendLiteral("----=_NextPart_000_"); 61 for (size_t i = 0; i < randomValuesLength; ++i) { 62 if (i == 2) 63 stringBuilder.append('_'); 64 else if (i == 6) 65 stringBuilder.append('.'); 66 stringBuilder.append(lowerNibbleToASCIIHexDigit(randomValues[i])); 67 stringBuilder.append(upperNibbleToASCIIHexDigit(randomValues[i])); 68 } 69 return stringBuilder.toString(); 70 } 71 72 static String replaceNonPrintableCharacters(const String& text) 73 { 74 StringBuilder stringBuilder; 75 for (size_t i = 0; i < text.length(); ++i) { 76 if (isASCIIPrintable(text[i])) 77 stringBuilder.append(text[i]); 78 else 79 stringBuilder.append('?'); 80 } 81 return stringBuilder.toString(); 82 } 83 84 MHTMLArchive::MHTMLArchive() 85 { 86 } 87 88 MHTMLArchive::~MHTMLArchive() 89 { 90 #if !ENABLE(OILPAN) 91 // Because all frames know about each other we need to perform a deep clearing of the archives graph. 92 clearAllSubframeArchives(); 93 #endif 94 } 95 96 PassRefPtrWillBeRawPtr<MHTMLArchive> MHTMLArchive::create() 97 { 98 return adoptRefWillBeNoop(new MHTMLArchive); 99 } 100 101 PassRefPtrWillBeRawPtr<MHTMLArchive> MHTMLArchive::create(const KURL& url, SharedBuffer* data) 102 { 103 // For security reasons we only load MHTML pages from local URLs. 104 if (!SchemeRegistry::shouldTreatURLSchemeAsLocal(url.protocol())) 105 return nullptr; 106 107 MHTMLParser parser(data); 108 RefPtrWillBeRawPtr<MHTMLArchive> mainArchive = parser.parseArchive(); 109 if (!mainArchive) 110 return nullptr; // Invalid MHTML file. 111 112 // Since MHTML is a flat format, we need to make all frames aware of all resources. 113 for (size_t i = 0; i < parser.frameCount(); ++i) { 114 RefPtrWillBeRawPtr<MHTMLArchive> archive = parser.frameAt(i); 115 for (size_t j = 1; j < parser.frameCount(); ++j) { 116 if (i != j) 117 archive->addSubframeArchive(parser.frameAt(j)); 118 } 119 for (size_t j = 0; j < parser.subResourceCount(); ++j) 120 archive->addSubresource(parser.subResourceAt(j)); 121 } 122 return mainArchive.release(); 123 } 124 125 PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(const Vector<SerializedResource>& resources, EncodingPolicy encodingPolicy, const String& title, const String& mimeType) 126 { 127 String boundary = generateRandomBoundary(); 128 String endOfResourceBoundary = "--" + boundary + "\r\n"; 129 130 DateComponents now; 131 now.setMillisecondsSinceEpochForDateTime(currentTimeMS()); 132 String dateString = makeRFC2822DateString(now.weekDay(), now.monthDay(), now.month(), now.fullYear(), now.hour(), now.minute(), now.second(), 0); 133 134 StringBuilder stringBuilder; 135 stringBuilder.appendLiteral("From: <Saved by WebKit>\r\n"); 136 stringBuilder.appendLiteral("Subject: "); 137 // We replace non ASCII characters with '?' characters to match IE's behavior. 138 stringBuilder.append(replaceNonPrintableCharacters(title)); 139 stringBuilder.appendLiteral("\r\nDate: "); 140 stringBuilder.append(dateString); 141 stringBuilder.appendLiteral("\r\nMIME-Version: 1.0\r\n"); 142 stringBuilder.appendLiteral("Content-Type: multipart/related;\r\n"); 143 stringBuilder.appendLiteral("\ttype=\""); 144 stringBuilder.append(mimeType); 145 stringBuilder.appendLiteral("\";\r\n"); 146 stringBuilder.appendLiteral("\tboundary=\""); 147 stringBuilder.append(boundary); 148 stringBuilder.appendLiteral("\"\r\n\r\n"); 149 150 // We use utf8() below instead of ascii() as ascii() replaces CRLFs with ?? (we still only have put ASCII characters in it). 151 ASSERT(stringBuilder.toString().containsOnlyASCII()); 152 CString asciiString = stringBuilder.toString().utf8(); 153 RefPtr<SharedBuffer> mhtmlData = SharedBuffer::create(); 154 mhtmlData->append(asciiString.data(), asciiString.length()); 155 156 for (size_t i = 0; i < resources.size(); ++i) { 157 const SerializedResource& resource = resources[i]; 158 159 stringBuilder.clear(); 160 stringBuilder.append(endOfResourceBoundary); 161 stringBuilder.appendLiteral("Content-Type: "); 162 stringBuilder.append(resource.mimeType); 163 164 const char* contentEncoding = 0; 165 if (encodingPolicy == UseBinaryEncoding) 166 contentEncoding = binary; 167 else if (MIMETypeRegistry::isSupportedJavaScriptMIMEType(resource.mimeType) || MIMETypeRegistry::isSupportedNonImageMIMEType(resource.mimeType)) 168 contentEncoding = quotedPrintable; 169 else 170 contentEncoding = base64; 171 172 stringBuilder.appendLiteral("\r\nContent-Transfer-Encoding: "); 173 stringBuilder.append(contentEncoding); 174 stringBuilder.appendLiteral("\r\nContent-Location: "); 175 stringBuilder.append(resource.url); 176 stringBuilder.appendLiteral("\r\n\r\n"); 177 178 asciiString = stringBuilder.toString().utf8(); 179 mhtmlData->append(asciiString.data(), asciiString.length()); 180 181 if (!strcmp(contentEncoding, binary)) { 182 const char* data; 183 size_t position = 0; 184 while (size_t length = resource.data->getSomeData(data, position)) { 185 mhtmlData->append(data, length); 186 position += length; 187 } 188 } else { 189 // FIXME: ideally we would encode the content as a stream without having to fetch it all. 190 const char* data = resource.data->data(); 191 size_t dataLength = resource.data->size(); 192 Vector<char> encodedData; 193 if (!strcmp(contentEncoding, quotedPrintable)) { 194 quotedPrintableEncode(data, dataLength, encodedData); 195 mhtmlData->append(encodedData.data(), encodedData.size()); 196 mhtmlData->append("\r\n", 2); 197 } else { 198 ASSERT(!strcmp(contentEncoding, base64)); 199 // We are not specifying insertLFs = true below as it would cut the lines with LFs and MHTML requires CRLFs. 200 base64Encode(data, dataLength, encodedData); 201 const size_t maximumLineLength = 76; 202 size_t index = 0; 203 size_t encodedDataLength = encodedData.size(); 204 do { 205 size_t lineLength = std::min(encodedDataLength - index, maximumLineLength); 206 mhtmlData->append(encodedData.data() + index, lineLength); 207 mhtmlData->append("\r\n", 2); 208 index += maximumLineLength; 209 } while (index < encodedDataLength); 210 } 211 } 212 } 213 214 asciiString = String("--" + boundary + "--\r\n").utf8(); 215 mhtmlData->append(asciiString.data(), asciiString.length()); 216 217 return mhtmlData.release(); 218 } 219 220 #if !ENABLE(OILPAN) 221 void MHTMLArchive::clearAllSubframeArchives() 222 { 223 SubFrameArchives clearedArchives; 224 clearAllSubframeArchivesImpl(&clearedArchives); 225 } 226 227 void MHTMLArchive::clearAllSubframeArchivesImpl(SubFrameArchives* clearedArchives) 228 { 229 for (SubFrameArchives::iterator it = m_subframeArchives.begin(); it != m_subframeArchives.end(); ++it) { 230 if (!clearedArchives->contains(*it)) { 231 clearedArchives->append(*it); 232 (*it)->clearAllSubframeArchivesImpl(clearedArchives); 233 } 234 } 235 m_subframeArchives.clear(); 236 } 237 #endif 238 239 void MHTMLArchive::setMainResource(PassRefPtrWillBeRawPtr<ArchiveResource> mainResource) 240 { 241 m_mainResource = mainResource; 242 } 243 244 void MHTMLArchive::addSubresource(PassRefPtrWillBeRawPtr<ArchiveResource> subResource) 245 { 246 m_subresources.append(subResource); 247 } 248 249 void MHTMLArchive::addSubframeArchive(PassRefPtrWillBeRawPtr<MHTMLArchive> subframeArchive) 250 { 251 m_subframeArchives.append(subframeArchive); 252 } 253 254 void MHTMLArchive::trace(Visitor* visitor) 255 { 256 visitor->trace(m_mainResource); 257 visitor->trace(m_subresources); 258 visitor->trace(m_subframeArchives); 259 } 260 261 } 262