1 /* 2 * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org) 3 * (C) 1999 Antti Koivisto (koivisto (at) kde.org) 4 * (C) 2001 Dirk Mueller (mueller (at) kde.org) 5 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. 6 * (C) 2006 Alexey Proskuryakov (ap (at) nypop.com) 7 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) 8 * 9 * This library is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Library General Public 11 * License as published by the Free Software Foundation; either 12 * version 2 of the License, or (at your option) any later version. 13 * 14 * This library is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Library General Public License for more details. 18 * 19 * You should have received a copy of the GNU Library General Public License 20 * along with this library; see the file COPYING.LIB. If not, write to 21 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 22 * Boston, MA 02110-1301, USA. 23 */ 24 25 #include "config.h" 26 #include "FormDataBuilder.h" 27 28 #include "Document.h" 29 #include "Frame.h" 30 #include "FrameLoader.h" 31 #include "TextEncoding.h" 32 33 #include <limits> 34 #include <wtf/Assertions.h> 35 #include <wtf/HexNumber.h> 36 #include <wtf/text/CString.h> 37 #include <wtf/RandomNumber.h> 38 39 namespace WebCore { 40 41 // Helper functions 42 static inline void append(Vector<char>& buffer, char string) 43 { 44 buffer.append(string); 45 } 46 47 static inline void append(Vector<char>& buffer, const char* string) 48 { 49 buffer.append(string, strlen(string)); 50 } 51 52 static inline void append(Vector<char>& buffer, const CString& string) 53 { 54 buffer.append(string.data(), string.length()); 55 } 56 57 static void appendQuotedString(Vector<char>& buffer, const CString& string) 58 { 59 // Append a string as a quoted value, escaping quotes and line breaks. 60 // FIXME: Is it correct to use percent escaping here? Other browsers do not encode these characters yet, 61 // so we should test popular servers to find out if there is an encoding form they can handle. 62 unsigned length = string.length(); 63 for (unsigned i = 0; i < length; ++i) { 64 unsigned char c = string.data()[i]; 65 66 switch (c) { 67 case 0x0a: 68 append(buffer, "%0A"); 69 break; 70 case 0x0d: 71 append(buffer, "%0D"); 72 break; 73 case '"': 74 append(buffer, "%22"); 75 break; 76 default: 77 append(buffer, c); 78 } 79 } 80 } 81 82 TextEncoding FormDataBuilder::encodingFromAcceptCharset(const String& acceptCharset, Document* document) 83 { 84 String normalizedAcceptCharset = acceptCharset; 85 normalizedAcceptCharset.replace(',', ' '); 86 87 Vector<String> charsets; 88 normalizedAcceptCharset.split(' ', charsets); 89 90 TextEncoding encoding; 91 92 Vector<String>::const_iterator end = charsets.end(); 93 for (Vector<String>::const_iterator it = charsets.begin(); it != end; ++it) { 94 if ((encoding = TextEncoding(*it)).isValid()) 95 return encoding; 96 } 97 98 return document->inputEncoding(); 99 } 100 101 Vector<char> FormDataBuilder::generateUniqueBoundaryString() 102 { 103 Vector<char> boundary; 104 105 // The RFC 2046 spec says the alphanumeric characters plus the 106 // following characters are legal for boundaries: '()+_,-./:=? 107 // However the following characters, though legal, cause some sites 108 // to fail: (),./:=+ 109 // Note that our algorithm makes it twice as much likely for 'A' or 'B' 110 // to appear in the boundary string, because 0x41 and 0x42 are present in 111 // the below array twice. 112 static const char alphaNumericEncodingMap[64] = { 113 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 114 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 115 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 116 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 117 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 118 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 119 0x77, 0x78, 0x79, 0x7A, 0x30, 0x31, 0x32, 0x33, 120 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, 0x42 121 }; 122 123 // Start with an informative prefix. 124 append(boundary, "----WebKitFormBoundary"); 125 126 // Append 16 random 7bit ascii AlphaNumeric characters. 127 Vector<char> randomBytes; 128 129 for (unsigned i = 0; i < 4; ++i) { 130 unsigned randomness = static_cast<unsigned>(randomNumber() * (std::numeric_limits<unsigned>::max() + 1.0)); 131 randomBytes.append(alphaNumericEncodingMap[(randomness >> 24) & 0x3F]); 132 randomBytes.append(alphaNumericEncodingMap[(randomness >> 16) & 0x3F]); 133 randomBytes.append(alphaNumericEncodingMap[(randomness >> 8) & 0x3F]); 134 randomBytes.append(alphaNumericEncodingMap[randomness & 0x3F]); 135 } 136 137 boundary.append(randomBytes); 138 boundary.append(0); // Add a 0 at the end so we can use this as a C-style string. 139 return boundary; 140 } 141 142 void FormDataBuilder::beginMultiPartHeader(Vector<char>& buffer, const CString& boundary, const CString& name) 143 { 144 addBoundaryToMultiPartHeader(buffer, boundary); 145 146 // FIXME: This loses data irreversibly if the input name includes characters you can't encode 147 // in the website's character set. 148 append(buffer, "Content-Disposition: form-data; name=\""); 149 appendQuotedString(buffer, name); 150 append(buffer, '"'); 151 } 152 153 void FormDataBuilder::addBoundaryToMultiPartHeader(Vector<char>& buffer, const CString& boundary, bool isLastBoundary) 154 { 155 append(buffer, "--"); 156 append(buffer, boundary); 157 158 if (isLastBoundary) 159 append(buffer, "--"); 160 161 append(buffer, "\r\n"); 162 } 163 164 void FormDataBuilder::addFilenameToMultiPartHeader(Vector<char>& buffer, const TextEncoding& encoding, const String& filename) 165 { 166 // FIXME: This loses data irreversibly if the filename includes characters you can't encode 167 // in the website's character set. 168 append(buffer, "; filename=\""); 169 appendQuotedString(buffer, encoding.encode(filename.characters(), filename.length(), QuestionMarksForUnencodables)); 170 append(buffer, '"'); 171 } 172 173 void FormDataBuilder::addContentTypeToMultiPartHeader(Vector<char>& buffer, const CString& mimeType) 174 { 175 append(buffer, "\r\nContent-Type: "); 176 append(buffer, mimeType); 177 } 178 179 void FormDataBuilder::finishMultiPartHeader(Vector<char>& buffer) 180 { 181 append(buffer, "\r\n\r\n"); 182 } 183 184 void FormDataBuilder::addKeyValuePairAsFormData(Vector<char>& buffer, const CString& key, const CString& value) 185 { 186 if (!buffer.isEmpty()) 187 append(buffer, '&'); 188 189 encodeStringAsFormData(buffer, key); 190 append(buffer, '='); 191 encodeStringAsFormData(buffer, value); 192 } 193 194 void FormDataBuilder::encodeStringAsFormData(Vector<char>& buffer, const CString& string) 195 { 196 // Same safe characters as Netscape for compatibility. 197 static const char safeCharacters[] = "-._*"; 198 199 // http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 200 unsigned length = string.length(); 201 for (unsigned i = 0; i < length; ++i) { 202 unsigned char c = string.data()[i]; 203 204 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || strchr(safeCharacters, c)) 205 append(buffer, c); 206 else if (c == ' ') 207 append(buffer, '+'); 208 else if (c == '\n' || (c == '\r' && (i + 1 >= length || string.data()[i + 1] != '\n'))) 209 append(buffer, "%0D%0A"); 210 else if (c != '\r') { 211 append(buffer, '%'); 212 appendByteAsHex(c, buffer); 213 } 214 } 215 } 216 217 } 218