1 /* 2 * Copyright (C) 2006 Alexey Proskuryakov (ap (at) webkit.org) 3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 4 * Copyright (C) 2009 Torch Mobile Inc. http://www.torchmobile.com/ 5 * Copyright (C) 2009 Google Inc. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of 17 * its contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 21 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 24 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "config.h" 33 #include "HTTPParsers.h" 34 #include "ResourceResponseBase.h" 35 36 #include "PlatformString.h" 37 #include <wtf/text/CString.h> 38 #include <wtf/DateMath.h> 39 40 using namespace WTF; 41 42 namespace WebCore { 43 44 // true if there is more to parse 45 static inline bool skipWhiteSpace(const String& str, unsigned& pos, bool fromHttpEquivMeta) 46 { 47 unsigned len = str.length(); 48 49 if (fromHttpEquivMeta) { 50 while (pos != len && str[pos] <= ' ') 51 ++pos; 52 } else { 53 while (pos != len && (str[pos] == '\t' || str[pos] == ' ')) 54 ++pos; 55 } 56 57 return pos != len; 58 } 59 60 // Returns true if the function can match the whole token (case insensitive). 61 // Note: Might return pos == str.length() 62 static inline bool skipToken(const String& str, unsigned& pos, const char* token) 63 { 64 unsigned len = str.length(); 65 66 while (pos != len && *token) { 67 if (toASCIILower(str[pos]) != *token++) 68 return false; 69 ++pos; 70 } 71 72 return true; 73 } 74 75 ContentDispositionType contentDispositionType(const String& contentDisposition) 76 { 77 if (contentDisposition.isEmpty()) 78 return ContentDispositionNone; 79 80 // Some broken sites just send 81 // Content-Disposition: ; filename="file" 82 // screen those out here. 83 if (contentDisposition.startsWith(";")) 84 return ContentDispositionNone; 85 86 if (contentDisposition.startsWith("inline", false)) 87 return ContentDispositionInline; 88 89 // Some broken sites just send 90 // Content-Disposition: filename="file" 91 // without a disposition token... screen those out. 92 if (contentDisposition.startsWith("filename", false)) 93 return ContentDispositionNone; 94 95 // Also in use is Content-Disposition: name="file" 96 if (contentDisposition.startsWith("name", false)) 97 return ContentDispositionNone; 98 99 // We have a content-disposition of "attachment" or unknown. 100 // RFC 2183, section 2.8 says that an unknown disposition 101 // value should be treated as "attachment" 102 return ContentDispositionAttachment; 103 } 104 105 bool parseHTTPRefresh(const String& refresh, bool fromHttpEquivMeta, double& delay, String& url) 106 { 107 unsigned len = refresh.length(); 108 unsigned pos = 0; 109 110 if (!skipWhiteSpace(refresh, pos, fromHttpEquivMeta)) 111 return false; 112 113 while (pos != len && refresh[pos] != ',' && refresh[pos] != ';') 114 ++pos; 115 116 if (pos == len) { // no URL 117 url = String(); 118 bool ok; 119 delay = refresh.stripWhiteSpace().toDouble(&ok); 120 return ok; 121 } else { 122 bool ok; 123 delay = refresh.left(pos).stripWhiteSpace().toDouble(&ok); 124 if (!ok) 125 return false; 126 127 ++pos; 128 skipWhiteSpace(refresh, pos, fromHttpEquivMeta); 129 unsigned urlStartPos = pos; 130 if (refresh.find("url", urlStartPos, false) == urlStartPos) { 131 urlStartPos += 3; 132 skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta); 133 if (refresh[urlStartPos] == '=') { 134 ++urlStartPos; 135 skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta); 136 } else 137 urlStartPos = pos; // e.g. "Refresh: 0; url.html" 138 } 139 140 unsigned urlEndPos = len; 141 142 if (refresh[urlStartPos] == '"' || refresh[urlStartPos] == '\'') { 143 UChar quotationMark = refresh[urlStartPos]; 144 urlStartPos++; 145 while (urlEndPos > urlStartPos) { 146 urlEndPos--; 147 if (refresh[urlEndPos] == quotationMark) 148 break; 149 } 150 151 // https://bugs.webkit.org/show_bug.cgi?id=27868 152 // Sometimes there is no closing quote for the end of the URL even though there was an opening quote. 153 // If we looped over the entire alleged URL string back to the opening quote, just go ahead and use everything 154 // after the opening quote instead. 155 if (urlEndPos == urlStartPos) 156 urlEndPos = len; 157 } 158 159 url = refresh.substring(urlStartPos, urlEndPos - urlStartPos).stripWhiteSpace(); 160 return true; 161 } 162 } 163 164 double parseDate(const String& value) 165 { 166 return parseDateFromNullTerminatedCharacters(value.utf8().data()); 167 } 168 169 String filenameFromHTTPContentDisposition(const String& value) 170 { 171 Vector<String> keyValuePairs; 172 value.split(';', keyValuePairs); 173 174 unsigned length = keyValuePairs.size(); 175 for (unsigned i = 0; i < length; i++) { 176 size_t valueStartPos = keyValuePairs[i].find('='); 177 if (valueStartPos == notFound) 178 continue; 179 180 String key = keyValuePairs[i].left(valueStartPos).stripWhiteSpace(); 181 182 if (key.isEmpty() || key != "filename") 183 continue; 184 185 String value = keyValuePairs[i].substring(valueStartPos + 1).stripWhiteSpace(); 186 187 // Remove quotes if there are any 188 if (value[0] == '\"') 189 value = value.substring(1, value.length() - 2); 190 191 return value; 192 } 193 194 return String(); 195 } 196 197 String extractMIMETypeFromMediaType(const String& mediaType) 198 { 199 Vector<UChar, 64> mimeType; 200 unsigned length = mediaType.length(); 201 mimeType.reserveCapacity(length); 202 for (unsigned i = 0; i < length; i++) { 203 UChar c = mediaType[i]; 204 205 if (c == ';') 206 break; 207 208 // While RFC 2616 does not allow it, other browsers allow multiple values in the HTTP media 209 // type header field, Content-Type. In such cases, the media type string passed here may contain 210 // the multiple values separated by commas. For now, this code ignores text after the first comma, 211 // which prevents it from simply failing to parse such types altogether. Later for better 212 // compatibility we could consider using the first or last valid MIME type instead. 213 // See https://bugs.webkit.org/show_bug.cgi?id=25352 for more discussion. 214 if (c == ',') 215 break; 216 217 // FIXME: The following is not correct. RFC 2616 allows linear white space before and 218 // after the MIME type, but not within the MIME type itself. And linear white space 219 // includes only a few specific ASCII characters; a small subset of isSpaceOrNewline. 220 // See https://bugs.webkit.org/show_bug.cgi?id=8644 for a bug tracking part of this. 221 if (isSpaceOrNewline(c)) 222 continue; 223 224 mimeType.append(c); 225 } 226 227 if (mimeType.size() == length) 228 return mediaType; 229 return String(mimeType.data(), mimeType.size()); 230 } 231 232 String extractCharsetFromMediaType(const String& mediaType) 233 { 234 unsigned int pos, len; 235 findCharsetInMediaType(mediaType, pos, len); 236 return mediaType.substring(pos, len); 237 } 238 239 void findCharsetInMediaType(const String& mediaType, unsigned int& charsetPos, unsigned int& charsetLen, unsigned int start) 240 { 241 charsetPos = start; 242 charsetLen = 0; 243 244 size_t pos = start; 245 unsigned length = mediaType.length(); 246 247 while (pos < length) { 248 pos = mediaType.find("charset", pos, false); 249 if (pos == notFound || pos == 0) { 250 charsetLen = 0; 251 return; 252 } 253 254 // is what we found a beginning of a word? 255 if (mediaType[pos-1] > ' ' && mediaType[pos-1] != ';') { 256 pos += 7; 257 continue; 258 } 259 260 pos += 7; 261 262 // skip whitespace 263 while (pos != length && mediaType[pos] <= ' ') 264 ++pos; 265 266 if (mediaType[pos++] != '=') // this "charset" substring wasn't a parameter name, but there may be others 267 continue; 268 269 while (pos != length && (mediaType[pos] <= ' ' || mediaType[pos] == '"' || mediaType[pos] == '\'')) 270 ++pos; 271 272 // we don't handle spaces within quoted parameter values, because charset names cannot have any 273 unsigned endpos = pos; 274 while (pos != length && mediaType[endpos] > ' ' && mediaType[endpos] != '"' && mediaType[endpos] != '\'' && mediaType[endpos] != ';') 275 ++endpos; 276 277 charsetPos = pos; 278 charsetLen = endpos - pos; 279 return; 280 } 281 } 282 283 XSSProtectionDisposition parseXSSProtectionHeader(const String& header) 284 { 285 String stippedHeader = header.stripWhiteSpace(); 286 287 if (stippedHeader.isEmpty()) 288 return XSSProtectionEnabled; 289 290 if (stippedHeader[0] == '0') 291 return XSSProtectionDisabled; 292 293 unsigned length = header.length(); 294 unsigned pos = 0; 295 if (stippedHeader[pos++] == '1' 296 && skipWhiteSpace(stippedHeader, pos, false) 297 && stippedHeader[pos++] == ';' 298 && skipWhiteSpace(stippedHeader, pos, false) 299 && skipToken(stippedHeader, pos, "mode") 300 && skipWhiteSpace(stippedHeader, pos, false) 301 && stippedHeader[pos++] == '=' 302 && skipWhiteSpace(stippedHeader, pos, false) 303 && skipToken(stippedHeader, pos, "block") 304 && pos == length) 305 return XSSProtectionBlockEnabled; 306 307 return XSSProtectionEnabled; 308 } 309 310 String extractReasonPhraseFromHTTPStatusLine(const String& statusLine) 311 { 312 size_t spacePos = statusLine.find(' '); 313 // Remove status code from the status line. 314 spacePos = statusLine.find(' ', spacePos + 1); 315 return statusLine.substring(spacePos + 1); 316 } 317 318 bool parseRange(const String& range, long long& rangeOffset, long long& rangeEnd, long long& rangeSuffixLength) 319 { 320 // The format of "Range" header is defined in RFC 2616 Section 14.35.1. 321 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.1 322 // We don't support multiple range requests. 323 324 rangeOffset = rangeEnd = rangeSuffixLength = -1; 325 326 // The "bytes" unit identifier should be present. 327 static const char bytesStart[] = "bytes="; 328 if (!range.startsWith(bytesStart, false)) 329 return false; 330 String byteRange = range.substring(sizeof(bytesStart) - 1); 331 332 // The '-' character needs to be present. 333 int index = byteRange.find('-'); 334 if (index == -1) 335 return false; 336 337 // If the '-' character is at the beginning, the suffix length, which specifies the last N bytes, is provided. 338 // Example: 339 // -500 340 if (!index) { 341 String suffixLengthString = byteRange.substring(index + 1).stripWhiteSpace(); 342 bool ok; 343 long long value = suffixLengthString.toInt64Strict(&ok); 344 if (ok) 345 rangeSuffixLength = value; 346 return true; 347 } 348 349 // Otherwise, the first-byte-position and the last-byte-position are provied. 350 // Examples: 351 // 0-499 352 // 500- 353 String firstBytePosStr = byteRange.left(index).stripWhiteSpace(); 354 bool ok; 355 long long firstBytePos = firstBytePosStr.toInt64Strict(&ok); 356 if (!ok) 357 return false; 358 359 String lastBytePosStr = byteRange.substring(index + 1).stripWhiteSpace(); 360 long long lastBytePos = -1; 361 if (!lastBytePosStr.isEmpty()) { 362 lastBytePos = lastBytePosStr.toInt64Strict(&ok); 363 if (!ok) 364 return false; 365 } 366 367 if (firstBytePos < 0 || !(lastBytePos == -1 || lastBytePos >= firstBytePos)) 368 return false; 369 370 rangeOffset = firstBytePos; 371 rangeEnd = lastBytePos; 372 return true; 373 } 374 375 } 376