Home | History | Annotate | Download | only in network
      1 /*
      2  * Copyright (C) 2006 Alexey Proskuryakov (ap (at) webkit.org)
      3  * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
      4  * Copyright (C) 2009 Torch Mobile Inc. http://www.torchmobile.com/
      5  * Copyright (C) 2009 Google Inc. All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  *
     11  * 1.  Redistributions of source code must retain the above copyright
     12  *     notice, this list of conditions and the following disclaimer.
     13  * 2.  Redistributions in binary form must reproduce the above copyright
     14  *     notice, this list of conditions and the following disclaimer in the
     15  *     documentation and/or other materials provided with the distribution.
     16  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
     17  *     its contributors may be used to endorse or promote products derived
     18  *     from this software without specific prior written permission.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
     21  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     22  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     23  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     24  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     25  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     27  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include "config.h"
     33 #include "HTTPParsers.h"
     34 #include "ResourceResponseBase.h"
     35 
     36 #include "PlatformString.h"
     37 #include <wtf/text/CString.h>
     38 #include <wtf/DateMath.h>
     39 
     40 using namespace WTF;
     41 
     42 namespace WebCore {
     43 
     44 // true if there is more to parse
     45 static inline bool skipWhiteSpace(const String& str, unsigned& pos, bool fromHttpEquivMeta)
     46 {
     47     unsigned len = str.length();
     48 
     49     if (fromHttpEquivMeta) {
     50         while (pos != len && str[pos] <= ' ')
     51             ++pos;
     52     } else {
     53         while (pos != len && (str[pos] == '\t' || str[pos] == ' '))
     54             ++pos;
     55     }
     56 
     57     return pos != len;
     58 }
     59 
     60 // Returns true if the function can match the whole token (case insensitive).
     61 // Note: Might return pos == str.length()
     62 static inline bool skipToken(const String& str, unsigned& pos, const char* token)
     63 {
     64     unsigned len = str.length();
     65 
     66     while (pos != len && *token) {
     67         if (toASCIILower(str[pos]) != *token++)
     68             return false;
     69         ++pos;
     70     }
     71 
     72     return true;
     73 }
     74 
     75 ContentDispositionType contentDispositionType(const String& contentDisposition)
     76 {
     77     if (contentDisposition.isEmpty())
     78         return ContentDispositionNone;
     79 
     80     // Some broken sites just send
     81     // Content-Disposition: ; filename="file"
     82     // screen those out here.
     83     if (contentDisposition.startsWith(";"))
     84         return ContentDispositionNone;
     85 
     86     if (contentDisposition.startsWith("inline", false))
     87         return ContentDispositionInline;
     88 
     89     // Some broken sites just send
     90     // Content-Disposition: filename="file"
     91     // without a disposition token... screen those out.
     92     if (contentDisposition.startsWith("filename", false))
     93         return ContentDispositionNone;
     94 
     95     // Also in use is Content-Disposition: name="file"
     96     if (contentDisposition.startsWith("name", false))
     97         return ContentDispositionNone;
     98 
     99     // We have a content-disposition of "attachment" or unknown.
    100     // RFC 2183, section 2.8 says that an unknown disposition
    101     // value should be treated as "attachment"
    102     return ContentDispositionAttachment;
    103 }
    104 
    105 bool parseHTTPRefresh(const String& refresh, bool fromHttpEquivMeta, double& delay, String& url)
    106 {
    107     unsigned len = refresh.length();
    108     unsigned pos = 0;
    109 
    110     if (!skipWhiteSpace(refresh, pos, fromHttpEquivMeta))
    111         return false;
    112 
    113     while (pos != len && refresh[pos] != ',' && refresh[pos] != ';')
    114         ++pos;
    115 
    116     if (pos == len) { // no URL
    117         url = String();
    118         bool ok;
    119         delay = refresh.stripWhiteSpace().toDouble(&ok);
    120         return ok;
    121     } else {
    122         bool ok;
    123         delay = refresh.left(pos).stripWhiteSpace().toDouble(&ok);
    124         if (!ok)
    125             return false;
    126 
    127         ++pos;
    128         skipWhiteSpace(refresh, pos, fromHttpEquivMeta);
    129         unsigned urlStartPos = pos;
    130         if (refresh.find("url", urlStartPos, false) == urlStartPos) {
    131             urlStartPos += 3;
    132             skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
    133             if (refresh[urlStartPos] == '=') {
    134                 ++urlStartPos;
    135                 skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
    136             } else
    137                 urlStartPos = pos;  // e.g. "Refresh: 0; url.html"
    138         }
    139 
    140         unsigned urlEndPos = len;
    141 
    142         if (refresh[urlStartPos] == '"' || refresh[urlStartPos] == '\'') {
    143             UChar quotationMark = refresh[urlStartPos];
    144             urlStartPos++;
    145             while (urlEndPos > urlStartPos) {
    146                 urlEndPos--;
    147                 if (refresh[urlEndPos] == quotationMark)
    148                     break;
    149             }
    150 
    151             // https://bugs.webkit.org/show_bug.cgi?id=27868
    152             // Sometimes there is no closing quote for the end of the URL even though there was an opening quote.
    153             // If we looped over the entire alleged URL string back to the opening quote, just go ahead and use everything
    154             // after the opening quote instead.
    155             if (urlEndPos == urlStartPos)
    156                 urlEndPos = len;
    157         }
    158 
    159         url = refresh.substring(urlStartPos, urlEndPos - urlStartPos).stripWhiteSpace();
    160         return true;
    161     }
    162 }
    163 
    164 double parseDate(const String& value)
    165 {
    166     return parseDateFromNullTerminatedCharacters(value.utf8().data());
    167 }
    168 
    169 String filenameFromHTTPContentDisposition(const String& value)
    170 {
    171     Vector<String> keyValuePairs;
    172     value.split(';', keyValuePairs);
    173 
    174     unsigned length = keyValuePairs.size();
    175     for (unsigned i = 0; i < length; i++) {
    176         size_t valueStartPos = keyValuePairs[i].find('=');
    177         if (valueStartPos == notFound)
    178             continue;
    179 
    180         String key = keyValuePairs[i].left(valueStartPos).stripWhiteSpace();
    181 
    182         if (key.isEmpty() || key != "filename")
    183             continue;
    184 
    185         String value = keyValuePairs[i].substring(valueStartPos + 1).stripWhiteSpace();
    186 
    187         // Remove quotes if there are any
    188         if (value[0] == '\"')
    189             value = value.substring(1, value.length() - 2);
    190 
    191         return value;
    192     }
    193 
    194     return String();
    195 }
    196 
    197 String extractMIMETypeFromMediaType(const String& mediaType)
    198 {
    199     Vector<UChar, 64> mimeType;
    200     unsigned length = mediaType.length();
    201     mimeType.reserveCapacity(length);
    202     for (unsigned i = 0; i < length; i++) {
    203         UChar c = mediaType[i];
    204 
    205         if (c == ';')
    206             break;
    207 
    208         // While RFC 2616 does not allow it, other browsers allow multiple values in the HTTP media
    209         // type header field, Content-Type. In such cases, the media type string passed here may contain
    210         // the multiple values separated by commas. For now, this code ignores text after the first comma,
    211         // which prevents it from simply failing to parse such types altogether. Later for better
    212         // compatibility we could consider using the first or last valid MIME type instead.
    213         // See https://bugs.webkit.org/show_bug.cgi?id=25352 for more discussion.
    214         if (c == ',')
    215             break;
    216 
    217         // FIXME: The following is not correct. RFC 2616 allows linear white space before and
    218         // after the MIME type, but not within the MIME type itself. And linear white space
    219         // includes only a few specific ASCII characters; a small subset of isSpaceOrNewline.
    220         // See https://bugs.webkit.org/show_bug.cgi?id=8644 for a bug tracking part of this.
    221         if (isSpaceOrNewline(c))
    222             continue;
    223 
    224         mimeType.append(c);
    225     }
    226 
    227     if (mimeType.size() == length)
    228         return mediaType;
    229     return String(mimeType.data(), mimeType.size());
    230 }
    231 
    232 String extractCharsetFromMediaType(const String& mediaType)
    233 {
    234     unsigned int pos, len;
    235     findCharsetInMediaType(mediaType, pos, len);
    236     return mediaType.substring(pos, len);
    237 }
    238 
    239 void findCharsetInMediaType(const String& mediaType, unsigned int& charsetPos, unsigned int& charsetLen, unsigned int start)
    240 {
    241     charsetPos = start;
    242     charsetLen = 0;
    243 
    244     size_t pos = start;
    245     unsigned length = mediaType.length();
    246 
    247     while (pos < length) {
    248         pos = mediaType.find("charset", pos, false);
    249         if (pos == notFound || pos == 0) {
    250             charsetLen = 0;
    251             return;
    252         }
    253 
    254         // is what we found a beginning of a word?
    255         if (mediaType[pos-1] > ' ' && mediaType[pos-1] != ';') {
    256             pos += 7;
    257             continue;
    258         }
    259 
    260         pos += 7;
    261 
    262         // skip whitespace
    263         while (pos != length && mediaType[pos] <= ' ')
    264             ++pos;
    265 
    266         if (mediaType[pos++] != '=') // this "charset" substring wasn't a parameter name, but there may be others
    267             continue;
    268 
    269         while (pos != length && (mediaType[pos] <= ' ' || mediaType[pos] == '"' || mediaType[pos] == '\''))
    270             ++pos;
    271 
    272         // we don't handle spaces within quoted parameter values, because charset names cannot have any
    273         unsigned endpos = pos;
    274         while (pos != length && mediaType[endpos] > ' ' && mediaType[endpos] != '"' && mediaType[endpos] != '\'' && mediaType[endpos] != ';')
    275             ++endpos;
    276 
    277         charsetPos = pos;
    278         charsetLen = endpos - pos;
    279         return;
    280     }
    281 }
    282 
    283 XSSProtectionDisposition parseXSSProtectionHeader(const String& header)
    284 {
    285     String stippedHeader = header.stripWhiteSpace();
    286 
    287     if (stippedHeader.isEmpty())
    288         return XSSProtectionEnabled;
    289 
    290     if (stippedHeader[0] == '0')
    291         return XSSProtectionDisabled;
    292 
    293     unsigned length = header.length();
    294     unsigned pos = 0;
    295     if (stippedHeader[pos++] == '1'
    296         && skipWhiteSpace(stippedHeader, pos, false)
    297         && stippedHeader[pos++] == ';'
    298         && skipWhiteSpace(stippedHeader, pos, false)
    299         && skipToken(stippedHeader, pos, "mode")
    300         && skipWhiteSpace(stippedHeader, pos, false)
    301         && stippedHeader[pos++] == '='
    302         && skipWhiteSpace(stippedHeader, pos, false)
    303         && skipToken(stippedHeader, pos, "block")
    304         && pos == length)
    305         return XSSProtectionBlockEnabled;
    306 
    307     return XSSProtectionEnabled;
    308 }
    309 
    310 String extractReasonPhraseFromHTTPStatusLine(const String& statusLine)
    311 {
    312     size_t spacePos = statusLine.find(' ');
    313     // Remove status code from the status line.
    314     spacePos = statusLine.find(' ', spacePos + 1);
    315     return statusLine.substring(spacePos + 1);
    316 }
    317 
    318 bool parseRange(const String& range, long long& rangeOffset, long long& rangeEnd, long long& rangeSuffixLength)
    319 {
    320     // The format of "Range" header is defined in RFC 2616 Section 14.35.1.
    321     // http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.1
    322     // We don't support multiple range requests.
    323 
    324     rangeOffset = rangeEnd = rangeSuffixLength = -1;
    325 
    326     // The "bytes" unit identifier should be present.
    327     static const char bytesStart[] = "bytes=";
    328     if (!range.startsWith(bytesStart, false))
    329         return false;
    330     String byteRange = range.substring(sizeof(bytesStart) - 1);
    331 
    332     // The '-' character needs to be present.
    333     int index = byteRange.find('-');
    334     if (index == -1)
    335         return false;
    336 
    337     // If the '-' character is at the beginning, the suffix length, which specifies the last N bytes, is provided.
    338     // Example:
    339     //     -500
    340     if (!index) {
    341         String suffixLengthString = byteRange.substring(index + 1).stripWhiteSpace();
    342         bool ok;
    343         long long value = suffixLengthString.toInt64Strict(&ok);
    344         if (ok)
    345             rangeSuffixLength = value;
    346         return true;
    347     }
    348 
    349     // Otherwise, the first-byte-position and the last-byte-position are provied.
    350     // Examples:
    351     //     0-499
    352     //     500-
    353     String firstBytePosStr = byteRange.left(index).stripWhiteSpace();
    354     bool ok;
    355     long long firstBytePos = firstBytePosStr.toInt64Strict(&ok);
    356     if (!ok)
    357         return false;
    358 
    359     String lastBytePosStr = byteRange.substring(index + 1).stripWhiteSpace();
    360     long long lastBytePos = -1;
    361     if (!lastBytePosStr.isEmpty()) {
    362         lastBytePos = lastBytePosStr.toInt64Strict(&ok);
    363         if (!ok)
    364             return false;
    365     }
    366 
    367     if (firstBytePos < 0 || !(lastBytePos == -1 || lastBytePos >= firstBytePos))
    368         return false;
    369 
    370     rangeOffset = firstBytePos;
    371     rangeEnd = lastBytePos;
    372     return true;
    373 }
    374 
    375 }
    376