1 /* 2 * Copyright (C) 2011 Google Inc. All rights reserved. 3 * Copyright (C) 2012 Intel Corporation. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: 8 * 9 * * Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * * Redistributions in binary form must reproduce the above 12 * copyright notice, this list of conditions and the following disclaimer 13 * in the documentation and/or other materials provided with the 14 * distribution. 15 * * Neither the name of Google Inc. nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "config.h" 33 #include "platform/network/ParsedContentType.h" 34 35 #include "wtf/text/CString.h" 36 #include "wtf/text/StringBuilder.h" 37 38 namespace WebCore { 39 40 class DummyParsedContentType { 41 public: 42 void setContentType(const SubstringRange&) const { } 43 void setContentTypeParameter(const SubstringRange&, const SubstringRange&) const { } 44 }; 45 46 static void skipSpaces(const String& input, unsigned& startIndex) 47 { 48 while (startIndex < input.length() && input[startIndex] == ' ') 49 ++startIndex; 50 } 51 52 static SubstringRange parseParameterPart(const String& input, unsigned& startIndex) 53 { 54 unsigned inputLength = input.length(); 55 unsigned tokenStart = startIndex; 56 unsigned& tokenEnd = startIndex; 57 58 if (tokenEnd >= inputLength) 59 return SubstringRange(); 60 61 bool quoted = input[tokenStart] == '\"'; 62 bool escape = false; 63 64 while (tokenEnd < inputLength) { 65 UChar c = input[tokenEnd]; 66 if (quoted && tokenStart != tokenEnd && c == '\"' && !escape) 67 return SubstringRange(tokenStart + 1, tokenEnd++ - tokenStart - 1); 68 if (!quoted && (c == ';' || c == '=')) 69 return SubstringRange(tokenStart, tokenEnd - tokenStart); 70 escape = !escape && c == '\\'; 71 ++tokenEnd; 72 } 73 74 if (quoted) 75 return SubstringRange(); 76 return SubstringRange(tokenStart, tokenEnd - tokenStart); 77 } 78 79 static String substringForRange(const String& string, const SubstringRange& range) 80 { 81 return string.substring(range.first, range.second); 82 } 83 84 // From http://tools.ietf.org/html/rfc2045#section-5.1: 85 // 86 // content := "Content-Type" ":" type "/" subtype 87 // *(";" parameter) 88 // ; Matching of media type and subtype 89 // ; is ALWAYS case-insensitive. 90 // 91 // type := discrete-type / composite-type 92 // 93 // discrete-type := "text" / "image" / "audio" / "video" / 94 // "application" / extension-token 95 // 96 // composite-type := "message" / "multipart" / extension-token 97 // 98 // extension-token := ietf-token / x-token 99 // 100 // ietf-token := <An extension token defined by a 101 // standards-track RFC and registered 102 // with IANA.> 103 // 104 // x-token := <The two characters "X-" or "x-" followed, with 105 // no intervening white space, by any token> 106 // 107 // subtype := extension-token / iana-token 108 // 109 // iana-token := <A publicly-defined extension token. Tokens 110 // of this form must be registered with IANA 111 // as specified in RFC 2048.> 112 // 113 // parameter := attribute "=" value 114 // 115 // attribute := token 116 // ; Matching of attributes 117 // ; is ALWAYS case-insensitive. 118 // 119 // value := token / quoted-string 120 // 121 // token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, 122 // or tspecials> 123 // 124 // tspecials := "(" / ")" / "<" / ">" / "@" / 125 // "," / ";" / ":" / "\" / <"> 126 // "/" / "[" / "]" / "?" / "=" 127 // ; Must be in quoted-string, 128 // ; to use within parameter values 129 130 template <class ReceiverType> 131 bool parseContentType(const String& contentType, ReceiverType& receiver) 132 { 133 unsigned index = 0; 134 unsigned contentTypeLength = contentType.length(); 135 skipSpaces(contentType, index); 136 if (index >= contentTypeLength) { 137 WTF_LOG_ERROR("Invalid Content-Type string '%s'", contentType.ascii().data()); 138 return false; 139 } 140 141 // There should not be any quoted strings until we reach the parameters. 142 size_t semiColonIndex = contentType.find(';', index); 143 if (semiColonIndex == kNotFound) { 144 receiver.setContentType(SubstringRange(index, contentTypeLength - index)); 145 return true; 146 } 147 148 receiver.setContentType(SubstringRange(index, semiColonIndex - index)); 149 index = semiColonIndex + 1; 150 while (true) { 151 skipSpaces(contentType, index); 152 SubstringRange keyRange = parseParameterPart(contentType, index); 153 if (!keyRange.second || index >= contentTypeLength) { 154 WTF_LOG_ERROR("Invalid Content-Type parameter name. (at %i)", index); 155 return false; 156 } 157 158 // Should we tolerate spaces here? 159 if (contentType[index++] != '=' || index >= contentTypeLength) { 160 WTF_LOG_ERROR("Invalid Content-Type malformed parameter (at %i).", index); 161 return false; 162 } 163 164 // Should we tolerate spaces here? 165 SubstringRange valueRange = parseParameterPart(contentType, index); 166 167 if (!valueRange.second) { 168 WTF_LOG_ERROR("Invalid Content-Type, invalid parameter value (at %i, for '%s').", index, substringForRange(contentType, keyRange).stripWhiteSpace().ascii().data()); 169 return false; 170 } 171 172 // Should we tolerate spaces here? 173 if (index < contentTypeLength && contentType[index++] != ';') { 174 WTF_LOG_ERROR("Invalid Content-Type, invalid character at the end of key/value parameter (at %i).", index); 175 return false; 176 } 177 178 receiver.setContentTypeParameter(keyRange, valueRange); 179 180 if (index >= contentTypeLength) 181 return true; 182 } 183 184 return true; 185 } 186 187 bool isValidContentType(const String& contentType) 188 { 189 if (contentType.contains('\r') || contentType.contains('\n')) 190 return false; 191 192 DummyParsedContentType parsedContentType = DummyParsedContentType(); 193 return parseContentType<DummyParsedContentType>(contentType, parsedContentType); 194 } 195 196 ParsedContentType::ParsedContentType(const String& contentType) 197 : m_contentType(contentType.stripWhiteSpace()) 198 { 199 parseContentType<ParsedContentType>(m_contentType, *this); 200 } 201 202 String ParsedContentType::charset() const 203 { 204 return parameterValueForName("charset"); 205 } 206 207 String ParsedContentType::parameterValueForName(const String& name) const 208 { 209 return m_parameters.get(name); 210 } 211 212 size_t ParsedContentType::parameterCount() const 213 { 214 return m_parameters.size(); 215 } 216 217 void ParsedContentType::setContentType(const SubstringRange& contentRange) 218 { 219 m_mimeType = substringForRange(m_contentType, contentRange).stripWhiteSpace(); 220 } 221 222 void ParsedContentType::setContentTypeParameter(const SubstringRange& key, const SubstringRange& value) 223 { 224 m_parameters.set(substringForRange(m_contentType, key), substringForRange(m_contentType, value)); 225 } 226 227 } 228