1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_HTTP_HTTP_UTIL_H_ 6 #define NET_HTTP_HTTP_UTIL_H_ 7 8 #include <vector> 9 10 #include "base/string_tokenizer.h" 11 #include "googleurl/src/gurl.h" 12 #include "net/http/http_byte_range.h" 13 14 // This is a macro to support extending this string literal at compile time. 15 // Please excuse me polluting your global namespace! 16 #define HTTP_LWS " \t" 17 18 namespace net { 19 20 class HttpUtil { 21 public: 22 // Returns the absolute path of the URL, to be used for the http request. 23 // The absolute path starts with a '/' and may contain a query. 24 static std::string PathForRequest(const GURL& url); 25 26 // Returns the absolute URL, to be used for the http request. This url is 27 // made up of the protocol, host, [port], path, [query]. Everything else 28 // is stripped (username, password, reference). 29 static std::string SpecForRequest(const GURL& url); 30 31 // Locates the next occurance of delimiter in line, skipping over quoted 32 // strings (e.g., commas will not be treated as delimiters if they appear 33 // within a quoted string). Returns the offset of the found delimiter or 34 // line.size() if no delimiter was found. 35 static size_t FindDelimiter(const std::string& line, 36 size_t search_start, 37 char delimiter); 38 39 // Parses the value of a Content-Type header. The resulting mime_type and 40 // charset values are normalized to lowercase. The mime_type and charset 41 // output values are only modified if the content_type_str contains a mime 42 // type and charset value, respectively. 43 static void ParseContentType(const std::string& content_type_str, 44 std::string* mime_type, 45 std::string* charset, 46 bool *had_charset); 47 48 // Scans the headers and look for the first "Range" header in |headers|, 49 // if "Range" exists and the first one of it is well formatted then returns 50 // true, |ranges| will contain a list of valid ranges. If return 51 // value is false then values in |ranges| should not be used. The format of 52 // "Range" header is defined in RFC 2616 Section 14.35.1. 53 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.1 54 static bool ParseRanges(const std::string& headers, 55 std::vector<HttpByteRange>* ranges); 56 57 // Scans the '\r\n'-delimited headers for the given header name. Returns 58 // true if a match is found. Input is assumed to be well-formed. 59 // TODO(darin): kill this 60 static bool HasHeader(const std::string& headers, const char* name); 61 62 // Strips all header lines from |headers| whose name matches 63 // |headers_to_remove|. |headers_to_remove| is a list of null-terminated 64 // lower-case header names, with array length |headers_to_remove_len|. 65 // Returns the stripped header lines list, separated by "\r\n". 66 static std::string StripHeaders(const std::string& headers, 67 const char* const headers_to_remove[], 68 size_t headers_to_remove_len); 69 70 // Multiple occurances of some headers cannot be coalesced into a comma- 71 // separated list since their values are (or contain) unquoted HTTP-date 72 // values, which may contain a comma (see RFC 2616 section 3.3.1). 73 static bool IsNonCoalescingHeader(std::string::const_iterator name_begin, 74 std::string::const_iterator name_end); 75 static bool IsNonCoalescingHeader(const std::string& name) { 76 return IsNonCoalescingHeader(name.begin(), name.end()); 77 } 78 79 // Return true if the character is HTTP "linear white space" (SP | HT). 80 // This definition corresponds with the HTTP_LWS macro, and does not match 81 // newlines. 82 static bool IsLWS(char c); 83 84 // Trim HTTP_LWS chars from the beginning and end of the string. 85 static void TrimLWS(std::string::const_iterator* begin, 86 std::string::const_iterator* end); 87 88 // Whether the character is the start of a quotation mark. 89 static bool IsQuote(char c); 90 91 // RFC 2616 Sec 2.2: 92 // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) 93 // Unquote() strips the surrounding quotemarks off a string, and unescapes 94 // any quoted-pair to obtain the value contained by the quoted-string. 95 // If the input is not quoted, then it works like the identity function. 96 static std::string Unquote(std::string::const_iterator begin, 97 std::string::const_iterator end); 98 99 // Same as above. 100 static std::string Unquote(const std::string& str); 101 102 // The reverse of Unquote() -- escapes and surrounds with " 103 static std::string Quote(const std::string& str); 104 105 // Returns the start of the status line, or -1 if no status line was found. 106 // This allows for 4 bytes of junk to precede the status line (which is what 107 // mozilla does too). 108 static int LocateStartOfStatusLine(const char* buf, int buf_len); 109 110 // Returns index beyond the end-of-headers marker or -1 if not found. RFC 111 // 2616 defines the end-of-headers marker as a double CRLF; however, some 112 // servers only send back LFs (e.g., Unix-based CGI scripts written using the 113 // ASIS Apache module). This function therefore accepts the pattern LF[CR]LF 114 // as end-of-headers (just like Mozilla). 115 // The parameter |i| is the offset within |buf| to begin searching from. 116 static int LocateEndOfHeaders(const char* buf, int buf_len, int i = 0); 117 118 // Assemble "raw headers" in the format required by HttpResponseHeaders. 119 // This involves normalizing line terminators, converting [CR]LF to \0 and 120 // handling HTTP line continuations (i.e., lines starting with LWS are 121 // continuations of the previous line). |buf_len| indicates the position of 122 // the end-of-headers marker as defined by LocateEndOfHeaders. 123 static std::string AssembleRawHeaders(const char* buf, int buf_len); 124 125 // Given a comma separated ordered list of language codes, return 126 // the list with a qvalue appended to each language. 127 // The way qvalues are assigned is rather simple. The qvalue 128 // starts with 1.0 and is decremented by 0.2 for each successive entry 129 // in the list until it reaches 0.2. All the entries after that are 130 // assigned the same qvalue of 0.2. Also, note that the 1st language 131 // will not have a qvalue added because the absence of a qvalue implicitly 132 // means q=1.0. 133 // 134 // When making a http request, this should be used to determine what 135 // to put in Accept-Language header. If a comma separated list of language 136 // codes *without* qvalue is sent, web servers regard all 137 // of them as having q=1.0 and pick one of them even though it may not 138 // be at the beginning of the list (see http://crbug.com/5899). 139 static std::string GenerateAcceptLanguageHeader( 140 const std::string& raw_language_list); 141 142 // Given a charset, return the list with a qvalue. If charset is utf-8, 143 // it will return 'utf-8,*;q=0.5'. Otherwise (e.g. 'euc-jp'), it'll return 144 // 'euc-jp,utf-8;q=0.7,*;q=0.3'. 145 static std::string GenerateAcceptCharsetHeader(const std::string& charset); 146 147 // Helper. If |*headers| already contains |header_name| do nothing, 148 // otherwise add <header_name> ": " <header_value> to the end of the list. 149 static void AppendHeaderIfMissing(const char* header_name, 150 const std::string& header_value, 151 std::string* headers); 152 153 // Used to iterate over the name/value pairs of HTTP headers. To iterate 154 // over the values in a multi-value header, use ValuesIterator. 155 // See AssembleRawHeaders for joining line continuations (this iterator 156 // does not expect any). 157 class HeadersIterator { 158 public: 159 HeadersIterator(std::string::const_iterator headers_begin, 160 std::string::const_iterator headers_end, 161 const std::string& line_delimiter); 162 163 // Advances the iterator to the next header, if any. Returns true if there 164 // is a next header. Use name* and values* methods to access the resultant 165 // header name and values. 166 bool GetNext(); 167 168 // Iterates through the list of headers, starting with the current position 169 // and looks for the specified header. Note that the name _must_ be 170 // lower cased. 171 // If the header was found, the return value will be true and the current 172 // position points to the header. If the return value is false, the 173 // current position will be at the end of the headers. 174 bool AdvanceTo(const char* lowercase_name); 175 176 void Reset() { 177 lines_.Reset(); 178 } 179 180 std::string::const_iterator name_begin() const { 181 return name_begin_; 182 } 183 std::string::const_iterator name_end() const { 184 return name_end_; 185 } 186 std::string name() const { 187 return std::string(name_begin_, name_end_); 188 } 189 190 std::string::const_iterator values_begin() const { 191 return values_begin_; 192 } 193 std::string::const_iterator values_end() const { 194 return values_end_; 195 } 196 std::string values() const { 197 return std::string(values_begin_, values_end_); 198 } 199 200 private: 201 StringTokenizer lines_; 202 std::string::const_iterator name_begin_; 203 std::string::const_iterator name_end_; 204 std::string::const_iterator values_begin_; 205 std::string::const_iterator values_end_; 206 }; 207 208 // Used to iterate over deliminated values in a HTTP header. HTTP LWS is 209 // automatically trimmed from the resulting values. 210 // 211 // When using this class to iterate over response header values, beware that 212 // for some headers (e.g., Last-Modified), commas are not used as delimiters. 213 // This iterator should be avoided for headers like that which are considered 214 // non-coalescing (see IsNonCoalescingHeader). 215 // 216 // This iterator is careful to skip over delimiters found inside an HTTP 217 // quoted string. 218 // 219 class ValuesIterator { 220 public: 221 ValuesIterator(std::string::const_iterator values_begin, 222 std::string::const_iterator values_end, 223 char delimiter); 224 225 // Advances the iterator to the next value, if any. Returns true if there 226 // is a next value. Use value* methods to access the resultant value. 227 bool GetNext(); 228 229 std::string::const_iterator value_begin() const { 230 return value_begin_; 231 } 232 std::string::const_iterator value_end() const { 233 return value_end_; 234 } 235 std::string value() const { 236 return std::string(value_begin_, value_end_); 237 } 238 239 private: 240 StringTokenizer values_; 241 std::string::const_iterator value_begin_; 242 std::string::const_iterator value_end_; 243 }; 244 }; 245 246 } // namespace net 247 248 #endif // NET_HTTP_HTTP_UTIL_H_ 249