Home | History | Annotate | Download | only in http
      1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef NET_HTTP_HTTP_UTIL_H_
      6 #define NET_HTTP_HTTP_UTIL_H_
      7 
      8 #include <vector>
      9 
     10 #include "base/string_tokenizer.h"
     11 #include "googleurl/src/gurl.h"
     12 #include "net/http/http_byte_range.h"
     13 
     14 // This is a macro to support extending this string literal at compile time.
     15 // Please excuse me polluting your global namespace!
     16 #define HTTP_LWS " \t"
     17 
     18 namespace net {
     19 
     20 class HttpUtil {
     21  public:
     22    // Returns the absolute path of the URL, to be used for the http request.
     23    // The absolute path starts with a '/' and may contain a query.
     24    static std::string PathForRequest(const GURL& url);
     25 
     26    // Returns the absolute URL, to be used for the http request. This url is
     27    // made up of the protocol, host, [port], path, [query]. Everything else
     28    // is stripped (username, password, reference).
     29    static std::string SpecForRequest(const GURL& url);
     30 
     31   // Locates the next occurance of delimiter in line, skipping over quoted
     32   // strings (e.g., commas will not be treated as delimiters if they appear
     33   // within a quoted string).  Returns the offset of the found delimiter or
     34   // line.size() if no delimiter was found.
     35   static size_t FindDelimiter(const std::string& line,
     36                               size_t search_start,
     37                               char delimiter);
     38 
     39   // Parses the value of a Content-Type header.  The resulting mime_type and
     40   // charset values are normalized to lowercase.  The mime_type and charset
     41   // output values are only modified if the content_type_str contains a mime
     42   // type and charset value, respectively.
     43   static void ParseContentType(const std::string& content_type_str,
     44                                std::string* mime_type,
     45                                std::string* charset,
     46                                bool *had_charset);
     47 
     48   // Scans the headers and look for the first "Range" header in |headers|,
     49   // if "Range" exists and the first one of it is well formatted then returns
     50   // true, |ranges| will contain a list of valid ranges. If return
     51   // value is false then values in |ranges| should not be used. The format of
     52   // "Range" header is defined in RFC 2616 Section 14.35.1.
     53   // http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.1
     54   static bool ParseRanges(const std::string& headers,
     55                           std::vector<HttpByteRange>* ranges);
     56 
     57   // Scans the '\r\n'-delimited headers for the given header name.  Returns
     58   // true if a match is found.  Input is assumed to be well-formed.
     59   // TODO(darin): kill this
     60   static bool HasHeader(const std::string& headers, const char* name);
     61 
     62   // Strips all header lines from |headers| whose name matches
     63   // |headers_to_remove|. |headers_to_remove| is a list of null-terminated
     64   // lower-case header names, with array length |headers_to_remove_len|.
     65   // Returns the stripped header lines list, separated by "\r\n".
     66   static std::string StripHeaders(const std::string& headers,
     67                                   const char* const headers_to_remove[],
     68                                   size_t headers_to_remove_len);
     69 
     70   // Multiple occurances of some headers cannot be coalesced into a comma-
     71   // separated list since their values are (or contain) unquoted HTTP-date
     72   // values, which may contain a comma (see RFC 2616 section 3.3.1).
     73   static bool IsNonCoalescingHeader(std::string::const_iterator name_begin,
     74                                     std::string::const_iterator name_end);
     75   static bool IsNonCoalescingHeader(const std::string& name) {
     76     return IsNonCoalescingHeader(name.begin(), name.end());
     77   }
     78 
     79   // Return true if the character is HTTP "linear white space" (SP | HT).
     80   // This definition corresponds with the HTTP_LWS macro, and does not match
     81   // newlines.
     82   static bool IsLWS(char c);
     83 
     84   // Trim HTTP_LWS chars from the beginning and end of the string.
     85   static void TrimLWS(std::string::const_iterator* begin,
     86                       std::string::const_iterator* end);
     87 
     88   // Whether the character is the start of a quotation mark.
     89   static bool IsQuote(char c);
     90 
     91   // RFC 2616 Sec 2.2:
     92   // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
     93   // Unquote() strips the surrounding quotemarks off a string, and unescapes
     94   // any quoted-pair to obtain the value contained by the quoted-string.
     95   // If the input is not quoted, then it works like the identity function.
     96   static std::string Unquote(std::string::const_iterator begin,
     97                              std::string::const_iterator end);
     98 
     99   // Same as above.
    100   static std::string Unquote(const std::string& str);
    101 
    102   // The reverse of Unquote() -- escapes and surrounds with "
    103   static std::string Quote(const std::string& str);
    104 
    105   // Returns the start of the status line, or -1 if no status line was found.
    106   // This allows for 4 bytes of junk to precede the status line (which is what
    107   // mozilla does too).
    108   static int LocateStartOfStatusLine(const char* buf, int buf_len);
    109 
    110   // Returns index beyond the end-of-headers marker or -1 if not found.  RFC
    111   // 2616 defines the end-of-headers marker as a double CRLF; however, some
    112   // servers only send back LFs (e.g., Unix-based CGI scripts written using the
    113   // ASIS Apache module).  This function therefore accepts the pattern LF[CR]LF
    114   // as end-of-headers (just like Mozilla).
    115   // The parameter |i| is the offset within |buf| to begin searching from.
    116   static int LocateEndOfHeaders(const char* buf, int buf_len, int i = 0);
    117 
    118   // Assemble "raw headers" in the format required by HttpResponseHeaders.
    119   // This involves normalizing line terminators, converting [CR]LF to \0 and
    120   // handling HTTP line continuations (i.e., lines starting with LWS are
    121   // continuations of the previous line).  |buf_len| indicates the position of
    122   // the end-of-headers marker as defined by LocateEndOfHeaders.
    123   static std::string AssembleRawHeaders(const char* buf, int buf_len);
    124 
    125   // Given a comma separated ordered list of language codes, return
    126   // the list with a qvalue appended to each language.
    127   // The way qvalues are assigned is rather simple. The qvalue
    128   // starts with 1.0 and is decremented by 0.2 for each successive entry
    129   // in the list until it reaches 0.2. All the entries after that are
    130   // assigned the same qvalue of 0.2. Also, note that the 1st language
    131   // will not have a qvalue added because the absence of a qvalue implicitly
    132   // means q=1.0.
    133   //
    134   // When making a http request, this should be used to determine what
    135   // to put in Accept-Language header. If a comma separated list of language
    136   // codes *without* qvalue is sent, web servers regard all
    137   // of them as having q=1.0 and pick one of them even though it may not
    138   // be at the beginning of the list (see http://crbug.com/5899).
    139   static std::string GenerateAcceptLanguageHeader(
    140       const std::string& raw_language_list);
    141 
    142   // Given a charset, return the list with a qvalue. If charset is utf-8,
    143   // it will return 'utf-8,*;q=0.5'. Otherwise (e.g. 'euc-jp'), it'll return
    144   // 'euc-jp,utf-8;q=0.7,*;q=0.3'.
    145   static std::string GenerateAcceptCharsetHeader(const std::string& charset);
    146 
    147   // Helper. If |*headers| already contains |header_name| do nothing,
    148   // otherwise add <header_name> ": " <header_value> to the end of the list.
    149   static void AppendHeaderIfMissing(const char* header_name,
    150                                     const std::string& header_value,
    151                                     std::string* headers);
    152 
    153   // Used to iterate over the name/value pairs of HTTP headers.  To iterate
    154   // over the values in a multi-value header, use ValuesIterator.
    155   // See AssembleRawHeaders for joining line continuations (this iterator
    156   // does not expect any).
    157   class HeadersIterator {
    158    public:
    159     HeadersIterator(std::string::const_iterator headers_begin,
    160                     std::string::const_iterator headers_end,
    161                     const std::string& line_delimiter);
    162 
    163     // Advances the iterator to the next header, if any.  Returns true if there
    164     // is a next header.  Use name* and values* methods to access the resultant
    165     // header name and values.
    166     bool GetNext();
    167 
    168     // Iterates through the list of headers, starting with the current position
    169     // and looks for the specified header.  Note that the name _must_ be
    170     // lower cased.
    171     // If the header was found, the return value will be true and the current
    172     // position points to the header.  If the return value is false, the
    173     // current position will be at the end of the headers.
    174     bool AdvanceTo(const char* lowercase_name);
    175 
    176     void Reset() {
    177       lines_.Reset();
    178     }
    179 
    180     std::string::const_iterator name_begin() const {
    181       return name_begin_;
    182     }
    183     std::string::const_iterator name_end() const {
    184       return name_end_;
    185     }
    186     std::string name() const {
    187       return std::string(name_begin_, name_end_);
    188     }
    189 
    190     std::string::const_iterator values_begin() const {
    191       return values_begin_;
    192     }
    193     std::string::const_iterator values_end() const {
    194       return values_end_;
    195     }
    196     std::string values() const {
    197       return std::string(values_begin_, values_end_);
    198     }
    199 
    200    private:
    201     StringTokenizer lines_;
    202     std::string::const_iterator name_begin_;
    203     std::string::const_iterator name_end_;
    204     std::string::const_iterator values_begin_;
    205     std::string::const_iterator values_end_;
    206   };
    207 
    208   // Used to iterate over deliminated values in a HTTP header.  HTTP LWS is
    209   // automatically trimmed from the resulting values.
    210   //
    211   // When using this class to iterate over response header values, beware that
    212   // for some headers (e.g., Last-Modified), commas are not used as delimiters.
    213   // This iterator should be avoided for headers like that which are considered
    214   // non-coalescing (see IsNonCoalescingHeader).
    215   //
    216   // This iterator is careful to skip over delimiters found inside an HTTP
    217   // quoted string.
    218   //
    219   class ValuesIterator {
    220    public:
    221     ValuesIterator(std::string::const_iterator values_begin,
    222                    std::string::const_iterator values_end,
    223                    char delimiter);
    224 
    225     // Advances the iterator to the next value, if any.  Returns true if there
    226     // is a next value.  Use value* methods to access the resultant value.
    227     bool GetNext();
    228 
    229     std::string::const_iterator value_begin() const {
    230       return value_begin_;
    231     }
    232     std::string::const_iterator value_end() const {
    233       return value_end_;
    234     }
    235     std::string value() const {
    236       return std::string(value_begin_, value_end_);
    237     }
    238 
    239    private:
    240     StringTokenizer values_;
    241     std::string::const_iterator value_begin_;
    242     std::string::const_iterator value_end_;
    243   };
    244 };
    245 
    246 }  // namespace net
    247 
    248 #endif  // NET_HTTP_HTTP_UTIL_H_
    249