Home | History | Annotate | Download | only in http
      1 /* ***** BEGIN LICENSE BLOCK *****
      2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
      3  *
      4  * The contents of this file are subject to the Mozilla Public License Version
      5  * 1.1 (the "License"); you may not use this file except in compliance with
      6  * the License. You may obtain a copy of the License at
      7  * http://www.mozilla.org/MPL/
      8  *
      9  * Software distributed under the License is distributed on an "AS IS" basis,
     10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
     11  * for the specific language governing rights and limitations under the
     12  * License.
     13  *
     14  * The Original Code is Mozilla.
     15  *
     16  * The Initial Developer of the Original Code is
     17  * Netscape Communications.
     18  * Portions created by the Initial Developer are Copyright (C) 2001
     19  * the Initial Developer. All Rights Reserved.
     20  *
     21  * Contributor(s):
     22  *   Darin Fisher <darin (at) netscape.com> (original author)
     23  *
     24  * Alternatively, the contents of this file may be used under the terms of
     25  * either the GNU General Public License Version 2 or later (the "GPL"), or
     26  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
     27  * in which case the provisions of the GPL or the LGPL are applicable instead
     28  * of those above. If you wish to allow use of your version of this file only
     29  * under the terms of either the GPL or the LGPL, and not to allow others to
     30  * use your version of this file under the terms of the MPL, indicate your
     31  * decision by deleting the provisions above and replace them with the notice
     32  * and other provisions required by the GPL or the LGPL. If you do not delete
     33  * the provisions above, a recipient may use your version of this file under
     34  * the terms of any one of the MPL, the GPL or the LGPL.
     35  *
     36  * ***** END LICENSE BLOCK ***** */
     37 
     38 // Derived from:
     39 // mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp
     40 
     41 #include "net/http/http_chunked_decoder.h"
     42 
     43 #include "base/logging.h"
     44 #include "base/string_piece.h"
     45 #include "base/string_util.h"
     46 #include "net/base/net_errors.h"
     47 
     48 namespace net {
     49 
     50 HttpChunkedDecoder::HttpChunkedDecoder()
     51     : chunk_remaining_(0),
     52       chunk_terminator_remaining_(false),
     53       reached_last_chunk_(false),
     54       reached_eof_(false),
     55       bytes_after_eof_(0) {
     56 }
     57 
     58 int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) {
     59   int result = 0;
     60 
     61   while (buf_len) {
     62     if (chunk_remaining_) {
     63       int num = std::min(chunk_remaining_, buf_len);
     64 
     65       buf_len -= num;
     66       chunk_remaining_ -= num;
     67 
     68       result += num;
     69       buf += num;
     70 
     71       // After each chunk's data there should be a CRLF
     72       if (!chunk_remaining_)
     73         chunk_terminator_remaining_ = true;
     74       continue;
     75     } else if (reached_eof_) {
     76       bytes_after_eof_ += buf_len;
     77       break;  // Done!
     78     }
     79 
     80     int bytes_consumed = ScanForChunkRemaining(buf, buf_len);
     81     if (bytes_consumed < 0)
     82       return bytes_consumed; // Error
     83 
     84     buf_len -= bytes_consumed;
     85     if (buf_len)
     86       memmove(buf, buf + bytes_consumed, buf_len);
     87   }
     88 
     89   return result;
     90 }
     91 
     92 int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) {
     93   DCHECK(chunk_remaining_ == 0);
     94   DCHECK(buf_len > 0);
     95 
     96   int bytes_consumed = 0;
     97 
     98   size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n');
     99   if (index_of_lf != base::StringPiece::npos) {
    100     buf_len = static_cast<int>(index_of_lf);
    101     if (buf_len && buf[buf_len - 1] == '\r')  // Eliminate a preceding CR.
    102       buf_len--;
    103     bytes_consumed = static_cast<int>(index_of_lf) + 1;
    104 
    105     // Make buf point to the full line buffer to parse.
    106     if (!line_buf_.empty()) {
    107       line_buf_.append(buf, buf_len);
    108       buf = line_buf_.data();
    109       buf_len = static_cast<int>(line_buf_.size());
    110     }
    111 
    112     if (reached_last_chunk_) {
    113       if (buf_len) {
    114         DLOG(INFO) << "ignoring http trailer";
    115       } else {
    116         reached_eof_ = true;
    117       }
    118     } else if (chunk_terminator_remaining_) {
    119        if (buf_len) {
    120          DLOG(ERROR) << "chunk data not terminated properly";
    121          return ERR_INVALID_CHUNKED_ENCODING;
    122        }
    123        chunk_terminator_remaining_ = false;
    124     } else if (buf_len) {
    125       // Ignore any chunk-extensions.
    126       size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';');
    127       if (index_of_semicolon != base::StringPiece::npos)
    128         buf_len = static_cast<int>(index_of_semicolon);
    129 
    130       if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) {
    131         DLOG(ERROR) << "Failed parsing HEX from: " <<
    132             std::string(buf, buf_len);
    133         return ERR_INVALID_CHUNKED_ENCODING;
    134       }
    135 
    136       if (chunk_remaining_ == 0)
    137         reached_last_chunk_ = true;
    138     } else {
    139       DLOG(ERROR) << "missing chunk-size";
    140       return ERR_INVALID_CHUNKED_ENCODING;
    141     }
    142     line_buf_.clear();
    143   } else {
    144     // Save the partial line; wait for more data.
    145     bytes_consumed = buf_len;
    146 
    147     // Ignore a trailing CR
    148     if (buf[buf_len - 1] == '\r')
    149       buf_len--;
    150 
    151     line_buf_.append(buf, buf_len);
    152   }
    153   return bytes_consumed;
    154 }
    155 
    156 
    157 // While the HTTP 1.1 specification defines chunk-size as 1*HEX
    158 // some sites rely on more lenient parsing.
    159 // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces
    160 // (0x20) to be 7 characters long, such as "819b   ".
    161 //
    162 // A comparison of browsers running on WindowsXP shows that
    163 // they will parse the following inputs (egrep syntax):
    164 //
    165 // Let \X be the character class for a hex digit: [0-9a-fA-F]
    166 //
    167 //   RFC 2616: ^\X+$
    168 //        IE7: ^\X+[^\X]*$
    169 // Safari 3.1: ^[\t\r ]*\X+[\t ]*$
    170 //  Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$
    171 // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$
    172 //
    173 // Our strategy is to be as strict as possible, while not breaking
    174 // known sites.
    175 //
    176 //         Us: ^\X+[ ]*$
    177 bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) {
    178   DCHECK(len >= 0);
    179 
    180   // Strip trailing spaces
    181   while (len && start[len - 1] == ' ')
    182     len--;
    183 
    184   // Be more restrictive than HexStringToInt;
    185   // don't allow inputs with leading "-", "+", "0x", "0X"
    186   if (base::StringPiece(start, len).find_first_not_of("0123456789abcdefABCDEF")
    187       != base::StringPiece::npos)
    188     return false;
    189 
    190   int parsed_number;
    191   bool ok = HexStringToInt(std::string(start, len), &parsed_number);
    192   if (ok && parsed_number >= 0) {
    193     *out = parsed_number;
    194     return true;
    195   }
    196   return false;
    197 }
    198 
    199 }  // namespace net
    200