Home | History | Annotate | Download | only in http
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Derived from:
      6 //   mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp
      7 // The license block is:
      8 /* ***** BEGIN LICENSE BLOCK *****
      9  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
     10  *
     11  * The contents of this file are subject to the Mozilla Public License Version
     12  * 1.1 (the "License"); you may not use this file except in compliance with
     13  * the License. You may obtain a copy of the License at
     14  * http://www.mozilla.org/MPL/
     15  *
     16  * Software distributed under the License is distributed on an "AS IS" basis,
     17  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
     18  * for the specific language governing rights and limitations under the
     19  * License.
     20  *
     21  * The Original Code is Mozilla.
     22  *
     23  * The Initial Developer of the Original Code is
     24  * Netscape Communications.
     25  * Portions created by the Initial Developer are Copyright (C) 2001
     26  * the Initial Developer. All Rights Reserved.
     27  *
     28  * Contributor(s):
     29  *   Darin Fisher <darin (at) netscape.com> (original author)
     30  *
     31  * Alternatively, the contents of this file may be used under the terms of
     32  * either the GNU General Public License Version 2 or later (the "GPL"), or
     33  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
     34  * in which case the provisions of the GPL or the LGPL are applicable instead
     35  * of those above. If you wish to allow use of your version of this file only
     36  * under the terms of either the GPL or the LGPL, and not to allow others to
     37  * use your version of this file under the terms of the MPL, indicate your
     38  * decision by deleting the provisions above and replace them with the notice
     39  * and other provisions required by the GPL or the LGPL. If you do not delete
     40  * the provisions above, a recipient may use your version of this file under
     41  * the terms of any one of the MPL, the GPL or the LGPL.
     42  *
     43  * ***** END LICENSE BLOCK ***** */
     44 
     45 #include "net/http/http_chunked_decoder.h"
     46 
     47 #include <algorithm>
     48 
     49 #include "base/logging.h"
     50 #include "base/strings/string_number_conversions.h"
     51 #include "base/strings/string_piece.h"
     52 #include "base/strings/string_util.h"
     53 #include "net/base/net_errors.h"
     54 
     55 namespace net {
     56 
     57 // Absurdly long size to avoid imposing a constraint on chunked encoding
     58 // extensions.
     59 const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384;
     60 
     61 HttpChunkedDecoder::HttpChunkedDecoder()
     62     : chunk_remaining_(0),
     63       chunk_terminator_remaining_(false),
     64       reached_last_chunk_(false),
     65       reached_eof_(false),
     66       bytes_after_eof_(0) {
     67 }
     68 
     69 int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) {
     70   int result = 0;
     71 
     72   while (buf_len) {
     73     if (chunk_remaining_) {
     74       int num = std::min(chunk_remaining_, buf_len);
     75 
     76       buf_len -= num;
     77       chunk_remaining_ -= num;
     78 
     79       result += num;
     80       buf += num;
     81 
     82       // After each chunk's data there should be a CRLF
     83       if (!chunk_remaining_)
     84         chunk_terminator_remaining_ = true;
     85       continue;
     86     } else if (reached_eof_) {
     87       bytes_after_eof_ += buf_len;
     88       break;  // Done!
     89     }
     90 
     91     int bytes_consumed = ScanForChunkRemaining(buf, buf_len);
     92     if (bytes_consumed < 0)
     93       return bytes_consumed; // Error
     94 
     95     buf_len -= bytes_consumed;
     96     if (buf_len)
     97       memmove(buf, buf + bytes_consumed, buf_len);
     98   }
     99 
    100   return result;
    101 }
    102 
    103 int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) {
    104   DCHECK_EQ(0, chunk_remaining_);
    105   DCHECK_GT(buf_len, 0);
    106 
    107   int bytes_consumed = 0;
    108 
    109   size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n');
    110   if (index_of_lf != base::StringPiece::npos) {
    111     buf_len = static_cast<int>(index_of_lf);
    112     if (buf_len && buf[buf_len - 1] == '\r')  // Eliminate a preceding CR.
    113       buf_len--;
    114     bytes_consumed = static_cast<int>(index_of_lf) + 1;
    115 
    116     // Make buf point to the full line buffer to parse.
    117     if (!line_buf_.empty()) {
    118       line_buf_.append(buf, buf_len);
    119       buf = line_buf_.data();
    120       buf_len = static_cast<int>(line_buf_.size());
    121     }
    122 
    123     if (reached_last_chunk_) {
    124       if (buf_len)
    125         DVLOG(1) << "ignoring http trailer";
    126       else
    127         reached_eof_ = true;
    128     } else if (chunk_terminator_remaining_) {
    129       if (buf_len) {
    130         DLOG(ERROR) << "chunk data not terminated properly";
    131         return ERR_INVALID_CHUNKED_ENCODING;
    132       }
    133       chunk_terminator_remaining_ = false;
    134     } else if (buf_len) {
    135       // Ignore any chunk-extensions.
    136       size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';');
    137       if (index_of_semicolon != base::StringPiece::npos)
    138         buf_len = static_cast<int>(index_of_semicolon);
    139 
    140       if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) {
    141         DLOG(ERROR) << "Failed parsing HEX from: " <<
    142             std::string(buf, buf_len);
    143         return ERR_INVALID_CHUNKED_ENCODING;
    144       }
    145 
    146       if (chunk_remaining_ == 0)
    147         reached_last_chunk_ = true;
    148     } else {
    149       DLOG(ERROR) << "missing chunk-size";
    150       return ERR_INVALID_CHUNKED_ENCODING;
    151     }
    152     line_buf_.clear();
    153   } else {
    154     // Save the partial line; wait for more data.
    155     bytes_consumed = buf_len;
    156 
    157     // Ignore a trailing CR
    158     if (buf[buf_len - 1] == '\r')
    159       buf_len--;
    160 
    161     if (line_buf_.length() + buf_len > kMaxLineBufLen) {
    162       DLOG(ERROR) << "Chunked line length too long";
    163       return ERR_INVALID_CHUNKED_ENCODING;
    164     }
    165 
    166     line_buf_.append(buf, buf_len);
    167   }
    168   return bytes_consumed;
    169 }
    170 
    171 
    172 // While the HTTP 1.1 specification defines chunk-size as 1*HEX
    173 // some sites rely on more lenient parsing.
    174 // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces
    175 // (0x20) to be 7 characters long, such as "819b   ".
    176 //
    177 // A comparison of browsers running on WindowsXP shows that
    178 // they will parse the following inputs (egrep syntax):
    179 //
    180 // Let \X be the character class for a hex digit: [0-9a-fA-F]
    181 //
    182 //   RFC 2616: ^\X+$
    183 //        IE7: ^\X+[^\X]*$
    184 // Safari 3.1: ^[\t\r ]*\X+[\t ]*$
    185 //  Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$
    186 // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$
    187 //
    188 // Our strategy is to be as strict as possible, while not breaking
    189 // known sites.
    190 //
    191 //         Us: ^\X+[ ]*$
    192 bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) {
    193   DCHECK_GE(len, 0);
    194 
    195   // Strip trailing spaces
    196   while (len && start[len - 1] == ' ')
    197     len--;
    198 
    199   // Be more restrictive than HexStringToInt;
    200   // don't allow inputs with leading "-", "+", "0x", "0X"
    201   base::StringPiece chunk_size(start, len);
    202   if (chunk_size.find_first_not_of("0123456789abcdefABCDEF")
    203       != base::StringPiece::npos) {
    204     return false;
    205   }
    206 
    207   int parsed_number;
    208   bool ok = base::HexStringToInt(chunk_size, &parsed_number);
    209   if (ok && parsed_number >= 0) {
    210     *out = parsed_number;
    211     return true;
    212   }
    213   return false;
    214 }
    215 
    216 }  // namespace net
    217