1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Derived from: 6 // mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp 7 // The license block is: 8 /* ***** BEGIN LICENSE BLOCK ***** 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 10 * 11 * The contents of this file are subject to the Mozilla Public License Version 12 * 1.1 (the "License"); you may not use this file except in compliance with 13 * the License. You may obtain a copy of the License at 14 * http://www.mozilla.org/MPL/ 15 * 16 * Software distributed under the License is distributed on an "AS IS" basis, 17 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 18 * for the specific language governing rights and limitations under the 19 * License. 20 * 21 * The Original Code is Mozilla. 22 * 23 * The Initial Developer of the Original Code is 24 * Netscape Communications. 25 * Portions created by the Initial Developer are Copyright (C) 2001 26 * the Initial Developer. All Rights Reserved. 27 * 28 * Contributor(s): 29 * Darin Fisher <darin (at) netscape.com> (original author) 30 * 31 * Alternatively, the contents of this file may be used under the terms of 32 * either the GNU General Public License Version 2 or later (the "GPL"), or 33 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 34 * in which case the provisions of the GPL or the LGPL are applicable instead 35 * of those above. If you wish to allow use of your version of this file only 36 * under the terms of either the GPL or the LGPL, and not to allow others to 37 * use your version of this file under the terms of the MPL, indicate your 38 * decision by deleting the provisions above and replace them with the notice 39 * and other provisions required by the GPL or the LGPL. If you do not delete 40 * the provisions above, a recipient may use your version of this file under 41 * the terms of any one of the MPL, the GPL or the LGPL. 42 * 43 * ***** END LICENSE BLOCK ***** */ 44 45 #include "net/http/http_chunked_decoder.h" 46 47 #include <algorithm> 48 49 #include "base/logging.h" 50 #include "base/strings/string_number_conversions.h" 51 #include "base/strings/string_piece.h" 52 #include "base/strings/string_util.h" 53 #include "net/base/net_errors.h" 54 55 namespace net { 56 57 // Absurdly long size to avoid imposing a constraint on chunked encoding 58 // extensions. 59 const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384; 60 61 HttpChunkedDecoder::HttpChunkedDecoder() 62 : chunk_remaining_(0), 63 chunk_terminator_remaining_(false), 64 reached_last_chunk_(false), 65 reached_eof_(false), 66 bytes_after_eof_(0) { 67 } 68 69 int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) { 70 int result = 0; 71 72 while (buf_len) { 73 if (chunk_remaining_) { 74 int num = std::min(chunk_remaining_, buf_len); 75 76 buf_len -= num; 77 chunk_remaining_ -= num; 78 79 result += num; 80 buf += num; 81 82 // After each chunk's data there should be a CRLF 83 if (!chunk_remaining_) 84 chunk_terminator_remaining_ = true; 85 continue; 86 } else if (reached_eof_) { 87 bytes_after_eof_ += buf_len; 88 break; // Done! 89 } 90 91 int bytes_consumed = ScanForChunkRemaining(buf, buf_len); 92 if (bytes_consumed < 0) 93 return bytes_consumed; // Error 94 95 buf_len -= bytes_consumed; 96 if (buf_len) 97 memmove(buf, buf + bytes_consumed, buf_len); 98 } 99 100 return result; 101 } 102 103 int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) { 104 DCHECK_EQ(0, chunk_remaining_); 105 DCHECK_GT(buf_len, 0); 106 107 int bytes_consumed = 0; 108 109 size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n'); 110 if (index_of_lf != base::StringPiece::npos) { 111 buf_len = static_cast<int>(index_of_lf); 112 if (buf_len && buf[buf_len - 1] == '\r') // Eliminate a preceding CR. 113 buf_len--; 114 bytes_consumed = static_cast<int>(index_of_lf) + 1; 115 116 // Make buf point to the full line buffer to parse. 117 if (!line_buf_.empty()) { 118 line_buf_.append(buf, buf_len); 119 buf = line_buf_.data(); 120 buf_len = static_cast<int>(line_buf_.size()); 121 } 122 123 if (reached_last_chunk_) { 124 if (buf_len) 125 DVLOG(1) << "ignoring http trailer"; 126 else 127 reached_eof_ = true; 128 } else if (chunk_terminator_remaining_) { 129 if (buf_len) { 130 DLOG(ERROR) << "chunk data not terminated properly"; 131 return ERR_INVALID_CHUNKED_ENCODING; 132 } 133 chunk_terminator_remaining_ = false; 134 } else if (buf_len) { 135 // Ignore any chunk-extensions. 136 size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';'); 137 if (index_of_semicolon != base::StringPiece::npos) 138 buf_len = static_cast<int>(index_of_semicolon); 139 140 if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) { 141 DLOG(ERROR) << "Failed parsing HEX from: " << 142 std::string(buf, buf_len); 143 return ERR_INVALID_CHUNKED_ENCODING; 144 } 145 146 if (chunk_remaining_ == 0) 147 reached_last_chunk_ = true; 148 } else { 149 DLOG(ERROR) << "missing chunk-size"; 150 return ERR_INVALID_CHUNKED_ENCODING; 151 } 152 line_buf_.clear(); 153 } else { 154 // Save the partial line; wait for more data. 155 bytes_consumed = buf_len; 156 157 // Ignore a trailing CR 158 if (buf[buf_len - 1] == '\r') 159 buf_len--; 160 161 if (line_buf_.length() + buf_len > kMaxLineBufLen) { 162 DLOG(ERROR) << "Chunked line length too long"; 163 return ERR_INVALID_CHUNKED_ENCODING; 164 } 165 166 line_buf_.append(buf, buf_len); 167 } 168 return bytes_consumed; 169 } 170 171 172 // While the HTTP 1.1 specification defines chunk-size as 1*HEX 173 // some sites rely on more lenient parsing. 174 // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces 175 // (0x20) to be 7 characters long, such as "819b ". 176 // 177 // A comparison of browsers running on WindowsXP shows that 178 // they will parse the following inputs (egrep syntax): 179 // 180 // Let \X be the character class for a hex digit: [0-9a-fA-F] 181 // 182 // RFC 2616: ^\X+$ 183 // IE7: ^\X+[^\X]*$ 184 // Safari 3.1: ^[\t\r ]*\X+[\t ]*$ 185 // Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$ 186 // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$ 187 // 188 // Our strategy is to be as strict as possible, while not breaking 189 // known sites. 190 // 191 // Us: ^\X+[ ]*$ 192 bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) { 193 DCHECK_GE(len, 0); 194 195 // Strip trailing spaces 196 while (len && start[len - 1] == ' ') 197 len--; 198 199 // Be more restrictive than HexStringToInt; 200 // don't allow inputs with leading "-", "+", "0x", "0X" 201 base::StringPiece chunk_size(start, len); 202 if (chunk_size.find_first_not_of("0123456789abcdefABCDEF") 203 != base::StringPiece::npos) { 204 return false; 205 } 206 207 int parsed_number; 208 bool ok = base::HexStringToInt(chunk_size, &parsed_number); 209 if (ok && parsed_number >= 0) { 210 *out = parsed_number; 211 return true; 212 } 213 return false; 214 } 215 216 } // namespace net 217