1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Derived from: 6 // mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp 7 // The license block is: 8 /* ***** BEGIN LICENSE BLOCK ***** 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 10 * 11 * The contents of this file are subject to the Mozilla Public License Version 12 * 1.1 (the "License"); you may not use this file except in compliance with 13 * the License. You may obtain a copy of the License at 14 * http://www.mozilla.org/MPL/ 15 * 16 * Software distributed under the License is distributed on an "AS IS" basis, 17 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 18 * for the specific language governing rights and limitations under the 19 * License. 20 * 21 * The Original Code is Mozilla. 22 * 23 * The Initial Developer of the Original Code is 24 * Netscape Communications. 25 * Portions created by the Initial Developer are Copyright (C) 2001 26 * the Initial Developer. All Rights Reserved. 27 * 28 * Contributor(s): 29 * Darin Fisher <darin (at) netscape.com> (original author) 30 * 31 * Alternatively, the contents of this file may be used under the terms of 32 * either the GNU General Public License Version 2 or later (the "GPL"), or 33 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 34 * in which case the provisions of the GPL or the LGPL are applicable instead 35 * of those above. If you wish to allow use of your version of this file only 36 * under the terms of either the GPL or the LGPL, and not to allow others to 37 * use your version of this file under the terms of the MPL, indicate your 38 * decision by deleting the provisions above and replace them with the notice 39 * and other provisions required by the GPL or the LGPL. If you do not delete 40 * the provisions above, a recipient may use your version of this file under 41 * the terms of any one of the MPL, the GPL or the LGPL. 42 * 43 * ***** END LICENSE BLOCK ***** */ 44 45 #include "net/http/http_chunked_decoder.h" 46 47 #include "base/logging.h" 48 #include "base/strings/string_number_conversions.h" 49 #include "base/strings/string_piece.h" 50 #include "base/strings/string_util.h" 51 #include "net/base/net_errors.h" 52 53 namespace net { 54 55 // Absurdly long size to avoid imposing a constraint on chunked encoding 56 // extensions. 57 const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384; 58 59 HttpChunkedDecoder::HttpChunkedDecoder() 60 : chunk_remaining_(0), 61 chunk_terminator_remaining_(false), 62 reached_last_chunk_(false), 63 reached_eof_(false), 64 bytes_after_eof_(0) { 65 } 66 67 int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) { 68 int result = 0; 69 70 while (buf_len) { 71 if (chunk_remaining_) { 72 int num = std::min(chunk_remaining_, buf_len); 73 74 buf_len -= num; 75 chunk_remaining_ -= num; 76 77 result += num; 78 buf += num; 79 80 // After each chunk's data there should be a CRLF 81 if (!chunk_remaining_) 82 chunk_terminator_remaining_ = true; 83 continue; 84 } else if (reached_eof_) { 85 bytes_after_eof_ += buf_len; 86 break; // Done! 87 } 88 89 int bytes_consumed = ScanForChunkRemaining(buf, buf_len); 90 if (bytes_consumed < 0) 91 return bytes_consumed; // Error 92 93 buf_len -= bytes_consumed; 94 if (buf_len) 95 memmove(buf, buf + bytes_consumed, buf_len); 96 } 97 98 return result; 99 } 100 101 int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) { 102 DCHECK_EQ(0, chunk_remaining_); 103 DCHECK_GT(buf_len, 0); 104 105 int bytes_consumed = 0; 106 107 size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n'); 108 if (index_of_lf != base::StringPiece::npos) { 109 buf_len = static_cast<int>(index_of_lf); 110 if (buf_len && buf[buf_len - 1] == '\r') // Eliminate a preceding CR. 111 buf_len--; 112 bytes_consumed = static_cast<int>(index_of_lf) + 1; 113 114 // Make buf point to the full line buffer to parse. 115 if (!line_buf_.empty()) { 116 line_buf_.append(buf, buf_len); 117 buf = line_buf_.data(); 118 buf_len = static_cast<int>(line_buf_.size()); 119 } 120 121 if (reached_last_chunk_) { 122 if (buf_len) 123 DVLOG(1) << "ignoring http trailer"; 124 else 125 reached_eof_ = true; 126 } else if (chunk_terminator_remaining_) { 127 if (buf_len) { 128 DLOG(ERROR) << "chunk data not terminated properly"; 129 return ERR_INVALID_CHUNKED_ENCODING; 130 } 131 chunk_terminator_remaining_ = false; 132 } else if (buf_len) { 133 // Ignore any chunk-extensions. 134 size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';'); 135 if (index_of_semicolon != base::StringPiece::npos) 136 buf_len = static_cast<int>(index_of_semicolon); 137 138 if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) { 139 DLOG(ERROR) << "Failed parsing HEX from: " << 140 std::string(buf, buf_len); 141 return ERR_INVALID_CHUNKED_ENCODING; 142 } 143 144 if (chunk_remaining_ == 0) 145 reached_last_chunk_ = true; 146 } else { 147 DLOG(ERROR) << "missing chunk-size"; 148 return ERR_INVALID_CHUNKED_ENCODING; 149 } 150 line_buf_.clear(); 151 } else { 152 // Save the partial line; wait for more data. 153 bytes_consumed = buf_len; 154 155 // Ignore a trailing CR 156 if (buf[buf_len - 1] == '\r') 157 buf_len--; 158 159 if (line_buf_.length() + buf_len > kMaxLineBufLen) { 160 DLOG(ERROR) << "Chunked line length too long"; 161 return ERR_INVALID_CHUNKED_ENCODING; 162 } 163 164 line_buf_.append(buf, buf_len); 165 } 166 return bytes_consumed; 167 } 168 169 170 // While the HTTP 1.1 specification defines chunk-size as 1*HEX 171 // some sites rely on more lenient parsing. 172 // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces 173 // (0x20) to be 7 characters long, such as "819b ". 174 // 175 // A comparison of browsers running on WindowsXP shows that 176 // they will parse the following inputs (egrep syntax): 177 // 178 // Let \X be the character class for a hex digit: [0-9a-fA-F] 179 // 180 // RFC 2616: ^\X+$ 181 // IE7: ^\X+[^\X]*$ 182 // Safari 3.1: ^[\t\r ]*\X+[\t ]*$ 183 // Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$ 184 // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$ 185 // 186 // Our strategy is to be as strict as possible, while not breaking 187 // known sites. 188 // 189 // Us: ^\X+[ ]*$ 190 bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) { 191 DCHECK_GE(len, 0); 192 193 // Strip trailing spaces 194 while (len && start[len - 1] == ' ') 195 len--; 196 197 // Be more restrictive than HexStringToInt; 198 // don't allow inputs with leading "-", "+", "0x", "0X" 199 base::StringPiece chunk_size(start, len); 200 if (chunk_size.find_first_not_of("0123456789abcdefABCDEF") 201 != base::StringPiece::npos) { 202 return false; 203 } 204 205 int parsed_number; 206 bool ok = base::HexStringToInt(chunk_size, &parsed_number); 207 if (ok && parsed_number >= 0) { 208 *out = parsed_number; 209 return true; 210 } 211 return false; 212 } 213 214 } // namespace net 215