1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/filter/sdch_filter.h" 6 7 #include <ctype.h> 8 #include <limits.h> 9 10 #include <algorithm> 11 12 #include "base/logging.h" 13 #include "base/metrics/histogram.h" 14 #include "net/base/sdch_manager.h" 15 #include "net/url_request/url_request_context.h" 16 17 #include "sdch/open-vcdiff/src/google/vcdecoder.h" 18 19 namespace net { 20 21 namespace { 22 23 // Disambiguate various types of responses that trigger a meta-refresh, 24 // failure, or fallback to pass-through. 25 enum ResponseCorruptionDetectionCause { 26 RESPONSE_NONE, 27 28 // 404 Http Response Code 29 RESPONSE_404 = 1, 30 31 // Not a 200 Http Response Code 32 RESPONSE_NOT_200 = 2, 33 34 // Cached before dictionary retrieved. 35 RESPONSE_OLD_UNENCODED = 3, 36 37 // Speculative but incorrect SDCH filtering was added added. 38 RESPONSE_TENTATIVE_SDCH = 4, 39 40 // Missing correct dict for decoding. 41 RESPONSE_NO_DICTIONARY = 5, 42 43 // Not an SDCH response but should be. 44 RESPONSE_CORRUPT_SDCH = 6, 45 46 // No dictionary was advertised with the request, the server claims 47 // to have encoded with SDCH anyway, but it isn't an SDCH response. 48 RESPONSE_ENCODING_LIE = 7, 49 50 RESPONSE_MAX, 51 }; 52 53 } // namespace 54 55 SdchFilter::SdchFilter(const FilterContext& filter_context) 56 : filter_context_(filter_context), 57 decoding_status_(DECODING_UNINITIALIZED), 58 dictionary_hash_(), 59 dictionary_hash_is_plausible_(false), 60 dictionary_(NULL), 61 url_request_context_(filter_context.GetURLRequestContext()), 62 dest_buffer_excess_(), 63 dest_buffer_excess_index_(0), 64 source_bytes_(0), 65 output_bytes_(0), 66 possible_pass_through_(false) { 67 bool success = filter_context.GetMimeType(&mime_type_); 68 DCHECK(success); 69 success = filter_context.GetURL(&url_); 70 DCHECK(success); 71 DCHECK(url_request_context_->sdch_manager()); 72 } 73 74 SdchFilter::~SdchFilter() { 75 // All code here is for gathering stats, and can be removed when SDCH is 76 // considered stable. 77 78 static int filter_use_count = 0; 79 ++filter_use_count; 80 if (META_REFRESH_RECOVERY == decoding_status_) { 81 UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count); 82 } 83 84 if (vcdiff_streaming_decoder_.get()) { 85 if (!vcdiff_streaming_decoder_->FinishDecoding()) { 86 decoding_status_ = DECODING_ERROR; 87 SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT); 88 // Make it possible for the user to hit reload, and get non-sdch content. 89 // Note this will "wear off" quickly enough, and is just meant to assure 90 // in some rare case that the user is not stuck. 91 url_request_context_->sdch_manager()->BlacklistDomain( 92 url_, SdchManager::INCOMPLETE_SDCH_CONTENT); 93 UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn", 94 static_cast<int>(filter_context_.GetByteReadCount())); 95 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_); 96 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_); 97 } 98 } 99 100 if (!dest_buffer_excess_.empty()) { 101 // Filter chaining error, or premature teardown. 102 SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT); 103 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn", 104 static_cast<int>(filter_context_.GetByteReadCount())); 105 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize", 106 dest_buffer_excess_.size()); 107 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_); 108 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_); 109 } 110 111 if (filter_context_.IsCachedContent()) { 112 // Not a real error, but it is useful to have this tally. 113 // TODO(jar): Remove this stat after SDCH stability is validated. 114 SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED); 115 return; // We don't need timing stats, and we aready got ratios. 116 } 117 118 switch (decoding_status_) { 119 case DECODING_IN_PROGRESS: { 120 if (output_bytes_) 121 UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a", 122 static_cast<int>( 123 (filter_context_.GetByteReadCount() * 100) / output_bytes_)); 124 UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a", 125 output_bytes_); 126 filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE); 127 128 // Allow latency experiments to proceed. 129 url_request_context_->sdch_manager()->SetAllowLatencyExperiment( 130 url_, true); 131 return; 132 } 133 case PASS_THROUGH: { 134 filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH); 135 return; 136 } 137 case DECODING_UNINITIALIZED: { 138 SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED); 139 return; 140 } 141 case WAITING_FOR_DICTIONARY_SELECTION: { 142 SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY); 143 return; 144 } 145 case DECODING_ERROR: { 146 SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR); 147 return; 148 } 149 case META_REFRESH_RECOVERY: { 150 // Already accounted for when set. 151 return; 152 } 153 } // end of switch. 154 } 155 156 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) { 157 if (decoding_status_ != DECODING_UNINITIALIZED) 158 return false; 159 160 // Handle case where sdch filter is guessed, but not required. 161 if (FILTER_TYPE_SDCH_POSSIBLE == filter_type) 162 possible_pass_through_ = true; 163 164 // Initialize decoder only after we have a dictionary in hand. 165 decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION; 166 return true; 167 } 168 169 #ifndef NDEBUG 170 static const char* kDecompressionErrorHtml = 171 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>" 172 "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;" 173 "border-color:black;border-style:solid;text-align:left;font-family:arial;" 174 "font-size:10pt;foreground-color:black;background-color:white\">" 175 "An error occurred. This page will be reloaded shortly. " 176 "Or press the \"reload\" button now to reload it immediately." 177 "</div>"; 178 #else 179 static const char* kDecompressionErrorHtml = 180 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"; 181 #endif 182 183 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer, 184 int* dest_len) { 185 int available_space = *dest_len; 186 *dest_len = 0; // Nothing output yet. 187 188 if (!dest_buffer || available_space <= 0) 189 return FILTER_ERROR; 190 191 if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) { 192 FilterStatus status = InitializeDictionary(); 193 if (FILTER_NEED_MORE_DATA == status) 194 return FILTER_NEED_MORE_DATA; 195 if (FILTER_ERROR == status) { 196 DCHECK_EQ(DECODING_ERROR, decoding_status_); 197 DCHECK_EQ(0u, dest_buffer_excess_index_); 198 DCHECK(dest_buffer_excess_.empty()); 199 // This is where we try very hard to do error recovery, and make this 200 // protocol robust in the face of proxies that do many different things. 201 // If we decide that things are looking very bad (too hard to recover), 202 // we may even issue a "meta-refresh" to reload the page without an SDCH 203 // advertisement (so that we are sure we're not hurting anything). 204 // 205 // Watch out for an error page inserted by the proxy as part of a 40x 206 // error response. When we see such content molestation, we certainly 207 // need to fall into the meta-refresh case. 208 ResponseCorruptionDetectionCause cause = RESPONSE_NONE; 209 if (filter_context_.GetResponseCode() == 404) { 210 // We could be more generous, but for now, only a "NOT FOUND" code will 211 // cause a pass through. All other bad codes will fall into a 212 // meta-refresh. 213 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE); 214 cause = RESPONSE_404; 215 decoding_status_ = PASS_THROUGH; 216 } else if (filter_context_.GetResponseCode() != 200) { 217 // We need to meta-refresh, with SDCH disabled. 218 cause = RESPONSE_NOT_200; 219 } else if (filter_context_.IsCachedContent() 220 && !dictionary_hash_is_plausible_) { 221 // We must have hit the back button, and gotten content that was fetched 222 // before we *really* advertised SDCH and a dictionary. 223 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED); 224 decoding_status_ = PASS_THROUGH; 225 cause = RESPONSE_OLD_UNENCODED; 226 } else if (possible_pass_through_) { 227 // This is the potentially most graceful response. There really was no 228 // error. We were just overly cautious when we added a TENTATIVE_SDCH. 229 // We added the sdch coding tag, and it should not have been added. 230 // This can happen in server experiments, where the server decides 231 // not to use sdch, even though there is a dictionary. To be 232 // conservative, we locally added the tentative sdch (fearing that a 233 // proxy stripped it!) and we must now recant (pass through). 234 // 235 // However.... just to be sure we don't get burned by proxies that 236 // re-compress with gzip or other system, we can sniff to see if this 237 // is compressed data etc. For now, we do nothing, which gets us into 238 // the meta-refresh result. 239 // TODO(jar): Improve robustness by sniffing for valid text that we can 240 // actual use re: decoding_status_ = PASS_THROUGH; 241 cause = RESPONSE_TENTATIVE_SDCH; 242 } else if (dictionary_hash_is_plausible_) { 243 // We need a meta-refresh since we don't have the dictionary. 244 // The common cause is a restart of the browser, where we try to render 245 // cached content that was saved when we had a dictionary. 246 cause = RESPONSE_NO_DICTIONARY; 247 } else if (filter_context_.SdchResponseExpected()) { 248 // This is a very corrupt SDCH request response. We can't decode it. 249 // We'll use a meta-refresh, and get content without asking for SDCH. 250 // This will also progressively disable SDCH for this domain. 251 cause = RESPONSE_CORRUPT_SDCH; 252 } else { 253 // One of the first 9 bytes precluded consideration as a hash. 254 // This can't be an SDCH payload, even though the server said it was. 255 // This is a major error, as the server or proxy tagged this SDCH even 256 // though it is not! 257 // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!! 258 // Worse yet, meta-refresh could lead to an infinite refresh loop. 259 SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH); 260 decoding_status_ = PASS_THROUGH; 261 // ... but further back-off on advertising SDCH support. 262 url_request_context_->sdch_manager()->BlacklistDomain( 263 url_, SdchManager::PASSING_THROUGH_NON_SDCH); 264 cause = RESPONSE_ENCODING_LIE; 265 } 266 DCHECK_NE(RESPONSE_NONE, cause); 267 268 // Use if statement rather than ?: because UMA_HISTOGRAM_ENUMERATION 269 // caches the histogram name based on the call site. 270 if (filter_context_.IsCachedContent()) { 271 UMA_HISTOGRAM_ENUMERATION( 272 "Sdch3.ResponseCorruptionDetection.Cached", cause, RESPONSE_MAX); 273 } else { 274 UMA_HISTOGRAM_ENUMERATION( 275 "Sdch3.ResponseCorruptionDetection.Uncached", cause, RESPONSE_MAX); 276 } 277 278 if (decoding_status_ == PASS_THROUGH) { 279 dest_buffer_excess_ = dictionary_hash_; // Send what we scanned. 280 } else { 281 // This is where we try to do the expensive meta-refresh. 282 if (std::string::npos == mime_type_.find("text/html")) { 283 // Since we can't do a meta-refresh (along with an exponential 284 // backoff), we'll just make sure this NEVER happens again. 285 SdchManager::ProblemCodes problem = 286 (filter_context_.IsCachedContent() ? 287 SdchManager::CACHED_META_REFRESH_UNSUPPORTED : 288 SdchManager::META_REFRESH_UNSUPPORTED); 289 url_request_context_->sdch_manager()->BlacklistDomainForever( 290 url_, problem); 291 SdchManager::SdchErrorRecovery(problem); 292 return FILTER_ERROR; 293 } 294 // HTML content means we can issue a meta-refresh, and get the content 295 // again, perhaps without SDCH (to be safe). 296 if (filter_context_.IsCachedContent()) { 297 // Cached content is probably a startup tab, so we'll just get fresh 298 // content and try again, without disabling sdch. 299 SdchManager::SdchErrorRecovery( 300 SdchManager::META_REFRESH_CACHED_RECOVERY); 301 } else { 302 // Since it wasn't in the cache, we definately need at least some 303 // period of blacklisting to get the correct content. 304 url_request_context_->sdch_manager()->BlacklistDomain( 305 url_, SdchManager::META_REFRESH_RECOVERY); 306 SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY); 307 } 308 decoding_status_ = META_REFRESH_RECOVERY; 309 // Issue a meta redirect with SDCH disabled. 310 dest_buffer_excess_ = kDecompressionErrorHtml; 311 } 312 } else { 313 DCHECK_EQ(DECODING_IN_PROGRESS, decoding_status_); 314 } 315 } 316 317 int amount = OutputBufferExcess(dest_buffer, available_space); 318 *dest_len += amount; 319 dest_buffer += amount; 320 available_space -= amount; 321 DCHECK_GE(available_space, 0); 322 323 if (available_space <= 0) 324 return FILTER_OK; 325 DCHECK(dest_buffer_excess_.empty()); 326 DCHECK_EQ(0u, dest_buffer_excess_index_); 327 328 if (decoding_status_ != DECODING_IN_PROGRESS) { 329 if (META_REFRESH_RECOVERY == decoding_status_) { 330 // Absorb all input data. We've already output page reload HTML. 331 next_stream_data_ = NULL; 332 stream_data_len_ = 0; 333 return FILTER_NEED_MORE_DATA; 334 } 335 if (PASS_THROUGH == decoding_status_) { 336 // We must pass in available_space, but it will be changed to bytes_used. 337 FilterStatus result = CopyOut(dest_buffer, &available_space); 338 // Accumulate the returned count of bytes_used (a.k.a., available_space). 339 *dest_len += available_space; 340 return result; 341 } 342 DCHECK(false); 343 decoding_status_ = DECODING_ERROR; 344 return FILTER_ERROR; 345 } 346 347 if (!next_stream_data_ || stream_data_len_ <= 0) 348 return FILTER_NEED_MORE_DATA; 349 350 bool ret = vcdiff_streaming_decoder_->DecodeChunk( 351 next_stream_data_, stream_data_len_, &dest_buffer_excess_); 352 // Assume all data was used in decoding. 353 next_stream_data_ = NULL; 354 source_bytes_ += stream_data_len_; 355 stream_data_len_ = 0; 356 output_bytes_ += dest_buffer_excess_.size(); 357 if (!ret) { 358 vcdiff_streaming_decoder_.reset(NULL); // Don't call it again. 359 decoding_status_ = DECODING_ERROR; 360 SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR); 361 return FILTER_ERROR; 362 } 363 364 amount = OutputBufferExcess(dest_buffer, available_space); 365 *dest_len += amount; 366 dest_buffer += amount; 367 available_space -= amount; 368 if (0 == available_space && !dest_buffer_excess_.empty()) 369 return FILTER_OK; 370 return FILTER_NEED_MORE_DATA; 371 } 372 373 Filter::FilterStatus SdchFilter::InitializeDictionary() { 374 const size_t kServerIdLength = 9; // Dictionary hash plus null from server. 375 size_t bytes_needed = kServerIdLength - dictionary_hash_.size(); 376 DCHECK_GT(bytes_needed, 0u); 377 if (!next_stream_data_) 378 return FILTER_NEED_MORE_DATA; 379 if (static_cast<size_t>(stream_data_len_) < bytes_needed) { 380 dictionary_hash_.append(next_stream_data_, stream_data_len_); 381 next_stream_data_ = NULL; 382 stream_data_len_ = 0; 383 return FILTER_NEED_MORE_DATA; 384 } 385 dictionary_hash_.append(next_stream_data_, bytes_needed); 386 DCHECK(kServerIdLength == dictionary_hash_.size()); 387 stream_data_len_ -= bytes_needed; 388 DCHECK_LE(0, stream_data_len_); 389 if (stream_data_len_ > 0) 390 next_stream_data_ += bytes_needed; 391 else 392 next_stream_data_ = NULL; 393 394 DCHECK(!dictionary_.get()); 395 dictionary_hash_is_plausible_ = true; // Assume plausible, but check. 396 397 if ('\0' == dictionary_hash_[kServerIdLength - 1]) { 398 SdchManager* manager(url_request_context_->sdch_manager()); 399 manager->GetVcdiffDictionary( 400 std::string(dictionary_hash_, 0, kServerIdLength - 1), 401 url_, &dictionary_); 402 } else { 403 dictionary_hash_is_plausible_ = false; 404 } 405 406 if (!dictionary_.get()) { 407 DCHECK(dictionary_hash_.size() == kServerIdLength); 408 // Since dictionary was not found, check to see if hash was even plausible. 409 for (size_t i = 0; i < kServerIdLength - 1; ++i) { 410 char base64_char = dictionary_hash_[i]; 411 if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) { 412 dictionary_hash_is_plausible_ = false; 413 break; 414 } 415 } 416 if (dictionary_hash_is_plausible_) 417 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND); 418 else 419 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED); 420 decoding_status_ = DECODING_ERROR; 421 return FILTER_ERROR; 422 } 423 vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder); 424 vcdiff_streaming_decoder_->SetAllowVcdTarget(false); 425 vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(), 426 dictionary_->text().size()); 427 decoding_status_ = DECODING_IN_PROGRESS; 428 return FILTER_OK; 429 } 430 431 int SdchFilter::OutputBufferExcess(char* const dest_buffer, 432 size_t available_space) { 433 if (dest_buffer_excess_.empty()) 434 return 0; 435 DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_); 436 size_t amount = std::min(available_space, 437 dest_buffer_excess_.size() - dest_buffer_excess_index_); 438 memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_, 439 amount); 440 dest_buffer_excess_index_ += amount; 441 if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) { 442 DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_); 443 dest_buffer_excess_.clear(); 444 dest_buffer_excess_index_ = 0; 445 } 446 return amount; 447 } 448 449 } // namespace net 450