1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/filter/sdch_filter.h" 6 7 #include <ctype.h> 8 #include <limits.h> 9 10 #include <algorithm> 11 12 #include "base/logging.h" 13 #include "base/metrics/histogram.h" 14 #include "net/base/sdch_manager.h" 15 #include "net/url_request/url_request_context.h" 16 17 #include "sdch/open-vcdiff/src/google/vcdecoder.h" 18 19 namespace net { 20 21 SdchFilter::SdchFilter(const FilterContext& filter_context) 22 : filter_context_(filter_context), 23 decoding_status_(DECODING_UNINITIALIZED), 24 dictionary_hash_(), 25 dictionary_hash_is_plausible_(false), 26 dictionary_(NULL), 27 url_request_context_(filter_context.GetURLRequestContext()), 28 dest_buffer_excess_(), 29 dest_buffer_excess_index_(0), 30 source_bytes_(0), 31 output_bytes_(0), 32 possible_pass_through_(false) { 33 bool success = filter_context.GetMimeType(&mime_type_); 34 DCHECK(success); 35 success = filter_context.GetURL(&url_); 36 DCHECK(success); 37 DCHECK(url_request_context_->sdch_manager()); 38 } 39 40 SdchFilter::~SdchFilter() { 41 // All code here is for gathering stats, and can be removed when SDCH is 42 // considered stable. 43 44 static int filter_use_count = 0; 45 ++filter_use_count; 46 if (META_REFRESH_RECOVERY == decoding_status_) { 47 UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count); 48 } 49 50 if (vcdiff_streaming_decoder_.get()) { 51 if (!vcdiff_streaming_decoder_->FinishDecoding()) { 52 decoding_status_ = DECODING_ERROR; 53 SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT); 54 // Make it possible for the user to hit reload, and get non-sdch content. 55 // Note this will "wear off" quickly enough, and is just meant to assure 56 // in some rare case that the user is not stuck. 57 url_request_context_->sdch_manager()->BlacklistDomain( 58 url_); 59 UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn", 60 static_cast<int>(filter_context_.GetByteReadCount())); 61 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_); 62 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_); 63 } 64 } 65 66 if (!dest_buffer_excess_.empty()) { 67 // Filter chaining error, or premature teardown. 68 SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT); 69 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn", 70 static_cast<int>(filter_context_.GetByteReadCount())); 71 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize", 72 dest_buffer_excess_.size()); 73 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_); 74 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_); 75 } 76 77 if (filter_context_.IsCachedContent()) { 78 // Not a real error, but it is useful to have this tally. 79 // TODO(jar): Remove this stat after SDCH stability is validated. 80 SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED); 81 return; // We don't need timing stats, and we aready got ratios. 82 } 83 84 switch (decoding_status_) { 85 case DECODING_IN_PROGRESS: { 86 if (output_bytes_) 87 UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a", 88 static_cast<int>( 89 (filter_context_.GetByteReadCount() * 100) / output_bytes_)); 90 UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a", 91 output_bytes_); 92 filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE); 93 94 // Allow latency experiments to proceed. 95 url_request_context_->sdch_manager()->SetAllowLatencyExperiment( 96 url_, true); 97 return; 98 } 99 case PASS_THROUGH: { 100 filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH); 101 return; 102 } 103 case DECODING_UNINITIALIZED: { 104 SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED); 105 return; 106 } 107 case WAITING_FOR_DICTIONARY_SELECTION: { 108 SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY); 109 return; 110 } 111 case DECODING_ERROR: { 112 SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR); 113 return; 114 } 115 case META_REFRESH_RECOVERY: { 116 // Already accounted for when set. 117 return; 118 } 119 } // end of switch. 120 } 121 122 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) { 123 if (decoding_status_ != DECODING_UNINITIALIZED) 124 return false; 125 126 // Handle case where sdch filter is guessed, but not required. 127 if (FILTER_TYPE_SDCH_POSSIBLE == filter_type) 128 possible_pass_through_ = true; 129 130 // Initialize decoder only after we have a dictionary in hand. 131 decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION; 132 return true; 133 } 134 135 #ifndef NDEBUG 136 static const char* kDecompressionErrorHtml = 137 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>" 138 "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;" 139 "border-color:black;border-style:solid;text-align:left;font-family:arial;" 140 "font-size:10pt;foreground-color:black;background-color:white\">" 141 "An error occurred. This page will be reloaded shortly. " 142 "Or press the \"reload\" button now to reload it immediately." 143 "</div>"; 144 #else 145 static const char* kDecompressionErrorHtml = 146 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"; 147 #endif 148 149 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer, 150 int* dest_len) { 151 int available_space = *dest_len; 152 *dest_len = 0; // Nothing output yet. 153 154 if (!dest_buffer || available_space <= 0) 155 return FILTER_ERROR; 156 157 if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) { 158 FilterStatus status = InitializeDictionary(); 159 if (FILTER_NEED_MORE_DATA == status) 160 return FILTER_NEED_MORE_DATA; 161 if (FILTER_ERROR == status) { 162 DCHECK_EQ(DECODING_ERROR, decoding_status_); 163 DCHECK_EQ(0u, dest_buffer_excess_index_); 164 DCHECK(dest_buffer_excess_.empty()); 165 // This is where we try very hard to do error recovery, and make this 166 // protocol robust in the face of proxies that do many different things. 167 // If we decide that things are looking very bad (too hard to recover), 168 // we may even issue a "meta-refresh" to reload the page without an SDCH 169 // advertisement (so that we are sure we're not hurting anything). 170 // 171 // Watch out for an error page inserted by the proxy as part of a 40x 172 // error response. When we see such content molestation, we certainly 173 // need to fall into the meta-refresh case. 174 if (filter_context_.GetResponseCode() == 404) { 175 // We could be more generous, but for now, only a "NOT FOUND" code will 176 // cause a pass through. All other bad codes will fall into a 177 // meta-refresh. 178 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE); 179 decoding_status_ = PASS_THROUGH; 180 } else if (filter_context_.GetResponseCode() != 200) { 181 // We need to meta-refresh, with SDCH disabled. 182 } else if (filter_context_.IsCachedContent() 183 && !dictionary_hash_is_plausible_) { 184 // We must have hit the back button, and gotten content that was fetched 185 // before we *really* advertised SDCH and a dictionary. 186 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED); 187 decoding_status_ = PASS_THROUGH; 188 } else if (possible_pass_through_) { 189 // This is the potentially most graceful response. There really was no 190 // error. We were just overly cautious when we added a TENTATIVE_SDCH. 191 // We added the sdch coding tag, and it should not have been added. 192 // This can happen in server experiments, where the server decides 193 // not to use sdch, even though there is a dictionary. To be 194 // conservative, we locally added the tentative sdch (fearing that a 195 // proxy stripped it!) and we must now recant (pass through). 196 SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH); 197 // However.... just to be sure we don't get burned by proxies that 198 // re-compress with gzip or other system, we can sniff to see if this 199 // is compressed data etc. For now, we do nothing, which gets us into 200 // the meta-refresh result. 201 // TODO(jar): Improve robustness by sniffing for valid text that we can 202 // actual use re: decoding_status_ = PASS_THROUGH; 203 } else if (dictionary_hash_is_plausible_) { 204 // We need a meta-refresh since we don't have the dictionary. 205 // The common cause is a restart of the browser, where we try to render 206 // cached content that was saved when we had a dictionary. 207 } else if (filter_context_.IsSdchResponse()) { 208 // This is a very corrupt SDCH request response. We can't decode it. 209 // We'll use a meta-refresh, and get content without asking for SDCH. 210 // This will also progressively disable SDCH for this domain. 211 } else { 212 // One of the first 9 bytes precluded consideration as a hash. 213 // This can't be an SDCH payload, even though the server said it was. 214 // This is a major error, as the server or proxy tagged this SDCH even 215 // though it is not! 216 // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!! 217 // Worse yet, meta-refresh could lead to an infinite refresh loop. 218 SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH); 219 decoding_status_ = PASS_THROUGH; 220 // ... but further back-off on advertising SDCH support. 221 url_request_context_->sdch_manager()->BlacklistDomain(url_); 222 } 223 224 if (decoding_status_ == PASS_THROUGH) { 225 dest_buffer_excess_ = dictionary_hash_; // Send what we scanned. 226 } else { 227 // This is where we try to do the expensive meta-refresh. 228 if (std::string::npos == mime_type_.find("text/html")) { 229 // Since we can't do a meta-refresh (along with an exponential 230 // backoff), we'll just make sure this NEVER happens again. 231 url_request_context_->sdch_manager()->BlacklistDomainForever(url_); 232 if (filter_context_.IsCachedContent()) 233 SdchManager::SdchErrorRecovery( 234 SdchManager::CACHED_META_REFRESH_UNSUPPORTED); 235 else 236 SdchManager::SdchErrorRecovery( 237 SdchManager::META_REFRESH_UNSUPPORTED); 238 return FILTER_ERROR; 239 } 240 // HTML content means we can issue a meta-refresh, and get the content 241 // again, perhaps without SDCH (to be safe). 242 if (filter_context_.IsCachedContent()) { 243 // Cached content is probably a startup tab, so we'll just get fresh 244 // content and try again, without disabling sdch. 245 SdchManager::SdchErrorRecovery( 246 SdchManager::META_REFRESH_CACHED_RECOVERY); 247 } else { 248 // Since it wasn't in the cache, we definately need at least some 249 // period of blacklisting to get the correct content. 250 url_request_context_->sdch_manager()->BlacklistDomain(url_); 251 SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY); 252 } 253 decoding_status_ = META_REFRESH_RECOVERY; 254 // Issue a meta redirect with SDCH disabled. 255 dest_buffer_excess_ = kDecompressionErrorHtml; 256 } 257 } else { 258 DCHECK_EQ(DECODING_IN_PROGRESS, decoding_status_); 259 } 260 } 261 262 int amount = OutputBufferExcess(dest_buffer, available_space); 263 *dest_len += amount; 264 dest_buffer += amount; 265 available_space -= amount; 266 DCHECK_GE(available_space, 0); 267 268 if (available_space <= 0) 269 return FILTER_OK; 270 DCHECK(dest_buffer_excess_.empty()); 271 DCHECK_EQ(0u, dest_buffer_excess_index_); 272 273 if (decoding_status_ != DECODING_IN_PROGRESS) { 274 if (META_REFRESH_RECOVERY == decoding_status_) { 275 // Absorb all input data. We've already output page reload HTML. 276 next_stream_data_ = NULL; 277 stream_data_len_ = 0; 278 return FILTER_NEED_MORE_DATA; 279 } 280 if (PASS_THROUGH == decoding_status_) { 281 // We must pass in available_space, but it will be changed to bytes_used. 282 FilterStatus result = CopyOut(dest_buffer, &available_space); 283 // Accumulate the returned count of bytes_used (a.k.a., available_space). 284 *dest_len += available_space; 285 return result; 286 } 287 DCHECK(false); 288 decoding_status_ = DECODING_ERROR; 289 return FILTER_ERROR; 290 } 291 292 if (!next_stream_data_ || stream_data_len_ <= 0) 293 return FILTER_NEED_MORE_DATA; 294 295 bool ret = vcdiff_streaming_decoder_->DecodeChunk( 296 next_stream_data_, stream_data_len_, &dest_buffer_excess_); 297 // Assume all data was used in decoding. 298 next_stream_data_ = NULL; 299 source_bytes_ += stream_data_len_; 300 stream_data_len_ = 0; 301 output_bytes_ += dest_buffer_excess_.size(); 302 if (!ret) { 303 vcdiff_streaming_decoder_.reset(NULL); // Don't call it again. 304 decoding_status_ = DECODING_ERROR; 305 SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR); 306 return FILTER_ERROR; 307 } 308 309 amount = OutputBufferExcess(dest_buffer, available_space); 310 *dest_len += amount; 311 dest_buffer += amount; 312 available_space -= amount; 313 if (0 == available_space && !dest_buffer_excess_.empty()) 314 return FILTER_OK; 315 return FILTER_NEED_MORE_DATA; 316 } 317 318 Filter::FilterStatus SdchFilter::InitializeDictionary() { 319 const size_t kServerIdLength = 9; // Dictionary hash plus null from server. 320 size_t bytes_needed = kServerIdLength - dictionary_hash_.size(); 321 DCHECK_GT(bytes_needed, 0u); 322 if (!next_stream_data_) 323 return FILTER_NEED_MORE_DATA; 324 if (static_cast<size_t>(stream_data_len_) < bytes_needed) { 325 dictionary_hash_.append(next_stream_data_, stream_data_len_); 326 next_stream_data_ = NULL; 327 stream_data_len_ = 0; 328 return FILTER_NEED_MORE_DATA; 329 } 330 dictionary_hash_.append(next_stream_data_, bytes_needed); 331 DCHECK(kServerIdLength == dictionary_hash_.size()); 332 stream_data_len_ -= bytes_needed; 333 DCHECK_LE(0, stream_data_len_); 334 if (stream_data_len_ > 0) 335 next_stream_data_ += bytes_needed; 336 else 337 next_stream_data_ = NULL; 338 339 DCHECK(!dictionary_); 340 dictionary_hash_is_plausible_ = true; // Assume plausible, but check. 341 342 if ('\0' == dictionary_hash_[kServerIdLength - 1]) { 343 SdchManager* manager(url_request_context_->sdch_manager()); 344 manager->GetVcdiffDictionary( 345 std::string(dictionary_hash_, 0, kServerIdLength - 1), 346 url_, &dictionary_); 347 } else { 348 dictionary_hash_is_plausible_ = false; 349 } 350 351 if (!dictionary_) { 352 DCHECK(dictionary_hash_.size() == kServerIdLength); 353 // Since dictionary was not found, check to see if hash was even plausible. 354 for (size_t i = 0; i < kServerIdLength - 1; ++i) { 355 char base64_char = dictionary_hash_[i]; 356 if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) { 357 dictionary_hash_is_plausible_ = false; 358 break; 359 } 360 } 361 if (dictionary_hash_is_plausible_) 362 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND); 363 else 364 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED); 365 decoding_status_ = DECODING_ERROR; 366 return FILTER_ERROR; 367 } 368 vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder); 369 vcdiff_streaming_decoder_->SetAllowVcdTarget(false); 370 vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(), 371 dictionary_->text().size()); 372 decoding_status_ = DECODING_IN_PROGRESS; 373 return FILTER_OK; 374 } 375 376 int SdchFilter::OutputBufferExcess(char* const dest_buffer, 377 size_t available_space) { 378 if (dest_buffer_excess_.empty()) 379 return 0; 380 DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_); 381 size_t amount = std::min(available_space, 382 dest_buffer_excess_.size() - dest_buffer_excess_index_); 383 memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_, 384 amount); 385 dest_buffer_excess_index_ += amount; 386 if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) { 387 DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_); 388 dest_buffer_excess_.clear(); 389 dest_buffer_excess_index_ = 0; 390 } 391 return amount; 392 } 393 394 } // namespace net 395