1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/base/sdch_filter.h" 6 7 #include <limits.h> 8 #include <ctype.h> 9 #include <algorithm> 10 11 #include "base/file_util.h" 12 #include "base/logging.h" 13 #include "base/metrics/histogram.h" 14 #include "net/base/sdch_manager.h" 15 16 #include "sdch/open-vcdiff/src/google/vcdecoder.h" 17 18 namespace net { 19 20 SdchFilter::SdchFilter(const FilterContext& filter_context) 21 : filter_context_(filter_context), 22 decoding_status_(DECODING_UNINITIALIZED), 23 vcdiff_streaming_decoder_(NULL), 24 dictionary_hash_(), 25 dictionary_hash_is_plausible_(false), 26 dictionary_(NULL), 27 dest_buffer_excess_(), 28 dest_buffer_excess_index_(0), 29 source_bytes_(0), 30 output_bytes_(0), 31 possible_pass_through_(false) { 32 bool success = filter_context.GetMimeType(&mime_type_); 33 DCHECK(success); 34 success = filter_context.GetURL(&url_); 35 DCHECK(success); 36 } 37 38 SdchFilter::~SdchFilter() { 39 // All code here is for gathering stats, and can be removed when SDCH is 40 // considered stable. 41 42 static int filter_use_count = 0; 43 ++filter_use_count; 44 if (META_REFRESH_RECOVERY == decoding_status_) { 45 UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count); 46 } 47 48 if (vcdiff_streaming_decoder_.get()) { 49 if (!vcdiff_streaming_decoder_->FinishDecoding()) { 50 decoding_status_ = DECODING_ERROR; 51 SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT); 52 // Make it possible for the user to hit reload, and get non-sdch content. 53 // Note this will "wear off" quickly enough, and is just meant to assure 54 // in some rare case that the user is not stuck. 55 SdchManager::BlacklistDomain(url_); 56 UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn", 57 static_cast<int>(filter_context_.GetByteReadCount())); 58 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_); 59 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_); 60 } 61 } 62 63 if (!dest_buffer_excess_.empty()) { 64 // Filter chaining error, or premature teardown. 65 SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT); 66 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn", 67 static_cast<int>(filter_context_.GetByteReadCount())); 68 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize", 69 dest_buffer_excess_.size()); 70 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_); 71 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_); 72 } 73 74 if (filter_context_.IsCachedContent()) { 75 // Not a real error, but it is useful to have this tally. 76 // TODO(jar): Remove this stat after SDCH stability is validated. 77 SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED); 78 return; // We don't need timing stats, and we aready got ratios. 79 } 80 81 switch (decoding_status_) { 82 case DECODING_IN_PROGRESS: { 83 if (output_bytes_) 84 UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a", 85 static_cast<int>( 86 (filter_context_.GetByteReadCount() * 100) / output_bytes_)); 87 UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a", 88 output_bytes_); 89 filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE); 90 91 // Allow latency experiments to proceed. 92 SdchManager::Global()->SetAllowLatencyExperiment(url_, true); 93 return; 94 } 95 case PASS_THROUGH: { 96 filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH); 97 return; 98 } 99 case DECODING_UNINITIALIZED: { 100 SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED); 101 return; 102 } 103 case WAITING_FOR_DICTIONARY_SELECTION: { 104 SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY); 105 return; 106 } 107 case DECODING_ERROR: { 108 SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR); 109 return; 110 } 111 case META_REFRESH_RECOVERY: { 112 // Already accounted for when set. 113 return; 114 } 115 } // end of switch. 116 } 117 118 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) { 119 if (decoding_status_ != DECODING_UNINITIALIZED) 120 return false; 121 122 // Handle case where sdch filter is guessed, but not required. 123 if (FILTER_TYPE_SDCH_POSSIBLE == filter_type) 124 possible_pass_through_ = true; 125 126 // Initialize decoder only after we have a dictionary in hand. 127 decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION; 128 return true; 129 } 130 131 #ifndef NDEBUG 132 static const char* kDecompressionErrorHtml = 133 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>" 134 "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;" 135 "border-color:black;border-style:solid;text-align:left;font-family:arial;" 136 "font-size:10pt;foreground-color:black;background-color:white\">" 137 "An error occurred. This page will be reloaded shortly. " 138 "Or press the \"reload\" button now to reload it immediately." 139 "</div>"; 140 #else 141 static const char* kDecompressionErrorHtml = 142 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"; 143 #endif 144 145 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer, 146 int* dest_len) { 147 int available_space = *dest_len; 148 *dest_len = 0; // Nothing output yet. 149 150 if (!dest_buffer || available_space <= 0) 151 return FILTER_ERROR; 152 153 if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) { 154 FilterStatus status = InitializeDictionary(); 155 if (FILTER_NEED_MORE_DATA == status) 156 return FILTER_NEED_MORE_DATA; 157 if (FILTER_ERROR == status) { 158 DCHECK(DECODING_ERROR == decoding_status_); 159 DCHECK_EQ(0u, dest_buffer_excess_index_); 160 DCHECK(dest_buffer_excess_.empty()); 161 // This is where we try very hard to do error recovery, and make this 162 // protocol robust in the face of proxies that do many different things. 163 // If we decide that things are looking very bad (too hard to recover), 164 // we may even issue a "meta-refresh" to reload the page without an SDCH 165 // advertisement (so that we are sure we're not hurting anything). 166 // 167 // Watch out for an error page inserted by the proxy as part of a 40x 168 // error response. When we see such content molestation, we certainly 169 // need to fall into the meta-refresh case. 170 if (filter_context_.GetResponseCode() == 404) { 171 // We could be more generous, but for now, only a "NOT FOUND" code will 172 // cause a pass through. All other bad codes will fall into a 173 // meta-refresh. 174 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE); 175 decoding_status_ = PASS_THROUGH; 176 } else if (filter_context_.GetResponseCode() != 200) { 177 // We need to meta-refresh, with SDCH disabled. 178 } else if (filter_context_.IsCachedContent() 179 && !dictionary_hash_is_plausible_) { 180 // We must have hit the back button, and gotten content that was fetched 181 // before we *really* advertised SDCH and a dictionary. 182 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED); 183 decoding_status_ = PASS_THROUGH; 184 } else if (possible_pass_through_) { 185 // This is the potentially most graceful response. There really was no 186 // error. We were just overly cautious when we added a TENTATIVE_SDCH. 187 // We added the sdch coding tag, and it should not have been added. 188 // This can happen in server experiments, where the server decides 189 // not to use sdch, even though there is a dictionary. To be 190 // conservative, we locally added the tentative sdch (fearing that a 191 // proxy stripped it!) and we must now recant (pass through). 192 SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH); 193 // However.... just to be sure we don't get burned by proxies that 194 // re-compress with gzip or other system, we can sniff to see if this 195 // is compressed data etc. For now, we do nothing, which gets us into 196 // the meta-refresh result. 197 // TODO(jar): Improve robustness by sniffing for valid text that we can 198 // actual use re: decoding_status_ = PASS_THROUGH; 199 } else if (dictionary_hash_is_plausible_) { 200 // We need a meta-refresh since we don't have the dictionary. 201 // The common cause is a restart of the browser, where we try to render 202 // cached content that was saved when we had a dictionary. 203 } else if (filter_context_.IsSdchResponse()) { 204 // This is a very corrupt SDCH request response. We can't decode it. 205 // We'll use a meta-refresh, and get content without asking for SDCH. 206 // This will also progressively disable SDCH for this domain. 207 } else { 208 // One of the first 9 bytes precluded consideration as a hash. 209 // This can't be an SDCH payload, even though the server said it was. 210 // This is a major error, as the server or proxy tagged this SDCH even 211 // though it is not! 212 // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!! 213 // Worse yet, meta-refresh could lead to an infinite refresh loop. 214 SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH); 215 decoding_status_ = PASS_THROUGH; 216 // ... but further back-off on advertising SDCH support. 217 SdchManager::BlacklistDomain(url_); 218 } 219 220 if (decoding_status_ == PASS_THROUGH) { 221 dest_buffer_excess_ = dictionary_hash_; // Send what we scanned. 222 } else { 223 // This is where we try to do the expensive meta-refresh. 224 if (std::string::npos == mime_type_.find("text/html")) { 225 // Since we can't do a meta-refresh (along with an exponential 226 // backoff), we'll just make sure this NEVER happens again. 227 SdchManager::BlacklistDomainForever(url_); 228 if (filter_context_.IsCachedContent()) 229 SdchManager::SdchErrorRecovery( 230 SdchManager::CACHED_META_REFRESH_UNSUPPORTED); 231 else 232 SdchManager::SdchErrorRecovery( 233 SdchManager::META_REFRESH_UNSUPPORTED); 234 return FILTER_ERROR; 235 } 236 // HTML content means we can issue a meta-refresh, and get the content 237 // again, perhaps without SDCH (to be safe). 238 if (filter_context_.IsCachedContent()) { 239 // Cached content is probably a startup tab, so we'll just get fresh 240 // content and try again, without disabling sdch. 241 SdchManager::SdchErrorRecovery( 242 SdchManager::META_REFRESH_CACHED_RECOVERY); 243 } else { 244 // Since it wasn't in the cache, we definately need at least some 245 // period of blacklisting to get the correct content. 246 SdchManager::BlacklistDomain(url_); 247 SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY); 248 } 249 decoding_status_ = META_REFRESH_RECOVERY; 250 // Issue a meta redirect with SDCH disabled. 251 dest_buffer_excess_ = kDecompressionErrorHtml; 252 } 253 } else { 254 DCHECK(DECODING_IN_PROGRESS == decoding_status_); 255 } 256 } 257 258 int amount = OutputBufferExcess(dest_buffer, available_space); 259 *dest_len += amount; 260 dest_buffer += amount; 261 available_space -= amount; 262 DCHECK_GE(available_space, 0); 263 264 if (available_space <= 0) 265 return FILTER_OK; 266 DCHECK(dest_buffer_excess_.empty()); 267 DCHECK_EQ(0u, dest_buffer_excess_index_); 268 269 if (decoding_status_ != DECODING_IN_PROGRESS) { 270 if (META_REFRESH_RECOVERY == decoding_status_) { 271 // Absorb all input data. We've already output page reload HTML. 272 next_stream_data_ = NULL; 273 stream_data_len_ = 0; 274 return FILTER_NEED_MORE_DATA; 275 } 276 if (PASS_THROUGH == decoding_status_) { 277 // We must pass in available_space, but it will be changed to bytes_used. 278 FilterStatus result = CopyOut(dest_buffer, &available_space); 279 // Accumulate the returned count of bytes_used (a.k.a., available_space). 280 *dest_len += available_space; 281 return result; 282 } 283 DCHECK(false); 284 decoding_status_ = DECODING_ERROR; 285 return FILTER_ERROR; 286 } 287 288 if (!next_stream_data_ || stream_data_len_ <= 0) 289 return FILTER_NEED_MORE_DATA; 290 291 bool ret = vcdiff_streaming_decoder_->DecodeChunk( 292 next_stream_data_, stream_data_len_, &dest_buffer_excess_); 293 // Assume all data was used in decoding. 294 next_stream_data_ = NULL; 295 source_bytes_ += stream_data_len_; 296 stream_data_len_ = 0; 297 output_bytes_ += dest_buffer_excess_.size(); 298 if (!ret) { 299 vcdiff_streaming_decoder_.reset(NULL); // Don't call it again. 300 decoding_status_ = DECODING_ERROR; 301 SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR); 302 return FILTER_ERROR; 303 } 304 305 amount = OutputBufferExcess(dest_buffer, available_space); 306 *dest_len += amount; 307 dest_buffer += amount; 308 available_space -= amount; 309 if (0 == available_space && !dest_buffer_excess_.empty()) 310 return FILTER_OK; 311 return FILTER_NEED_MORE_DATA; 312 } 313 314 Filter::FilterStatus SdchFilter::InitializeDictionary() { 315 const size_t kServerIdLength = 9; // Dictionary hash plus null from server. 316 size_t bytes_needed = kServerIdLength - dictionary_hash_.size(); 317 DCHECK_GT(bytes_needed, 0u); 318 if (!next_stream_data_) 319 return FILTER_NEED_MORE_DATA; 320 if (static_cast<size_t>(stream_data_len_) < bytes_needed) { 321 dictionary_hash_.append(next_stream_data_, stream_data_len_); 322 next_stream_data_ = NULL; 323 stream_data_len_ = 0; 324 return FILTER_NEED_MORE_DATA; 325 } 326 dictionary_hash_.append(next_stream_data_, bytes_needed); 327 DCHECK(kServerIdLength == dictionary_hash_.size()); 328 stream_data_len_ -= bytes_needed; 329 DCHECK_LE(0, stream_data_len_); 330 if (stream_data_len_ > 0) 331 next_stream_data_ += bytes_needed; 332 else 333 next_stream_data_ = NULL; 334 335 DCHECK(!dictionary_.get()); 336 dictionary_hash_is_plausible_ = true; // Assume plausible, but check. 337 338 SdchManager::Dictionary* dictionary = NULL; 339 if ('\0' == dictionary_hash_[kServerIdLength - 1]) 340 SdchManager::Global()->GetVcdiffDictionary(std::string(dictionary_hash_, 0, 341 kServerIdLength - 1), 342 url_, &dictionary); 343 else 344 dictionary_hash_is_plausible_ = false; 345 346 if (!dictionary) { 347 DCHECK(dictionary_hash_.size() == kServerIdLength); 348 // Since dictionary was not found, check to see if hash was even plausible. 349 for (size_t i = 0; i < kServerIdLength - 1; ++i) { 350 char base64_char = dictionary_hash_[i]; 351 if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) { 352 dictionary_hash_is_plausible_ = false; 353 break; 354 } 355 } 356 if (dictionary_hash_is_plausible_) 357 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND); 358 else 359 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED); 360 decoding_status_ = DECODING_ERROR; 361 return FILTER_ERROR; 362 } 363 dictionary_ = dictionary; 364 vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder); 365 vcdiff_streaming_decoder_->SetAllowVcdTarget(false); 366 vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(), 367 dictionary_->text().size()); 368 decoding_status_ = DECODING_IN_PROGRESS; 369 return FILTER_OK; 370 } 371 372 int SdchFilter::OutputBufferExcess(char* const dest_buffer, 373 size_t available_space) { 374 if (dest_buffer_excess_.empty()) 375 return 0; 376 DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_); 377 size_t amount = std::min(available_space, 378 dest_buffer_excess_.size() - dest_buffer_excess_index_); 379 memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_, 380 amount); 381 dest_buffer_excess_index_ += amount; 382 if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) { 383 DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_); 384 dest_buffer_excess_.clear(); 385 dest_buffer_excess_index_ = 0; 386 } 387 return amount; 388 } 389 390 } // namespace net 391