1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <limits.h> 6 #include <ctype.h> 7 #include <algorithm> 8 9 #include "base/file_util.h" 10 #include "base/histogram.h" 11 #include "base/logging.h" 12 #include "net/base/sdch_filter.h" 13 #include "net/base/sdch_manager.h" 14 15 #include "sdch/open-vcdiff/src/google/vcdecoder.h" 16 17 SdchFilter::SdchFilter(const FilterContext& filter_context) 18 : Filter(filter_context), 19 decoding_status_(DECODING_UNINITIALIZED), 20 vcdiff_streaming_decoder_(NULL), 21 dictionary_hash_(), 22 dictionary_hash_is_plausible_(false), 23 dictionary_(NULL), 24 dest_buffer_excess_(), 25 dest_buffer_excess_index_(0), 26 source_bytes_(0), 27 output_bytes_(0), 28 possible_pass_through_(false) { 29 bool success = filter_context.GetMimeType(&mime_type_); 30 DCHECK(success); 31 success = filter_context.GetURL(&url_); 32 DCHECK(success); 33 } 34 35 SdchFilter::~SdchFilter() { 36 // All code here is for gathering stats, and can be removed when SDCH is 37 // considered stable. 38 39 static int filter_use_count = 0; 40 ++filter_use_count; 41 if (META_REFRESH_RECOVERY == decoding_status_) { 42 UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count); 43 } 44 45 if (vcdiff_streaming_decoder_.get()) { 46 if (!vcdiff_streaming_decoder_->FinishDecoding()) { 47 decoding_status_ = DECODING_ERROR; 48 SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT); 49 // Make it possible for the user to hit reload, and get non-sdch content. 50 // Note this will "wear off" quickly enough, and is just meant to assure 51 // in some rare case that the user is not stuck. 52 SdchManager::BlacklistDomain(url_); 53 UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn", 54 static_cast<int>(filter_context().GetByteReadCount())); 55 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_); 56 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_); 57 } 58 } 59 60 if (!dest_buffer_excess_.empty()) { 61 // Filter chaining error, or premature teardown. 62 SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT); 63 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn", 64 static_cast<int>(filter_context().GetByteReadCount())); 65 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize", 66 dest_buffer_excess_.size()); 67 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_); 68 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_); 69 } 70 71 if (filter_context().IsCachedContent()) { 72 // Not a real error, but it is useful to have this tally. 73 // TODO(jar): Remove this stat after SDCH stability is validated. 74 SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED); 75 return; // We don't need timing stats, and we aready got ratios. 76 } 77 78 switch (decoding_status_) { 79 case DECODING_IN_PROGRESS: { 80 if (output_bytes_) 81 UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a", 82 static_cast<int>( 83 (filter_context().GetByteReadCount() * 100) / output_bytes_)); 84 UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a", 85 output_bytes_); 86 filter_context().RecordPacketStats(FilterContext::SDCH_DECODE); 87 88 // Allow latency experiments to proceed. 89 SdchManager::Global()->SetAllowLatencyExperiment(url_, true); 90 return; 91 } 92 case PASS_THROUGH: { 93 filter_context().RecordPacketStats(FilterContext::SDCH_PASSTHROUGH); 94 return; 95 } 96 case DECODING_UNINITIALIZED: { 97 SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED); 98 return; 99 } 100 case WAITING_FOR_DICTIONARY_SELECTION: { 101 SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY); 102 return; 103 } 104 case DECODING_ERROR: { 105 SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR); 106 return; 107 } 108 case META_REFRESH_RECOVERY: { 109 // Already accounted for when set. 110 return; 111 } 112 } // end of switch. 113 } 114 115 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) { 116 if (decoding_status_ != DECODING_UNINITIALIZED) 117 return false; 118 119 // Handle case where sdch filter is guessed, but not required. 120 if (FILTER_TYPE_SDCH_POSSIBLE == filter_type) 121 possible_pass_through_ = true; 122 123 // Initialize decoder only after we have a dictionary in hand. 124 decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION; 125 return true; 126 } 127 128 static const char* kDecompressionErrorHtml = 129 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>" 130 "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;" 131 "border-color:black;border-style:solid;text-align:left;font-family:arial;" 132 "font-size:10pt;foreground-color:black;background-color:white\">" 133 "An error occurred. This page will be reloaded shortly. " 134 "Or press the \"reload\" button now to reload it immediately." 135 "</div>"; 136 137 138 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer, 139 int* dest_len) { 140 int available_space = *dest_len; 141 *dest_len = 0; // Nothing output yet. 142 143 if (!dest_buffer || available_space <= 0) 144 return FILTER_ERROR; 145 146 if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) { 147 FilterStatus status = InitializeDictionary(); 148 if (FILTER_NEED_MORE_DATA == status) 149 return FILTER_NEED_MORE_DATA; 150 if (FILTER_ERROR == status) { 151 DCHECK(DECODING_ERROR == decoding_status_); 152 DCHECK_EQ(0u, dest_buffer_excess_index_); 153 DCHECK(dest_buffer_excess_.empty()); 154 // This is where we try very hard to do error recovery, and make this 155 // protocol robust in the face of proxies that do many different things. 156 // If we decide that things are looking very bad (too hard to recover), 157 // we may even issue a "meta-refresh" to reload the page without an SDCH 158 // advertisement (so that we are sure we're not hurting anything). 159 // 160 // Watch out for an error page inserted by the proxy as part of a 40x 161 // error response. When we see such content molestation, we certainly 162 // need to fall into the meta-refresh case. 163 if (filter_context().GetResponseCode() == 404) { 164 // We could be more generous, but for now, only a "NOT FOUND" code will 165 // cause a pass through. All other bad codes will fall into a 166 // meta-refresh. 167 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE); 168 decoding_status_ = PASS_THROUGH; 169 } else if (filter_context().GetResponseCode() != 200) { 170 // We need to meta-refresh, with SDCH disabled. 171 } else if (filter_context().IsCachedContent() 172 && !dictionary_hash_is_plausible_) { 173 // We must have hit the back button, and gotten content that was fetched 174 // before we *really* advertised SDCH and a dictionary. 175 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED); 176 decoding_status_ = PASS_THROUGH; 177 } else if (possible_pass_through_) { 178 // This is the potentially most graceful response. There really was no 179 // error. We were just overly cautious when we added a TENTATIVE_SDCH. 180 // We added the sdch coding tag, and it should not have been added. 181 // This can happen in server experiments, where the server decides 182 // not to use sdch, even though there is a dictionary. To be 183 // conservative, we locally added the tentative sdch (fearing that a 184 // proxy stripped it!) and we must now recant (pass through). 185 SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH); 186 // However.... just to be sure we don't get burned by proxies that 187 // re-compress with gzip or other system, we can sniff to see if this 188 // is compressed data etc. For now, we do nothing, which gets us into 189 // the meta-refresh result. 190 // TODO(jar): Improve robustness by sniffing for valid text that we can 191 // actual use re: decoding_status_ = PASS_THROUGH; 192 } else if (dictionary_hash_is_plausible_) { 193 // We need a meta-refresh since we don't have the dictionary. 194 // The common cause is a restart of the browser, where we try to render 195 // cached content that was saved when we had a dictionary. 196 } else if (filter_context().IsSdchResponse()) { 197 // This is a very corrupt SDCH request response. We can't decode it. 198 // We'll use a meta-refresh, and get content without asking for SDCH. 199 // This will also progressively disable SDCH for this domain. 200 } else { 201 // One of the first 9 bytes precluded consideration as a hash. 202 // This can't be an SDCH payload, even though the server said it was. 203 // This is a major error, as the server or proxy tagged this SDCH even 204 // though it is not! 205 // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!! 206 // Worse yet, meta-refresh could lead to an infinite refresh loop. 207 SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH); 208 decoding_status_ = PASS_THROUGH; 209 // ... but further back-off on advertising SDCH support. 210 SdchManager::BlacklistDomain(url_); 211 } 212 213 if (decoding_status_ == PASS_THROUGH) { 214 dest_buffer_excess_ = dictionary_hash_; // Send what we scanned. 215 } else { 216 // This is where we try to do the expensive meta-refresh. 217 if (std::string::npos == mime_type_.find("text/html")) { 218 // Since we can't do a meta-refresh (along with an exponential 219 // backoff), we'll just make sure this NEVER happens again. 220 SdchManager::BlacklistDomainForever(url_); 221 if (filter_context().IsCachedContent()) 222 SdchManager::SdchErrorRecovery( 223 SdchManager::CACHED_META_REFRESH_UNSUPPORTED); 224 else 225 SdchManager::SdchErrorRecovery( 226 SdchManager::META_REFRESH_UNSUPPORTED); 227 return FILTER_ERROR; 228 } 229 // HTML content means we can issue a meta-refresh, and get the content 230 // again, perhaps without SDCH (to be safe). 231 if (filter_context().IsCachedContent()) { 232 // Cached content is probably a startup tab, so we'll just get fresh 233 // content and try again, without disabling sdch. 234 SdchManager::SdchErrorRecovery( 235 SdchManager::META_REFRESH_CACHED_RECOVERY); 236 } else { 237 // Since it wasn't in the cache, we definately need at least some 238 // period of blacklisting to get the correct content. 239 SdchManager::BlacklistDomain(url_); 240 SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY); 241 } 242 decoding_status_ = META_REFRESH_RECOVERY; 243 // Issue a meta redirect with SDCH disabled. 244 dest_buffer_excess_ = kDecompressionErrorHtml; 245 } 246 } else { 247 DCHECK(DECODING_IN_PROGRESS == decoding_status_); 248 } 249 } 250 251 int amount = OutputBufferExcess(dest_buffer, available_space); 252 *dest_len += amount; 253 dest_buffer += amount; 254 available_space -= amount; 255 DCHECK_GE(available_space, 0); 256 257 if (available_space <= 0) 258 return FILTER_OK; 259 DCHECK(dest_buffer_excess_.empty()); 260 DCHECK_EQ(0u, dest_buffer_excess_index_); 261 262 if (decoding_status_ != DECODING_IN_PROGRESS) { 263 if (META_REFRESH_RECOVERY == decoding_status_) { 264 // Absorb all input data. We've already output page reload HTML. 265 next_stream_data_ = NULL; 266 stream_data_len_ = 0; 267 return FILTER_NEED_MORE_DATA; 268 } 269 if (PASS_THROUGH == decoding_status_) { 270 // We must pass in available_space, but it will be changed to bytes_used. 271 FilterStatus result = CopyOut(dest_buffer, &available_space); 272 // Accumulate the returned count of bytes_used (a.k.a., available_space). 273 *dest_len += available_space; 274 return result; 275 } 276 DCHECK(false); 277 decoding_status_ = DECODING_ERROR; 278 return FILTER_ERROR; 279 } 280 281 if (!next_stream_data_ || stream_data_len_ <= 0) 282 return FILTER_NEED_MORE_DATA; 283 284 bool ret = vcdiff_streaming_decoder_->DecodeChunk( 285 next_stream_data_, stream_data_len_, &dest_buffer_excess_); 286 // Assume all data was used in decoding. 287 next_stream_data_ = NULL; 288 source_bytes_ += stream_data_len_; 289 stream_data_len_ = 0; 290 output_bytes_ += dest_buffer_excess_.size(); 291 if (!ret) { 292 vcdiff_streaming_decoder_.reset(NULL); // Don't call it again. 293 decoding_status_ = DECODING_ERROR; 294 SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR); 295 return FILTER_ERROR; 296 } 297 298 amount = OutputBufferExcess(dest_buffer, available_space); 299 *dest_len += amount; 300 dest_buffer += amount; 301 available_space -= amount; 302 if (0 == available_space && !dest_buffer_excess_.empty()) 303 return FILTER_OK; 304 return FILTER_NEED_MORE_DATA; 305 } 306 307 Filter::FilterStatus SdchFilter::InitializeDictionary() { 308 const size_t kServerIdLength = 9; // Dictionary hash plus null from server. 309 size_t bytes_needed = kServerIdLength - dictionary_hash_.size(); 310 DCHECK_GT(bytes_needed, 0u); 311 if (!next_stream_data_) 312 return FILTER_NEED_MORE_DATA; 313 if (static_cast<size_t>(stream_data_len_) < bytes_needed) { 314 dictionary_hash_.append(next_stream_data_, stream_data_len_); 315 next_stream_data_ = NULL; 316 stream_data_len_ = 0; 317 return FILTER_NEED_MORE_DATA; 318 } 319 dictionary_hash_.append(next_stream_data_, bytes_needed); 320 DCHECK(kServerIdLength == dictionary_hash_.size()); 321 stream_data_len_ -= bytes_needed; 322 DCHECK_LE(0, stream_data_len_); 323 if (stream_data_len_ > 0) 324 next_stream_data_ += bytes_needed; 325 else 326 next_stream_data_ = NULL; 327 328 DCHECK(!dictionary_.get()); 329 dictionary_hash_is_plausible_ = true; // Assume plausible, but check. 330 331 SdchManager::Dictionary* dictionary = NULL; 332 if ('\0' == dictionary_hash_[kServerIdLength - 1]) 333 SdchManager::Global()->GetVcdiffDictionary(std::string(dictionary_hash_, 0, 334 kServerIdLength - 1), 335 url_, &dictionary); 336 else 337 dictionary_hash_is_plausible_ = false; 338 339 if (!dictionary) { 340 DCHECK(dictionary_hash_.size() == kServerIdLength); 341 // Since dictionary was not found, check to see if hash was even plausible. 342 for (size_t i = 0; i < kServerIdLength - 1; ++i) { 343 char base64_char = dictionary_hash_[i]; 344 if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) { 345 dictionary_hash_is_plausible_ = false; 346 break; 347 } 348 } 349 if (dictionary_hash_is_plausible_) 350 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND); 351 else 352 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED); 353 decoding_status_ = DECODING_ERROR; 354 return FILTER_ERROR; 355 } 356 dictionary_ = dictionary; 357 vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder); 358 vcdiff_streaming_decoder_->SetAllowVcdTarget(false); 359 vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(), 360 dictionary_->text().size()); 361 decoding_status_ = DECODING_IN_PROGRESS; 362 return FILTER_OK; 363 } 364 365 int SdchFilter::OutputBufferExcess(char* const dest_buffer, 366 size_t available_space) { 367 if (dest_buffer_excess_.empty()) 368 return 0; 369 DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_); 370 size_t amount = std::min(available_space, 371 dest_buffer_excess_.size() - dest_buffer_excess_index_); 372 memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_, 373 amount); 374 dest_buffer_excess_index_ += amount; 375 if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) { 376 DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_); 377 dest_buffer_excess_.clear(); 378 dest_buffer_excess_index_ = 0; 379 } 380 return amount; 381 } 382