Home | History | Annotate | Download | only in filter
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/filter/sdch_filter.h"
      6 
      7 #include <ctype.h>
      8 #include <limits.h>
      9 
     10 #include <algorithm>
     11 
     12 #include "base/logging.h"
     13 #include "base/metrics/histogram.h"
     14 #include "net/base/sdch_manager.h"
     15 #include "net/url_request/url_request_context.h"
     16 
     17 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
     18 
     19 namespace net {
     20 
     21 namespace {
     22 
     23 // Disambiguate various types of responses that trigger a meta-refresh,
     24 // failure, or fallback to pass-through.
     25 enum ResponseCorruptionDetectionCause {
     26   RESPONSE_NONE,
     27 
     28   // 404 Http Response Code
     29   RESPONSE_404 = 1,
     30 
     31   // Not a 200 Http Response Code
     32   RESPONSE_NOT_200 = 2,
     33 
     34   // Cached before dictionary retrieved.
     35   RESPONSE_OLD_UNENCODED = 3,
     36 
     37   // Speculative but incorrect SDCH filtering was added added.
     38   RESPONSE_TENTATIVE_SDCH = 4,
     39 
     40   // Missing correct dict for decoding.
     41   RESPONSE_NO_DICTIONARY = 5,
     42 
     43   // Not an SDCH response but should be.
     44   RESPONSE_CORRUPT_SDCH = 6,
     45 
     46   // No dictionary was advertised with the request, the server claims
     47   // to have encoded with SDCH anyway, but it isn't an SDCH response.
     48   RESPONSE_ENCODING_LIE = 7,
     49 
     50   RESPONSE_MAX,
     51 };
     52 
     53 }  // namespace
     54 
     55 SdchFilter::SdchFilter(const FilterContext& filter_context)
     56     : filter_context_(filter_context),
     57       decoding_status_(DECODING_UNINITIALIZED),
     58       dictionary_hash_(),
     59       dictionary_hash_is_plausible_(false),
     60       dictionary_(NULL),
     61       url_request_context_(filter_context.GetURLRequestContext()),
     62       dest_buffer_excess_(),
     63       dest_buffer_excess_index_(0),
     64       source_bytes_(0),
     65       output_bytes_(0),
     66       possible_pass_through_(false) {
     67   bool success = filter_context.GetMimeType(&mime_type_);
     68   DCHECK(success);
     69   success = filter_context.GetURL(&url_);
     70   DCHECK(success);
     71   DCHECK(url_request_context_->sdch_manager());
     72 }
     73 
     74 SdchFilter::~SdchFilter() {
     75   // All code here is for gathering stats, and can be removed when SDCH is
     76   // considered stable.
     77 
     78   static int filter_use_count = 0;
     79   ++filter_use_count;
     80   if (META_REFRESH_RECOVERY == decoding_status_) {
     81     UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count);
     82   }
     83 
     84   if (vcdiff_streaming_decoder_.get()) {
     85     if (!vcdiff_streaming_decoder_->FinishDecoding()) {
     86       decoding_status_ = DECODING_ERROR;
     87       SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT);
     88       // Make it possible for the user to hit reload, and get non-sdch content.
     89       // Note this will "wear off" quickly enough, and is just meant to assure
     90       // in some rare case that the user is not stuck.
     91       url_request_context_->sdch_manager()->BlacklistDomain(
     92           url_, SdchManager::INCOMPLETE_SDCH_CONTENT);
     93       UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
     94            static_cast<int>(filter_context_.GetByteReadCount()));
     95       UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_);
     96       UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_);
     97     }
     98   }
     99 
    100   if (!dest_buffer_excess_.empty()) {
    101     // Filter chaining error, or premature teardown.
    102     SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT);
    103     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
    104          static_cast<int>(filter_context_.GetByteReadCount()));
    105     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
    106                          dest_buffer_excess_.size());
    107     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_);
    108     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_);
    109   }
    110 
    111   if (filter_context_.IsCachedContent()) {
    112     // Not a real error, but it is useful to have this tally.
    113     // TODO(jar): Remove this stat after SDCH stability is validated.
    114     SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED);
    115     return;  // We don't need timing stats, and we aready got ratios.
    116   }
    117 
    118   switch (decoding_status_) {
    119     case DECODING_IN_PROGRESS: {
    120       if (output_bytes_)
    121         UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
    122             static_cast<int>(
    123                 (filter_context_.GetByteReadCount() * 100) / output_bytes_));
    124       UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
    125                            output_bytes_);
    126       filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE);
    127 
    128       // Allow latency experiments to proceed.
    129       url_request_context_->sdch_manager()->SetAllowLatencyExperiment(
    130           url_, true);
    131       return;
    132     }
    133     case PASS_THROUGH: {
    134       filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH);
    135       return;
    136     }
    137     case DECODING_UNINITIALIZED: {
    138       SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED);
    139       return;
    140     }
    141     case WAITING_FOR_DICTIONARY_SELECTION: {
    142       SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY);
    143       return;
    144     }
    145     case DECODING_ERROR: {
    146       SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR);
    147       return;
    148     }
    149     case META_REFRESH_RECOVERY: {
    150       // Already accounted for when set.
    151       return;
    152     }
    153   }  // end of switch.
    154 }
    155 
    156 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) {
    157   if (decoding_status_ != DECODING_UNINITIALIZED)
    158     return false;
    159 
    160   // Handle case  where sdch filter is guessed, but not required.
    161   if (FILTER_TYPE_SDCH_POSSIBLE == filter_type)
    162     possible_pass_through_ = true;
    163 
    164   // Initialize decoder only after we have a dictionary in hand.
    165   decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION;
    166   return true;
    167 }
    168 
    169 #ifndef NDEBUG
    170 static const char* kDecompressionErrorHtml =
    171   "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
    172   "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
    173   "border-color:black;border-style:solid;text-align:left;font-family:arial;"
    174   "font-size:10pt;foreground-color:black;background-color:white\">"
    175   "An error occurred. This page will be reloaded shortly. "
    176   "Or press the \"reload\" button now to reload it immediately."
    177   "</div>";
    178 #else
    179 static const char* kDecompressionErrorHtml =
    180   "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
    181 #endif
    182 
    183 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer,
    184                                                   int* dest_len) {
    185   int available_space = *dest_len;
    186   *dest_len = 0;  // Nothing output yet.
    187 
    188   if (!dest_buffer || available_space <= 0)
    189     return FILTER_ERROR;
    190 
    191   if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) {
    192     FilterStatus status = InitializeDictionary();
    193     if (FILTER_NEED_MORE_DATA == status)
    194       return FILTER_NEED_MORE_DATA;
    195     if (FILTER_ERROR == status) {
    196       DCHECK_EQ(DECODING_ERROR, decoding_status_);
    197       DCHECK_EQ(0u, dest_buffer_excess_index_);
    198       DCHECK(dest_buffer_excess_.empty());
    199       // This is where we try very hard to do error recovery, and make this
    200       // protocol robust in the face of proxies that do many different things.
    201       // If we decide that things are looking very bad (too hard to recover),
    202       // we may even issue a "meta-refresh" to reload the page without an SDCH
    203       // advertisement (so that we are sure we're not hurting anything).
    204       //
    205       // Watch out for an error page inserted by the proxy as part of a 40x
    206       // error response.  When we see such content molestation, we certainly
    207       // need to fall into the meta-refresh case.
    208       ResponseCorruptionDetectionCause cause = RESPONSE_NONE;
    209       if (filter_context_.GetResponseCode() == 404) {
    210         // We could be more generous, but for now, only a "NOT FOUND" code will
    211         // cause a pass through.  All other bad codes will fall into a
    212         // meta-refresh.
    213         SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE);
    214         cause = RESPONSE_404;
    215         decoding_status_ = PASS_THROUGH;
    216       } else if (filter_context_.GetResponseCode() != 200) {
    217         // We need to meta-refresh, with SDCH disabled.
    218         cause = RESPONSE_NOT_200;
    219       } else if (filter_context_.IsCachedContent()
    220                  && !dictionary_hash_is_plausible_) {
    221         // We must have hit the back button, and gotten content that was fetched
    222         // before we *really* advertised SDCH and a dictionary.
    223         SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED);
    224         decoding_status_ = PASS_THROUGH;
    225         cause = RESPONSE_OLD_UNENCODED;
    226       } else if (possible_pass_through_) {
    227         // This is the potentially most graceful response. There really was no
    228         // error. We were just overly cautious when we added a TENTATIVE_SDCH.
    229         // We added the sdch coding tag, and it should not have been added.
    230         // This can happen in server experiments, where the server decides
    231         // not to use sdch, even though there is a dictionary.  To be
    232         // conservative, we locally added the tentative sdch (fearing that a
    233         // proxy stripped it!) and we must now recant (pass through).
    234         //
    235         // However.... just to be sure we don't get burned by proxies that
    236         // re-compress with gzip or other system, we can sniff to see if this
    237         // is compressed data etc.  For now, we do nothing, which gets us into
    238         // the meta-refresh result.
    239         // TODO(jar): Improve robustness by sniffing for valid text that we can
    240         // actual use re: decoding_status_ = PASS_THROUGH;
    241         cause = RESPONSE_TENTATIVE_SDCH;
    242       } else if (dictionary_hash_is_plausible_) {
    243         // We need a meta-refresh since we don't have the dictionary.
    244         // The common cause is a restart of the browser, where we try to render
    245         // cached content that was saved when we had a dictionary.
    246         cause = RESPONSE_NO_DICTIONARY;
    247       } else if (filter_context_.SdchResponseExpected()) {
    248         // This is a very corrupt SDCH request response.  We can't decode it.
    249         // We'll use a meta-refresh, and get content without asking for SDCH.
    250         // This will also progressively disable SDCH for this domain.
    251         cause = RESPONSE_CORRUPT_SDCH;
    252       } else {
    253         // One of the first 9 bytes precluded consideration as a hash.
    254         // This can't be an SDCH payload, even though the server said it was.
    255         // This is a major error, as the server or proxy tagged this SDCH even
    256         // though it is not!
    257         // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
    258         // Worse yet, meta-refresh could lead to an infinite refresh loop.
    259         SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH);
    260         decoding_status_ = PASS_THROUGH;
    261         // ... but further back-off on advertising SDCH support.
    262         url_request_context_->sdch_manager()->BlacklistDomain(
    263             url_, SdchManager::PASSING_THROUGH_NON_SDCH);
    264         cause = RESPONSE_ENCODING_LIE;
    265       }
    266       DCHECK_NE(RESPONSE_NONE, cause);
    267 
    268       // Use if statement rather than ?: because UMA_HISTOGRAM_ENUMERATION
    269       // caches the histogram name based on the call site.
    270       if (filter_context_.IsCachedContent()) {
    271         UMA_HISTOGRAM_ENUMERATION(
    272             "Sdch3.ResponseCorruptionDetection.Cached", cause, RESPONSE_MAX);
    273       } else {
    274         UMA_HISTOGRAM_ENUMERATION(
    275             "Sdch3.ResponseCorruptionDetection.Uncached", cause, RESPONSE_MAX);
    276       }
    277 
    278       if (decoding_status_ == PASS_THROUGH) {
    279         dest_buffer_excess_ = dictionary_hash_;  // Send what we scanned.
    280       } else {
    281         // This is where we try to do the expensive meta-refresh.
    282         if (std::string::npos == mime_type_.find("text/html")) {
    283           // Since we can't do a meta-refresh (along with an exponential
    284           // backoff), we'll just make sure this NEVER happens again.
    285           SdchManager::ProblemCodes problem =
    286               (filter_context_.IsCachedContent() ?
    287                SdchManager::CACHED_META_REFRESH_UNSUPPORTED :
    288                SdchManager::META_REFRESH_UNSUPPORTED);
    289           url_request_context_->sdch_manager()->BlacklistDomainForever(
    290               url_, problem);
    291           SdchManager::SdchErrorRecovery(problem);
    292           return FILTER_ERROR;
    293         }
    294         // HTML content means we can issue a meta-refresh, and get the content
    295         // again, perhaps without SDCH (to be safe).
    296         if (filter_context_.IsCachedContent()) {
    297           // Cached content is probably a startup tab, so we'll just get fresh
    298           // content and try again, without disabling sdch.
    299           SdchManager::SdchErrorRecovery(
    300               SdchManager::META_REFRESH_CACHED_RECOVERY);
    301         } else {
    302           // Since it wasn't in the cache, we definately need at least some
    303           // period of blacklisting to get the correct content.
    304           url_request_context_->sdch_manager()->BlacklistDomain(
    305               url_, SdchManager::META_REFRESH_RECOVERY);
    306           SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY);
    307         }
    308         decoding_status_ = META_REFRESH_RECOVERY;
    309         // Issue a meta redirect with SDCH disabled.
    310         dest_buffer_excess_ = kDecompressionErrorHtml;
    311       }
    312     } else {
    313       DCHECK_EQ(DECODING_IN_PROGRESS, decoding_status_);
    314     }
    315   }
    316 
    317   int amount = OutputBufferExcess(dest_buffer, available_space);
    318   *dest_len += amount;
    319   dest_buffer += amount;
    320   available_space -= amount;
    321   DCHECK_GE(available_space, 0);
    322 
    323   if (available_space <= 0)
    324     return FILTER_OK;
    325   DCHECK(dest_buffer_excess_.empty());
    326   DCHECK_EQ(0u, dest_buffer_excess_index_);
    327 
    328   if (decoding_status_ != DECODING_IN_PROGRESS) {
    329     if (META_REFRESH_RECOVERY == decoding_status_) {
    330       // Absorb all input data.  We've already output page reload HTML.
    331       next_stream_data_ = NULL;
    332       stream_data_len_ = 0;
    333       return FILTER_NEED_MORE_DATA;
    334     }
    335     if (PASS_THROUGH == decoding_status_) {
    336       // We must pass in available_space, but it will be changed to bytes_used.
    337       FilterStatus result = CopyOut(dest_buffer, &available_space);
    338       // Accumulate the returned count of bytes_used (a.k.a., available_space).
    339       *dest_len += available_space;
    340       return result;
    341     }
    342     DCHECK(false);
    343     decoding_status_ = DECODING_ERROR;
    344     return FILTER_ERROR;
    345   }
    346 
    347   if (!next_stream_data_ || stream_data_len_ <= 0)
    348     return FILTER_NEED_MORE_DATA;
    349 
    350   bool ret = vcdiff_streaming_decoder_->DecodeChunk(
    351     next_stream_data_, stream_data_len_, &dest_buffer_excess_);
    352   // Assume all data was used in decoding.
    353   next_stream_data_ = NULL;
    354   source_bytes_ += stream_data_len_;
    355   stream_data_len_ = 0;
    356   output_bytes_ += dest_buffer_excess_.size();
    357   if (!ret) {
    358     vcdiff_streaming_decoder_.reset(NULL);  // Don't call it again.
    359     decoding_status_ = DECODING_ERROR;
    360     SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR);
    361     return FILTER_ERROR;
    362   }
    363 
    364   amount = OutputBufferExcess(dest_buffer, available_space);
    365   *dest_len += amount;
    366   dest_buffer += amount;
    367   available_space -= amount;
    368   if (0 == available_space && !dest_buffer_excess_.empty())
    369       return FILTER_OK;
    370   return FILTER_NEED_MORE_DATA;
    371 }
    372 
    373 Filter::FilterStatus SdchFilter::InitializeDictionary() {
    374   const size_t kServerIdLength = 9;  // Dictionary hash plus null from server.
    375   size_t bytes_needed = kServerIdLength - dictionary_hash_.size();
    376   DCHECK_GT(bytes_needed, 0u);
    377   if (!next_stream_data_)
    378     return FILTER_NEED_MORE_DATA;
    379   if (static_cast<size_t>(stream_data_len_) < bytes_needed) {
    380     dictionary_hash_.append(next_stream_data_, stream_data_len_);
    381     next_stream_data_ = NULL;
    382     stream_data_len_ = 0;
    383     return FILTER_NEED_MORE_DATA;
    384   }
    385   dictionary_hash_.append(next_stream_data_, bytes_needed);
    386   DCHECK(kServerIdLength == dictionary_hash_.size());
    387   stream_data_len_ -= bytes_needed;
    388   DCHECK_LE(0, stream_data_len_);
    389   if (stream_data_len_ > 0)
    390     next_stream_data_ += bytes_needed;
    391   else
    392     next_stream_data_ = NULL;
    393 
    394   DCHECK(!dictionary_.get());
    395   dictionary_hash_is_plausible_ = true;  // Assume plausible, but check.
    396 
    397   if ('\0' == dictionary_hash_[kServerIdLength - 1]) {
    398     SdchManager* manager(url_request_context_->sdch_manager());
    399     manager->GetVcdiffDictionary(
    400         std::string(dictionary_hash_, 0, kServerIdLength - 1),
    401         url_, &dictionary_);
    402   } else {
    403     dictionary_hash_is_plausible_ = false;
    404   }
    405 
    406   if (!dictionary_.get()) {
    407     DCHECK(dictionary_hash_.size() == kServerIdLength);
    408     // Since dictionary was not found, check to see if hash was even plausible.
    409     for (size_t i = 0; i < kServerIdLength - 1; ++i) {
    410       char base64_char = dictionary_hash_[i];
    411       if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) {
    412         dictionary_hash_is_plausible_ = false;
    413         break;
    414       }
    415     }
    416     if (dictionary_hash_is_plausible_)
    417       SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND);
    418     else
    419       SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED);
    420     decoding_status_ = DECODING_ERROR;
    421     return FILTER_ERROR;
    422   }
    423   vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder);
    424   vcdiff_streaming_decoder_->SetAllowVcdTarget(false);
    425   vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(),
    426                                            dictionary_->text().size());
    427   decoding_status_ = DECODING_IN_PROGRESS;
    428   return FILTER_OK;
    429 }
    430 
    431 int SdchFilter::OutputBufferExcess(char* const dest_buffer,
    432                                    size_t available_space) {
    433   if (dest_buffer_excess_.empty())
    434     return 0;
    435   DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_);
    436   size_t amount = std::min(available_space,
    437       dest_buffer_excess_.size() - dest_buffer_excess_index_);
    438   memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_,
    439          amount);
    440   dest_buffer_excess_index_ += amount;
    441   if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) {
    442     DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_);
    443     dest_buffer_excess_.clear();
    444     dest_buffer_excess_index_ = 0;
    445   }
    446   return amount;
    447 }
    448 
    449 }  // namespace net
    450