Home | History | Annotate | Download | only in filter
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/filter/sdch_filter.h"
      6 
      7 #include <ctype.h>
      8 #include <limits.h>
      9 
     10 #include <algorithm>
     11 
     12 #include "base/logging.h"
     13 #include "base/metrics/histogram.h"
     14 #include "net/base/sdch_manager.h"
     15 #include "net/url_request/url_request_context.h"
     16 
     17 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
     18 
     19 namespace net {
     20 
     21 SdchFilter::SdchFilter(const FilterContext& filter_context)
     22     : filter_context_(filter_context),
     23       decoding_status_(DECODING_UNINITIALIZED),
     24       dictionary_hash_(),
     25       dictionary_hash_is_plausible_(false),
     26       dictionary_(NULL),
     27       url_request_context_(filter_context.GetURLRequestContext()),
     28       dest_buffer_excess_(),
     29       dest_buffer_excess_index_(0),
     30       source_bytes_(0),
     31       output_bytes_(0),
     32       possible_pass_through_(false) {
     33   bool success = filter_context.GetMimeType(&mime_type_);
     34   DCHECK(success);
     35   success = filter_context.GetURL(&url_);
     36   DCHECK(success);
     37   DCHECK(url_request_context_->sdch_manager());
     38 }
     39 
     40 SdchFilter::~SdchFilter() {
     41   // All code here is for gathering stats, and can be removed when SDCH is
     42   // considered stable.
     43 
     44   static int filter_use_count = 0;
     45   ++filter_use_count;
     46   if (META_REFRESH_RECOVERY == decoding_status_) {
     47     UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count);
     48   }
     49 
     50   if (vcdiff_streaming_decoder_.get()) {
     51     if (!vcdiff_streaming_decoder_->FinishDecoding()) {
     52       decoding_status_ = DECODING_ERROR;
     53       SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT);
     54       // Make it possible for the user to hit reload, and get non-sdch content.
     55       // Note this will "wear off" quickly enough, and is just meant to assure
     56       // in some rare case that the user is not stuck.
     57       url_request_context_->sdch_manager()->BlacklistDomain(
     58           url_);
     59       UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
     60            static_cast<int>(filter_context_.GetByteReadCount()));
     61       UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_);
     62       UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_);
     63     }
     64   }
     65 
     66   if (!dest_buffer_excess_.empty()) {
     67     // Filter chaining error, or premature teardown.
     68     SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT);
     69     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
     70          static_cast<int>(filter_context_.GetByteReadCount()));
     71     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
     72                          dest_buffer_excess_.size());
     73     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_);
     74     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_);
     75   }
     76 
     77   if (filter_context_.IsCachedContent()) {
     78     // Not a real error, but it is useful to have this tally.
     79     // TODO(jar): Remove this stat after SDCH stability is validated.
     80     SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED);
     81     return;  // We don't need timing stats, and we aready got ratios.
     82   }
     83 
     84   switch (decoding_status_) {
     85     case DECODING_IN_PROGRESS: {
     86       if (output_bytes_)
     87         UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
     88             static_cast<int>(
     89                 (filter_context_.GetByteReadCount() * 100) / output_bytes_));
     90       UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
     91                            output_bytes_);
     92       filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE);
     93 
     94       // Allow latency experiments to proceed.
     95       url_request_context_->sdch_manager()->SetAllowLatencyExperiment(
     96           url_, true);
     97       return;
     98     }
     99     case PASS_THROUGH: {
    100       filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH);
    101       return;
    102     }
    103     case DECODING_UNINITIALIZED: {
    104       SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED);
    105       return;
    106     }
    107     case WAITING_FOR_DICTIONARY_SELECTION: {
    108       SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY);
    109       return;
    110     }
    111     case DECODING_ERROR: {
    112       SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR);
    113       return;
    114     }
    115     case META_REFRESH_RECOVERY: {
    116       // Already accounted for when set.
    117       return;
    118     }
    119   }  // end of switch.
    120 }
    121 
    122 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) {
    123   if (decoding_status_ != DECODING_UNINITIALIZED)
    124     return false;
    125 
    126   // Handle case  where sdch filter is guessed, but not required.
    127   if (FILTER_TYPE_SDCH_POSSIBLE == filter_type)
    128     possible_pass_through_ = true;
    129 
    130   // Initialize decoder only after we have a dictionary in hand.
    131   decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION;
    132   return true;
    133 }
    134 
    135 #ifndef NDEBUG
    136 static const char* kDecompressionErrorHtml =
    137   "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
    138   "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
    139   "border-color:black;border-style:solid;text-align:left;font-family:arial;"
    140   "font-size:10pt;foreground-color:black;background-color:white\">"
    141   "An error occurred. This page will be reloaded shortly. "
    142   "Or press the \"reload\" button now to reload it immediately."
    143   "</div>";
    144 #else
    145 static const char* kDecompressionErrorHtml =
    146   "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
    147 #endif
    148 
    149 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer,
    150                                                   int* dest_len) {
    151   int available_space = *dest_len;
    152   *dest_len = 0;  // Nothing output yet.
    153 
    154   if (!dest_buffer || available_space <= 0)
    155     return FILTER_ERROR;
    156 
    157   if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) {
    158     FilterStatus status = InitializeDictionary();
    159     if (FILTER_NEED_MORE_DATA == status)
    160       return FILTER_NEED_MORE_DATA;
    161     if (FILTER_ERROR == status) {
    162       DCHECK_EQ(DECODING_ERROR, decoding_status_);
    163       DCHECK_EQ(0u, dest_buffer_excess_index_);
    164       DCHECK(dest_buffer_excess_.empty());
    165       // This is where we try very hard to do error recovery, and make this
    166       // protocol robust in the face of proxies that do many different things.
    167       // If we decide that things are looking very bad (too hard to recover),
    168       // we may even issue a "meta-refresh" to reload the page without an SDCH
    169       // advertisement (so that we are sure we're not hurting anything).
    170       //
    171       // Watch out for an error page inserted by the proxy as part of a 40x
    172       // error response.  When we see such content molestation, we certainly
    173       // need to fall into the meta-refresh case.
    174       if (filter_context_.GetResponseCode() == 404) {
    175         // We could be more generous, but for now, only a "NOT FOUND" code will
    176         // cause a pass through.  All other bad codes will fall into a
    177         // meta-refresh.
    178         SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE);
    179         decoding_status_ = PASS_THROUGH;
    180       } else if (filter_context_.GetResponseCode() != 200) {
    181         // We need to meta-refresh, with SDCH disabled.
    182       } else if (filter_context_.IsCachedContent()
    183                  && !dictionary_hash_is_plausible_) {
    184         // We must have hit the back button, and gotten content that was fetched
    185         // before we *really* advertised SDCH and a dictionary.
    186         SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED);
    187         decoding_status_ = PASS_THROUGH;
    188       } else if (possible_pass_through_) {
    189         // This is the potentially most graceful response. There really was no
    190         // error. We were just overly cautious when we added a TENTATIVE_SDCH.
    191         // We added the sdch coding tag, and it should not have been added.
    192         // This can happen in server experiments, where the server decides
    193         // not to use sdch, even though there is a dictionary.  To be
    194         // conservative, we locally added the tentative sdch (fearing that a
    195         // proxy stripped it!) and we must now recant (pass through).
    196         SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH);
    197         // However.... just to be sure we don't get burned by proxies that
    198         // re-compress with gzip or other system, we can sniff to see if this
    199         // is compressed data etc.  For now, we do nothing, which gets us into
    200         // the meta-refresh result.
    201         // TODO(jar): Improve robustness by sniffing for valid text that we can
    202         // actual use re: decoding_status_ = PASS_THROUGH;
    203       } else if (dictionary_hash_is_plausible_) {
    204         // We need a meta-refresh since we don't have the dictionary.
    205         // The common cause is a restart of the browser, where we try to render
    206         // cached content that was saved when we had a dictionary.
    207       } else if (filter_context_.IsSdchResponse()) {
    208         // This is a very corrupt SDCH request response.  We can't decode it.
    209         // We'll use a meta-refresh, and get content without asking for SDCH.
    210         // This will also progressively disable SDCH for this domain.
    211       } else {
    212         // One of the first 9 bytes precluded consideration as a hash.
    213         // This can't be an SDCH payload, even though the server said it was.
    214         // This is a major error, as the server or proxy tagged this SDCH even
    215         // though it is not!
    216         // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
    217         // Worse yet, meta-refresh could lead to an infinite refresh loop.
    218         SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH);
    219         decoding_status_ = PASS_THROUGH;
    220         // ... but further back-off on advertising SDCH support.
    221         url_request_context_->sdch_manager()->BlacklistDomain(url_);
    222       }
    223 
    224       if (decoding_status_ == PASS_THROUGH) {
    225         dest_buffer_excess_ = dictionary_hash_;  // Send what we scanned.
    226       } else {
    227         // This is where we try to do the expensive meta-refresh.
    228         if (std::string::npos == mime_type_.find("text/html")) {
    229           // Since we can't do a meta-refresh (along with an exponential
    230           // backoff), we'll just make sure this NEVER happens again.
    231           url_request_context_->sdch_manager()->BlacklistDomainForever(url_);
    232           if (filter_context_.IsCachedContent())
    233             SdchManager::SdchErrorRecovery(
    234                 SdchManager::CACHED_META_REFRESH_UNSUPPORTED);
    235           else
    236             SdchManager::SdchErrorRecovery(
    237                 SdchManager::META_REFRESH_UNSUPPORTED);
    238           return FILTER_ERROR;
    239         }
    240         // HTML content means we can issue a meta-refresh, and get the content
    241         // again, perhaps without SDCH (to be safe).
    242         if (filter_context_.IsCachedContent()) {
    243           // Cached content is probably a startup tab, so we'll just get fresh
    244           // content and try again, without disabling sdch.
    245           SdchManager::SdchErrorRecovery(
    246               SdchManager::META_REFRESH_CACHED_RECOVERY);
    247         } else {
    248           // Since it wasn't in the cache, we definately need at least some
    249           // period of blacklisting to get the correct content.
    250           url_request_context_->sdch_manager()->BlacklistDomain(url_);
    251           SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY);
    252         }
    253         decoding_status_ = META_REFRESH_RECOVERY;
    254         // Issue a meta redirect with SDCH disabled.
    255         dest_buffer_excess_ = kDecompressionErrorHtml;
    256       }
    257     } else {
    258       DCHECK_EQ(DECODING_IN_PROGRESS, decoding_status_);
    259     }
    260   }
    261 
    262   int amount = OutputBufferExcess(dest_buffer, available_space);
    263   *dest_len += amount;
    264   dest_buffer += amount;
    265   available_space -= amount;
    266   DCHECK_GE(available_space, 0);
    267 
    268   if (available_space <= 0)
    269     return FILTER_OK;
    270   DCHECK(dest_buffer_excess_.empty());
    271   DCHECK_EQ(0u, dest_buffer_excess_index_);
    272 
    273   if (decoding_status_ != DECODING_IN_PROGRESS) {
    274     if (META_REFRESH_RECOVERY == decoding_status_) {
    275       // Absorb all input data.  We've already output page reload HTML.
    276       next_stream_data_ = NULL;
    277       stream_data_len_ = 0;
    278       return FILTER_NEED_MORE_DATA;
    279     }
    280     if (PASS_THROUGH == decoding_status_) {
    281       // We must pass in available_space, but it will be changed to bytes_used.
    282       FilterStatus result = CopyOut(dest_buffer, &available_space);
    283       // Accumulate the returned count of bytes_used (a.k.a., available_space).
    284       *dest_len += available_space;
    285       return result;
    286     }
    287     DCHECK(false);
    288     decoding_status_ = DECODING_ERROR;
    289     return FILTER_ERROR;
    290   }
    291 
    292   if (!next_stream_data_ || stream_data_len_ <= 0)
    293     return FILTER_NEED_MORE_DATA;
    294 
    295   bool ret = vcdiff_streaming_decoder_->DecodeChunk(
    296     next_stream_data_, stream_data_len_, &dest_buffer_excess_);
    297   // Assume all data was used in decoding.
    298   next_stream_data_ = NULL;
    299   source_bytes_ += stream_data_len_;
    300   stream_data_len_ = 0;
    301   output_bytes_ += dest_buffer_excess_.size();
    302   if (!ret) {
    303     vcdiff_streaming_decoder_.reset(NULL);  // Don't call it again.
    304     decoding_status_ = DECODING_ERROR;
    305     SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR);
    306     return FILTER_ERROR;
    307   }
    308 
    309   amount = OutputBufferExcess(dest_buffer, available_space);
    310   *dest_len += amount;
    311   dest_buffer += amount;
    312   available_space -= amount;
    313   if (0 == available_space && !dest_buffer_excess_.empty())
    314       return FILTER_OK;
    315   return FILTER_NEED_MORE_DATA;
    316 }
    317 
    318 Filter::FilterStatus SdchFilter::InitializeDictionary() {
    319   const size_t kServerIdLength = 9;  // Dictionary hash plus null from server.
    320   size_t bytes_needed = kServerIdLength - dictionary_hash_.size();
    321   DCHECK_GT(bytes_needed, 0u);
    322   if (!next_stream_data_)
    323     return FILTER_NEED_MORE_DATA;
    324   if (static_cast<size_t>(stream_data_len_) < bytes_needed) {
    325     dictionary_hash_.append(next_stream_data_, stream_data_len_);
    326     next_stream_data_ = NULL;
    327     stream_data_len_ = 0;
    328     return FILTER_NEED_MORE_DATA;
    329   }
    330   dictionary_hash_.append(next_stream_data_, bytes_needed);
    331   DCHECK(kServerIdLength == dictionary_hash_.size());
    332   stream_data_len_ -= bytes_needed;
    333   DCHECK_LE(0, stream_data_len_);
    334   if (stream_data_len_ > 0)
    335     next_stream_data_ += bytes_needed;
    336   else
    337     next_stream_data_ = NULL;
    338 
    339   DCHECK(!dictionary_);
    340   dictionary_hash_is_plausible_ = true;  // Assume plausible, but check.
    341 
    342   if ('\0' == dictionary_hash_[kServerIdLength - 1]) {
    343     SdchManager* manager(url_request_context_->sdch_manager());
    344     manager->GetVcdiffDictionary(
    345         std::string(dictionary_hash_, 0, kServerIdLength - 1),
    346         url_, &dictionary_);
    347   } else {
    348     dictionary_hash_is_plausible_ = false;
    349   }
    350 
    351   if (!dictionary_) {
    352     DCHECK(dictionary_hash_.size() == kServerIdLength);
    353     // Since dictionary was not found, check to see if hash was even plausible.
    354     for (size_t i = 0; i < kServerIdLength - 1; ++i) {
    355       char base64_char = dictionary_hash_[i];
    356       if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) {
    357         dictionary_hash_is_plausible_ = false;
    358         break;
    359       }
    360     }
    361     if (dictionary_hash_is_plausible_)
    362       SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND);
    363     else
    364       SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED);
    365     decoding_status_ = DECODING_ERROR;
    366     return FILTER_ERROR;
    367   }
    368   vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder);
    369   vcdiff_streaming_decoder_->SetAllowVcdTarget(false);
    370   vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(),
    371                                            dictionary_->text().size());
    372   decoding_status_ = DECODING_IN_PROGRESS;
    373   return FILTER_OK;
    374 }
    375 
    376 int SdchFilter::OutputBufferExcess(char* const dest_buffer,
    377                                    size_t available_space) {
    378   if (dest_buffer_excess_.empty())
    379     return 0;
    380   DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_);
    381   size_t amount = std::min(available_space,
    382       dest_buffer_excess_.size() - dest_buffer_excess_index_);
    383   memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_,
    384          amount);
    385   dest_buffer_excess_index_ += amount;
    386   if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) {
    387     DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_);
    388     dest_buffer_excess_.clear();
    389     dest_buffer_excess_index_ = 0;
    390   }
    391   return amount;
    392 }
    393 
    394 }  // namespace net
    395