Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <limits.h>
      6 #include <ctype.h>
      7 #include <algorithm>
      8 
      9 #include "base/file_util.h"
     10 #include "base/histogram.h"
     11 #include "base/logging.h"
     12 #include "net/base/sdch_filter.h"
     13 #include "net/base/sdch_manager.h"
     14 
     15 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
     16 
     17 SdchFilter::SdchFilter(const FilterContext& filter_context)
     18     : Filter(filter_context),
     19       decoding_status_(DECODING_UNINITIALIZED),
     20       vcdiff_streaming_decoder_(NULL),
     21       dictionary_hash_(),
     22       dictionary_hash_is_plausible_(false),
     23       dictionary_(NULL),
     24       dest_buffer_excess_(),
     25       dest_buffer_excess_index_(0),
     26       source_bytes_(0),
     27       output_bytes_(0),
     28       possible_pass_through_(false) {
     29   bool success = filter_context.GetMimeType(&mime_type_);
     30   DCHECK(success);
     31   success = filter_context.GetURL(&url_);
     32   DCHECK(success);
     33 }
     34 
     35 SdchFilter::~SdchFilter() {
     36   // All code here is for gathering stats, and can be removed when SDCH is
     37   // considered stable.
     38 
     39   static int filter_use_count = 0;
     40   ++filter_use_count;
     41   if (META_REFRESH_RECOVERY == decoding_status_) {
     42     UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count);
     43   }
     44 
     45   if (vcdiff_streaming_decoder_.get()) {
     46     if (!vcdiff_streaming_decoder_->FinishDecoding()) {
     47       decoding_status_ = DECODING_ERROR;
     48       SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT);
     49       // Make it possible for the user to hit reload, and get non-sdch content.
     50       // Note this will "wear off" quickly enough, and is just meant to assure
     51       // in some rare case that the user is not stuck.
     52       SdchManager::BlacklistDomain(url_);
     53       UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
     54            static_cast<int>(filter_context().GetByteReadCount()));
     55       UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_);
     56       UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_);
     57     }
     58   }
     59 
     60   if (!dest_buffer_excess_.empty()) {
     61     // Filter chaining error, or premature teardown.
     62     SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT);
     63     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
     64          static_cast<int>(filter_context().GetByteReadCount()));
     65     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
     66                          dest_buffer_excess_.size());
     67     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_);
     68     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_);
     69   }
     70 
     71   if (filter_context().IsCachedContent()) {
     72     // Not a real error, but it is useful to have this tally.
     73     // TODO(jar): Remove this stat after SDCH stability is validated.
     74     SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED);
     75     return;  // We don't need timing stats, and we aready got ratios.
     76   }
     77 
     78   switch (decoding_status_) {
     79     case DECODING_IN_PROGRESS: {
     80       if (output_bytes_)
     81         UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
     82             static_cast<int>(
     83                 (filter_context().GetByteReadCount() * 100) / output_bytes_));
     84       UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
     85                            output_bytes_);
     86       filter_context().RecordPacketStats(FilterContext::SDCH_DECODE);
     87 
     88       // Allow latency experiments to proceed.
     89       SdchManager::Global()->SetAllowLatencyExperiment(url_, true);
     90       return;
     91     }
     92     case PASS_THROUGH: {
     93       filter_context().RecordPacketStats(FilterContext::SDCH_PASSTHROUGH);
     94       return;
     95     }
     96     case DECODING_UNINITIALIZED: {
     97       SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED);
     98       return;
     99     }
    100     case WAITING_FOR_DICTIONARY_SELECTION: {
    101       SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY);
    102       return;
    103     }
    104     case DECODING_ERROR: {
    105       SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR);
    106       return;
    107     }
    108     case META_REFRESH_RECOVERY: {
    109       // Already accounted for when set.
    110       return;
    111     }
    112   }  // end of switch.
    113 }
    114 
    115 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) {
    116   if (decoding_status_ != DECODING_UNINITIALIZED)
    117     return false;
    118 
    119   // Handle case  where sdch filter is guessed, but not required.
    120   if (FILTER_TYPE_SDCH_POSSIBLE == filter_type)
    121     possible_pass_through_ = true;
    122 
    123   // Initialize decoder only after we have a dictionary in hand.
    124   decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION;
    125   return true;
    126 }
    127 
    128 static const char* kDecompressionErrorHtml =
    129   "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
    130   "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
    131   "border-color:black;border-style:solid;text-align:left;font-family:arial;"
    132   "font-size:10pt;foreground-color:black;background-color:white\">"
    133   "An error occurred. This page will be reloaded shortly. "
    134   "Or press the \"reload\" button now to reload it immediately."
    135   "</div>";
    136 
    137 
    138 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer,
    139                                                   int* dest_len) {
    140   int available_space = *dest_len;
    141   *dest_len = 0;  // Nothing output yet.
    142 
    143   if (!dest_buffer || available_space <= 0)
    144     return FILTER_ERROR;
    145 
    146   if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) {
    147     FilterStatus status = InitializeDictionary();
    148     if (FILTER_NEED_MORE_DATA == status)
    149       return FILTER_NEED_MORE_DATA;
    150     if (FILTER_ERROR == status) {
    151       DCHECK(DECODING_ERROR == decoding_status_);
    152       DCHECK_EQ(0u, dest_buffer_excess_index_);
    153       DCHECK(dest_buffer_excess_.empty());
    154       // This is where we try very hard to do error recovery, and make this
    155       // protocol robust in the face of proxies that do many different things.
    156       // If we decide that things are looking very bad (too hard to recover),
    157       // we may even issue a "meta-refresh" to reload the page without an SDCH
    158       // advertisement (so that we are sure we're not hurting anything).
    159       //
    160       // Watch out for an error page inserted by the proxy as part of a 40x
    161       // error response.  When we see such content molestation, we certainly
    162       // need to fall into the meta-refresh case.
    163       if (filter_context().GetResponseCode() == 404) {
    164         // We could be more generous, but for now, only a "NOT FOUND" code will
    165         // cause a pass through.  All other bad codes will fall into a
    166         // meta-refresh.
    167         SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE);
    168         decoding_status_ = PASS_THROUGH;
    169       } else if (filter_context().GetResponseCode() != 200) {
    170         // We need to meta-refresh, with SDCH disabled.
    171       } else if (filter_context().IsCachedContent()
    172                  && !dictionary_hash_is_plausible_) {
    173         // We must have hit the back button, and gotten content that was fetched
    174         // before we *really* advertised SDCH and a dictionary.
    175         SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED);
    176         decoding_status_ = PASS_THROUGH;
    177       } else if (possible_pass_through_) {
    178         // This is the potentially most graceful response. There really was no
    179         // error. We were just overly cautious when we added a TENTATIVE_SDCH.
    180         // We added the sdch coding tag, and it should not have been added.
    181         // This can happen in server experiments, where the server decides
    182         // not to use sdch, even though there is a dictionary.  To be
    183         // conservative, we locally added the tentative sdch (fearing that a
    184         // proxy stripped it!) and we must now recant (pass through).
    185         SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH);
    186         // However.... just to be sure we don't get burned by proxies that
    187         // re-compress with gzip or other system, we can sniff to see if this
    188         // is compressed data etc.  For now, we do nothing, which gets us into
    189         // the meta-refresh result.
    190         // TODO(jar): Improve robustness by sniffing for valid text that we can
    191         // actual use re: decoding_status_ = PASS_THROUGH;
    192       } else if (dictionary_hash_is_plausible_) {
    193         // We need a meta-refresh since we don't have the dictionary.
    194         // The common cause is a restart of the browser, where we try to render
    195         // cached content that was saved when we had a dictionary.
    196       } else if (filter_context().IsSdchResponse()) {
    197         // This is a very corrupt SDCH request response.  We can't decode it.
    198         // We'll use a meta-refresh, and get content without asking for SDCH.
    199         // This will also progressively disable SDCH for this domain.
    200       } else {
    201         // One of the first 9 bytes precluded consideration as a hash.
    202         // This can't be an SDCH payload, even though the server said it was.
    203         // This is a major error, as the server or proxy tagged this SDCH even
    204         // though it is not!
    205         // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
    206         // Worse yet, meta-refresh could lead to an infinite refresh loop.
    207         SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH);
    208         decoding_status_ = PASS_THROUGH;
    209         // ... but further back-off on advertising SDCH support.
    210         SdchManager::BlacklistDomain(url_);
    211       }
    212 
    213       if (decoding_status_ == PASS_THROUGH) {
    214         dest_buffer_excess_ = dictionary_hash_;  // Send what we scanned.
    215       } else {
    216         // This is where we try to do the expensive meta-refresh.
    217         if (std::string::npos == mime_type_.find("text/html")) {
    218           // Since we can't do a meta-refresh (along with an exponential
    219           // backoff), we'll just make sure this NEVER happens again.
    220           SdchManager::BlacklistDomainForever(url_);
    221           if (filter_context().IsCachedContent())
    222             SdchManager::SdchErrorRecovery(
    223                 SdchManager::CACHED_META_REFRESH_UNSUPPORTED);
    224           else
    225             SdchManager::SdchErrorRecovery(
    226                 SdchManager::META_REFRESH_UNSUPPORTED);
    227           return FILTER_ERROR;
    228         }
    229         // HTML content means we can issue a meta-refresh, and get the content
    230         // again, perhaps without SDCH (to be safe).
    231         if (filter_context().IsCachedContent()) {
    232           // Cached content is probably a startup tab, so we'll just get fresh
    233           // content and try again, without disabling sdch.
    234           SdchManager::SdchErrorRecovery(
    235               SdchManager::META_REFRESH_CACHED_RECOVERY);
    236         } else {
    237           // Since it wasn't in the cache, we definately need at least some
    238           // period of blacklisting to get the correct content.
    239           SdchManager::BlacklistDomain(url_);
    240           SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY);
    241         }
    242         decoding_status_ = META_REFRESH_RECOVERY;
    243         // Issue a meta redirect with SDCH disabled.
    244         dest_buffer_excess_ = kDecompressionErrorHtml;
    245       }
    246     } else {
    247       DCHECK(DECODING_IN_PROGRESS == decoding_status_);
    248     }
    249   }
    250 
    251   int amount = OutputBufferExcess(dest_buffer, available_space);
    252   *dest_len += amount;
    253   dest_buffer += amount;
    254   available_space -= amount;
    255   DCHECK_GE(available_space, 0);
    256 
    257   if (available_space <= 0)
    258     return FILTER_OK;
    259   DCHECK(dest_buffer_excess_.empty());
    260   DCHECK_EQ(0u, dest_buffer_excess_index_);
    261 
    262   if (decoding_status_ != DECODING_IN_PROGRESS) {
    263     if (META_REFRESH_RECOVERY == decoding_status_) {
    264       // Absorb all input data.  We've already output page reload HTML.
    265       next_stream_data_ = NULL;
    266       stream_data_len_ = 0;
    267       return FILTER_NEED_MORE_DATA;
    268     }
    269     if (PASS_THROUGH == decoding_status_) {
    270       // We must pass in available_space, but it will be changed to bytes_used.
    271       FilterStatus result = CopyOut(dest_buffer, &available_space);
    272       // Accumulate the returned count of bytes_used (a.k.a., available_space).
    273       *dest_len += available_space;
    274       return result;
    275     }
    276     DCHECK(false);
    277     decoding_status_ = DECODING_ERROR;
    278     return FILTER_ERROR;
    279   }
    280 
    281   if (!next_stream_data_ || stream_data_len_ <= 0)
    282     return FILTER_NEED_MORE_DATA;
    283 
    284   bool ret = vcdiff_streaming_decoder_->DecodeChunk(
    285     next_stream_data_, stream_data_len_, &dest_buffer_excess_);
    286   // Assume all data was used in decoding.
    287   next_stream_data_ = NULL;
    288   source_bytes_ += stream_data_len_;
    289   stream_data_len_ = 0;
    290   output_bytes_ += dest_buffer_excess_.size();
    291   if (!ret) {
    292     vcdiff_streaming_decoder_.reset(NULL);  // Don't call it again.
    293     decoding_status_ = DECODING_ERROR;
    294     SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR);
    295     return FILTER_ERROR;
    296   }
    297 
    298   amount = OutputBufferExcess(dest_buffer, available_space);
    299   *dest_len += amount;
    300   dest_buffer += amount;
    301   available_space -= amount;
    302   if (0 == available_space && !dest_buffer_excess_.empty())
    303       return FILTER_OK;
    304   return FILTER_NEED_MORE_DATA;
    305 }
    306 
    307 Filter::FilterStatus SdchFilter::InitializeDictionary() {
    308   const size_t kServerIdLength = 9;  // Dictionary hash plus null from server.
    309   size_t bytes_needed = kServerIdLength - dictionary_hash_.size();
    310   DCHECK_GT(bytes_needed, 0u);
    311   if (!next_stream_data_)
    312     return FILTER_NEED_MORE_DATA;
    313   if (static_cast<size_t>(stream_data_len_) < bytes_needed) {
    314     dictionary_hash_.append(next_stream_data_, stream_data_len_);
    315     next_stream_data_ = NULL;
    316     stream_data_len_ = 0;
    317     return FILTER_NEED_MORE_DATA;
    318   }
    319   dictionary_hash_.append(next_stream_data_, bytes_needed);
    320   DCHECK(kServerIdLength == dictionary_hash_.size());
    321   stream_data_len_ -= bytes_needed;
    322   DCHECK_LE(0, stream_data_len_);
    323   if (stream_data_len_ > 0)
    324     next_stream_data_ += bytes_needed;
    325   else
    326     next_stream_data_ = NULL;
    327 
    328   DCHECK(!dictionary_.get());
    329   dictionary_hash_is_plausible_ = true;  // Assume plausible, but check.
    330 
    331   SdchManager::Dictionary* dictionary = NULL;
    332   if ('\0' == dictionary_hash_[kServerIdLength - 1])
    333     SdchManager::Global()->GetVcdiffDictionary(std::string(dictionary_hash_, 0,
    334                                                            kServerIdLength - 1),
    335                                                url_, &dictionary);
    336   else
    337     dictionary_hash_is_plausible_ = false;
    338 
    339   if (!dictionary) {
    340     DCHECK(dictionary_hash_.size() == kServerIdLength);
    341     // Since dictionary was not found, check to see if hash was even plausible.
    342     for (size_t i = 0; i < kServerIdLength - 1; ++i) {
    343       char base64_char = dictionary_hash_[i];
    344       if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) {
    345         dictionary_hash_is_plausible_ = false;
    346         break;
    347       }
    348     }
    349     if (dictionary_hash_is_plausible_)
    350       SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND);
    351     else
    352       SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED);
    353     decoding_status_ = DECODING_ERROR;
    354     return FILTER_ERROR;
    355   }
    356   dictionary_ = dictionary;
    357   vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder);
    358   vcdiff_streaming_decoder_->SetAllowVcdTarget(false);
    359   vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(),
    360                                            dictionary_->text().size());
    361   decoding_status_ = DECODING_IN_PROGRESS;
    362   return FILTER_OK;
    363 }
    364 
    365 int SdchFilter::OutputBufferExcess(char* const dest_buffer,
    366                                    size_t available_space) {
    367   if (dest_buffer_excess_.empty())
    368     return 0;
    369   DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_);
    370   size_t amount = std::min(available_space,
    371       dest_buffer_excess_.size() - dest_buffer_excess_index_);
    372   memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_,
    373          amount);
    374   dest_buffer_excess_index_ += amount;
    375   if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) {
    376     DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_);
    377     dest_buffer_excess_.clear();
    378     dest_buffer_excess_index_ = 0;
    379   }
    380   return amount;
    381 }
    382