Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/base/gzip_filter.h"
      6 
      7 #if defined(USE_SYSTEM_ZLIB)
      8 #include <zlib.h>
      9 #else
     10 #include "third_party/zlib/zlib.h"
     11 #endif
     12 
     13 #include "base/logging.h"
     14 #include "net/base/gzip_header.h"
     15 
     16 namespace net {
     17 
     18 GZipFilter::GZipFilter()
     19     : decoding_status_(DECODING_UNINITIALIZED),
     20       decoding_mode_(DECODE_MODE_UNKNOWN),
     21       gzip_header_status_(GZIP_CHECK_HEADER_IN_PROGRESS),
     22       zlib_header_added_(false),
     23       gzip_footer_bytes_(0),
     24       possible_sdch_pass_through_(false) {
     25 }
     26 
     27 GZipFilter::~GZipFilter() {
     28   if (decoding_status_ != DECODING_UNINITIALIZED) {
     29     inflateEnd(zlib_stream_.get());
     30   }
     31 }
     32 
     33 bool GZipFilter::InitDecoding(Filter::FilterType filter_type) {
     34   if (decoding_status_ != DECODING_UNINITIALIZED)
     35     return false;
     36 
     37   // Initialize zlib control block
     38   zlib_stream_.reset(new z_stream);
     39   if (!zlib_stream_.get())
     40     return false;
     41   memset(zlib_stream_.get(), 0, sizeof(z_stream));
     42 
     43   // Set decoding mode
     44   switch (filter_type) {
     45     case Filter::FILTER_TYPE_DEFLATE: {
     46       if (inflateInit(zlib_stream_.get()) != Z_OK)
     47         return false;
     48       decoding_mode_ = DECODE_MODE_DEFLATE;
     49       break;
     50     }
     51     case Filter::FILTER_TYPE_GZIP_HELPING_SDCH:
     52       possible_sdch_pass_through_ =  true;  // Needed to optionally help sdch.
     53       // Fall through to GZIP case.
     54     case Filter::FILTER_TYPE_GZIP: {
     55       gzip_header_.reset(new GZipHeader());
     56       if (!gzip_header_.get())
     57         return false;
     58       if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK)
     59         return false;
     60       decoding_mode_ = DECODE_MODE_GZIP;
     61       break;
     62     }
     63     default: {
     64       return false;
     65     }
     66   }
     67 
     68   decoding_status_ = DECODING_IN_PROGRESS;
     69   return true;
     70 }
     71 
     72 Filter::FilterStatus GZipFilter::ReadFilteredData(char* dest_buffer,
     73                                                   int* dest_len) {
     74   if (!dest_buffer || !dest_len || *dest_len <= 0)
     75     return Filter::FILTER_ERROR;
     76 
     77   if (decoding_status_ == DECODING_DONE) {
     78     if (GZIP_GET_INVALID_HEADER != gzip_header_status_)
     79       SkipGZipFooter();
     80     // Some server might send extra data after the gzip footer. We just copy
     81     // them out. Mozilla does this too.
     82     return CopyOut(dest_buffer, dest_len);
     83   }
     84 
     85   if (decoding_status_ != DECODING_IN_PROGRESS)
     86     return Filter::FILTER_ERROR;
     87 
     88   Filter::FilterStatus status;
     89 
     90   if (decoding_mode_ == DECODE_MODE_GZIP &&
     91       gzip_header_status_ == GZIP_CHECK_HEADER_IN_PROGRESS) {
     92     // With gzip encoding the content is wrapped with a gzip header.
     93     // We need to parse and verify the header first.
     94     status = CheckGZipHeader();
     95     switch (status) {
     96       case Filter::FILTER_NEED_MORE_DATA: {
     97         // We have consumed all input data, either getting a complete header or
     98         // a partial header. Return now to get more data.
     99         *dest_len = 0;
    100         // Partial header means it can't be an SDCH header.
    101         // Reason: SDCH *always* starts with 8 printable characters [a-zA-Z/_].
    102         // Gzip always starts with two non-printable characters.  Hence even a
    103         // single character (partial header) means that this can't be an SDCH
    104         // encoded body masquerading as a GZIP body.
    105         possible_sdch_pass_through_ = false;
    106         return status;
    107       }
    108       case Filter::FILTER_OK: {
    109         // The header checking succeeds, and there are more data in the input.
    110         // We must have got a complete header here.
    111         DCHECK_EQ(gzip_header_status_, GZIP_GET_COMPLETE_HEADER);
    112         break;
    113       }
    114       case Filter::FILTER_ERROR: {
    115         if (possible_sdch_pass_through_ &&
    116             GZIP_GET_INVALID_HEADER == gzip_header_status_) {
    117           decoding_status_ = DECODING_DONE;  // Become a pass through filter.
    118           return CopyOut(dest_buffer, dest_len);
    119         }
    120         decoding_status_ = DECODING_ERROR;
    121         return status;
    122       }
    123       default: {
    124         status = Filter::FILTER_ERROR;    // Unexpected.
    125         decoding_status_ = DECODING_ERROR;
    126         return status;
    127       }
    128     }
    129   }
    130 
    131   int dest_orig_size = *dest_len;
    132   status = DoInflate(dest_buffer, dest_len);
    133 
    134   if (decoding_mode_ == DECODE_MODE_DEFLATE && status == Filter::FILTER_ERROR) {
    135     // As noted in Mozilla implementation, some servers such as Apache with
    136     // mod_deflate don't generate zlib headers.
    137     // See 677409 for instances where this work around is needed.
    138     // Insert a dummy zlib header and try again.
    139     if (InsertZlibHeader()) {
    140       *dest_len = dest_orig_size;
    141       status = DoInflate(dest_buffer, dest_len);
    142     }
    143   }
    144 
    145   if (status == Filter::FILTER_DONE) {
    146     decoding_status_ = DECODING_DONE;
    147   } else if (status == Filter::FILTER_ERROR) {
    148     decoding_status_ = DECODING_ERROR;
    149   }
    150 
    151   return status;
    152 }
    153 
    154 Filter::FilterStatus GZipFilter::CheckGZipHeader() {
    155   DCHECK_EQ(gzip_header_status_, GZIP_CHECK_HEADER_IN_PROGRESS);
    156 
    157   // Check input data in pre-filter buffer.
    158   if (!next_stream_data_ || stream_data_len_ <= 0)
    159     return Filter::FILTER_ERROR;
    160 
    161   const char* header_end = NULL;
    162   GZipHeader::Status header_status;
    163   header_status = gzip_header_->ReadMore(next_stream_data_, stream_data_len_,
    164                                          &header_end);
    165 
    166   switch (header_status) {
    167     case GZipHeader::INCOMPLETE_HEADER: {
    168       // We read all the data but only got a partial header.
    169       next_stream_data_ = NULL;
    170       stream_data_len_ = 0;
    171       return Filter::FILTER_NEED_MORE_DATA;
    172     }
    173     case GZipHeader::COMPLETE_HEADER: {
    174       // We have a complete header. Check whether there are more data.
    175       int num_chars_left = static_cast<int>(stream_data_len_ -
    176                                             (header_end - next_stream_data_));
    177       gzip_header_status_ = GZIP_GET_COMPLETE_HEADER;
    178 
    179       if (num_chars_left > 0) {
    180         next_stream_data_ = const_cast<char*>(header_end);
    181         stream_data_len_ = num_chars_left;
    182         return Filter::FILTER_OK;
    183       } else {
    184         next_stream_data_ = NULL;
    185         stream_data_len_ = 0;
    186         return Filter::FILTER_NEED_MORE_DATA;
    187       }
    188     }
    189     case GZipHeader::INVALID_HEADER: {
    190       gzip_header_status_ = GZIP_GET_INVALID_HEADER;
    191       return Filter::FILTER_ERROR;
    192     }
    193     default: {
    194       break;
    195     }
    196   }
    197 
    198   return Filter::FILTER_ERROR;
    199 }
    200 
    201 Filter::FilterStatus GZipFilter::DoInflate(char* dest_buffer, int* dest_len) {
    202   // Make sure we have both valid input data and output buffer.
    203   if (!dest_buffer || !dest_len || *dest_len <= 0)  // output
    204     return Filter::FILTER_ERROR;
    205 
    206   if (!next_stream_data_ || stream_data_len_ <= 0) {  // input
    207     *dest_len = 0;
    208     return Filter::FILTER_NEED_MORE_DATA;
    209   }
    210 
    211   // Fill in zlib control block
    212   zlib_stream_.get()->next_in = bit_cast<Bytef*>(next_stream_data_);
    213   zlib_stream_.get()->avail_in = stream_data_len_;
    214   zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer);
    215   zlib_stream_.get()->avail_out = *dest_len;
    216 
    217   int inflate_code = inflate(zlib_stream_.get(), Z_NO_FLUSH);
    218   int bytesWritten = *dest_len - zlib_stream_.get()->avail_out;
    219 
    220   Filter::FilterStatus status;
    221 
    222   switch (inflate_code) {
    223     case Z_STREAM_END: {
    224       *dest_len = bytesWritten;
    225 
    226       stream_data_len_ = zlib_stream_.get()->avail_in;
    227       next_stream_data_ = bit_cast<char*>(zlib_stream_.get()->next_in);
    228 
    229       SkipGZipFooter();
    230 
    231       status = Filter::FILTER_DONE;
    232       break;
    233     }
    234     case Z_BUF_ERROR: {
    235       // According to zlib documentation, when calling inflate with Z_NO_FLUSH,
    236       // getting Z_BUF_ERROR means no progress is possible. Neither processing
    237       // more input nor producing more output can be done.
    238       // Since we have checked both input data and output buffer before calling
    239       // inflate, this result is unexpected.
    240       status = Filter::FILTER_ERROR;
    241       break;
    242     }
    243     case Z_OK: {
    244       // Some progress has been made (more input processed or more output
    245       // produced).
    246       *dest_len = bytesWritten;
    247 
    248       // Check whether we have consumed all input data.
    249       stream_data_len_ = zlib_stream_.get()->avail_in;
    250       if (stream_data_len_ == 0) {
    251         next_stream_data_ = NULL;
    252         status = Filter::FILTER_NEED_MORE_DATA;
    253       } else {
    254         next_stream_data_ = bit_cast<char*>(zlib_stream_.get()->next_in);
    255         status = Filter::FILTER_OK;
    256       }
    257       break;
    258     }
    259     default: {
    260       status = Filter::FILTER_ERROR;
    261       break;
    262     }
    263   }
    264 
    265   return status;
    266 }
    267 
    268 bool GZipFilter::InsertZlibHeader() {
    269   static char dummy_head[2] = { 0x78, 0x1 };
    270 
    271   char dummy_output[4];
    272 
    273   // We only try add additional header once.
    274   if (zlib_header_added_)
    275     return false;
    276 
    277   inflateReset(zlib_stream_.get());
    278   zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_head[0]);
    279   zlib_stream_.get()->avail_in = sizeof(dummy_head);
    280   zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]);
    281   zlib_stream_.get()->avail_out = sizeof(dummy_output);
    282 
    283   int code = inflate(zlib_stream_.get(), Z_NO_FLUSH);
    284   zlib_header_added_ = true;
    285 
    286   return (code == Z_OK);
    287 }
    288 
    289 
    290 void GZipFilter::SkipGZipFooter() {
    291   int footer_bytes_expected = kGZipFooterSize - gzip_footer_bytes_;
    292   if (footer_bytes_expected > 0) {
    293     int footer_byte_avail = std::min(footer_bytes_expected, stream_data_len_);
    294     stream_data_len_ -= footer_byte_avail;
    295     next_stream_data_ += footer_byte_avail;
    296     gzip_footer_bytes_ += footer_byte_avail;
    297 
    298     if (stream_data_len_ == 0)
    299       next_stream_data_ = NULL;
    300   }
    301 }
    302 
    303 }  // namespace net
    304