Home | History | Annotate | Download | only in src
      1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "puffin/src/puffin_stream.h"
      6 
      7 #include <algorithm>
      8 #include <memory>
      9 #include <string>
     10 #include <utility>
     11 #include <vector>
     12 
     13 #include "puffin/src/bit_reader.h"
     14 #include "puffin/src/bit_writer.h"
     15 #include "puffin/src/include/puffin/common.h"
     16 #include "puffin/src/include/puffin/huffer.h"
     17 #include "puffin/src/include/puffin/puffer.h"
     18 #include "puffin/src/include/puffin/stream.h"
     19 #include "puffin/src/puff_reader.h"
     20 #include "puffin/src/puff_writer.h"
     21 #include "puffin/src/set_errors.h"
     22 
     23 namespace puffin {
     24 
     25 using std::vector;
     26 using std::unique_ptr;
     27 using std::shared_ptr;
     28 
     29 namespace {
     30 
     31 bool CheckArgsIntegrity(uint64_t puff_size,
     32                         const std::vector<BitExtent>& deflates,
     33                         const std::vector<ByteExtent>& puffs) {
     34   TEST_AND_RETURN_FALSE(puffs.size() == deflates.size());
     35   // Check if the |puff_size| is actually greater than the last byte of the last
     36   // puff in |puffs|.
     37   if (!puffs.empty()) {
     38     TEST_AND_RETURN_FALSE(puff_size >=
     39                           puffs.back().offset + puffs.back().length);
     40   }
     41 
     42   // Check to make sure |puffs| and |deflates| are sorted and non-overlapping.
     43   auto is_overlap = [](const auto& a, const auto& b) {
     44     return (a.offset + a.length) > b.offset;
     45   };
     46   TEST_AND_RETURN_FALSE(deflates.end() == std::adjacent_find(deflates.begin(),
     47                                                              deflates.end(),
     48                                                              is_overlap));
     49   TEST_AND_RETURN_FALSE(puffs.end() == std::adjacent_find(puffs.begin(),
     50                                                           puffs.end(),
     51                                                           is_overlap));
     52   return true;
     53 }
     54 
     55 }  // namespace
     56 
     57 UniqueStreamPtr PuffinStream::CreateForPuff(
     58     UniqueStreamPtr stream,
     59     std::shared_ptr<Puffer> puffer,
     60     uint64_t puff_size,
     61     const std::vector<BitExtent>& deflates,
     62     const std::vector<ByteExtent>& puffs,
     63     size_t max_cache_size) {
     64   TEST_AND_RETURN_VALUE(CheckArgsIntegrity(puff_size, deflates, puffs),
     65                         nullptr);
     66   TEST_AND_RETURN_VALUE(stream->Seek(0), nullptr);
     67 
     68   UniqueStreamPtr puffin_stream(new PuffinStream(std::move(stream), puffer,
     69                                                  nullptr, puff_size, deflates,
     70                                                  puffs, max_cache_size));
     71   TEST_AND_RETURN_VALUE(puffin_stream->Seek(0), nullptr);
     72   return puffin_stream;
     73 }
     74 
     75 UniqueStreamPtr PuffinStream::CreateForHuff(
     76     UniqueStreamPtr stream,
     77     std::shared_ptr<Huffer> huffer,
     78     uint64_t puff_size,
     79     const std::vector<BitExtent>& deflates,
     80     const std::vector<ByteExtent>& puffs) {
     81   TEST_AND_RETURN_VALUE(CheckArgsIntegrity(puff_size, deflates, puffs),
     82                         nullptr);
     83   TEST_AND_RETURN_VALUE(stream->Seek(0), nullptr);
     84 
     85   UniqueStreamPtr puffin_stream(new PuffinStream(
     86       std::move(stream), nullptr, huffer, puff_size, deflates, puffs, 0));
     87   TEST_AND_RETURN_VALUE(puffin_stream->Seek(0), nullptr);
     88   return puffin_stream;
     89 }
     90 
     91 PuffinStream::PuffinStream(UniqueStreamPtr stream,
     92                            shared_ptr<Puffer> puffer,
     93                            shared_ptr<Huffer> huffer,
     94                            uint64_t puff_size,
     95                            const vector<BitExtent>& deflates,
     96                            const vector<ByteExtent>& puffs,
     97                            size_t max_cache_size)
     98     : stream_(std::move(stream)),
     99       puffer_(puffer),
    100       huffer_(huffer),
    101       puff_stream_size_(puff_size),
    102       deflates_(deflates),
    103       puffs_(puffs),
    104       puff_pos_(0),
    105       skip_bytes_(0),
    106       deflate_bit_pos_(0),
    107       last_byte_(0),
    108       extra_byte_(0),
    109       is_for_puff_(puffer_ ? true : false),
    110       closed_(false),
    111       max_cache_size_(max_cache_size),
    112       cur_cache_size_(0) {
    113   // Building upper bounds for faster seek.
    114   upper_bounds_.reserve(puffs.size());
    115   for (const auto& puff : puffs) {
    116     upper_bounds_.emplace_back(puff.offset + puff.length);
    117   }
    118   upper_bounds_.emplace_back(puff_stream_size_ + 1);
    119 
    120   // We can pass the size of the deflate stream too, but it is not necessary
    121   // yet. We cannot get the size of stream from itself, because we might be
    122   // writing into it and its size is not defined yet.
    123   uint64_t deflate_stream_size = puff_stream_size_;
    124   if (!puffs.empty()) {
    125     deflate_stream_size =
    126         ((deflates.back().offset + deflates.back().length) / 8) +
    127         puff_stream_size_ - (puffs.back().offset + puffs.back().length);
    128   }
    129 
    130   deflates_.emplace_back(deflate_stream_size * 8, 0);
    131   puffs_.emplace_back(puff_stream_size_, 0);
    132 
    133   // Look for the largest puff and deflate extents and get proper size buffers.
    134   uint64_t max_puff_length = 0;
    135   for (const auto& puff : puffs) {
    136     max_puff_length = std::max(max_puff_length, puff.length);
    137   }
    138   puff_buffer_.reset(new Buffer(max_puff_length + 1));
    139   if (max_cache_size_ < max_puff_length) {
    140     max_cache_size_ = 0;  // It means we are not caching puffs.
    141   }
    142 
    143   uint64_t max_deflate_length = 0;
    144   for (const auto& deflate : deflates) {
    145     max_deflate_length = std::max(max_deflate_length, deflate.length * 8);
    146   }
    147   deflate_buffer_.reset(new Buffer(max_deflate_length + 2));
    148 }
    149 
    150 bool PuffinStream::GetSize(uint64_t* size) const {
    151   *size = puff_stream_size_;
    152   return true;
    153 }
    154 
    155 bool PuffinStream::GetOffset(uint64_t* offset) const {
    156   *offset = puff_pos_ + skip_bytes_;
    157   return true;
    158 }
    159 
    160 bool PuffinStream::Seek(uint64_t offset) {
    161   TEST_AND_RETURN_FALSE(!closed_);
    162   if (!is_for_puff_) {
    163     // For huffing we should not seek, only seek to zero is accepted.
    164     TEST_AND_RETURN_FALSE(offset == 0);
    165   }
    166 
    167   TEST_AND_RETURN_FALSE(offset <= puff_stream_size_);
    168 
    169   // We are searching first available puff which either includes the |offset| or
    170   // it is the next available puff after |offset|.
    171   auto next_puff_iter =
    172       std::upper_bound(upper_bounds_.begin(), upper_bounds_.end(), offset);
    173   TEST_AND_RETURN_FALSE(next_puff_iter != upper_bounds_.end());
    174   auto next_puff_idx = std::distance(upper_bounds_.begin(), next_puff_iter);
    175   cur_puff_ = std::next(puffs_.begin(), next_puff_idx);
    176   cur_deflate_ = std::next(deflates_.begin(), next_puff_idx);
    177 
    178   if (offset < cur_puff_->offset) {
    179     // between two puffs.
    180     puff_pos_ = offset;
    181     auto back_track_bytes = cur_puff_->offset - puff_pos_;
    182     deflate_bit_pos_ = ((cur_deflate_->offset + 7) / 8 - back_track_bytes) * 8;
    183     if (cur_puff_ != puffs_.begin()) {
    184       auto prev_deflate = std::prev(cur_deflate_);
    185       if (deflate_bit_pos_ < prev_deflate->offset + prev_deflate->length) {
    186         deflate_bit_pos_ = prev_deflate->offset + prev_deflate->length;
    187       }
    188     }
    189   } else {
    190     // Inside a puff.
    191     puff_pos_ = cur_puff_->offset;
    192     deflate_bit_pos_ = cur_deflate_->offset;
    193   }
    194   skip_bytes_ = offset - puff_pos_;
    195   if (!is_for_puff_ && offset == 0) {
    196     TEST_AND_RETURN_FALSE(stream_->Seek(0));
    197     TEST_AND_RETURN_FALSE(SetExtraByte());
    198   }
    199   return true;
    200 }
    201 
    202 bool PuffinStream::Close() {
    203   closed_ = true;
    204   return stream_->Close();
    205 }
    206 
    207 bool PuffinStream::Read(void* buffer, size_t count) {
    208   TEST_AND_RETURN_FALSE(!closed_);
    209   TEST_AND_RETURN_FALSE(is_for_puff_);
    210   if (cur_puff_ == puffs_.end()) {
    211     TEST_AND_RETURN_FALSE(count == 0);
    212   }
    213   auto bytes = static_cast<uint8_t*>(buffer);
    214   uint64_t length = count;
    215   uint64_t bytes_read = 0;
    216   while (bytes_read < length) {
    217     if (puff_pos_ < cur_puff_->offset) {
    218       // Reading between two deflates. We also read bytes that have at least one
    219       // bit of a deflate bit stream. The byte which has both deflate and raw
    220       // data will be shifted or masked off the deflate bits and the remaining
    221       // value will be saved in the puff stream as an byte integer.
    222       uint64_t start_byte = (deflate_bit_pos_ / 8);
    223       uint64_t end_byte = (cur_deflate_->offset + 7) / 8;
    224       auto bytes_to_read = std::min(length - bytes_read, end_byte - start_byte);
    225       TEST_AND_RETURN_FALSE(bytes_to_read >= 1);
    226 
    227       TEST_AND_RETURN_FALSE(stream_->Seek(start_byte));
    228       TEST_AND_RETURN_FALSE(stream_->Read(bytes + bytes_read, bytes_to_read));
    229 
    230       // If true, we read the first byte of the curret deflate. So we have to
    231       // mask out the deflate bits (which are most significant bits.)
    232       if ((start_byte + bytes_to_read) * 8 > cur_deflate_->offset) {
    233         bytes[bytes_read + bytes_to_read - 1] &=
    234             (1 << (cur_deflate_->offset & 7)) - 1;
    235       }
    236 
    237       // If true, we read the last byte of the previous deflate and we have to
    238       // shift it out. The least significat bits belongs to the deflate
    239       // stream. The order of these last two conditions are important because a
    240       // byte can contain a finishing deflate and a starting deflate with some
    241       // bits between them so we have to modify correctly. Keep in mind that in
    242       // this situation both are modifying the same byte.
    243       if (start_byte * 8 < deflate_bit_pos_) {
    244         bytes[bytes_read] >>= deflate_bit_pos_ & 7;
    245       }
    246 
    247       // Pass |deflate_bit_pos_| for all the read bytes.
    248       deflate_bit_pos_ -= deflate_bit_pos_ & 7;
    249       deflate_bit_pos_ += bytes_to_read * 8;
    250       if (deflate_bit_pos_ > cur_deflate_->offset) {
    251         // In case it reads into the first byte of the current deflate.
    252         deflate_bit_pos_ = cur_deflate_->offset;
    253       }
    254 
    255       bytes_read += bytes_to_read;
    256       puff_pos_ += bytes_to_read;
    257       TEST_AND_RETURN_FALSE(puff_pos_ <= cur_puff_->offset);
    258     } else {
    259       // Reading the deflate itself. We read all bytes including the first and
    260       // last byte (which may partially include a deflate bit). Here we keep the
    261       // |puff_pos_| point to the first byte of the puffed stream and
    262       // |skip_bytes_| shows how many bytes in the puff we have copied till now.
    263       auto start_byte = (cur_deflate_->offset / 8);
    264       auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8;
    265       auto bytes_to_read = end_byte - start_byte;
    266       // Puff directly to buffer if it has space.
    267       bool puff_directly_into_buffer =
    268           max_cache_size_ == 0 && (skip_bytes_ == 0) &&
    269           (length - bytes_read >= cur_puff_->length);
    270 
    271       auto cur_puff_idx = std::distance(puffs_.begin(), cur_puff_);
    272       if (max_cache_size_ == 0 ||
    273           !GetPuffCache(cur_puff_idx, cur_puff_->length, &puff_buffer_)) {
    274         // Did not find the puff buffer in cache. We have to build it.
    275         deflate_buffer_->resize(bytes_to_read);
    276         TEST_AND_RETURN_FALSE(stream_->Seek(start_byte));
    277         TEST_AND_RETURN_FALSE(
    278             stream_->Read(deflate_buffer_->data(), bytes_to_read));
    279         BufferBitReader bit_reader(deflate_buffer_->data(), bytes_to_read);
    280 
    281         BufferPuffWriter puff_writer(puff_directly_into_buffer
    282                                          ? bytes + bytes_read
    283                                          : puff_buffer_->data(),
    284                                      cur_puff_->length);
    285 
    286         // Drop the first unused bits.
    287         size_t extra_bits_len = cur_deflate_->offset & 7;
    288         TEST_AND_RETURN_FALSE(bit_reader.CacheBits(extra_bits_len));
    289         bit_reader.DropBits(extra_bits_len);
    290 
    291         Error error;
    292         TEST_AND_RETURN_FALSE(
    293             puffer_->PuffDeflate(&bit_reader, &puff_writer, nullptr, &error));
    294         TEST_AND_RETURN_FALSE(bytes_to_read == bit_reader.Offset());
    295         TEST_AND_RETURN_FALSE(cur_puff_->length == puff_writer.Size());
    296       } else {
    297         // Just seek to proper location.
    298         TEST_AND_RETURN_FALSE(stream_->Seek(start_byte + bytes_to_read));
    299       }
    300       // Copy from puff buffer to output if needed.
    301       auto bytes_to_copy =
    302           std::min(length - bytes_read, cur_puff_->length - skip_bytes_);
    303       if (!puff_directly_into_buffer) {
    304         memcpy(bytes + bytes_read, puff_buffer_->data() + skip_bytes_,
    305                bytes_to_copy);
    306       }
    307 
    308       skip_bytes_ += bytes_to_copy;
    309       bytes_read += bytes_to_copy;
    310 
    311       // Move to next puff.
    312       if (puff_pos_ + skip_bytes_ == cur_puff_->offset + cur_puff_->length) {
    313         puff_pos_ += skip_bytes_;
    314         skip_bytes_ = 0;
    315         deflate_bit_pos_ = cur_deflate_->offset + cur_deflate_->length;
    316         cur_puff_++;
    317         cur_deflate_++;
    318         if (cur_puff_ == puffs_.end()) {
    319           break;
    320         }
    321       }
    322     }
    323   }
    324 
    325   TEST_AND_RETURN_FALSE(bytes_read == length);
    326   return true;
    327 }
    328 
    329 bool PuffinStream::Write(const void* buffer, size_t count) {
    330   TEST_AND_RETURN_FALSE(!closed_);
    331   TEST_AND_RETURN_FALSE(!is_for_puff_);
    332   auto bytes = static_cast<const uint8_t*>(buffer);
    333   uint64_t length = count;
    334   uint64_t bytes_wrote = 0;
    335   while (bytes_wrote < length) {
    336     if (deflate_bit_pos_ < (cur_deflate_->offset & ~7ull)) {
    337       // Between two puffs or before the first puff. We know that we are
    338       // starting from the byte boundary because we have already processed the
    339       // non-deflate bits of the last byte of the last deflate. Here we don't
    340       // process any byte that has deflate bit.
    341       TEST_AND_RETURN_FALSE((deflate_bit_pos_ & 7) == 0);
    342       auto copy_len =
    343           std::min((cur_deflate_->offset / 8) - (deflate_bit_pos_ / 8),
    344                    length - bytes_wrote);
    345       TEST_AND_RETURN_FALSE(stream_->Write(bytes + bytes_wrote, copy_len));
    346       bytes_wrote += copy_len;
    347       puff_pos_ += copy_len;
    348       deflate_bit_pos_ += copy_len * 8;
    349     } else {
    350       // We are in a puff. We have to buffer incoming bytes until we reach the
    351       // size of the current puff so we can huff :). If the last bit of the
    352       // current deflate does not end in a byte boundary, then we have to read
    353       // one more byte to fill up the last byte of the deflate stream before
    354       // doing anything else.
    355 
    356       // |deflate_bit_pos_| now should be in the same byte as
    357       // |cur_deflate->offset|.
    358       if (deflate_bit_pos_ < cur_deflate_->offset) {
    359         last_byte_ |= bytes[bytes_wrote++] << (deflate_bit_pos_ & 7);
    360         skip_bytes_ = 0;
    361         deflate_bit_pos_ = cur_deflate_->offset;
    362         puff_pos_++;
    363         TEST_AND_RETURN_FALSE(puff_pos_ == cur_puff_->offset);
    364       }
    365 
    366       auto copy_len = std::min(length - bytes_wrote,
    367                                cur_puff_->length + extra_byte_ - skip_bytes_);
    368       TEST_AND_RETURN_FALSE(puff_buffer_->size() >= skip_bytes_ + copy_len);
    369       memcpy(puff_buffer_->data() + skip_bytes_, bytes + bytes_wrote, copy_len);
    370       skip_bytes_ += copy_len;
    371       bytes_wrote += copy_len;
    372 
    373       if (skip_bytes_ == cur_puff_->length + extra_byte_) {
    374         // |puff_buffer_| is full, now huff into the |deflate_buffer_|.
    375         auto start_byte = cur_deflate_->offset / 8;
    376         auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8;
    377         auto bytes_to_write = end_byte - start_byte;
    378 
    379         deflate_buffer_->resize(bytes_to_write);
    380         BufferBitWriter bit_writer(deflate_buffer_->data(), bytes_to_write);
    381         BufferPuffReader puff_reader(puff_buffer_->data(), cur_puff_->length);
    382 
    383         // Write last byte if it has any.
    384         TEST_AND_RETURN_FALSE(
    385             bit_writer.WriteBits(cur_deflate_->offset & 7, last_byte_));
    386         last_byte_ = 0;
    387 
    388         Error error;
    389         TEST_AND_RETURN_FALSE(
    390             huffer_->HuffDeflate(&puff_reader, &bit_writer, &error));
    391         TEST_AND_RETURN_FALSE(bit_writer.Size() == bytes_to_write);
    392         TEST_AND_RETURN_FALSE(puff_reader.BytesLeft() == 0);
    393 
    394         deflate_bit_pos_ = cur_deflate_->offset + cur_deflate_->length;
    395         if (extra_byte_ == 1) {
    396           deflate_buffer_->data()[bytes_to_write - 1] |=
    397               puff_buffer_->data()[cur_puff_->length] << (deflate_bit_pos_ & 7);
    398           deflate_bit_pos_ = (deflate_bit_pos_ + 7) & ~7ull;
    399         } else if ((deflate_bit_pos_ & 7) != 0) {
    400           // This happens if current and next deflate finish and end on the same
    401           // byte, then we cannot write into output until we have huffed the
    402           // next puff buffer, so untill then we cache it into |last_byte_| and
    403           // we won't write it out.
    404           last_byte_ = deflate_buffer_->data()[bytes_to_write - 1];
    405           bytes_to_write--;
    406         }
    407 
    408         // Write |deflate_buffer_| into output.
    409         TEST_AND_RETURN_FALSE(
    410             stream_->Write(deflate_buffer_->data(), bytes_to_write));
    411 
    412         // Move to the next deflate/puff.
    413         puff_pos_ += skip_bytes_;
    414         skip_bytes_ = 0;
    415         cur_puff_++;
    416         cur_deflate_++;
    417         if (cur_puff_ == puffs_.end()) {
    418           break;
    419         }
    420         // Find if need an extra byte to cache at the end.
    421         TEST_AND_RETURN_FALSE(SetExtraByte());
    422       }
    423     }
    424   }
    425 
    426   TEST_AND_RETURN_FALSE(bytes_wrote == length);
    427   return true;
    428 }
    429 
    430 bool PuffinStream::SetExtraByte() {
    431   TEST_AND_RETURN_FALSE(cur_deflate_ != deflates_.end());
    432   if ((cur_deflate_ + 1) == deflates_.end()) {
    433     extra_byte_ = 0;
    434     return true;
    435   }
    436   uint64_t end_bit = cur_deflate_->offset + cur_deflate_->length;
    437   if ((end_bit & 7) && ((end_bit + 7) & ~7ull) <= (cur_deflate_ + 1)->offset) {
    438     extra_byte_ = 1;
    439   } else {
    440     extra_byte_ = 0;
    441   }
    442   return true;
    443 }
    444 
    445 bool PuffinStream::GetPuffCache(int puff_id,
    446                                 uint64_t puff_size,
    447                                 SharedBufferPtr* buffer) {
    448   bool found = false;
    449   // Search for it.
    450   std::pair<int, SharedBufferPtr> cache;
    451   // TODO(*): Find a faster way of doing this? Maybe change the data structure
    452   // that supports faster search.
    453   for (auto iter = caches_.begin(); iter != caches_.end(); ++iter) {
    454     if (iter->first == puff_id) {
    455       cache = std::move(*iter);
    456       found = true;
    457       // Remove it so later we can add it to the begining of the list.
    458       caches_.erase(iter);
    459       break;
    460     }
    461   }
    462 
    463   // If not found, either create one or get one from the list.
    464   if (!found) {
    465     // If |caches_| were full, remove last ones in the list (least used), until
    466     // we have enough space for the new cache.
    467     while (!caches_.empty() && cur_cache_size_ + puff_size > max_cache_size_) {
    468       cache = std::move(caches_.back());
    469       caches_.pop_back();  // Remove it from the list.
    470       cur_cache_size_ -= cache.second->capacity();
    471     }
    472     // If we have not populated the cache yet, create one.
    473     if (!cache.second) {
    474       cache.second.reset(new Buffer(puff_size));
    475     }
    476     cache.second->resize(puff_size);
    477 
    478     constexpr uint64_t kMaxSizeDifference = 20 * 1024;
    479     if (puff_size + kMaxSizeDifference < cache.second->capacity()) {
    480       cache.second->shrink_to_fit();
    481     }
    482 
    483     cur_cache_size_ += cache.second->capacity();
    484     cache.first = puff_id;
    485   }
    486 
    487   *buffer = cache.second;
    488   // By now we have either removed a cache or created new one. Now we have to
    489   // insert it in the front of the list so it becomes the most recently used
    490   // one.
    491   caches_.push_front(std::move(cache));
    492   return found;
    493 }
    494 
    495 }  // namespace puffin
    496