1 // Copyright 2017 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "puffin/src/puffin_stream.h" 6 7 #include <algorithm> 8 #include <memory> 9 #include <string> 10 #include <utility> 11 #include <vector> 12 13 #include "puffin/src/bit_reader.h" 14 #include "puffin/src/bit_writer.h" 15 #include "puffin/src/include/puffin/common.h" 16 #include "puffin/src/include/puffin/huffer.h" 17 #include "puffin/src/include/puffin/puffer.h" 18 #include "puffin/src/include/puffin/stream.h" 19 #include "puffin/src/puff_reader.h" 20 #include "puffin/src/puff_writer.h" 21 #include "puffin/src/set_errors.h" 22 23 namespace puffin { 24 25 using std::vector; 26 using std::unique_ptr; 27 using std::shared_ptr; 28 29 namespace { 30 31 bool CheckArgsIntegrity(uint64_t puff_size, 32 const std::vector<BitExtent>& deflates, 33 const std::vector<ByteExtent>& puffs) { 34 TEST_AND_RETURN_FALSE(puffs.size() == deflates.size()); 35 // Check if the |puff_size| is actually greater than the last byte of the last 36 // puff in |puffs|. 37 if (!puffs.empty()) { 38 TEST_AND_RETURN_FALSE(puff_size >= 39 puffs.back().offset + puffs.back().length); 40 } 41 42 // Check to make sure |puffs| and |deflates| are sorted and non-overlapping. 43 auto is_overlap = [](const auto& a, const auto& b) { 44 return (a.offset + a.length) > b.offset; 45 }; 46 TEST_AND_RETURN_FALSE(deflates.end() == std::adjacent_find(deflates.begin(), 47 deflates.end(), 48 is_overlap)); 49 TEST_AND_RETURN_FALSE(puffs.end() == std::adjacent_find(puffs.begin(), 50 puffs.end(), 51 is_overlap)); 52 return true; 53 } 54 55 } // namespace 56 57 UniqueStreamPtr PuffinStream::CreateForPuff( 58 UniqueStreamPtr stream, 59 std::shared_ptr<Puffer> puffer, 60 uint64_t puff_size, 61 const std::vector<BitExtent>& deflates, 62 const std::vector<ByteExtent>& puffs, 63 size_t max_cache_size) { 64 TEST_AND_RETURN_VALUE(CheckArgsIntegrity(puff_size, deflates, puffs), 65 nullptr); 66 TEST_AND_RETURN_VALUE(stream->Seek(0), nullptr); 67 68 UniqueStreamPtr puffin_stream(new PuffinStream(std::move(stream), puffer, 69 nullptr, puff_size, deflates, 70 puffs, max_cache_size)); 71 TEST_AND_RETURN_VALUE(puffin_stream->Seek(0), nullptr); 72 return puffin_stream; 73 } 74 75 UniqueStreamPtr PuffinStream::CreateForHuff( 76 UniqueStreamPtr stream, 77 std::shared_ptr<Huffer> huffer, 78 uint64_t puff_size, 79 const std::vector<BitExtent>& deflates, 80 const std::vector<ByteExtent>& puffs) { 81 TEST_AND_RETURN_VALUE(CheckArgsIntegrity(puff_size, deflates, puffs), 82 nullptr); 83 TEST_AND_RETURN_VALUE(stream->Seek(0), nullptr); 84 85 UniqueStreamPtr puffin_stream(new PuffinStream( 86 std::move(stream), nullptr, huffer, puff_size, deflates, puffs, 0)); 87 TEST_AND_RETURN_VALUE(puffin_stream->Seek(0), nullptr); 88 return puffin_stream; 89 } 90 91 PuffinStream::PuffinStream(UniqueStreamPtr stream, 92 shared_ptr<Puffer> puffer, 93 shared_ptr<Huffer> huffer, 94 uint64_t puff_size, 95 const vector<BitExtent>& deflates, 96 const vector<ByteExtent>& puffs, 97 size_t max_cache_size) 98 : stream_(std::move(stream)), 99 puffer_(puffer), 100 huffer_(huffer), 101 puff_stream_size_(puff_size), 102 deflates_(deflates), 103 puffs_(puffs), 104 puff_pos_(0), 105 skip_bytes_(0), 106 deflate_bit_pos_(0), 107 last_byte_(0), 108 extra_byte_(0), 109 is_for_puff_(puffer_ ? true : false), 110 closed_(false), 111 max_cache_size_(max_cache_size), 112 cur_cache_size_(0) { 113 // Building upper bounds for faster seek. 114 upper_bounds_.reserve(puffs.size()); 115 for (const auto& puff : puffs) { 116 upper_bounds_.emplace_back(puff.offset + puff.length); 117 } 118 upper_bounds_.emplace_back(puff_stream_size_ + 1); 119 120 // We can pass the size of the deflate stream too, but it is not necessary 121 // yet. We cannot get the size of stream from itself, because we might be 122 // writing into it and its size is not defined yet. 123 uint64_t deflate_stream_size = puff_stream_size_; 124 if (!puffs.empty()) { 125 deflate_stream_size = 126 ((deflates.back().offset + deflates.back().length) / 8) + 127 puff_stream_size_ - (puffs.back().offset + puffs.back().length); 128 } 129 130 deflates_.emplace_back(deflate_stream_size * 8, 0); 131 puffs_.emplace_back(puff_stream_size_, 0); 132 133 // Look for the largest puff and deflate extents and get proper size buffers. 134 uint64_t max_puff_length = 0; 135 for (const auto& puff : puffs) { 136 max_puff_length = std::max(max_puff_length, puff.length); 137 } 138 puff_buffer_.reset(new Buffer(max_puff_length + 1)); 139 if (max_cache_size_ < max_puff_length) { 140 max_cache_size_ = 0; // It means we are not caching puffs. 141 } 142 143 uint64_t max_deflate_length = 0; 144 for (const auto& deflate : deflates) { 145 max_deflate_length = std::max(max_deflate_length, deflate.length * 8); 146 } 147 deflate_buffer_.reset(new Buffer(max_deflate_length + 2)); 148 } 149 150 bool PuffinStream::GetSize(uint64_t* size) const { 151 *size = puff_stream_size_; 152 return true; 153 } 154 155 bool PuffinStream::GetOffset(uint64_t* offset) const { 156 *offset = puff_pos_ + skip_bytes_; 157 return true; 158 } 159 160 bool PuffinStream::Seek(uint64_t offset) { 161 TEST_AND_RETURN_FALSE(!closed_); 162 if (!is_for_puff_) { 163 // For huffing we should not seek, only seek to zero is accepted. 164 TEST_AND_RETURN_FALSE(offset == 0); 165 } 166 167 TEST_AND_RETURN_FALSE(offset <= puff_stream_size_); 168 169 // We are searching first available puff which either includes the |offset| or 170 // it is the next available puff after |offset|. 171 auto next_puff_iter = 172 std::upper_bound(upper_bounds_.begin(), upper_bounds_.end(), offset); 173 TEST_AND_RETURN_FALSE(next_puff_iter != upper_bounds_.end()); 174 auto next_puff_idx = std::distance(upper_bounds_.begin(), next_puff_iter); 175 cur_puff_ = std::next(puffs_.begin(), next_puff_idx); 176 cur_deflate_ = std::next(deflates_.begin(), next_puff_idx); 177 178 if (offset < cur_puff_->offset) { 179 // between two puffs. 180 puff_pos_ = offset; 181 auto back_track_bytes = cur_puff_->offset - puff_pos_; 182 deflate_bit_pos_ = ((cur_deflate_->offset + 7) / 8 - back_track_bytes) * 8; 183 if (cur_puff_ != puffs_.begin()) { 184 auto prev_deflate = std::prev(cur_deflate_); 185 if (deflate_bit_pos_ < prev_deflate->offset + prev_deflate->length) { 186 deflate_bit_pos_ = prev_deflate->offset + prev_deflate->length; 187 } 188 } 189 } else { 190 // Inside a puff. 191 puff_pos_ = cur_puff_->offset; 192 deflate_bit_pos_ = cur_deflate_->offset; 193 } 194 skip_bytes_ = offset - puff_pos_; 195 if (!is_for_puff_ && offset == 0) { 196 TEST_AND_RETURN_FALSE(stream_->Seek(0)); 197 TEST_AND_RETURN_FALSE(SetExtraByte()); 198 } 199 return true; 200 } 201 202 bool PuffinStream::Close() { 203 closed_ = true; 204 return stream_->Close(); 205 } 206 207 bool PuffinStream::Read(void* buffer, size_t count) { 208 TEST_AND_RETURN_FALSE(!closed_); 209 TEST_AND_RETURN_FALSE(is_for_puff_); 210 if (cur_puff_ == puffs_.end()) { 211 TEST_AND_RETURN_FALSE(count == 0); 212 } 213 auto bytes = static_cast<uint8_t*>(buffer); 214 uint64_t length = count; 215 uint64_t bytes_read = 0; 216 while (bytes_read < length) { 217 if (puff_pos_ < cur_puff_->offset) { 218 // Reading between two deflates. We also read bytes that have at least one 219 // bit of a deflate bit stream. The byte which has both deflate and raw 220 // data will be shifted or masked off the deflate bits and the remaining 221 // value will be saved in the puff stream as an byte integer. 222 uint64_t start_byte = (deflate_bit_pos_ / 8); 223 uint64_t end_byte = (cur_deflate_->offset + 7) / 8; 224 auto bytes_to_read = std::min(length - bytes_read, end_byte - start_byte); 225 TEST_AND_RETURN_FALSE(bytes_to_read >= 1); 226 227 TEST_AND_RETURN_FALSE(stream_->Seek(start_byte)); 228 TEST_AND_RETURN_FALSE(stream_->Read(bytes + bytes_read, bytes_to_read)); 229 230 // If true, we read the first byte of the curret deflate. So we have to 231 // mask out the deflate bits (which are most significant bits.) 232 if ((start_byte + bytes_to_read) * 8 > cur_deflate_->offset) { 233 bytes[bytes_read + bytes_to_read - 1] &= 234 (1 << (cur_deflate_->offset & 7)) - 1; 235 } 236 237 // If true, we read the last byte of the previous deflate and we have to 238 // shift it out. The least significat bits belongs to the deflate 239 // stream. The order of these last two conditions are important because a 240 // byte can contain a finishing deflate and a starting deflate with some 241 // bits between them so we have to modify correctly. Keep in mind that in 242 // this situation both are modifying the same byte. 243 if (start_byte * 8 < deflate_bit_pos_) { 244 bytes[bytes_read] >>= deflate_bit_pos_ & 7; 245 } 246 247 // Pass |deflate_bit_pos_| for all the read bytes. 248 deflate_bit_pos_ -= deflate_bit_pos_ & 7; 249 deflate_bit_pos_ += bytes_to_read * 8; 250 if (deflate_bit_pos_ > cur_deflate_->offset) { 251 // In case it reads into the first byte of the current deflate. 252 deflate_bit_pos_ = cur_deflate_->offset; 253 } 254 255 bytes_read += bytes_to_read; 256 puff_pos_ += bytes_to_read; 257 TEST_AND_RETURN_FALSE(puff_pos_ <= cur_puff_->offset); 258 } else { 259 // Reading the deflate itself. We read all bytes including the first and 260 // last byte (which may partially include a deflate bit). Here we keep the 261 // |puff_pos_| point to the first byte of the puffed stream and 262 // |skip_bytes_| shows how many bytes in the puff we have copied till now. 263 auto start_byte = (cur_deflate_->offset / 8); 264 auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8; 265 auto bytes_to_read = end_byte - start_byte; 266 // Puff directly to buffer if it has space. 267 bool puff_directly_into_buffer = 268 max_cache_size_ == 0 && (skip_bytes_ == 0) && 269 (length - bytes_read >= cur_puff_->length); 270 271 auto cur_puff_idx = std::distance(puffs_.begin(), cur_puff_); 272 if (max_cache_size_ == 0 || 273 !GetPuffCache(cur_puff_idx, cur_puff_->length, &puff_buffer_)) { 274 // Did not find the puff buffer in cache. We have to build it. 275 deflate_buffer_->resize(bytes_to_read); 276 TEST_AND_RETURN_FALSE(stream_->Seek(start_byte)); 277 TEST_AND_RETURN_FALSE( 278 stream_->Read(deflate_buffer_->data(), bytes_to_read)); 279 BufferBitReader bit_reader(deflate_buffer_->data(), bytes_to_read); 280 281 BufferPuffWriter puff_writer(puff_directly_into_buffer 282 ? bytes + bytes_read 283 : puff_buffer_->data(), 284 cur_puff_->length); 285 286 // Drop the first unused bits. 287 size_t extra_bits_len = cur_deflate_->offset & 7; 288 TEST_AND_RETURN_FALSE(bit_reader.CacheBits(extra_bits_len)); 289 bit_reader.DropBits(extra_bits_len); 290 291 Error error; 292 TEST_AND_RETURN_FALSE( 293 puffer_->PuffDeflate(&bit_reader, &puff_writer, nullptr, &error)); 294 TEST_AND_RETURN_FALSE(bytes_to_read == bit_reader.Offset()); 295 TEST_AND_RETURN_FALSE(cur_puff_->length == puff_writer.Size()); 296 } else { 297 // Just seek to proper location. 298 TEST_AND_RETURN_FALSE(stream_->Seek(start_byte + bytes_to_read)); 299 } 300 // Copy from puff buffer to output if needed. 301 auto bytes_to_copy = 302 std::min(length - bytes_read, cur_puff_->length - skip_bytes_); 303 if (!puff_directly_into_buffer) { 304 memcpy(bytes + bytes_read, puff_buffer_->data() + skip_bytes_, 305 bytes_to_copy); 306 } 307 308 skip_bytes_ += bytes_to_copy; 309 bytes_read += bytes_to_copy; 310 311 // Move to next puff. 312 if (puff_pos_ + skip_bytes_ == cur_puff_->offset + cur_puff_->length) { 313 puff_pos_ += skip_bytes_; 314 skip_bytes_ = 0; 315 deflate_bit_pos_ = cur_deflate_->offset + cur_deflate_->length; 316 cur_puff_++; 317 cur_deflate_++; 318 if (cur_puff_ == puffs_.end()) { 319 break; 320 } 321 } 322 } 323 } 324 325 TEST_AND_RETURN_FALSE(bytes_read == length); 326 return true; 327 } 328 329 bool PuffinStream::Write(const void* buffer, size_t count) { 330 TEST_AND_RETURN_FALSE(!closed_); 331 TEST_AND_RETURN_FALSE(!is_for_puff_); 332 auto bytes = static_cast<const uint8_t*>(buffer); 333 uint64_t length = count; 334 uint64_t bytes_wrote = 0; 335 while (bytes_wrote < length) { 336 if (deflate_bit_pos_ < (cur_deflate_->offset & ~7ull)) { 337 // Between two puffs or before the first puff. We know that we are 338 // starting from the byte boundary because we have already processed the 339 // non-deflate bits of the last byte of the last deflate. Here we don't 340 // process any byte that has deflate bit. 341 TEST_AND_RETURN_FALSE((deflate_bit_pos_ & 7) == 0); 342 auto copy_len = 343 std::min((cur_deflate_->offset / 8) - (deflate_bit_pos_ / 8), 344 length - bytes_wrote); 345 TEST_AND_RETURN_FALSE(stream_->Write(bytes + bytes_wrote, copy_len)); 346 bytes_wrote += copy_len; 347 puff_pos_ += copy_len; 348 deflate_bit_pos_ += copy_len * 8; 349 } else { 350 // We are in a puff. We have to buffer incoming bytes until we reach the 351 // size of the current puff so we can huff :). If the last bit of the 352 // current deflate does not end in a byte boundary, then we have to read 353 // one more byte to fill up the last byte of the deflate stream before 354 // doing anything else. 355 356 // |deflate_bit_pos_| now should be in the same byte as 357 // |cur_deflate->offset|. 358 if (deflate_bit_pos_ < cur_deflate_->offset) { 359 last_byte_ |= bytes[bytes_wrote++] << (deflate_bit_pos_ & 7); 360 skip_bytes_ = 0; 361 deflate_bit_pos_ = cur_deflate_->offset; 362 puff_pos_++; 363 TEST_AND_RETURN_FALSE(puff_pos_ == cur_puff_->offset); 364 } 365 366 auto copy_len = std::min(length - bytes_wrote, 367 cur_puff_->length + extra_byte_ - skip_bytes_); 368 TEST_AND_RETURN_FALSE(puff_buffer_->size() >= skip_bytes_ + copy_len); 369 memcpy(puff_buffer_->data() + skip_bytes_, bytes + bytes_wrote, copy_len); 370 skip_bytes_ += copy_len; 371 bytes_wrote += copy_len; 372 373 if (skip_bytes_ == cur_puff_->length + extra_byte_) { 374 // |puff_buffer_| is full, now huff into the |deflate_buffer_|. 375 auto start_byte = cur_deflate_->offset / 8; 376 auto end_byte = (cur_deflate_->offset + cur_deflate_->length + 7) / 8; 377 auto bytes_to_write = end_byte - start_byte; 378 379 deflate_buffer_->resize(bytes_to_write); 380 BufferBitWriter bit_writer(deflate_buffer_->data(), bytes_to_write); 381 BufferPuffReader puff_reader(puff_buffer_->data(), cur_puff_->length); 382 383 // Write last byte if it has any. 384 TEST_AND_RETURN_FALSE( 385 bit_writer.WriteBits(cur_deflate_->offset & 7, last_byte_)); 386 last_byte_ = 0; 387 388 Error error; 389 TEST_AND_RETURN_FALSE( 390 huffer_->HuffDeflate(&puff_reader, &bit_writer, &error)); 391 TEST_AND_RETURN_FALSE(bit_writer.Size() == bytes_to_write); 392 TEST_AND_RETURN_FALSE(puff_reader.BytesLeft() == 0); 393 394 deflate_bit_pos_ = cur_deflate_->offset + cur_deflate_->length; 395 if (extra_byte_ == 1) { 396 deflate_buffer_->data()[bytes_to_write - 1] |= 397 puff_buffer_->data()[cur_puff_->length] << (deflate_bit_pos_ & 7); 398 deflate_bit_pos_ = (deflate_bit_pos_ + 7) & ~7ull; 399 } else if ((deflate_bit_pos_ & 7) != 0) { 400 // This happens if current and next deflate finish and end on the same 401 // byte, then we cannot write into output until we have huffed the 402 // next puff buffer, so untill then we cache it into |last_byte_| and 403 // we won't write it out. 404 last_byte_ = deflate_buffer_->data()[bytes_to_write - 1]; 405 bytes_to_write--; 406 } 407 408 // Write |deflate_buffer_| into output. 409 TEST_AND_RETURN_FALSE( 410 stream_->Write(deflate_buffer_->data(), bytes_to_write)); 411 412 // Move to the next deflate/puff. 413 puff_pos_ += skip_bytes_; 414 skip_bytes_ = 0; 415 cur_puff_++; 416 cur_deflate_++; 417 if (cur_puff_ == puffs_.end()) { 418 break; 419 } 420 // Find if need an extra byte to cache at the end. 421 TEST_AND_RETURN_FALSE(SetExtraByte()); 422 } 423 } 424 } 425 426 TEST_AND_RETURN_FALSE(bytes_wrote == length); 427 return true; 428 } 429 430 bool PuffinStream::SetExtraByte() { 431 TEST_AND_RETURN_FALSE(cur_deflate_ != deflates_.end()); 432 if ((cur_deflate_ + 1) == deflates_.end()) { 433 extra_byte_ = 0; 434 return true; 435 } 436 uint64_t end_bit = cur_deflate_->offset + cur_deflate_->length; 437 if ((end_bit & 7) && ((end_bit + 7) & ~7ull) <= (cur_deflate_ + 1)->offset) { 438 extra_byte_ = 1; 439 } else { 440 extra_byte_ = 0; 441 } 442 return true; 443 } 444 445 bool PuffinStream::GetPuffCache(int puff_id, 446 uint64_t puff_size, 447 SharedBufferPtr* buffer) { 448 bool found = false; 449 // Search for it. 450 std::pair<int, SharedBufferPtr> cache; 451 // TODO(*): Find a faster way of doing this? Maybe change the data structure 452 // that supports faster search. 453 for (auto iter = caches_.begin(); iter != caches_.end(); ++iter) { 454 if (iter->first == puff_id) { 455 cache = std::move(*iter); 456 found = true; 457 // Remove it so later we can add it to the begining of the list. 458 caches_.erase(iter); 459 break; 460 } 461 } 462 463 // If not found, either create one or get one from the list. 464 if (!found) { 465 // If |caches_| were full, remove last ones in the list (least used), until 466 // we have enough space for the new cache. 467 while (!caches_.empty() && cur_cache_size_ + puff_size > max_cache_size_) { 468 cache = std::move(caches_.back()); 469 caches_.pop_back(); // Remove it from the list. 470 cur_cache_size_ -= cache.second->capacity(); 471 } 472 // If we have not populated the cache yet, create one. 473 if (!cache.second) { 474 cache.second.reset(new Buffer(puff_size)); 475 } 476 cache.second->resize(puff_size); 477 478 constexpr uint64_t kMaxSizeDifference = 20 * 1024; 479 if (puff_size + kMaxSizeDifference < cache.second->capacity()) { 480 cache.second->shrink_to_fit(); 481 } 482 483 cur_cache_size_ += cache.second->capacity(); 484 cache.first = puff_id; 485 } 486 487 *buffer = cache.second; 488 // By now we have either removed a cache or created new one. Now we have to 489 // insert it in the front of the list so it becomes the most recently used 490 // one. 491 caches_.push_front(std::move(cache)); 492 return found; 493 } 494 495 } // namespace puffin 496