1 // 2 // Copyright (C) 2015 The Android Open Source Project 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 17 #include "update_engine/payload_generator/delta_diff_utils.h" 18 19 #include <endian.h> 20 // TODO: Remove these pragmas when b/35721782 is fixed. 21 #pragma clang diagnostic push 22 #pragma clang diagnostic ignored "-Wmacro-redefined" 23 #include <ext2fs/ext2fs.h> 24 #pragma clang diagnostic pop 25 #include <unistd.h> 26 27 #include <algorithm> 28 #include <map> 29 30 #include <base/files/file_util.h> 31 #include <base/format_macros.h> 32 #include <base/strings/stringprintf.h> 33 #include <base/threading/simple_thread.h> 34 #include <brillo/data_encoding.h> 35 36 #include "update_engine/common/hash_calculator.h" 37 #include "update_engine/common/subprocess.h" 38 #include "update_engine/common/utils.h" 39 #include "update_engine/payload_generator/block_mapping.h" 40 #include "update_engine/payload_generator/bzip.h" 41 #include "update_engine/payload_generator/delta_diff_generator.h" 42 #include "update_engine/payload_generator/extent_ranges.h" 43 #include "update_engine/payload_generator/extent_utils.h" 44 #include "update_engine/payload_generator/xz.h" 45 46 using std::map; 47 using std::string; 48 using std::vector; 49 50 namespace chromeos_update_engine { 51 namespace { 52 53 const char* const kBsdiffPath = "bsdiff"; 54 const char* const kImgdiffPath = "imgdiff"; 55 56 // The maximum destination size allowed for bsdiff. In general, bsdiff should 57 // work for arbitrary big files, but the payload generation and payload 58 // application requires a significant amount of RAM. We put a hard-limit of 59 // 200 MiB that should not affect any released board, but will limit the 60 // Chrome binary in ASan builders. 61 const uint64_t kMaxBsdiffDestinationSize = 200 * 1024 * 1024; // bytes 62 63 // The maximum destination size allowed for imgdiff. In general, imgdiff should 64 // work for arbitrary big files, but the payload application is quite memory 65 // intensive, so we limit these operations to 50 MiB. 66 const uint64_t kMaxImgdiffDestinationSize = 50 * 1024 * 1024; // bytes 67 68 // Process a range of blocks from |range_start| to |range_end| in the extent at 69 // position |*idx_p| of |extents|. If |do_remove| is true, this range will be 70 // removed, which may cause the extent to be trimmed, split or removed entirely. 71 // The value of |*idx_p| is updated to point to the next extent to be processed. 72 // Returns true iff the next extent to process is a new or updated one. 73 bool ProcessExtentBlockRange(vector<Extent>* extents, size_t* idx_p, 74 const bool do_remove, uint64_t range_start, 75 uint64_t range_end) { 76 size_t idx = *idx_p; 77 uint64_t start_block = (*extents)[idx].start_block(); 78 uint64_t num_blocks = (*extents)[idx].num_blocks(); 79 uint64_t range_size = range_end - range_start; 80 81 if (do_remove) { 82 if (range_size == num_blocks) { 83 // Remove the entire extent. 84 extents->erase(extents->begin() + idx); 85 } else if (range_end == num_blocks) { 86 // Trim the end of the extent. 87 (*extents)[idx].set_num_blocks(num_blocks - range_size); 88 idx++; 89 } else if (range_start == 0) { 90 // Trim the head of the extent. 91 (*extents)[idx].set_start_block(start_block + range_size); 92 (*extents)[idx].set_num_blocks(num_blocks - range_size); 93 } else { 94 // Trim the middle, splitting the remainder into two parts. 95 (*extents)[idx].set_num_blocks(range_start); 96 Extent e; 97 e.set_start_block(start_block + range_end); 98 e.set_num_blocks(num_blocks - range_end); 99 idx++; 100 extents->insert(extents->begin() + idx, e); 101 } 102 } else if (range_end == num_blocks) { 103 // Done with this extent. 104 idx++; 105 } else { 106 return false; 107 } 108 109 *idx_p = idx; 110 return true; 111 } 112 113 // Remove identical corresponding block ranges in |src_extents| and 114 // |dst_extents|. Used for preventing moving of blocks onto themselves during 115 // MOVE operations. The value of |total_bytes| indicates the actual length of 116 // content; this may be slightly less than the total size of blocks, in which 117 // case the last block is only partly occupied with data. Returns the total 118 // number of bytes removed. 119 size_t RemoveIdenticalBlockRanges(vector<Extent>* src_extents, 120 vector<Extent>* dst_extents, 121 const size_t total_bytes) { 122 size_t src_idx = 0; 123 size_t dst_idx = 0; 124 uint64_t src_offset = 0, dst_offset = 0; 125 size_t removed_bytes = 0, nonfull_block_bytes; 126 bool do_remove = false; 127 while (src_idx < src_extents->size() && dst_idx < dst_extents->size()) { 128 do_remove = ((*src_extents)[src_idx].start_block() + src_offset == 129 (*dst_extents)[dst_idx].start_block() + dst_offset); 130 131 uint64_t src_num_blocks = (*src_extents)[src_idx].num_blocks(); 132 uint64_t dst_num_blocks = (*dst_extents)[dst_idx].num_blocks(); 133 uint64_t min_num_blocks = std::min(src_num_blocks - src_offset, 134 dst_num_blocks - dst_offset); 135 uint64_t prev_src_offset = src_offset; 136 uint64_t prev_dst_offset = dst_offset; 137 src_offset += min_num_blocks; 138 dst_offset += min_num_blocks; 139 140 bool new_src = ProcessExtentBlockRange(src_extents, &src_idx, do_remove, 141 prev_src_offset, src_offset); 142 bool new_dst = ProcessExtentBlockRange(dst_extents, &dst_idx, do_remove, 143 prev_dst_offset, dst_offset); 144 if (new_src) { 145 src_offset = 0; 146 } 147 if (new_dst) { 148 dst_offset = 0; 149 } 150 151 if (do_remove) 152 removed_bytes += min_num_blocks * kBlockSize; 153 } 154 155 // If we removed the last block and this block is only partly used by file 156 // content, deduct the unused portion from the total removed byte count. 157 if (do_remove && (nonfull_block_bytes = total_bytes % kBlockSize)) 158 removed_bytes -= kBlockSize - nonfull_block_bytes; 159 160 return removed_bytes; 161 } 162 163 // Returns true if the given blob |data| contains gzip header magic. 164 bool ContainsGZip(const brillo::Blob& data) { 165 const uint8_t kGZipMagic[] = {0x1f, 0x8b, 0x08, 0x00}; 166 return std::search(data.begin(), 167 data.end(), 168 std::begin(kGZipMagic), 169 std::end(kGZipMagic)) != data.end(); 170 } 171 172 } // namespace 173 174 namespace diff_utils { 175 176 // This class encapsulates a file delta processing thread work. The 177 // processor computes the delta between the source and target files; 178 // and write the compressed delta to the blob. 179 class FileDeltaProcessor : public base::DelegateSimpleThread::Delegate { 180 public: 181 FileDeltaProcessor(const string& old_part, 182 const string& new_part, 183 const PayloadVersion& version, 184 const vector<Extent>& old_extents, 185 const vector<Extent>& new_extents, 186 const string& name, 187 ssize_t chunk_blocks, 188 BlobFileWriter* blob_file) 189 : old_part_(old_part), 190 new_part_(new_part), 191 version_(version), 192 old_extents_(old_extents), 193 new_extents_(new_extents), 194 name_(name), 195 chunk_blocks_(chunk_blocks), 196 blob_file_(blob_file) {} 197 198 FileDeltaProcessor(FileDeltaProcessor&& processor) = default; 199 200 ~FileDeltaProcessor() override = default; 201 202 // Overrides DelegateSimpleThread::Delegate. 203 // Calculate the list of operations and write their corresponding deltas to 204 // the blob_file. 205 void Run() override; 206 207 // Merge each file processor's ops list to aops. 208 void MergeOperation(vector<AnnotatedOperation>* aops); 209 210 private: 211 const string& old_part_; 212 const string& new_part_; 213 const PayloadVersion& version_; 214 215 // The block ranges of the old/new file within the src/tgt image 216 const vector<Extent> old_extents_; 217 const vector<Extent> new_extents_; 218 const string name_; 219 // Block limit of one aop. 220 ssize_t chunk_blocks_; 221 BlobFileWriter* blob_file_; 222 223 // The list of ops to reach the new file from the old file. 224 vector<AnnotatedOperation> file_aops_; 225 226 DISALLOW_COPY_AND_ASSIGN(FileDeltaProcessor); 227 }; 228 229 void FileDeltaProcessor::Run() { 230 TEST_AND_RETURN(blob_file_ != nullptr); 231 232 if (!DeltaReadFile(&file_aops_, 233 old_part_, 234 new_part_, 235 old_extents_, 236 new_extents_, 237 name_, 238 chunk_blocks_, 239 version_, 240 blob_file_)) { 241 LOG(ERROR) << "Failed to generate delta for " << name_ << " (" 242 << BlocksInExtents(new_extents_) << " blocks)"; 243 } 244 } 245 246 void FileDeltaProcessor::MergeOperation(vector<AnnotatedOperation>* aops) { 247 aops->reserve(aops->size() + file_aops_.size()); 248 std::move(file_aops_.begin(), file_aops_.end(), std::back_inserter(*aops)); 249 } 250 251 bool DeltaReadPartition(vector<AnnotatedOperation>* aops, 252 const PartitionConfig& old_part, 253 const PartitionConfig& new_part, 254 ssize_t hard_chunk_blocks, 255 size_t soft_chunk_blocks, 256 const PayloadVersion& version, 257 BlobFileWriter* blob_file) { 258 ExtentRanges old_visited_blocks; 259 ExtentRanges new_visited_blocks; 260 261 TEST_AND_RETURN_FALSE(DeltaMovedAndZeroBlocks( 262 aops, 263 old_part.path, 264 new_part.path, 265 old_part.size / kBlockSize, 266 new_part.size / kBlockSize, 267 soft_chunk_blocks, 268 version, 269 blob_file, 270 &old_visited_blocks, 271 &new_visited_blocks)); 272 273 map<string, vector<Extent>> old_files_map; 274 if (old_part.fs_interface) { 275 vector<FilesystemInterface::File> old_files; 276 old_part.fs_interface->GetFiles(&old_files); 277 for (const FilesystemInterface::File& file : old_files) 278 old_files_map[file.name] = file.extents; 279 } 280 281 TEST_AND_RETURN_FALSE(new_part.fs_interface); 282 vector<FilesystemInterface::File> new_files; 283 new_part.fs_interface->GetFiles(&new_files); 284 285 vector<FileDeltaProcessor> file_delta_processors; 286 287 // The processing is very straightforward here, we generate operations for 288 // every file (and pseudo-file such as the metadata) in the new filesystem 289 // based on the file with the same name in the old filesystem, if any. 290 // Files with overlapping data blocks (like hardlinks or filesystems with tail 291 // packing or compression where the blocks store more than one file) are only 292 // generated once in the new image, but are also used only once from the old 293 // image due to some simplifications (see below). 294 for (const FilesystemInterface::File& new_file : new_files) { 295 // Ignore the files in the new filesystem without blocks. Symlinks with 296 // data blocks (for example, symlinks bigger than 60 bytes in ext2) are 297 // handled as normal files. We also ignore blocks that were already 298 // processed by a previous file. 299 vector<Extent> new_file_extents = FilterExtentRanges( 300 new_file.extents, new_visited_blocks); 301 new_visited_blocks.AddExtents(new_file_extents); 302 303 if (new_file_extents.empty()) 304 continue; 305 306 LOG(INFO) << "Encoding file " << new_file.name << " (" 307 << BlocksInExtents(new_file_extents) << " blocks)"; 308 309 // We can't visit each dst image inode more than once, as that would 310 // duplicate work. Here, we avoid visiting each source image inode 311 // more than once. Technically, we could have multiple operations 312 // that read the same blocks from the source image for diffing, but 313 // we choose not to avoid complexity. Eventually we will move away 314 // from using a graph/cycle detection/etc to generate diffs, and at that 315 // time, it will be easy (non-complex) to have many operations read 316 // from the same source blocks. At that time, this code can die. -adlr 317 vector<Extent> old_file_extents = FilterExtentRanges( 318 old_files_map[new_file.name], old_visited_blocks); 319 old_visited_blocks.AddExtents(old_file_extents); 320 321 file_delta_processors.emplace_back(old_part.path, 322 new_part.path, 323 version, 324 std::move(old_file_extents), 325 std::move(new_file_extents), 326 new_file.name, // operation name 327 hard_chunk_blocks, 328 blob_file); 329 } 330 331 size_t max_threads = GetMaxThreads(); 332 base::DelegateSimpleThreadPool thread_pool("incremental-update-generator", 333 max_threads); 334 thread_pool.Start(); 335 for (auto& processor : file_delta_processors) { 336 thread_pool.AddWork(&processor); 337 } 338 thread_pool.JoinAll(); 339 340 for (auto& processor : file_delta_processors) { 341 processor.MergeOperation(aops); 342 } 343 344 // Process all the blocks not included in any file. We provided all the unused 345 // blocks in the old partition as available data. 346 vector<Extent> new_unvisited = { 347 ExtentForRange(0, new_part.size / kBlockSize)}; 348 new_unvisited = FilterExtentRanges(new_unvisited, new_visited_blocks); 349 if (new_unvisited.empty()) 350 return true; 351 352 vector<Extent> old_unvisited; 353 if (old_part.fs_interface) { 354 old_unvisited.push_back(ExtentForRange(0, old_part.size / kBlockSize)); 355 old_unvisited = FilterExtentRanges(old_unvisited, old_visited_blocks); 356 } 357 358 LOG(INFO) << "Scanning " << BlocksInExtents(new_unvisited) 359 << " unwritten blocks using chunk size of " 360 << soft_chunk_blocks << " blocks."; 361 // We use the soft_chunk_blocks limit for the <non-file-data> as we don't 362 // really know the structure of this data and we should not expect it to have 363 // redundancy between partitions. 364 TEST_AND_RETURN_FALSE(DeltaReadFile(aops, 365 old_part.path, 366 new_part.path, 367 old_unvisited, 368 new_unvisited, 369 "<non-file-data>", // operation name 370 soft_chunk_blocks, 371 version, 372 blob_file)); 373 374 return true; 375 } 376 377 bool DeltaMovedAndZeroBlocks(vector<AnnotatedOperation>* aops, 378 const string& old_part, 379 const string& new_part, 380 size_t old_num_blocks, 381 size_t new_num_blocks, 382 ssize_t chunk_blocks, 383 const PayloadVersion& version, 384 BlobFileWriter* blob_file, 385 ExtentRanges* old_visited_blocks, 386 ExtentRanges* new_visited_blocks) { 387 vector<BlockMapping::BlockId> old_block_ids; 388 vector<BlockMapping::BlockId> new_block_ids; 389 TEST_AND_RETURN_FALSE(MapPartitionBlocks(old_part, 390 new_part, 391 old_num_blocks * kBlockSize, 392 new_num_blocks * kBlockSize, 393 kBlockSize, 394 &old_block_ids, 395 &new_block_ids)); 396 397 // If the update is inplace, we map all the blocks that didn't move, 398 // regardless of the contents since they are already copied and no operation 399 // is required. 400 if (version.InplaceUpdate()) { 401 uint64_t num_blocks = std::min(old_num_blocks, new_num_blocks); 402 for (uint64_t block = 0; block < num_blocks; block++) { 403 if (old_block_ids[block] == new_block_ids[block] && 404 !old_visited_blocks->ContainsBlock(block) && 405 !new_visited_blocks->ContainsBlock(block)) { 406 old_visited_blocks->AddBlock(block); 407 new_visited_blocks->AddBlock(block); 408 } 409 } 410 } 411 412 // A mapping from the block_id to the list of block numbers with that block id 413 // in the old partition. This is used to lookup where in the old partition 414 // is a block from the new partition. 415 map<BlockMapping::BlockId, vector<uint64_t>> old_blocks_map; 416 417 for (uint64_t block = old_num_blocks; block-- > 0; ) { 418 if (old_block_ids[block] != 0 && !old_visited_blocks->ContainsBlock(block)) 419 old_blocks_map[old_block_ids[block]].push_back(block); 420 421 // Mark all zeroed blocks in the old image as "used" since it doesn't make 422 // any sense to spend I/O to read zeros from the source partition and more 423 // importantly, these could sometimes be blocks discarded in the SSD which 424 // would read non-zero values. 425 if (old_block_ids[block] == 0) 426 old_visited_blocks->AddBlock(block); 427 } 428 429 // The collection of blocks in the new partition with just zeros. This is a 430 // common case for free-space that's also problematic for bsdiff, so we want 431 // to optimize it using REPLACE_BZ operations. The blob for a REPLACE_BZ of 432 // just zeros is so small that it doesn't make sense to spend the I/O reading 433 // zeros from the old partition. 434 vector<Extent> new_zeros; 435 436 vector<Extent> old_identical_blocks; 437 vector<Extent> new_identical_blocks; 438 439 for (uint64_t block = 0; block < new_num_blocks; block++) { 440 // Only produce operations for blocks that were not yet visited. 441 if (new_visited_blocks->ContainsBlock(block)) 442 continue; 443 if (new_block_ids[block] == 0) { 444 AppendBlockToExtents(&new_zeros, block); 445 continue; 446 } 447 448 auto old_blocks_map_it = old_blocks_map.find(new_block_ids[block]); 449 // Check if the block exists in the old partition at all. 450 if (old_blocks_map_it == old_blocks_map.end() || 451 old_blocks_map_it->second.empty()) 452 continue; 453 454 AppendBlockToExtents(&old_identical_blocks, 455 old_blocks_map_it->second.back()); 456 AppendBlockToExtents(&new_identical_blocks, block); 457 // We can't reuse source blocks in minor version 1 because the cycle 458 // breaking algorithm used in the in-place update doesn't support that. 459 if (version.InplaceUpdate()) 460 old_blocks_map_it->second.pop_back(); 461 } 462 463 // Produce operations for the zero blocks split per output extent. 464 // TODO(deymo): Produce ZERO operations instead of calling DeltaReadFile(). 465 size_t num_ops = aops->size(); 466 new_visited_blocks->AddExtents(new_zeros); 467 for (const Extent& extent : new_zeros) { 468 TEST_AND_RETURN_FALSE(DeltaReadFile(aops, 469 "", 470 new_part, 471 vector<Extent>(), // old_extents 472 vector<Extent>{extent}, // new_extents 473 "<zeros>", 474 chunk_blocks, 475 version, 476 blob_file)); 477 } 478 LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for " 479 << BlocksInExtents(new_zeros) << " zeroed blocks"; 480 481 // Produce MOVE/SOURCE_COPY operations for the moved blocks. 482 num_ops = aops->size(); 483 if (chunk_blocks == -1) 484 chunk_blocks = new_num_blocks; 485 uint64_t used_blocks = 0; 486 old_visited_blocks->AddExtents(old_identical_blocks); 487 new_visited_blocks->AddExtents(new_identical_blocks); 488 for (const Extent& extent : new_identical_blocks) { 489 // We split the operation at the extent boundary or when bigger than 490 // chunk_blocks. 491 for (uint64_t op_block_offset = 0; op_block_offset < extent.num_blocks(); 492 op_block_offset += chunk_blocks) { 493 aops->emplace_back(); 494 AnnotatedOperation* aop = &aops->back(); 495 aop->name = "<identical-blocks>"; 496 aop->op.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY) 497 ? InstallOperation::SOURCE_COPY 498 : InstallOperation::MOVE); 499 500 uint64_t chunk_num_blocks = 501 std::min(static_cast<uint64_t>(extent.num_blocks()) - op_block_offset, 502 static_cast<uint64_t>(chunk_blocks)); 503 504 // The current operation represents the move/copy operation for the 505 // sublist starting at |used_blocks| of length |chunk_num_blocks| where 506 // the src and dst are from |old_identical_blocks| and 507 // |new_identical_blocks| respectively. 508 StoreExtents( 509 ExtentsSublist(old_identical_blocks, used_blocks, chunk_num_blocks), 510 aop->op.mutable_src_extents()); 511 512 Extent* op_dst_extent = aop->op.add_dst_extents(); 513 op_dst_extent->set_start_block(extent.start_block() + op_block_offset); 514 op_dst_extent->set_num_blocks(chunk_num_blocks); 515 CHECK( 516 vector<Extent>{*op_dst_extent} == // NOLINT(whitespace/braces) 517 ExtentsSublist(new_identical_blocks, used_blocks, chunk_num_blocks)); 518 519 used_blocks += chunk_num_blocks; 520 } 521 } 522 LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for " 523 << used_blocks << " identical blocks moved"; 524 525 return true; 526 } 527 528 bool DeltaReadFile(vector<AnnotatedOperation>* aops, 529 const string& old_part, 530 const string& new_part, 531 const vector<Extent>& old_extents, 532 const vector<Extent>& new_extents, 533 const string& name, 534 ssize_t chunk_blocks, 535 const PayloadVersion& version, 536 BlobFileWriter* blob_file) { 537 brillo::Blob data; 538 InstallOperation operation; 539 540 uint64_t total_blocks = BlocksInExtents(new_extents); 541 if (chunk_blocks == -1) 542 chunk_blocks = total_blocks; 543 544 for (uint64_t block_offset = 0; block_offset < total_blocks; 545 block_offset += chunk_blocks) { 546 // Split the old/new file in the same chunks. Note that this could drop 547 // some information from the old file used for the new chunk. If the old 548 // file is smaller (or even empty when there's no old file) the chunk will 549 // also be empty. 550 vector<Extent> old_extents_chunk = ExtentsSublist( 551 old_extents, block_offset, chunk_blocks); 552 vector<Extent> new_extents_chunk = ExtentsSublist( 553 new_extents, block_offset, chunk_blocks); 554 NormalizeExtents(&old_extents_chunk); 555 NormalizeExtents(&new_extents_chunk); 556 557 TEST_AND_RETURN_FALSE(ReadExtentsToDiff(old_part, 558 new_part, 559 old_extents_chunk, 560 new_extents_chunk, 561 version, 562 &data, 563 &operation)); 564 565 // Check if the operation writes nothing. 566 if (operation.dst_extents_size() == 0) { 567 if (operation.type() == InstallOperation::MOVE) { 568 LOG(INFO) << "Empty MOVE operation (" 569 << name << "), skipping"; 570 continue; 571 } else { 572 LOG(ERROR) << "Empty non-MOVE operation"; 573 return false; 574 } 575 } 576 577 // Now, insert into the list of operations. 578 AnnotatedOperation aop; 579 aop.name = name; 580 if (static_cast<uint64_t>(chunk_blocks) < total_blocks) { 581 aop.name = base::StringPrintf("%s:%" PRIu64, 582 name.c_str(), block_offset / chunk_blocks); 583 } 584 aop.op = operation; 585 586 // Write the data 587 TEST_AND_RETURN_FALSE(aop.SetOperationBlob(data, blob_file)); 588 aops->emplace_back(aop); 589 } 590 return true; 591 } 592 593 bool GenerateBestFullOperation(const brillo::Blob& new_data, 594 const PayloadVersion& version, 595 brillo::Blob* out_blob, 596 InstallOperation_Type* out_type) { 597 if (new_data.empty()) 598 return false; 599 600 if (version.OperationAllowed(InstallOperation::ZERO) && 601 std::all_of( 602 new_data.begin(), new_data.end(), [](uint8_t x) { return x == 0; })) { 603 // The read buffer is all zeros, so produce a ZERO operation. No need to 604 // check other types of operations in this case. 605 *out_blob = brillo::Blob(); 606 *out_type = InstallOperation::ZERO; 607 return true; 608 } 609 610 bool out_blob_set = false; 611 612 // Try compressing |new_data| with xz first. 613 if (version.OperationAllowed(InstallOperation::REPLACE_XZ)) { 614 brillo::Blob new_data_xz; 615 if (XzCompress(new_data, &new_data_xz) && !new_data_xz.empty()) { 616 *out_type = InstallOperation::REPLACE_XZ; 617 *out_blob = std::move(new_data_xz); 618 out_blob_set = true; 619 } 620 } 621 622 // Try compressing it with bzip2. 623 if (version.OperationAllowed(InstallOperation::REPLACE_BZ)) { 624 brillo::Blob new_data_bz; 625 // TODO(deymo): Implement some heuristic to determine if it is worth trying 626 // to compress the blob with bzip2 if we already have a good REPLACE_XZ. 627 if (BzipCompress(new_data, &new_data_bz) && !new_data_bz.empty() && 628 (!out_blob_set || out_blob->size() > new_data_bz.size())) { 629 // A REPLACE_BZ is better or nothing else was set. 630 *out_type = InstallOperation::REPLACE_BZ; 631 *out_blob = std::move(new_data_bz); 632 out_blob_set = true; 633 } 634 } 635 636 // If nothing else worked or it was badly compressed we try a REPLACE. 637 if (!out_blob_set || out_blob->size() >= new_data.size()) { 638 *out_type = InstallOperation::REPLACE; 639 // This needs to make a copy of the data in the case bzip or xz didn't 640 // compress well, which is not the common case so the performance hit is 641 // low. 642 *out_blob = new_data; 643 } 644 return true; 645 } 646 647 bool ReadExtentsToDiff(const string& old_part, 648 const string& new_part, 649 const vector<Extent>& old_extents, 650 const vector<Extent>& new_extents, 651 const PayloadVersion& version, 652 brillo::Blob* out_data, 653 InstallOperation* out_op) { 654 InstallOperation operation; 655 656 // We read blocks from old_extents and write blocks to new_extents. 657 uint64_t blocks_to_read = BlocksInExtents(old_extents); 658 uint64_t blocks_to_write = BlocksInExtents(new_extents); 659 660 // Disable bsdiff and imgdiff when the data is too big. 661 bool bsdiff_allowed = 662 version.OperationAllowed(InstallOperation::SOURCE_BSDIFF) || 663 version.OperationAllowed(InstallOperation::BSDIFF); 664 if (bsdiff_allowed && 665 blocks_to_read * kBlockSize > kMaxBsdiffDestinationSize) { 666 LOG(INFO) << "bsdiff blacklisted, data too big: " 667 << blocks_to_read * kBlockSize << " bytes"; 668 bsdiff_allowed = false; 669 } 670 671 bool imgdiff_allowed = version.OperationAllowed(InstallOperation::IMGDIFF); 672 if (imgdiff_allowed && 673 blocks_to_read * kBlockSize > kMaxImgdiffDestinationSize) { 674 LOG(INFO) << "imgdiff blacklisted, data too big: " 675 << blocks_to_read * kBlockSize << " bytes"; 676 imgdiff_allowed = false; 677 } 678 679 // Make copies of the extents so we can modify them. 680 vector<Extent> src_extents = old_extents; 681 vector<Extent> dst_extents = new_extents; 682 683 // Read in bytes from new data. 684 brillo::Blob new_data; 685 TEST_AND_RETURN_FALSE(utils::ReadExtents(new_part, 686 new_extents, 687 &new_data, 688 kBlockSize * blocks_to_write, 689 kBlockSize)); 690 TEST_AND_RETURN_FALSE(!new_data.empty()); 691 692 // Data blob that will be written to delta file. 693 brillo::Blob data_blob; 694 695 // Try generating a full operation for the given new data, regardless of the 696 // old_data. 697 InstallOperation_Type op_type; 698 TEST_AND_RETURN_FALSE( 699 GenerateBestFullOperation(new_data, version, &data_blob, &op_type)); 700 operation.set_type(op_type); 701 702 brillo::Blob old_data; 703 if (blocks_to_read > 0) { 704 // Read old data. 705 TEST_AND_RETURN_FALSE( 706 utils::ReadExtents(old_part, src_extents, &old_data, 707 kBlockSize * blocks_to_read, kBlockSize)); 708 if (old_data == new_data) { 709 // No change in data. 710 operation.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY) 711 ? InstallOperation::SOURCE_COPY 712 : InstallOperation::MOVE); 713 data_blob = brillo::Blob(); 714 } else if (bsdiff_allowed || imgdiff_allowed) { 715 // If the source file is considered bsdiff safe (no bsdiff bugs 716 // triggered), see if BSDIFF encoding is smaller. 717 base::FilePath old_chunk; 718 TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&old_chunk)); 719 ScopedPathUnlinker old_unlinker(old_chunk.value()); 720 TEST_AND_RETURN_FALSE(utils::WriteFile( 721 old_chunk.value().c_str(), old_data.data(), old_data.size())); 722 base::FilePath new_chunk; 723 TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&new_chunk)); 724 ScopedPathUnlinker new_unlinker(new_chunk.value()); 725 TEST_AND_RETURN_FALSE(utils::WriteFile( 726 new_chunk.value().c_str(), new_data.data(), new_data.size())); 727 728 if (bsdiff_allowed) { 729 brillo::Blob bsdiff_delta; 730 TEST_AND_RETURN_FALSE(DiffFiles( 731 kBsdiffPath, old_chunk.value(), new_chunk.value(), &bsdiff_delta)); 732 CHECK_GT(bsdiff_delta.size(), static_cast<brillo::Blob::size_type>(0)); 733 if (bsdiff_delta.size() < data_blob.size()) { 734 operation.set_type( 735 version.OperationAllowed(InstallOperation::SOURCE_BSDIFF) 736 ? InstallOperation::SOURCE_BSDIFF 737 : InstallOperation::BSDIFF); 738 data_blob = std::move(bsdiff_delta); 739 } 740 } 741 if (imgdiff_allowed && ContainsGZip(old_data) && ContainsGZip(new_data)) { 742 brillo::Blob imgdiff_delta; 743 // Imgdiff might fail in some cases, only use the result if it succeed, 744 // otherwise print the extents to analyze. 745 if (DiffFiles(kImgdiffPath, 746 old_chunk.value(), 747 new_chunk.value(), 748 &imgdiff_delta) && 749 imgdiff_delta.size() > 0) { 750 if (imgdiff_delta.size() < data_blob.size()) { 751 operation.set_type(InstallOperation::IMGDIFF); 752 data_blob = std::move(imgdiff_delta); 753 } 754 } else { 755 LOG(ERROR) << "Imgdiff failed with source extents: " 756 << ExtentsToString(src_extents) 757 << ", destination extents: " 758 << ExtentsToString(dst_extents); 759 } 760 } 761 } 762 } 763 764 size_t removed_bytes = 0; 765 // Remove identical src/dst block ranges in MOVE operations. 766 if (operation.type() == InstallOperation::MOVE) { 767 removed_bytes = RemoveIdenticalBlockRanges( 768 &src_extents, &dst_extents, new_data.size()); 769 } 770 // Set legacy src_length and dst_length fields. 771 operation.set_src_length(old_data.size() - removed_bytes); 772 operation.set_dst_length(new_data.size() - removed_bytes); 773 774 // Embed extents in the operation. 775 StoreExtents(src_extents, operation.mutable_src_extents()); 776 StoreExtents(dst_extents, operation.mutable_dst_extents()); 777 778 // Replace operations should not have source extents. 779 if (IsAReplaceOperation(operation.type())) { 780 operation.clear_src_extents(); 781 operation.clear_src_length(); 782 } 783 784 *out_data = std::move(data_blob); 785 *out_op = operation; 786 787 return true; 788 } 789 790 // Runs the bsdiff or imgdiff tool in |diff_path| on two files and returns the 791 // resulting delta in |out|. Returns true on success. 792 bool DiffFiles(const string& diff_path, 793 const string& old_file, 794 const string& new_file, 795 brillo::Blob* out) { 796 const string kPatchFile = "delta.patchXXXXXX"; 797 string patch_file_path; 798 799 TEST_AND_RETURN_FALSE( 800 utils::MakeTempFile(kPatchFile, &patch_file_path, nullptr)); 801 802 vector<string> cmd; 803 cmd.push_back(diff_path); 804 cmd.push_back(old_file); 805 cmd.push_back(new_file); 806 cmd.push_back(patch_file_path); 807 808 int rc = 1; 809 string stdout; 810 TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc, &stdout)); 811 if (rc != 0) { 812 LOG(ERROR) << diff_path << " returned " << rc << std::endl << stdout; 813 return false; 814 } 815 TEST_AND_RETURN_FALSE(utils::ReadFile(patch_file_path, out)); 816 unlink(patch_file_path.c_str()); 817 return true; 818 } 819 820 bool IsAReplaceOperation(InstallOperation_Type op_type) { 821 return (op_type == InstallOperation::REPLACE || 822 op_type == InstallOperation::REPLACE_BZ || 823 op_type == InstallOperation::REPLACE_XZ); 824 } 825 826 // Returns true if |op| is a no-op operation that doesn't do any useful work 827 // (e.g., a move operation that copies blocks onto themselves). 828 bool IsNoopOperation(const InstallOperation& op) { 829 return (op.type() == InstallOperation::MOVE && 830 ExpandExtents(op.src_extents()) == ExpandExtents(op.dst_extents())); 831 } 832 833 void FilterNoopOperations(vector<AnnotatedOperation>* ops) { 834 ops->erase( 835 std::remove_if( 836 ops->begin(), ops->end(), 837 [](const AnnotatedOperation& aop){return IsNoopOperation(aop.op);}), 838 ops->end()); 839 } 840 841 bool InitializePartitionInfo(const PartitionConfig& part, PartitionInfo* info) { 842 info->set_size(part.size); 843 HashCalculator hasher; 844 TEST_AND_RETURN_FALSE(hasher.UpdateFile(part.path, part.size) == 845 static_cast<off_t>(part.size)); 846 TEST_AND_RETURN_FALSE(hasher.Finalize()); 847 const brillo::Blob& hash = hasher.raw_hash(); 848 info->set_hash(hash.data(), hash.size()); 849 LOG(INFO) << part.path << ": size=" << part.size 850 << " hash=" << brillo::data_encoding::Base64Encode(hash); 851 return true; 852 } 853 854 bool CompareAopsByDestination(AnnotatedOperation first_aop, 855 AnnotatedOperation second_aop) { 856 // We want empty operations to be at the end of the payload. 857 if (!first_aop.op.dst_extents().size() || !second_aop.op.dst_extents().size()) 858 return ((!first_aop.op.dst_extents().size()) < 859 (!second_aop.op.dst_extents().size())); 860 uint32_t first_dst_start = first_aop.op.dst_extents(0).start_block(); 861 uint32_t second_dst_start = second_aop.op.dst_extents(0).start_block(); 862 return first_dst_start < second_dst_start; 863 } 864 865 bool IsExtFilesystem(const string& device) { 866 brillo::Blob header; 867 // See include/linux/ext2_fs.h for more details on the structure. We obtain 868 // ext2 constants from ext2fs/ext2fs.h header but we don't link with the 869 // library. 870 if (!utils::ReadFileChunk( 871 device, 0, SUPERBLOCK_OFFSET + SUPERBLOCK_SIZE, &header) || 872 header.size() < SUPERBLOCK_OFFSET + SUPERBLOCK_SIZE) 873 return false; 874 875 const uint8_t* superblock = header.data() + SUPERBLOCK_OFFSET; 876 877 // ext3_fs.h: ext3_super_block.s_blocks_count 878 uint32_t block_count = 879 *reinterpret_cast<const uint32_t*>(superblock + 1 * sizeof(int32_t)); 880 881 // ext3_fs.h: ext3_super_block.s_log_block_size 882 uint32_t log_block_size = 883 *reinterpret_cast<const uint32_t*>(superblock + 6 * sizeof(int32_t)); 884 885 // ext3_fs.h: ext3_super_block.s_magic 886 uint16_t magic = 887 *reinterpret_cast<const uint16_t*>(superblock + 14 * sizeof(int32_t)); 888 889 block_count = le32toh(block_count); 890 log_block_size = le32toh(log_block_size) + EXT2_MIN_BLOCK_LOG_SIZE; 891 magic = le16toh(magic); 892 893 if (magic != EXT2_SUPER_MAGIC) 894 return false; 895 896 // Sanity check the parameters. 897 TEST_AND_RETURN_FALSE(log_block_size >= EXT2_MIN_BLOCK_LOG_SIZE && 898 log_block_size <= EXT2_MAX_BLOCK_LOG_SIZE); 899 TEST_AND_RETURN_FALSE(block_count > 0); 900 return true; 901 } 902 903 // Return the number of CPUs on the machine, and 4 threads in minimum. 904 size_t GetMaxThreads() { 905 return std::max(sysconf(_SC_NPROCESSORS_ONLN), 4L); 906 } 907 908 } // namespace diff_utils 909 910 } // namespace chromeos_update_engine 911