Home | History | Annotate | Download | only in payload_generator
      1 //
      2 // Copyright (C) 2015 The Android Open Source Project
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 //
     16 
     17 #include "update_engine/payload_generator/delta_diff_utils.h"
     18 
     19 #include <algorithm>
     20 #include <map>
     21 
     22 #include <base/files/file_util.h>
     23 #include <base/format_macros.h>
     24 #include <base/strings/stringprintf.h>
     25 
     26 #include "update_engine/common/hash_calculator.h"
     27 #include "update_engine/common/subprocess.h"
     28 #include "update_engine/common/utils.h"
     29 #include "update_engine/payload_generator/block_mapping.h"
     30 #include "update_engine/payload_generator/bzip.h"
     31 #include "update_engine/payload_generator/delta_diff_generator.h"
     32 #include "update_engine/payload_generator/extent_ranges.h"
     33 #include "update_engine/payload_generator/extent_utils.h"
     34 #include "update_engine/payload_generator/xz.h"
     35 
     36 using std::map;
     37 using std::string;
     38 using std::vector;
     39 
     40 namespace chromeos_update_engine {
     41 namespace {
     42 
     43 const char* const kBsdiffPath = "bsdiff";
     44 const char* const kImgdiffPath = "imgdiff";
     45 
     46 // The maximum destination size allowed for bsdiff. In general, bsdiff should
     47 // work for arbitrary big files, but the payload generation and payload
     48 // application requires a significant amount of RAM. We put a hard-limit of
     49 // 200 MiB that should not affect any released board, but will limit the
     50 // Chrome binary in ASan builders.
     51 const uint64_t kMaxBsdiffDestinationSize = 200 * 1024 * 1024;  // bytes
     52 
     53 // The maximum destination size allowed for imgdiff. In general, imgdiff should
     54 // work for arbitrary big files, but the payload application is quite memory
     55 // intensive, so we limit these operations to 50 MiB.
     56 const uint64_t kMaxImgdiffDestinationSize = 50 * 1024 * 1024;  // bytes
     57 
     58 // Process a range of blocks from |range_start| to |range_end| in the extent at
     59 // position |*idx_p| of |extents|. If |do_remove| is true, this range will be
     60 // removed, which may cause the extent to be trimmed, split or removed entirely.
     61 // The value of |*idx_p| is updated to point to the next extent to be processed.
     62 // Returns true iff the next extent to process is a new or updated one.
     63 bool ProcessExtentBlockRange(vector<Extent>* extents, size_t* idx_p,
     64                              const bool do_remove, uint64_t range_start,
     65                              uint64_t range_end) {
     66   size_t idx = *idx_p;
     67   uint64_t start_block = (*extents)[idx].start_block();
     68   uint64_t num_blocks = (*extents)[idx].num_blocks();
     69   uint64_t range_size = range_end - range_start;
     70 
     71   if (do_remove) {
     72     if (range_size == num_blocks) {
     73       // Remove the entire extent.
     74       extents->erase(extents->begin() + idx);
     75     } else if (range_end == num_blocks) {
     76       // Trim the end of the extent.
     77       (*extents)[idx].set_num_blocks(num_blocks - range_size);
     78       idx++;
     79     } else if (range_start == 0) {
     80       // Trim the head of the extent.
     81       (*extents)[idx].set_start_block(start_block + range_size);
     82       (*extents)[idx].set_num_blocks(num_blocks - range_size);
     83     } else {
     84       // Trim the middle, splitting the remainder into two parts.
     85       (*extents)[idx].set_num_blocks(range_start);
     86       Extent e;
     87       e.set_start_block(start_block + range_end);
     88       e.set_num_blocks(num_blocks - range_end);
     89       idx++;
     90       extents->insert(extents->begin() + idx, e);
     91     }
     92   } else if (range_end == num_blocks) {
     93     // Done with this extent.
     94     idx++;
     95   } else {
     96     return false;
     97   }
     98 
     99   *idx_p = idx;
    100   return true;
    101 }
    102 
    103 // Remove identical corresponding block ranges in |src_extents| and
    104 // |dst_extents|. Used for preventing moving of blocks onto themselves during
    105 // MOVE operations. The value of |total_bytes| indicates the actual length of
    106 // content; this may be slightly less than the total size of blocks, in which
    107 // case the last block is only partly occupied with data. Returns the total
    108 // number of bytes removed.
    109 size_t RemoveIdenticalBlockRanges(vector<Extent>* src_extents,
    110                                   vector<Extent>* dst_extents,
    111                                   const size_t total_bytes) {
    112   size_t src_idx = 0;
    113   size_t dst_idx = 0;
    114   uint64_t src_offset = 0, dst_offset = 0;
    115   bool new_src = true, new_dst = true;
    116   size_t removed_bytes = 0, nonfull_block_bytes;
    117   bool do_remove = false;
    118   while (src_idx < src_extents->size() && dst_idx < dst_extents->size()) {
    119     if (new_src) {
    120       src_offset = 0;
    121       new_src = false;
    122     }
    123     if (new_dst) {
    124       dst_offset = 0;
    125       new_dst = false;
    126     }
    127 
    128     do_remove = ((*src_extents)[src_idx].start_block() + src_offset ==
    129                  (*dst_extents)[dst_idx].start_block() + dst_offset);
    130 
    131     uint64_t src_num_blocks = (*src_extents)[src_idx].num_blocks();
    132     uint64_t dst_num_blocks = (*dst_extents)[dst_idx].num_blocks();
    133     uint64_t min_num_blocks = std::min(src_num_blocks - src_offset,
    134                                        dst_num_blocks - dst_offset);
    135     uint64_t prev_src_offset = src_offset;
    136     uint64_t prev_dst_offset = dst_offset;
    137     src_offset += min_num_blocks;
    138     dst_offset += min_num_blocks;
    139 
    140     new_src = ProcessExtentBlockRange(src_extents, &src_idx, do_remove,
    141                                       prev_src_offset, src_offset);
    142     new_dst = ProcessExtentBlockRange(dst_extents, &dst_idx, do_remove,
    143                                       prev_dst_offset, dst_offset);
    144     if (do_remove)
    145       removed_bytes += min_num_blocks * kBlockSize;
    146   }
    147 
    148   // If we removed the last block and this block is only partly used by file
    149   // content, deduct the unused portion from the total removed byte count.
    150   if (do_remove && (nonfull_block_bytes = total_bytes % kBlockSize))
    151     removed_bytes -= kBlockSize - nonfull_block_bytes;
    152 
    153   return removed_bytes;
    154 }
    155 
    156 // Returns true if the given blob |data| contains gzip header magic.
    157 bool ContainsGZip(const brillo::Blob& data) {
    158   const uint8_t kGZipMagic[] = {0x1f, 0x8b, 0x08, 0x00};
    159   return std::search(data.begin(),
    160                      data.end(),
    161                      std::begin(kGZipMagic),
    162                      std::end(kGZipMagic)) != data.end();
    163 }
    164 
    165 }  // namespace
    166 
    167 namespace diff_utils {
    168 
    169 bool DeltaReadPartition(vector<AnnotatedOperation>* aops,
    170                         const PartitionConfig& old_part,
    171                         const PartitionConfig& new_part,
    172                         ssize_t hard_chunk_blocks,
    173                         size_t soft_chunk_blocks,
    174                         const PayloadVersion& version,
    175                         BlobFileWriter* blob_file) {
    176   ExtentRanges old_visited_blocks;
    177   ExtentRanges new_visited_blocks;
    178 
    179   TEST_AND_RETURN_FALSE(DeltaMovedAndZeroBlocks(
    180       aops,
    181       old_part.path,
    182       new_part.path,
    183       old_part.fs_interface ? old_part.fs_interface->GetBlockCount() : 0,
    184       new_part.fs_interface->GetBlockCount(),
    185       soft_chunk_blocks,
    186       version,
    187       blob_file,
    188       &old_visited_blocks,
    189       &new_visited_blocks));
    190 
    191   map<string, vector<Extent>> old_files_map;
    192   if (old_part.fs_interface) {
    193     vector<FilesystemInterface::File> old_files;
    194     old_part.fs_interface->GetFiles(&old_files);
    195     for (const FilesystemInterface::File& file : old_files)
    196       old_files_map[file.name] = file.extents;
    197   }
    198 
    199   TEST_AND_RETURN_FALSE(new_part.fs_interface);
    200   vector<FilesystemInterface::File> new_files;
    201   new_part.fs_interface->GetFiles(&new_files);
    202 
    203   // The processing is very straightforward here, we generate operations for
    204   // every file (and pseudo-file such as the metadata) in the new filesystem
    205   // based on the file with the same name in the old filesystem, if any.
    206   // Files with overlapping data blocks (like hardlinks or filesystems with tail
    207   // packing or compression where the blocks store more than one file) are only
    208   // generated once in the new image, but are also used only once from the old
    209   // image due to some simplifications (see below).
    210   for (const FilesystemInterface::File& new_file : new_files) {
    211     // Ignore the files in the new filesystem without blocks. Symlinks with
    212     // data blocks (for example, symlinks bigger than 60 bytes in ext2) are
    213     // handled as normal files. We also ignore blocks that were already
    214     // processed by a previous file.
    215     vector<Extent> new_file_extents = FilterExtentRanges(
    216         new_file.extents, new_visited_blocks);
    217     new_visited_blocks.AddExtents(new_file_extents);
    218 
    219     if (new_file_extents.empty())
    220       continue;
    221 
    222     LOG(INFO) << "Encoding file " << new_file.name << " ("
    223               << BlocksInExtents(new_file_extents) << " blocks)";
    224 
    225     // We can't visit each dst image inode more than once, as that would
    226     // duplicate work. Here, we avoid visiting each source image inode
    227     // more than once. Technically, we could have multiple operations
    228     // that read the same blocks from the source image for diffing, but
    229     // we choose not to avoid complexity. Eventually we will move away
    230     // from using a graph/cycle detection/etc to generate diffs, and at that
    231     // time, it will be easy (non-complex) to have many operations read
    232     // from the same source blocks. At that time, this code can die. -adlr
    233     vector<Extent> old_file_extents = FilterExtentRanges(
    234         old_files_map[new_file.name], old_visited_blocks);
    235     old_visited_blocks.AddExtents(old_file_extents);
    236 
    237     TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
    238                                         old_part.path,
    239                                         new_part.path,
    240                                         old_file_extents,
    241                                         new_file_extents,
    242                                         new_file.name,  // operation name
    243                                         hard_chunk_blocks,
    244                                         version,
    245                                         blob_file));
    246   }
    247   // Process all the blocks not included in any file. We provided all the unused
    248   // blocks in the old partition as available data.
    249   vector<Extent> new_unvisited = {
    250       ExtentForRange(0, new_part.size / kBlockSize)};
    251   new_unvisited = FilterExtentRanges(new_unvisited, new_visited_blocks);
    252   if (new_unvisited.empty())
    253     return true;
    254 
    255   vector<Extent> old_unvisited;
    256   if (old_part.fs_interface) {
    257     old_unvisited.push_back(ExtentForRange(0, old_part.size / kBlockSize));
    258     old_unvisited = FilterExtentRanges(old_unvisited, old_visited_blocks);
    259   }
    260 
    261   LOG(INFO) << "Scanning " << BlocksInExtents(new_unvisited)
    262             << " unwritten blocks using chunk size of "
    263             << soft_chunk_blocks << " blocks.";
    264   // We use the soft_chunk_blocks limit for the <non-file-data> as we don't
    265   // really know the structure of this data and we should not expect it to have
    266   // redundancy between partitions.
    267   TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
    268                                       old_part.path,
    269                                       new_part.path,
    270                                       old_unvisited,
    271                                       new_unvisited,
    272                                       "<non-file-data>",  // operation name
    273                                       soft_chunk_blocks,
    274                                       version,
    275                                       blob_file));
    276 
    277   return true;
    278 }
    279 
    280 bool DeltaMovedAndZeroBlocks(vector<AnnotatedOperation>* aops,
    281                              const string& old_part,
    282                              const string& new_part,
    283                              size_t old_num_blocks,
    284                              size_t new_num_blocks,
    285                              ssize_t chunk_blocks,
    286                              const PayloadVersion& version,
    287                              BlobFileWriter* blob_file,
    288                              ExtentRanges* old_visited_blocks,
    289                              ExtentRanges* new_visited_blocks) {
    290   vector<BlockMapping::BlockId> old_block_ids;
    291   vector<BlockMapping::BlockId> new_block_ids;
    292   TEST_AND_RETURN_FALSE(MapPartitionBlocks(old_part,
    293                                            new_part,
    294                                            old_num_blocks * kBlockSize,
    295                                            new_num_blocks * kBlockSize,
    296                                            kBlockSize,
    297                                            &old_block_ids,
    298                                            &new_block_ids));
    299 
    300   // If the update is inplace, we map all the blocks that didn't move,
    301   // regardless of the contents since they are already copied and no operation
    302   // is required.
    303   if (version.InplaceUpdate()) {
    304     uint64_t num_blocks = std::min(old_num_blocks, new_num_blocks);
    305     for (uint64_t block = 0; block < num_blocks; block++) {
    306       if (old_block_ids[block] == new_block_ids[block] &&
    307           !old_visited_blocks->ContainsBlock(block) &&
    308           !new_visited_blocks->ContainsBlock(block)) {
    309         old_visited_blocks->AddBlock(block);
    310         new_visited_blocks->AddBlock(block);
    311       }
    312     }
    313   }
    314 
    315   // A mapping from the block_id to the list of block numbers with that block id
    316   // in the old partition. This is used to lookup where in the old partition
    317   // is a block from the new partition.
    318   map<BlockMapping::BlockId, vector<uint64_t>> old_blocks_map;
    319 
    320   for (uint64_t block = old_num_blocks; block-- > 0; ) {
    321     if (old_block_ids[block] != 0 && !old_visited_blocks->ContainsBlock(block))
    322       old_blocks_map[old_block_ids[block]].push_back(block);
    323   }
    324 
    325   // The collection of blocks in the new partition with just zeros. This is a
    326   // common case for free-space that's also problematic for bsdiff, so we want
    327   // to optimize it using REPLACE_BZ operations. The blob for a REPLACE_BZ of
    328   // just zeros is so small that it doesn't make sense to spend the I/O reading
    329   // zeros from the old partition.
    330   vector<Extent> new_zeros;
    331 
    332   vector<Extent> old_identical_blocks;
    333   vector<Extent> new_identical_blocks;
    334 
    335   for (uint64_t block = 0; block < new_num_blocks; block++) {
    336     // Only produce operations for blocks that were not yet visited.
    337     if (new_visited_blocks->ContainsBlock(block))
    338       continue;
    339     if (new_block_ids[block] == 0) {
    340       AppendBlockToExtents(&new_zeros, block);
    341       continue;
    342     }
    343 
    344     auto old_blocks_map_it = old_blocks_map.find(new_block_ids[block]);
    345     // Check if the block exists in the old partition at all.
    346     if (old_blocks_map_it == old_blocks_map.end() ||
    347         old_blocks_map_it->second.empty())
    348       continue;
    349 
    350     AppendBlockToExtents(&old_identical_blocks,
    351                          old_blocks_map_it->second.back());
    352     AppendBlockToExtents(&new_identical_blocks, block);
    353     // We can't reuse source blocks in minor version 1 because the cycle
    354     // breaking algorithm used in the in-place update doesn't support that.
    355     if (version.InplaceUpdate())
    356       old_blocks_map_it->second.pop_back();
    357   }
    358 
    359   // Produce operations for the zero blocks split per output extent.
    360   // TODO(deymo): Produce ZERO operations instead of calling DeltaReadFile().
    361   size_t num_ops = aops->size();
    362   new_visited_blocks->AddExtents(new_zeros);
    363   for (Extent extent : new_zeros) {
    364     TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
    365                                         "",
    366                                         new_part,
    367                                         vector<Extent>(),        // old_extents
    368                                         vector<Extent>{extent},  // new_extents
    369                                         "<zeros>",
    370                                         chunk_blocks,
    371                                         version,
    372                                         blob_file));
    373   }
    374   LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for "
    375             << BlocksInExtents(new_zeros) << " zeroed blocks";
    376 
    377   // Produce MOVE/SOURCE_COPY operations for the moved blocks.
    378   num_ops = aops->size();
    379   if (chunk_blocks == -1)
    380     chunk_blocks = new_num_blocks;
    381   uint64_t used_blocks = 0;
    382   old_visited_blocks->AddExtents(old_identical_blocks);
    383   new_visited_blocks->AddExtents(new_identical_blocks);
    384   for (Extent extent : new_identical_blocks) {
    385     // We split the operation at the extent boundary or when bigger than
    386     // chunk_blocks.
    387     for (uint64_t op_block_offset = 0; op_block_offset < extent.num_blocks();
    388          op_block_offset += chunk_blocks) {
    389       aops->emplace_back();
    390       AnnotatedOperation* aop = &aops->back();
    391       aop->name = "<identical-blocks>";
    392       aop->op.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY)
    393                            ? InstallOperation::SOURCE_COPY
    394                            : InstallOperation::MOVE);
    395 
    396       uint64_t chunk_num_blocks =
    397         std::min(extent.num_blocks() - op_block_offset,
    398                  static_cast<uint64_t>(chunk_blocks));
    399 
    400       // The current operation represents the move/copy operation for the
    401       // sublist starting at |used_blocks| of length |chunk_num_blocks| where
    402       // the src and dst are from |old_identical_blocks| and
    403       // |new_identical_blocks| respectively.
    404       StoreExtents(
    405           ExtentsSublist(old_identical_blocks, used_blocks, chunk_num_blocks),
    406           aop->op.mutable_src_extents());
    407 
    408       Extent* op_dst_extent = aop->op.add_dst_extents();
    409       op_dst_extent->set_start_block(extent.start_block() + op_block_offset);
    410       op_dst_extent->set_num_blocks(chunk_num_blocks);
    411       CHECK(
    412           vector<Extent>{*op_dst_extent} ==  // NOLINT(whitespace/braces)
    413           ExtentsSublist(new_identical_blocks, used_blocks, chunk_num_blocks));
    414 
    415       used_blocks += chunk_num_blocks;
    416     }
    417   }
    418   LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for "
    419             << used_blocks << " identical blocks moved";
    420 
    421   return true;
    422 }
    423 
    424 bool DeltaReadFile(vector<AnnotatedOperation>* aops,
    425                    const string& old_part,
    426                    const string& new_part,
    427                    const vector<Extent>& old_extents,
    428                    const vector<Extent>& new_extents,
    429                    const string& name,
    430                    ssize_t chunk_blocks,
    431                    const PayloadVersion& version,
    432                    BlobFileWriter* blob_file) {
    433   brillo::Blob data;
    434   InstallOperation operation;
    435 
    436   uint64_t total_blocks = BlocksInExtents(new_extents);
    437   if (chunk_blocks == -1)
    438     chunk_blocks = total_blocks;
    439 
    440   for (uint64_t block_offset = 0; block_offset < total_blocks;
    441       block_offset += chunk_blocks) {
    442     // Split the old/new file in the same chunks. Note that this could drop
    443     // some information from the old file used for the new chunk. If the old
    444     // file is smaller (or even empty when there's no old file) the chunk will
    445     // also be empty.
    446     vector<Extent> old_extents_chunk = ExtentsSublist(
    447         old_extents, block_offset, chunk_blocks);
    448     vector<Extent> new_extents_chunk = ExtentsSublist(
    449         new_extents, block_offset, chunk_blocks);
    450     NormalizeExtents(&old_extents_chunk);
    451     NormalizeExtents(&new_extents_chunk);
    452 
    453     TEST_AND_RETURN_FALSE(ReadExtentsToDiff(old_part,
    454                                             new_part,
    455                                             old_extents_chunk,
    456                                             new_extents_chunk,
    457                                             version,
    458                                             &data,
    459                                             &operation));
    460 
    461     // Check if the operation writes nothing.
    462     if (operation.dst_extents_size() == 0) {
    463       if (operation.type() == InstallOperation::MOVE) {
    464         LOG(INFO) << "Empty MOVE operation ("
    465                   << name << "), skipping";
    466         continue;
    467       } else {
    468         LOG(ERROR) << "Empty non-MOVE operation";
    469         return false;
    470       }
    471     }
    472 
    473     // Now, insert into the list of operations.
    474     AnnotatedOperation aop;
    475     aop.name = name;
    476     if (static_cast<uint64_t>(chunk_blocks) < total_blocks) {
    477       aop.name = base::StringPrintf("%s:%" PRIu64,
    478                                     name.c_str(), block_offset / chunk_blocks);
    479     }
    480     aop.op = operation;
    481 
    482     // Write the data
    483     TEST_AND_RETURN_FALSE(aop.SetOperationBlob(data, blob_file));
    484     aops->emplace_back(aop);
    485   }
    486   return true;
    487 }
    488 
    489 bool GenerateBestFullOperation(const brillo::Blob& new_data,
    490                                const PayloadVersion& version,
    491                                brillo::Blob* out_blob,
    492                                InstallOperation_Type* out_type) {
    493   if (new_data.empty())
    494     return false;
    495 
    496   if (version.OperationAllowed(InstallOperation::ZERO) &&
    497       std::all_of(
    498           new_data.begin(), new_data.end(), [](uint8_t x) { return x == 0; })) {
    499     // The read buffer is all zeros, so produce a ZERO operation. No need to
    500     // check other types of operations in this case.
    501     *out_blob = brillo::Blob();
    502     *out_type = InstallOperation::ZERO;
    503     return true;
    504   }
    505 
    506   bool out_blob_set = false;
    507 
    508   // Try compressing |new_data| with xz first.
    509   if (version.OperationAllowed(InstallOperation::REPLACE_XZ)) {
    510     brillo::Blob new_data_xz;
    511     if (XzCompress(new_data, &new_data_xz) && !new_data_xz.empty()) {
    512       *out_type = InstallOperation::REPLACE_XZ;
    513       *out_blob = std::move(new_data_xz);
    514       out_blob_set = true;
    515     }
    516   }
    517 
    518   // Try compressing it with bzip2.
    519   if (version.OperationAllowed(InstallOperation::REPLACE_BZ)) {
    520     brillo::Blob new_data_bz;
    521     // TODO(deymo): Implement some heuristic to determine if it is worth trying
    522     // to compress the blob with bzip2 if we already have a good REPLACE_XZ.
    523     if (BzipCompress(new_data, &new_data_bz) && !new_data_bz.empty() &&
    524         (!out_blob_set || out_blob->size() > new_data_bz.size())) {
    525       // A REPLACE_BZ is better or nothing else was set.
    526       *out_type = InstallOperation::REPLACE_BZ;
    527       *out_blob = std::move(new_data_bz);
    528       out_blob_set = true;
    529     }
    530   }
    531 
    532   // If nothing else worked or it was badly compressed we try a REPLACE.
    533   if (!out_blob_set || out_blob->size() >= new_data.size()) {
    534     *out_type = InstallOperation::REPLACE;
    535     // This needs to make a copy of the data in the case bzip or xz didn't
    536     // compress well, which is not the common case so the performance hit is
    537     // low.
    538     *out_blob = new_data;
    539   }
    540   return true;
    541 }
    542 
    543 bool ReadExtentsToDiff(const string& old_part,
    544                        const string& new_part,
    545                        const vector<Extent>& old_extents,
    546                        const vector<Extent>& new_extents,
    547                        const PayloadVersion& version,
    548                        brillo::Blob* out_data,
    549                        InstallOperation* out_op) {
    550   InstallOperation operation;
    551 
    552   // We read blocks from old_extents and write blocks to new_extents.
    553   uint64_t blocks_to_read = BlocksInExtents(old_extents);
    554   uint64_t blocks_to_write = BlocksInExtents(new_extents);
    555 
    556   // Disable bsdiff and imgdiff when the data is too big.
    557   bool bsdiff_allowed =
    558       version.OperationAllowed(InstallOperation::SOURCE_BSDIFF) ||
    559       version.OperationAllowed(InstallOperation::BSDIFF);
    560   if (bsdiff_allowed &&
    561       blocks_to_read * kBlockSize > kMaxBsdiffDestinationSize) {
    562     LOG(INFO) << "bsdiff blacklisted, data too big: "
    563               << blocks_to_read * kBlockSize << " bytes";
    564     bsdiff_allowed = false;
    565   }
    566 
    567   bool imgdiff_allowed = version.OperationAllowed(InstallOperation::IMGDIFF);
    568   if (imgdiff_allowed &&
    569       blocks_to_read * kBlockSize > kMaxImgdiffDestinationSize) {
    570     LOG(INFO) << "imgdiff blacklisted, data too big: "
    571               << blocks_to_read * kBlockSize << " bytes";
    572     imgdiff_allowed = false;
    573   }
    574 
    575   // Make copies of the extents so we can modify them.
    576   vector<Extent> src_extents = old_extents;
    577   vector<Extent> dst_extents = new_extents;
    578 
    579   // Read in bytes from new data.
    580   brillo::Blob new_data;
    581   TEST_AND_RETURN_FALSE(utils::ReadExtents(new_part,
    582                                            new_extents,
    583                                            &new_data,
    584                                            kBlockSize * blocks_to_write,
    585                                            kBlockSize));
    586   TEST_AND_RETURN_FALSE(!new_data.empty());
    587 
    588   // Data blob that will be written to delta file.
    589   brillo::Blob data_blob;
    590 
    591   // Try generating a full operation for the given new data, regardless of the
    592   // old_data.
    593   InstallOperation_Type op_type;
    594   TEST_AND_RETURN_FALSE(
    595       GenerateBestFullOperation(new_data, version, &data_blob, &op_type));
    596   operation.set_type(op_type);
    597 
    598   brillo::Blob old_data;
    599   if (blocks_to_read > 0) {
    600     // Read old data.
    601     TEST_AND_RETURN_FALSE(
    602         utils::ReadExtents(old_part, src_extents, &old_data,
    603                            kBlockSize * blocks_to_read, kBlockSize));
    604     if (old_data == new_data) {
    605       // No change in data.
    606       operation.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY)
    607                              ? InstallOperation::SOURCE_COPY
    608                              : InstallOperation::MOVE);
    609       data_blob = brillo::Blob();
    610     } else if (bsdiff_allowed || imgdiff_allowed) {
    611       // If the source file is considered bsdiff safe (no bsdiff bugs
    612       // triggered), see if BSDIFF encoding is smaller.
    613       base::FilePath old_chunk;
    614       TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&old_chunk));
    615       ScopedPathUnlinker old_unlinker(old_chunk.value());
    616       TEST_AND_RETURN_FALSE(utils::WriteFile(
    617           old_chunk.value().c_str(), old_data.data(), old_data.size()));
    618       base::FilePath new_chunk;
    619       TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&new_chunk));
    620       ScopedPathUnlinker new_unlinker(new_chunk.value());
    621       TEST_AND_RETURN_FALSE(utils::WriteFile(
    622           new_chunk.value().c_str(), new_data.data(), new_data.size()));
    623 
    624       if (bsdiff_allowed) {
    625         brillo::Blob bsdiff_delta;
    626         TEST_AND_RETURN_FALSE(DiffFiles(
    627             kBsdiffPath, old_chunk.value(), new_chunk.value(), &bsdiff_delta));
    628         CHECK_GT(bsdiff_delta.size(), static_cast<brillo::Blob::size_type>(0));
    629         if (bsdiff_delta.size() < data_blob.size()) {
    630           operation.set_type(
    631               version.OperationAllowed(InstallOperation::SOURCE_BSDIFF)
    632                   ? InstallOperation::SOURCE_BSDIFF
    633                   : InstallOperation::BSDIFF);
    634           data_blob = std::move(bsdiff_delta);
    635         }
    636       }
    637       if (imgdiff_allowed && ContainsGZip(old_data) && ContainsGZip(new_data)) {
    638         brillo::Blob imgdiff_delta;
    639         // Imgdiff might fail in some cases, only use the result if it succeed,
    640         // otherwise print the extents to analyze.
    641         if (DiffFiles(kImgdiffPath,
    642                       old_chunk.value(),
    643                       new_chunk.value(),
    644                       &imgdiff_delta) &&
    645             imgdiff_delta.size() > 0) {
    646           if (imgdiff_delta.size() < data_blob.size()) {
    647             operation.set_type(InstallOperation::IMGDIFF);
    648             data_blob = std::move(imgdiff_delta);
    649           }
    650         } else {
    651           LOG(ERROR) << "Imgdiff failed with source extents: "
    652                      << ExtentsToString(src_extents)
    653                      << ", destination extents: "
    654                      << ExtentsToString(dst_extents);
    655         }
    656       }
    657     }
    658   }
    659 
    660   size_t removed_bytes = 0;
    661   // Remove identical src/dst block ranges in MOVE operations.
    662   if (operation.type() == InstallOperation::MOVE) {
    663     removed_bytes = RemoveIdenticalBlockRanges(
    664         &src_extents, &dst_extents, new_data.size());
    665   }
    666   // Set legacy src_length and dst_length fields.
    667   operation.set_src_length(old_data.size() - removed_bytes);
    668   operation.set_dst_length(new_data.size() - removed_bytes);
    669 
    670   // Embed extents in the operation.
    671   StoreExtents(src_extents, operation.mutable_src_extents());
    672   StoreExtents(dst_extents, operation.mutable_dst_extents());
    673 
    674   // Replace operations should not have source extents.
    675   if (IsAReplaceOperation(operation.type())) {
    676     operation.clear_src_extents();
    677     operation.clear_src_length();
    678   }
    679 
    680   *out_data = std::move(data_blob);
    681   *out_op = operation;
    682 
    683   return true;
    684 }
    685 
    686 // Runs the bsdiff or imgdiff tool in |diff_path| on two files and returns the
    687 // resulting delta in |out|. Returns true on success.
    688 bool DiffFiles(const string& diff_path,
    689                const string& old_file,
    690                const string& new_file,
    691                brillo::Blob* out) {
    692   const string kPatchFile = "delta.patchXXXXXX";
    693   string patch_file_path;
    694 
    695   TEST_AND_RETURN_FALSE(
    696       utils::MakeTempFile(kPatchFile, &patch_file_path, nullptr));
    697 
    698   vector<string> cmd;
    699   cmd.push_back(diff_path);
    700   cmd.push_back(old_file);
    701   cmd.push_back(new_file);
    702   cmd.push_back(patch_file_path);
    703 
    704   int rc = 1;
    705   brillo::Blob patch_file;
    706   string stdout;
    707   TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc, &stdout));
    708   if (rc != 0) {
    709     LOG(ERROR) << diff_path << " returned " << rc << std::endl << stdout;
    710     return false;
    711   }
    712   TEST_AND_RETURN_FALSE(utils::ReadFile(patch_file_path, out));
    713   unlink(patch_file_path.c_str());
    714   return true;
    715 }
    716 
    717 bool IsAReplaceOperation(InstallOperation_Type op_type) {
    718   return (op_type == InstallOperation::REPLACE ||
    719           op_type == InstallOperation::REPLACE_BZ ||
    720           op_type == InstallOperation::REPLACE_XZ);
    721 }
    722 
    723 // Returns true if |op| is a no-op operation that doesn't do any useful work
    724 // (e.g., a move operation that copies blocks onto themselves).
    725 bool IsNoopOperation(const InstallOperation& op) {
    726   return (op.type() == InstallOperation::MOVE &&
    727           ExpandExtents(op.src_extents()) == ExpandExtents(op.dst_extents()));
    728 }
    729 
    730 void FilterNoopOperations(vector<AnnotatedOperation>* ops) {
    731   ops->erase(
    732       std::remove_if(
    733           ops->begin(), ops->end(),
    734           [](const AnnotatedOperation& aop){return IsNoopOperation(aop.op);}),
    735       ops->end());
    736 }
    737 
    738 bool InitializePartitionInfo(const PartitionConfig& part, PartitionInfo* info) {
    739   info->set_size(part.size);
    740   HashCalculator hasher;
    741   TEST_AND_RETURN_FALSE(hasher.UpdateFile(part.path, part.size) ==
    742                         static_cast<off_t>(part.size));
    743   TEST_AND_RETURN_FALSE(hasher.Finalize());
    744   const brillo::Blob& hash = hasher.raw_hash();
    745   info->set_hash(hash.data(), hash.size());
    746   LOG(INFO) << part.path << ": size=" << part.size << " hash=" << hasher.hash();
    747   return true;
    748 }
    749 
    750 bool CompareAopsByDestination(AnnotatedOperation first_aop,
    751                               AnnotatedOperation second_aop) {
    752   // We want empty operations to be at the end of the payload.
    753   if (!first_aop.op.dst_extents().size() || !second_aop.op.dst_extents().size())
    754     return ((!first_aop.op.dst_extents().size()) <
    755             (!second_aop.op.dst_extents().size()));
    756   uint32_t first_dst_start = first_aop.op.dst_extents(0).start_block();
    757   uint32_t second_dst_start = second_aop.op.dst_extents(0).start_block();
    758   return first_dst_start < second_dst_start;
    759 }
    760 
    761 }  // namespace diff_utils
    762 
    763 }  // namespace chromeos_update_engine
    764