Home | History | Annotate | Download | only in payload_generator
      1 //
      2 // Copyright (C) 2015 The Android Open Source Project
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 //
     16 
     17 #include "update_engine/payload_generator/delta_diff_utils.h"
     18 
     19 #include <algorithm>
     20 #include <map>
     21 
     22 #include <base/files/file_util.h>
     23 #include <base/format_macros.h>
     24 #include <base/strings/stringprintf.h>
     25 
     26 #include "update_engine/common/hash_calculator.h"
     27 #include "update_engine/common/subprocess.h"
     28 #include "update_engine/common/utils.h"
     29 #include "update_engine/payload_generator/block_mapping.h"
     30 #include "update_engine/payload_generator/bzip.h"
     31 #include "update_engine/payload_generator/delta_diff_generator.h"
     32 #include "update_engine/payload_generator/extent_ranges.h"
     33 #include "update_engine/payload_generator/extent_utils.h"
     34 #include "update_engine/payload_generator/xz.h"
     35 
     36 using std::map;
     37 using std::string;
     38 using std::vector;
     39 
     40 namespace chromeos_update_engine {
     41 namespace {
     42 
     43 const char* const kBsdiffPath = "bsdiff";
     44 const char* const kImgdiffPath = "imgdiff";
     45 
     46 // The maximum destination size allowed for bsdiff. In general, bsdiff should
     47 // work for arbitrary big files, but the payload generation and payload
     48 // application requires a significant amount of RAM. We put a hard-limit of
     49 // 200 MiB that should not affect any released board, but will limit the
     50 // Chrome binary in ASan builders.
     51 const uint64_t kMaxBsdiffDestinationSize = 200 * 1024 * 1024;  // bytes
     52 
     53 // The maximum destination size allowed for imgdiff. In general, imgdiff should
     54 // work for arbitrary big files, but the payload application is quite memory
     55 // intensive, so we limit these operations to 50 MiB.
     56 const uint64_t kMaxImgdiffDestinationSize = 50 * 1024 * 1024;  // bytes
     57 
     58 // Process a range of blocks from |range_start| to |range_end| in the extent at
     59 // position |*idx_p| of |extents|. If |do_remove| is true, this range will be
     60 // removed, which may cause the extent to be trimmed, split or removed entirely.
     61 // The value of |*idx_p| is updated to point to the next extent to be processed.
     62 // Returns true iff the next extent to process is a new or updated one.
     63 bool ProcessExtentBlockRange(vector<Extent>* extents, size_t* idx_p,
     64                              const bool do_remove, uint64_t range_start,
     65                              uint64_t range_end) {
     66   size_t idx = *idx_p;
     67   uint64_t start_block = (*extents)[idx].start_block();
     68   uint64_t num_blocks = (*extents)[idx].num_blocks();
     69   uint64_t range_size = range_end - range_start;
     70 
     71   if (do_remove) {
     72     if (range_size == num_blocks) {
     73       // Remove the entire extent.
     74       extents->erase(extents->begin() + idx);
     75     } else if (range_end == num_blocks) {
     76       // Trim the end of the extent.
     77       (*extents)[idx].set_num_blocks(num_blocks - range_size);
     78       idx++;
     79     } else if (range_start == 0) {
     80       // Trim the head of the extent.
     81       (*extents)[idx].set_start_block(start_block + range_size);
     82       (*extents)[idx].set_num_blocks(num_blocks - range_size);
     83     } else {
     84       // Trim the middle, splitting the remainder into two parts.
     85       (*extents)[idx].set_num_blocks(range_start);
     86       Extent e;
     87       e.set_start_block(start_block + range_end);
     88       e.set_num_blocks(num_blocks - range_end);
     89       idx++;
     90       extents->insert(extents->begin() + idx, e);
     91     }
     92   } else if (range_end == num_blocks) {
     93     // Done with this extent.
     94     idx++;
     95   } else {
     96     return false;
     97   }
     98 
     99   *idx_p = idx;
    100   return true;
    101 }
    102 
    103 // Remove identical corresponding block ranges in |src_extents| and
    104 // |dst_extents|. Used for preventing moving of blocks onto themselves during
    105 // MOVE operations. The value of |total_bytes| indicates the actual length of
    106 // content; this may be slightly less than the total size of blocks, in which
    107 // case the last block is only partly occupied with data. Returns the total
    108 // number of bytes removed.
    109 size_t RemoveIdenticalBlockRanges(vector<Extent>* src_extents,
    110                                   vector<Extent>* dst_extents,
    111                                   const size_t total_bytes) {
    112   size_t src_idx = 0;
    113   size_t dst_idx = 0;
    114   uint64_t src_offset = 0, dst_offset = 0;
    115   bool new_src = true, new_dst = true;
    116   size_t removed_bytes = 0, nonfull_block_bytes;
    117   bool do_remove = false;
    118   while (src_idx < src_extents->size() && dst_idx < dst_extents->size()) {
    119     if (new_src) {
    120       src_offset = 0;
    121       new_src = false;
    122     }
    123     if (new_dst) {
    124       dst_offset = 0;
    125       new_dst = false;
    126     }
    127 
    128     do_remove = ((*src_extents)[src_idx].start_block() + src_offset ==
    129                  (*dst_extents)[dst_idx].start_block() + dst_offset);
    130 
    131     uint64_t src_num_blocks = (*src_extents)[src_idx].num_blocks();
    132     uint64_t dst_num_blocks = (*dst_extents)[dst_idx].num_blocks();
    133     uint64_t min_num_blocks = std::min(src_num_blocks - src_offset,
    134                                        dst_num_blocks - dst_offset);
    135     uint64_t prev_src_offset = src_offset;
    136     uint64_t prev_dst_offset = dst_offset;
    137     src_offset += min_num_blocks;
    138     dst_offset += min_num_blocks;
    139 
    140     new_src = ProcessExtentBlockRange(src_extents, &src_idx, do_remove,
    141                                       prev_src_offset, src_offset);
    142     new_dst = ProcessExtentBlockRange(dst_extents, &dst_idx, do_remove,
    143                                       prev_dst_offset, dst_offset);
    144     if (do_remove)
    145       removed_bytes += min_num_blocks * kBlockSize;
    146   }
    147 
    148   // If we removed the last block and this block is only partly used by file
    149   // content, deduct the unused portion from the total removed byte count.
    150   if (do_remove && (nonfull_block_bytes = total_bytes % kBlockSize))
    151     removed_bytes -= kBlockSize - nonfull_block_bytes;
    152 
    153   return removed_bytes;
    154 }
    155 
    156 // Returns true if the given blob |data| contains gzip header magic.
    157 bool ContainsGZip(const brillo::Blob& data) {
    158   const uint8_t kGZipMagic[] = {0x1f, 0x8b, 0x08, 0x00};
    159   return std::search(data.begin(),
    160                      data.end(),
    161                      std::begin(kGZipMagic),
    162                      std::end(kGZipMagic)) != data.end();
    163 }
    164 
    165 }  // namespace
    166 
    167 namespace diff_utils {
    168 
    169 bool DeltaReadPartition(vector<AnnotatedOperation>* aops,
    170                         const PartitionConfig& old_part,
    171                         const PartitionConfig& new_part,
    172                         ssize_t hard_chunk_blocks,
    173                         size_t soft_chunk_blocks,
    174                         const PayloadVersion& version,
    175                         BlobFileWriter* blob_file) {
    176   ExtentRanges old_visited_blocks;
    177   ExtentRanges new_visited_blocks;
    178 
    179   TEST_AND_RETURN_FALSE(DeltaMovedAndZeroBlocks(
    180       aops,
    181       old_part.path,
    182       new_part.path,
    183       old_part.size / kBlockSize,
    184       new_part.size / kBlockSize,
    185       soft_chunk_blocks,
    186       version,
    187       blob_file,
    188       &old_visited_blocks,
    189       &new_visited_blocks));
    190 
    191   map<string, vector<Extent>> old_files_map;
    192   if (old_part.fs_interface) {
    193     vector<FilesystemInterface::File> old_files;
    194     old_part.fs_interface->GetFiles(&old_files);
    195     for (const FilesystemInterface::File& file : old_files)
    196       old_files_map[file.name] = file.extents;
    197   }
    198 
    199   TEST_AND_RETURN_FALSE(new_part.fs_interface);
    200   vector<FilesystemInterface::File> new_files;
    201   new_part.fs_interface->GetFiles(&new_files);
    202 
    203   // The processing is very straightforward here, we generate operations for
    204   // every file (and pseudo-file such as the metadata) in the new filesystem
    205   // based on the file with the same name in the old filesystem, if any.
    206   // Files with overlapping data blocks (like hardlinks or filesystems with tail
    207   // packing or compression where the blocks store more than one file) are only
    208   // generated once in the new image, but are also used only once from the old
    209   // image due to some simplifications (see below).
    210   for (const FilesystemInterface::File& new_file : new_files) {
    211     // Ignore the files in the new filesystem without blocks. Symlinks with
    212     // data blocks (for example, symlinks bigger than 60 bytes in ext2) are
    213     // handled as normal files. We also ignore blocks that were already
    214     // processed by a previous file.
    215     vector<Extent> new_file_extents = FilterExtentRanges(
    216         new_file.extents, new_visited_blocks);
    217     new_visited_blocks.AddExtents(new_file_extents);
    218 
    219     if (new_file_extents.empty())
    220       continue;
    221 
    222     LOG(INFO) << "Encoding file " << new_file.name << " ("
    223               << BlocksInExtents(new_file_extents) << " blocks)";
    224 
    225     // We can't visit each dst image inode more than once, as that would
    226     // duplicate work. Here, we avoid visiting each source image inode
    227     // more than once. Technically, we could have multiple operations
    228     // that read the same blocks from the source image for diffing, but
    229     // we choose not to avoid complexity. Eventually we will move away
    230     // from using a graph/cycle detection/etc to generate diffs, and at that
    231     // time, it will be easy (non-complex) to have many operations read
    232     // from the same source blocks. At that time, this code can die. -adlr
    233     vector<Extent> old_file_extents = FilterExtentRanges(
    234         old_files_map[new_file.name], old_visited_blocks);
    235     old_visited_blocks.AddExtents(old_file_extents);
    236 
    237     TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
    238                                         old_part.path,
    239                                         new_part.path,
    240                                         old_file_extents,
    241                                         new_file_extents,
    242                                         new_file.name,  // operation name
    243                                         hard_chunk_blocks,
    244                                         version,
    245                                         blob_file));
    246   }
    247   // Process all the blocks not included in any file. We provided all the unused
    248   // blocks in the old partition as available data.
    249   vector<Extent> new_unvisited = {
    250       ExtentForRange(0, new_part.size / kBlockSize)};
    251   new_unvisited = FilterExtentRanges(new_unvisited, new_visited_blocks);
    252   if (new_unvisited.empty())
    253     return true;
    254 
    255   vector<Extent> old_unvisited;
    256   if (old_part.fs_interface) {
    257     old_unvisited.push_back(ExtentForRange(0, old_part.size / kBlockSize));
    258     old_unvisited = FilterExtentRanges(old_unvisited, old_visited_blocks);
    259   }
    260 
    261   LOG(INFO) << "Scanning " << BlocksInExtents(new_unvisited)
    262             << " unwritten blocks using chunk size of "
    263             << soft_chunk_blocks << " blocks.";
    264   // We use the soft_chunk_blocks limit for the <non-file-data> as we don't
    265   // really know the structure of this data and we should not expect it to have
    266   // redundancy between partitions.
    267   TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
    268                                       old_part.path,
    269                                       new_part.path,
    270                                       old_unvisited,
    271                                       new_unvisited,
    272                                       "<non-file-data>",  // operation name
    273                                       soft_chunk_blocks,
    274                                       version,
    275                                       blob_file));
    276 
    277   return true;
    278 }
    279 
    280 bool DeltaMovedAndZeroBlocks(vector<AnnotatedOperation>* aops,
    281                              const string& old_part,
    282                              const string& new_part,
    283                              size_t old_num_blocks,
    284                              size_t new_num_blocks,
    285                              ssize_t chunk_blocks,
    286                              const PayloadVersion& version,
    287                              BlobFileWriter* blob_file,
    288                              ExtentRanges* old_visited_blocks,
    289                              ExtentRanges* new_visited_blocks) {
    290   vector<BlockMapping::BlockId> old_block_ids;
    291   vector<BlockMapping::BlockId> new_block_ids;
    292   TEST_AND_RETURN_FALSE(MapPartitionBlocks(old_part,
    293                                            new_part,
    294                                            old_num_blocks * kBlockSize,
    295                                            new_num_blocks * kBlockSize,
    296                                            kBlockSize,
    297                                            &old_block_ids,
    298                                            &new_block_ids));
    299 
    300   // If the update is inplace, we map all the blocks that didn't move,
    301   // regardless of the contents since they are already copied and no operation
    302   // is required.
    303   if (version.InplaceUpdate()) {
    304     uint64_t num_blocks = std::min(old_num_blocks, new_num_blocks);
    305     for (uint64_t block = 0; block < num_blocks; block++) {
    306       if (old_block_ids[block] == new_block_ids[block] &&
    307           !old_visited_blocks->ContainsBlock(block) &&
    308           !new_visited_blocks->ContainsBlock(block)) {
    309         old_visited_blocks->AddBlock(block);
    310         new_visited_blocks->AddBlock(block);
    311       }
    312     }
    313   }
    314 
    315   // A mapping from the block_id to the list of block numbers with that block id
    316   // in the old partition. This is used to lookup where in the old partition
    317   // is a block from the new partition.
    318   map<BlockMapping::BlockId, vector<uint64_t>> old_blocks_map;
    319 
    320   for (uint64_t block = old_num_blocks; block-- > 0; ) {
    321     if (old_block_ids[block] != 0 && !old_visited_blocks->ContainsBlock(block))
    322       old_blocks_map[old_block_ids[block]].push_back(block);
    323 
    324     // Mark all zeroed blocks in the old image as "used" since it doesn't make
    325     // any sense to spend I/O to read zeros from the source partition and more
    326     // importantly, these could sometimes be blocks discarded in the SSD which
    327     // would read non-zero values.
    328     if (old_block_ids[block] == 0)
    329       old_visited_blocks->AddBlock(block);
    330   }
    331 
    332   // The collection of blocks in the new partition with just zeros. This is a
    333   // common case for free-space that's also problematic for bsdiff, so we want
    334   // to optimize it using REPLACE_BZ operations. The blob for a REPLACE_BZ of
    335   // just zeros is so small that it doesn't make sense to spend the I/O reading
    336   // zeros from the old partition.
    337   vector<Extent> new_zeros;
    338 
    339   vector<Extent> old_identical_blocks;
    340   vector<Extent> new_identical_blocks;
    341 
    342   for (uint64_t block = 0; block < new_num_blocks; block++) {
    343     // Only produce operations for blocks that were not yet visited.
    344     if (new_visited_blocks->ContainsBlock(block))
    345       continue;
    346     if (new_block_ids[block] == 0) {
    347       AppendBlockToExtents(&new_zeros, block);
    348       continue;
    349     }
    350 
    351     auto old_blocks_map_it = old_blocks_map.find(new_block_ids[block]);
    352     // Check if the block exists in the old partition at all.
    353     if (old_blocks_map_it == old_blocks_map.end() ||
    354         old_blocks_map_it->second.empty())
    355       continue;
    356 
    357     AppendBlockToExtents(&old_identical_blocks,
    358                          old_blocks_map_it->second.back());
    359     AppendBlockToExtents(&new_identical_blocks, block);
    360     // We can't reuse source blocks in minor version 1 because the cycle
    361     // breaking algorithm used in the in-place update doesn't support that.
    362     if (version.InplaceUpdate())
    363       old_blocks_map_it->second.pop_back();
    364   }
    365 
    366   // Produce operations for the zero blocks split per output extent.
    367   // TODO(deymo): Produce ZERO operations instead of calling DeltaReadFile().
    368   size_t num_ops = aops->size();
    369   new_visited_blocks->AddExtents(new_zeros);
    370   for (Extent extent : new_zeros) {
    371     TEST_AND_RETURN_FALSE(DeltaReadFile(aops,
    372                                         "",
    373                                         new_part,
    374                                         vector<Extent>(),        // old_extents
    375                                         vector<Extent>{extent},  // new_extents
    376                                         "<zeros>",
    377                                         chunk_blocks,
    378                                         version,
    379                                         blob_file));
    380   }
    381   LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for "
    382             << BlocksInExtents(new_zeros) << " zeroed blocks";
    383 
    384   // Produce MOVE/SOURCE_COPY operations for the moved blocks.
    385   num_ops = aops->size();
    386   if (chunk_blocks == -1)
    387     chunk_blocks = new_num_blocks;
    388   uint64_t used_blocks = 0;
    389   old_visited_blocks->AddExtents(old_identical_blocks);
    390   new_visited_blocks->AddExtents(new_identical_blocks);
    391   for (Extent extent : new_identical_blocks) {
    392     // We split the operation at the extent boundary or when bigger than
    393     // chunk_blocks.
    394     for (uint64_t op_block_offset = 0; op_block_offset < extent.num_blocks();
    395          op_block_offset += chunk_blocks) {
    396       aops->emplace_back();
    397       AnnotatedOperation* aop = &aops->back();
    398       aop->name = "<identical-blocks>";
    399       aop->op.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY)
    400                            ? InstallOperation::SOURCE_COPY
    401                            : InstallOperation::MOVE);
    402 
    403       uint64_t chunk_num_blocks =
    404         std::min(extent.num_blocks() - op_block_offset,
    405                  static_cast<uint64_t>(chunk_blocks));
    406 
    407       // The current operation represents the move/copy operation for the
    408       // sublist starting at |used_blocks| of length |chunk_num_blocks| where
    409       // the src and dst are from |old_identical_blocks| and
    410       // |new_identical_blocks| respectively.
    411       StoreExtents(
    412           ExtentsSublist(old_identical_blocks, used_blocks, chunk_num_blocks),
    413           aop->op.mutable_src_extents());
    414 
    415       Extent* op_dst_extent = aop->op.add_dst_extents();
    416       op_dst_extent->set_start_block(extent.start_block() + op_block_offset);
    417       op_dst_extent->set_num_blocks(chunk_num_blocks);
    418       CHECK(
    419           vector<Extent>{*op_dst_extent} ==  // NOLINT(whitespace/braces)
    420           ExtentsSublist(new_identical_blocks, used_blocks, chunk_num_blocks));
    421 
    422       used_blocks += chunk_num_blocks;
    423     }
    424   }
    425   LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for "
    426             << used_blocks << " identical blocks moved";
    427 
    428   return true;
    429 }
    430 
    431 bool DeltaReadFile(vector<AnnotatedOperation>* aops,
    432                    const string& old_part,
    433                    const string& new_part,
    434                    const vector<Extent>& old_extents,
    435                    const vector<Extent>& new_extents,
    436                    const string& name,
    437                    ssize_t chunk_blocks,
    438                    const PayloadVersion& version,
    439                    BlobFileWriter* blob_file) {
    440   brillo::Blob data;
    441   InstallOperation operation;
    442 
    443   uint64_t total_blocks = BlocksInExtents(new_extents);
    444   if (chunk_blocks == -1)
    445     chunk_blocks = total_blocks;
    446 
    447   for (uint64_t block_offset = 0; block_offset < total_blocks;
    448       block_offset += chunk_blocks) {
    449     // Split the old/new file in the same chunks. Note that this could drop
    450     // some information from the old file used for the new chunk. If the old
    451     // file is smaller (or even empty when there's no old file) the chunk will
    452     // also be empty.
    453     vector<Extent> old_extents_chunk = ExtentsSublist(
    454         old_extents, block_offset, chunk_blocks);
    455     vector<Extent> new_extents_chunk = ExtentsSublist(
    456         new_extents, block_offset, chunk_blocks);
    457     NormalizeExtents(&old_extents_chunk);
    458     NormalizeExtents(&new_extents_chunk);
    459 
    460     TEST_AND_RETURN_FALSE(ReadExtentsToDiff(old_part,
    461                                             new_part,
    462                                             old_extents_chunk,
    463                                             new_extents_chunk,
    464                                             version,
    465                                             &data,
    466                                             &operation));
    467 
    468     // Check if the operation writes nothing.
    469     if (operation.dst_extents_size() == 0) {
    470       if (operation.type() == InstallOperation::MOVE) {
    471         LOG(INFO) << "Empty MOVE operation ("
    472                   << name << "), skipping";
    473         continue;
    474       } else {
    475         LOG(ERROR) << "Empty non-MOVE operation";
    476         return false;
    477       }
    478     }
    479 
    480     // Now, insert into the list of operations.
    481     AnnotatedOperation aop;
    482     aop.name = name;
    483     if (static_cast<uint64_t>(chunk_blocks) < total_blocks) {
    484       aop.name = base::StringPrintf("%s:%" PRIu64,
    485                                     name.c_str(), block_offset / chunk_blocks);
    486     }
    487     aop.op = operation;
    488 
    489     // Write the data
    490     TEST_AND_RETURN_FALSE(aop.SetOperationBlob(data, blob_file));
    491     aops->emplace_back(aop);
    492   }
    493   return true;
    494 }
    495 
    496 bool GenerateBestFullOperation(const brillo::Blob& new_data,
    497                                const PayloadVersion& version,
    498                                brillo::Blob* out_blob,
    499                                InstallOperation_Type* out_type) {
    500   if (new_data.empty())
    501     return false;
    502 
    503   if (version.OperationAllowed(InstallOperation::ZERO) &&
    504       std::all_of(
    505           new_data.begin(), new_data.end(), [](uint8_t x) { return x == 0; })) {
    506     // The read buffer is all zeros, so produce a ZERO operation. No need to
    507     // check other types of operations in this case.
    508     *out_blob = brillo::Blob();
    509     *out_type = InstallOperation::ZERO;
    510     return true;
    511   }
    512 
    513   bool out_blob_set = false;
    514 
    515   // Try compressing |new_data| with xz first.
    516   if (version.OperationAllowed(InstallOperation::REPLACE_XZ)) {
    517     brillo::Blob new_data_xz;
    518     if (XzCompress(new_data, &new_data_xz) && !new_data_xz.empty()) {
    519       *out_type = InstallOperation::REPLACE_XZ;
    520       *out_blob = std::move(new_data_xz);
    521       out_blob_set = true;
    522     }
    523   }
    524 
    525   // Try compressing it with bzip2.
    526   if (version.OperationAllowed(InstallOperation::REPLACE_BZ)) {
    527     brillo::Blob new_data_bz;
    528     // TODO(deymo): Implement some heuristic to determine if it is worth trying
    529     // to compress the blob with bzip2 if we already have a good REPLACE_XZ.
    530     if (BzipCompress(new_data, &new_data_bz) && !new_data_bz.empty() &&
    531         (!out_blob_set || out_blob->size() > new_data_bz.size())) {
    532       // A REPLACE_BZ is better or nothing else was set.
    533       *out_type = InstallOperation::REPLACE_BZ;
    534       *out_blob = std::move(new_data_bz);
    535       out_blob_set = true;
    536     }
    537   }
    538 
    539   // If nothing else worked or it was badly compressed we try a REPLACE.
    540   if (!out_blob_set || out_blob->size() >= new_data.size()) {
    541     *out_type = InstallOperation::REPLACE;
    542     // This needs to make a copy of the data in the case bzip or xz didn't
    543     // compress well, which is not the common case so the performance hit is
    544     // low.
    545     *out_blob = new_data;
    546   }
    547   return true;
    548 }
    549 
    550 bool ReadExtentsToDiff(const string& old_part,
    551                        const string& new_part,
    552                        const vector<Extent>& old_extents,
    553                        const vector<Extent>& new_extents,
    554                        const PayloadVersion& version,
    555                        brillo::Blob* out_data,
    556                        InstallOperation* out_op) {
    557   InstallOperation operation;
    558 
    559   // We read blocks from old_extents and write blocks to new_extents.
    560   uint64_t blocks_to_read = BlocksInExtents(old_extents);
    561   uint64_t blocks_to_write = BlocksInExtents(new_extents);
    562 
    563   // Disable bsdiff and imgdiff when the data is too big.
    564   bool bsdiff_allowed =
    565       version.OperationAllowed(InstallOperation::SOURCE_BSDIFF) ||
    566       version.OperationAllowed(InstallOperation::BSDIFF);
    567   if (bsdiff_allowed &&
    568       blocks_to_read * kBlockSize > kMaxBsdiffDestinationSize) {
    569     LOG(INFO) << "bsdiff blacklisted, data too big: "
    570               << blocks_to_read * kBlockSize << " bytes";
    571     bsdiff_allowed = false;
    572   }
    573 
    574   bool imgdiff_allowed = version.OperationAllowed(InstallOperation::IMGDIFF);
    575   if (imgdiff_allowed &&
    576       blocks_to_read * kBlockSize > kMaxImgdiffDestinationSize) {
    577     LOG(INFO) << "imgdiff blacklisted, data too big: "
    578               << blocks_to_read * kBlockSize << " bytes";
    579     imgdiff_allowed = false;
    580   }
    581 
    582   // Make copies of the extents so we can modify them.
    583   vector<Extent> src_extents = old_extents;
    584   vector<Extent> dst_extents = new_extents;
    585 
    586   // Read in bytes from new data.
    587   brillo::Blob new_data;
    588   TEST_AND_RETURN_FALSE(utils::ReadExtents(new_part,
    589                                            new_extents,
    590                                            &new_data,
    591                                            kBlockSize * blocks_to_write,
    592                                            kBlockSize));
    593   TEST_AND_RETURN_FALSE(!new_data.empty());
    594 
    595   // Data blob that will be written to delta file.
    596   brillo::Blob data_blob;
    597 
    598   // Try generating a full operation for the given new data, regardless of the
    599   // old_data.
    600   InstallOperation_Type op_type;
    601   TEST_AND_RETURN_FALSE(
    602       GenerateBestFullOperation(new_data, version, &data_blob, &op_type));
    603   operation.set_type(op_type);
    604 
    605   brillo::Blob old_data;
    606   if (blocks_to_read > 0) {
    607     // Read old data.
    608     TEST_AND_RETURN_FALSE(
    609         utils::ReadExtents(old_part, src_extents, &old_data,
    610                            kBlockSize * blocks_to_read, kBlockSize));
    611     if (old_data == new_data) {
    612       // No change in data.
    613       operation.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY)
    614                              ? InstallOperation::SOURCE_COPY
    615                              : InstallOperation::MOVE);
    616       data_blob = brillo::Blob();
    617     } else if (bsdiff_allowed || imgdiff_allowed) {
    618       // If the source file is considered bsdiff safe (no bsdiff bugs
    619       // triggered), see if BSDIFF encoding is smaller.
    620       base::FilePath old_chunk;
    621       TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&old_chunk));
    622       ScopedPathUnlinker old_unlinker(old_chunk.value());
    623       TEST_AND_RETURN_FALSE(utils::WriteFile(
    624           old_chunk.value().c_str(), old_data.data(), old_data.size()));
    625       base::FilePath new_chunk;
    626       TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&new_chunk));
    627       ScopedPathUnlinker new_unlinker(new_chunk.value());
    628       TEST_AND_RETURN_FALSE(utils::WriteFile(
    629           new_chunk.value().c_str(), new_data.data(), new_data.size()));
    630 
    631       if (bsdiff_allowed) {
    632         brillo::Blob bsdiff_delta;
    633         TEST_AND_RETURN_FALSE(DiffFiles(
    634             kBsdiffPath, old_chunk.value(), new_chunk.value(), &bsdiff_delta));
    635         CHECK_GT(bsdiff_delta.size(), static_cast<brillo::Blob::size_type>(0));
    636         if (bsdiff_delta.size() < data_blob.size()) {
    637           operation.set_type(
    638               version.OperationAllowed(InstallOperation::SOURCE_BSDIFF)
    639                   ? InstallOperation::SOURCE_BSDIFF
    640                   : InstallOperation::BSDIFF);
    641           data_blob = std::move(bsdiff_delta);
    642         }
    643       }
    644       if (imgdiff_allowed && ContainsGZip(old_data) && ContainsGZip(new_data)) {
    645         brillo::Blob imgdiff_delta;
    646         // Imgdiff might fail in some cases, only use the result if it succeed,
    647         // otherwise print the extents to analyze.
    648         if (DiffFiles(kImgdiffPath,
    649                       old_chunk.value(),
    650                       new_chunk.value(),
    651                       &imgdiff_delta) &&
    652             imgdiff_delta.size() > 0) {
    653           if (imgdiff_delta.size() < data_blob.size()) {
    654             operation.set_type(InstallOperation::IMGDIFF);
    655             data_blob = std::move(imgdiff_delta);
    656           }
    657         } else {
    658           LOG(ERROR) << "Imgdiff failed with source extents: "
    659                      << ExtentsToString(src_extents)
    660                      << ", destination extents: "
    661                      << ExtentsToString(dst_extents);
    662         }
    663       }
    664     }
    665   }
    666 
    667   size_t removed_bytes = 0;
    668   // Remove identical src/dst block ranges in MOVE operations.
    669   if (operation.type() == InstallOperation::MOVE) {
    670     removed_bytes = RemoveIdenticalBlockRanges(
    671         &src_extents, &dst_extents, new_data.size());
    672   }
    673   // Set legacy src_length and dst_length fields.
    674   operation.set_src_length(old_data.size() - removed_bytes);
    675   operation.set_dst_length(new_data.size() - removed_bytes);
    676 
    677   // Embed extents in the operation.
    678   StoreExtents(src_extents, operation.mutable_src_extents());
    679   StoreExtents(dst_extents, operation.mutable_dst_extents());
    680 
    681   // Replace operations should not have source extents.
    682   if (IsAReplaceOperation(operation.type())) {
    683     operation.clear_src_extents();
    684     operation.clear_src_length();
    685   }
    686 
    687   *out_data = std::move(data_blob);
    688   *out_op = operation;
    689 
    690   return true;
    691 }
    692 
    693 // Runs the bsdiff or imgdiff tool in |diff_path| on two files and returns the
    694 // resulting delta in |out|. Returns true on success.
    695 bool DiffFiles(const string& diff_path,
    696                const string& old_file,
    697                const string& new_file,
    698                brillo::Blob* out) {
    699   const string kPatchFile = "delta.patchXXXXXX";
    700   string patch_file_path;
    701 
    702   TEST_AND_RETURN_FALSE(
    703       utils::MakeTempFile(kPatchFile, &patch_file_path, nullptr));
    704 
    705   vector<string> cmd;
    706   cmd.push_back(diff_path);
    707   cmd.push_back(old_file);
    708   cmd.push_back(new_file);
    709   cmd.push_back(patch_file_path);
    710 
    711   int rc = 1;
    712   brillo::Blob patch_file;
    713   string stdout;
    714   TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc, &stdout));
    715   if (rc != 0) {
    716     LOG(ERROR) << diff_path << " returned " << rc << std::endl << stdout;
    717     return false;
    718   }
    719   TEST_AND_RETURN_FALSE(utils::ReadFile(patch_file_path, out));
    720   unlink(patch_file_path.c_str());
    721   return true;
    722 }
    723 
    724 bool IsAReplaceOperation(InstallOperation_Type op_type) {
    725   return (op_type == InstallOperation::REPLACE ||
    726           op_type == InstallOperation::REPLACE_BZ ||
    727           op_type == InstallOperation::REPLACE_XZ);
    728 }
    729 
    730 // Returns true if |op| is a no-op operation that doesn't do any useful work
    731 // (e.g., a move operation that copies blocks onto themselves).
    732 bool IsNoopOperation(const InstallOperation& op) {
    733   return (op.type() == InstallOperation::MOVE &&
    734           ExpandExtents(op.src_extents()) == ExpandExtents(op.dst_extents()));
    735 }
    736 
    737 void FilterNoopOperations(vector<AnnotatedOperation>* ops) {
    738   ops->erase(
    739       std::remove_if(
    740           ops->begin(), ops->end(),
    741           [](const AnnotatedOperation& aop){return IsNoopOperation(aop.op);}),
    742       ops->end());
    743 }
    744 
    745 bool InitializePartitionInfo(const PartitionConfig& part, PartitionInfo* info) {
    746   info->set_size(part.size);
    747   HashCalculator hasher;
    748   TEST_AND_RETURN_FALSE(hasher.UpdateFile(part.path, part.size) ==
    749                         static_cast<off_t>(part.size));
    750   TEST_AND_RETURN_FALSE(hasher.Finalize());
    751   const brillo::Blob& hash = hasher.raw_hash();
    752   info->set_hash(hash.data(), hash.size());
    753   LOG(INFO) << part.path << ": size=" << part.size << " hash=" << hasher.hash();
    754   return true;
    755 }
    756 
    757 bool CompareAopsByDestination(AnnotatedOperation first_aop,
    758                               AnnotatedOperation second_aop) {
    759   // We want empty operations to be at the end of the payload.
    760   if (!first_aop.op.dst_extents().size() || !second_aop.op.dst_extents().size())
    761     return ((!first_aop.op.dst_extents().size()) <
    762             (!second_aop.op.dst_extents().size()));
    763   uint32_t first_dst_start = first_aop.op.dst_extents(0).start_block();
    764   uint32_t second_dst_start = second_aop.op.dst_extents(0).start_block();
    765   return first_dst_start < second_dst_start;
    766 }
    767 
    768 }  // namespace diff_utils
    769 
    770 }  // namespace chromeos_update_engine
    771