Home | History | Annotate | Download | only in releasetools
      1 # Copyright (C) 2014 The Android Open Source Project
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #      http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 
     15 from __future__ import print_function
     16 
     17 import array
     18 import common
     19 import functools
     20 import heapq
     21 import itertools
     22 import multiprocessing
     23 import os
     24 import os.path
     25 import re
     26 import subprocess
     27 import sys
     28 import threading
     29 
     30 from collections import deque, OrderedDict
     31 from hashlib import sha1
     32 from rangelib import RangeSet
     33 
     34 
     35 __all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
     36 
     37 
     38 def compute_patch(srcfile, tgtfile, imgdiff=False):
     39   patchfile = common.MakeTempFile(prefix='patch-')
     40 
     41   cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
     42   cmd.extend([srcfile, tgtfile, patchfile])
     43 
     44   # Not using common.Run(), which would otherwise dump all the bsdiff/imgdiff
     45   # commands when OPTIONS.verbose is True - not useful for the case here, since
     46   # they contain temp filenames only.
     47   p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     48   output, _ = p.communicate()
     49 
     50   if p.returncode != 0:
     51     raise ValueError(output)
     52 
     53   with open(patchfile, 'rb') as f:
     54     return f.read()
     55 
     56 
     57 class Image(object):
     58   def RangeSha1(self, ranges):
     59     raise NotImplementedError
     60 
     61   def ReadRangeSet(self, ranges):
     62     raise NotImplementedError
     63 
     64   def TotalSha1(self, include_clobbered_blocks=False):
     65     raise NotImplementedError
     66 
     67   def WriteRangeDataToFd(self, ranges, fd):
     68     raise NotImplementedError
     69 
     70 
     71 class EmptyImage(Image):
     72   """A zero-length image."""
     73 
     74   def __init__(self):
     75     self.blocksize = 4096
     76     self.care_map = RangeSet()
     77     self.clobbered_blocks = RangeSet()
     78     self.extended = RangeSet()
     79     self.total_blocks = 0
     80     self.file_map = {}
     81 
     82   def RangeSha1(self, ranges):
     83     return sha1().hexdigest()
     84 
     85   def ReadRangeSet(self, ranges):
     86     return ()
     87 
     88   def TotalSha1(self, include_clobbered_blocks=False):
     89     # EmptyImage always carries empty clobbered_blocks, so
     90     # include_clobbered_blocks can be ignored.
     91     assert self.clobbered_blocks.size() == 0
     92     return sha1().hexdigest()
     93 
     94   def WriteRangeDataToFd(self, ranges, fd):
     95     raise ValueError("Can't write data from EmptyImage to file")
     96 
     97 
     98 class DataImage(Image):
     99   """An image wrapped around a single string of data."""
    100 
    101   def __init__(self, data, trim=False, pad=False):
    102     self.data = data
    103     self.blocksize = 4096
    104 
    105     assert not (trim and pad)
    106 
    107     partial = len(self.data) % self.blocksize
    108     padded = False
    109     if partial > 0:
    110       if trim:
    111         self.data = self.data[:-partial]
    112       elif pad:
    113         self.data += '\0' * (self.blocksize - partial)
    114         padded = True
    115       else:
    116         raise ValueError(("data for DataImage must be multiple of %d bytes "
    117                           "unless trim or pad is specified") %
    118                          (self.blocksize,))
    119 
    120     assert len(self.data) % self.blocksize == 0
    121 
    122     self.total_blocks = len(self.data) / self.blocksize
    123     self.care_map = RangeSet(data=(0, self.total_blocks))
    124     # When the last block is padded, we always write the whole block even for
    125     # incremental OTAs. Because otherwise the last block may get skipped if
    126     # unchanged for an incremental, but would fail the post-install
    127     # verification if it has non-zero contents in the padding bytes.
    128     # Bug: 23828506
    129     if padded:
    130       clobbered_blocks = [self.total_blocks-1, self.total_blocks]
    131     else:
    132       clobbered_blocks = []
    133     self.clobbered_blocks = clobbered_blocks
    134     self.extended = RangeSet()
    135 
    136     zero_blocks = []
    137     nonzero_blocks = []
    138     reference = '\0' * self.blocksize
    139 
    140     for i in range(self.total_blocks-1 if padded else self.total_blocks):
    141       d = self.data[i*self.blocksize : (i+1)*self.blocksize]
    142       if d == reference:
    143         zero_blocks.append(i)
    144         zero_blocks.append(i+1)
    145       else:
    146         nonzero_blocks.append(i)
    147         nonzero_blocks.append(i+1)
    148 
    149     assert zero_blocks or nonzero_blocks or clobbered_blocks
    150 
    151     self.file_map = dict()
    152     if zero_blocks:
    153       self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
    154     if nonzero_blocks:
    155       self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
    156     if clobbered_blocks:
    157       self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
    158 
    159   def _GetRangeData(self, ranges):
    160     for s, e in ranges:
    161       yield self.data[s*self.blocksize:e*self.blocksize]
    162 
    163   def RangeSha1(self, ranges):
    164     h = sha1()
    165     for data in self._GetRangeData(ranges):
    166       h.update(data)
    167     return h.hexdigest()
    168 
    169   def ReadRangeSet(self, ranges):
    170     return [self._GetRangeData(ranges)]
    171 
    172   def TotalSha1(self, include_clobbered_blocks=False):
    173     if not include_clobbered_blocks:
    174       return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
    175     else:
    176       return sha1(self.data).hexdigest()
    177 
    178   def WriteRangeDataToFd(self, ranges, fd):
    179     for data in self._GetRangeData(ranges):
    180       fd.write(data)
    181 
    182 
    183 class Transfer(object):
    184   def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
    185                src_sha1, style, by_id):
    186     self.tgt_name = tgt_name
    187     self.src_name = src_name
    188     self.tgt_ranges = tgt_ranges
    189     self.src_ranges = src_ranges
    190     self.tgt_sha1 = tgt_sha1
    191     self.src_sha1 = src_sha1
    192     self.style = style
    193     self.intact = (getattr(tgt_ranges, "monotonic", False) and
    194                    getattr(src_ranges, "monotonic", False))
    195 
    196     # We use OrderedDict rather than dict so that the output is repeatable;
    197     # otherwise it would depend on the hash values of the Transfer objects.
    198     self.goes_before = OrderedDict()
    199     self.goes_after = OrderedDict()
    200 
    201     self.stash_before = []
    202     self.use_stash = []
    203 
    204     self.id = len(by_id)
    205     by_id.append(self)
    206 
    207   def NetStashChange(self):
    208     return (sum(sr.size() for (_, sr) in self.stash_before) -
    209             sum(sr.size() for (_, sr) in self.use_stash))
    210 
    211   def ConvertToNew(self):
    212     assert self.style != "new"
    213     self.use_stash = []
    214     self.style = "new"
    215     self.src_ranges = RangeSet()
    216 
    217   def __str__(self):
    218     return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
    219             " to " + str(self.tgt_ranges) + ">")
    220 
    221 
    222 @functools.total_ordering
    223 class HeapItem(object):
    224   def __init__(self, item):
    225     self.item = item
    226     # Negate the score since python's heap is a min-heap and we want
    227     # the maximum score.
    228     self.score = -item.score
    229   def clear(self):
    230     self.item = None
    231   def __bool__(self):
    232     return self.item is None
    233   def __eq__(self, other):
    234     return self.score == other.score
    235   def __le__(self, other):
    236     return self.score <= other.score
    237 
    238 
    239 # BlockImageDiff works on two image objects.  An image object is
    240 # anything that provides the following attributes:
    241 #
    242 #    blocksize: the size in bytes of a block, currently must be 4096.
    243 #
    244 #    total_blocks: the total size of the partition/image, in blocks.
    245 #
    246 #    care_map: a RangeSet containing which blocks (in the range [0,
    247 #      total_blocks) we actually care about; i.e. which blocks contain
    248 #      data.
    249 #
    250 #    file_map: a dict that partitions the blocks contained in care_map
    251 #      into smaller domains that are useful for doing diffs on.
    252 #      (Typically a domain is a file, and the key in file_map is the
    253 #      pathname.)
    254 #
    255 #    clobbered_blocks: a RangeSet containing which blocks contain data
    256 #      but may be altered by the FS. They need to be excluded when
    257 #      verifying the partition integrity.
    258 #
    259 #    ReadRangeSet(): a function that takes a RangeSet and returns the
    260 #      data contained in the image blocks of that RangeSet.  The data
    261 #      is returned as a list or tuple of strings; concatenating the
    262 #      elements together should produce the requested data.
    263 #      Implementations are free to break up the data into list/tuple
    264 #      elements in any way that is convenient.
    265 #
    266 #    RangeSha1(): a function that returns (as a hex string) the SHA-1
    267 #      hash of all the data in the specified range.
    268 #
    269 #    TotalSha1(): a function that returns (as a hex string) the SHA-1
    270 #      hash of all the data in the image (ie, all the blocks in the
    271 #      care_map minus clobbered_blocks, or including the clobbered
    272 #      blocks if include_clobbered_blocks is True).
    273 #
    274 # When creating a BlockImageDiff, the src image may be None, in which
    275 # case the list of transfers produced will never read from the
    276 # original image.
    277 
    278 class BlockImageDiff(object):
    279   def __init__(self, tgt, src=None, threads=None, version=4,
    280                disable_imgdiff=False):
    281     if threads is None:
    282       threads = multiprocessing.cpu_count() // 2
    283       if threads == 0:
    284         threads = 1
    285     self.threads = threads
    286     self.version = version
    287     self.transfers = []
    288     self.src_basenames = {}
    289     self.src_numpatterns = {}
    290     self._max_stashed_size = 0
    291     self.touched_src_ranges = RangeSet()
    292     self.touched_src_sha1 = None
    293     self.disable_imgdiff = disable_imgdiff
    294 
    295     assert version in (3, 4)
    296 
    297     self.tgt = tgt
    298     if src is None:
    299       src = EmptyImage()
    300     self.src = src
    301 
    302     # The updater code that installs the patch always uses 4k blocks.
    303     assert tgt.blocksize == 4096
    304     assert src.blocksize == 4096
    305 
    306     # The range sets in each filemap should comprise a partition of
    307     # the care map.
    308     self.AssertPartition(src.care_map, src.file_map.values())
    309     self.AssertPartition(tgt.care_map, tgt.file_map.values())
    310 
    311   @property
    312   def max_stashed_size(self):
    313     return self._max_stashed_size
    314 
    315   def Compute(self, prefix):
    316     # When looking for a source file to use as the diff input for a
    317     # target file, we try:
    318     #   1) an exact path match if available, otherwise
    319     #   2) a exact basename match if available, otherwise
    320     #   3) a basename match after all runs of digits are replaced by
    321     #      "#" if available, otherwise
    322     #   4) we have no source for this target.
    323     self.AbbreviateSourceNames()
    324     self.FindTransfers()
    325 
    326     # Find the ordering dependencies among transfers (this is O(n^2)
    327     # in the number of transfers).
    328     self.GenerateDigraph()
    329     # Find a sequence of transfers that satisfies as many ordering
    330     # dependencies as possible (heuristically).
    331     self.FindVertexSequence()
    332     # Fix up the ordering dependencies that the sequence didn't
    333     # satisfy.
    334     self.ReverseBackwardEdges()
    335     self.ImproveVertexSequence()
    336 
    337     # Ensure the runtime stash size is under the limit.
    338     if common.OPTIONS.cache_size is not None:
    339       self.ReviseStashSize()
    340 
    341     # Double-check our work.
    342     self.AssertSequenceGood()
    343 
    344     self.ComputePatches(prefix)
    345     self.WriteTransfers(prefix)
    346 
    347   def WriteTransfers(self, prefix):
    348     def WriteSplitTransfers(out, style, target_blocks):
    349       """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
    350 
    351       This prevents the target size of one command from being too large; and
    352       might help to avoid fsync errors on some devices."""
    353 
    354       assert style == "new" or style == "zero"
    355       blocks_limit = 1024
    356       total = 0
    357       while target_blocks:
    358         blocks_to_write = target_blocks.first(blocks_limit)
    359         out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
    360         total += blocks_to_write.size()
    361         target_blocks = target_blocks.subtract(blocks_to_write)
    362       return total
    363 
    364     out = []
    365     total = 0
    366 
    367     # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
    368     # id. 'stashes' records the map from 'hash' to the ref count. The stash
    369     # will be freed only if the count decrements to zero.
    370     stashes = {}
    371     stashed_blocks = 0
    372     max_stashed_blocks = 0
    373 
    374     for xf in self.transfers:
    375 
    376       for _, sr in xf.stash_before:
    377         sh = self.src.RangeSha1(sr)
    378         if sh in stashes:
    379           stashes[sh] += 1
    380         else:
    381           stashes[sh] = 1
    382           stashed_blocks += sr.size()
    383           self.touched_src_ranges = self.touched_src_ranges.union(sr)
    384           out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
    385 
    386       if stashed_blocks > max_stashed_blocks:
    387         max_stashed_blocks = stashed_blocks
    388 
    389       free_string = []
    390       free_size = 0
    391 
    392       #   <# blocks> <src ranges>
    393       #     OR
    394       #   <# blocks> <src ranges> <src locs> <stash refs...>
    395       #     OR
    396       #   <# blocks> - <stash refs...>
    397 
    398       size = xf.src_ranges.size()
    399       src_str = [str(size)]
    400 
    401       unstashed_src_ranges = xf.src_ranges
    402       mapped_stashes = []
    403       for _, sr in xf.use_stash:
    404         unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
    405         sh = self.src.RangeSha1(sr)
    406         sr = xf.src_ranges.map_within(sr)
    407         mapped_stashes.append(sr)
    408         assert sh in stashes
    409         src_str.append("%s:%s" % (sh, sr.to_string_raw()))
    410         stashes[sh] -= 1
    411         if stashes[sh] == 0:
    412           free_string.append("free %s\n" % (sh,))
    413           free_size += sr.size()
    414           stashes.pop(sh)
    415 
    416       if unstashed_src_ranges:
    417         src_str.insert(1, unstashed_src_ranges.to_string_raw())
    418         if xf.use_stash:
    419           mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
    420           src_str.insert(2, mapped_unstashed.to_string_raw())
    421           mapped_stashes.append(mapped_unstashed)
    422           self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
    423       else:
    424         src_str.insert(1, "-")
    425         self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
    426 
    427       src_str = " ".join(src_str)
    428 
    429       # version 3+:
    430       #   zero <rangeset>
    431       #   new <rangeset>
    432       #   erase <rangeset>
    433       #   bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
    434       #   imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
    435       #   move hash <tgt rangeset> <src_str>
    436 
    437       tgt_size = xf.tgt_ranges.size()
    438 
    439       if xf.style == "new":
    440         assert xf.tgt_ranges
    441         assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
    442         total += tgt_size
    443       elif xf.style == "move":
    444         assert xf.tgt_ranges
    445         assert xf.src_ranges.size() == tgt_size
    446         if xf.src_ranges != xf.tgt_ranges:
    447           # take into account automatic stashing of overlapping blocks
    448           if xf.src_ranges.overlaps(xf.tgt_ranges):
    449             temp_stash_usage = stashed_blocks + xf.src_ranges.size()
    450             if temp_stash_usage > max_stashed_blocks:
    451               max_stashed_blocks = temp_stash_usage
    452 
    453           self.touched_src_ranges = self.touched_src_ranges.union(
    454               xf.src_ranges)
    455 
    456           out.append("%s %s %s %s\n" % (
    457               xf.style,
    458               xf.tgt_sha1,
    459               xf.tgt_ranges.to_string_raw(), src_str))
    460           total += tgt_size
    461       elif xf.style in ("bsdiff", "imgdiff"):
    462         assert xf.tgt_ranges
    463         assert xf.src_ranges
    464         # take into account automatic stashing of overlapping blocks
    465         if xf.src_ranges.overlaps(xf.tgt_ranges):
    466           temp_stash_usage = stashed_blocks + xf.src_ranges.size()
    467           if temp_stash_usage > max_stashed_blocks:
    468             max_stashed_blocks = temp_stash_usage
    469 
    470         self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
    471 
    472         out.append("%s %d %d %s %s %s %s\n" % (
    473             xf.style,
    474             xf.patch_start, xf.patch_len,
    475             xf.src_sha1,
    476             xf.tgt_sha1,
    477             xf.tgt_ranges.to_string_raw(), src_str))
    478         total += tgt_size
    479       elif xf.style == "zero":
    480         assert xf.tgt_ranges
    481         to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
    482         assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
    483         total += to_zero.size()
    484       else:
    485         raise ValueError("unknown transfer style '%s'\n" % xf.style)
    486 
    487       if free_string:
    488         out.append("".join(free_string))
    489         stashed_blocks -= free_size
    490 
    491       if common.OPTIONS.cache_size is not None:
    492         # Sanity check: abort if we're going to need more stash space than
    493         # the allowed size (cache_size * threshold). There are two purposes
    494         # of having a threshold here. a) Part of the cache may have been
    495         # occupied by some recovery logs. b) It will buy us some time to deal
    496         # with the oversize issue.
    497         cache_size = common.OPTIONS.cache_size
    498         stash_threshold = common.OPTIONS.stash_threshold
    499         max_allowed = cache_size * stash_threshold
    500         assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
    501                'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
    502                    max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
    503                    self.tgt.blocksize, max_allowed, cache_size,
    504                    stash_threshold)
    505 
    506     self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
    507 
    508     # Zero out extended blocks as a workaround for bug 20881595.
    509     if self.tgt.extended:
    510       assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
    511               self.tgt.extended.size())
    512       total += self.tgt.extended.size()
    513 
    514     # We erase all the blocks on the partition that a) don't contain useful
    515     # data in the new image; b) will not be touched by dm-verity. Out of those
    516     # blocks, we erase the ones that won't be used in this update at the
    517     # beginning of an update. The rest would be erased at the end. This is to
    518     # work around the eMMC issue observed on some devices, which may otherwise
    519     # get starving for clean blocks and thus fail the update. (b/28347095)
    520     all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
    521     all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
    522     new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
    523 
    524     erase_first = new_dontcare.subtract(self.touched_src_ranges)
    525     if erase_first:
    526       out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
    527 
    528     erase_last = new_dontcare.subtract(erase_first)
    529     if erase_last:
    530       out.append("erase %s\n" % (erase_last.to_string_raw(),))
    531 
    532     out.insert(0, "%d\n" % (self.version,))   # format version number
    533     out.insert(1, "%d\n" % (total,))
    534     # v3+: the number of stash slots is unused.
    535     out.insert(2, "0\n")
    536     out.insert(3, str(max_stashed_blocks) + "\n")
    537 
    538     with open(prefix + ".transfer.list", "wb") as f:
    539       for i in out:
    540         f.write(i)
    541 
    542     self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
    543     OPTIONS = common.OPTIONS
    544     if OPTIONS.cache_size is not None:
    545       max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
    546       print("max stashed blocks: %d  (%d bytes), "
    547             "limit: %d bytes (%.2f%%)\n" % (
    548             max_stashed_blocks, self._max_stashed_size, max_allowed,
    549             self._max_stashed_size * 100.0 / max_allowed))
    550     else:
    551       print("max stashed blocks: %d  (%d bytes), limit: <unknown>\n" % (
    552             max_stashed_blocks, self._max_stashed_size))
    553 
    554   def ReviseStashSize(self):
    555     print("Revising stash size...")
    556     stash_map = {}
    557 
    558     # Create the map between a stash and its def/use points. For example, for a
    559     # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
    560     for xf in self.transfers:
    561       # Command xf defines (stores) all the stashes in stash_before.
    562       for stash_raw_id, sr in xf.stash_before:
    563         stash_map[stash_raw_id] = (sr, xf)
    564 
    565       # Record all the stashes command xf uses.
    566       for stash_raw_id, _ in xf.use_stash:
    567         stash_map[stash_raw_id] += (xf,)
    568 
    569     # Compute the maximum blocks available for stash based on /cache size and
    570     # the threshold.
    571     cache_size = common.OPTIONS.cache_size
    572     stash_threshold = common.OPTIONS.stash_threshold
    573     max_allowed = cache_size * stash_threshold / self.tgt.blocksize
    574 
    575     # See the comments for 'stashes' in WriteTransfers().
    576     stashes = {}
    577     stashed_blocks = 0
    578     new_blocks = 0
    579 
    580     # Now go through all the commands. Compute the required stash size on the
    581     # fly. If a command requires excess stash than available, it deletes the
    582     # stash by replacing the command that uses the stash with a "new" command
    583     # instead.
    584     for xf in self.transfers:
    585       replaced_cmds = []
    586 
    587       # xf.stash_before generates explicit stash commands.
    588       for stash_raw_id, sr in xf.stash_before:
    589         # Check the post-command stashed_blocks.
    590         stashed_blocks_after = stashed_blocks
    591         sh = self.src.RangeSha1(sr)
    592         if sh not in stashes:
    593           stashed_blocks_after += sr.size()
    594 
    595         if stashed_blocks_after > max_allowed:
    596           # We cannot stash this one for a later command. Find out the command
    597           # that will use this stash and replace the command with "new".
    598           use_cmd = stash_map[stash_raw_id][2]
    599           replaced_cmds.append(use_cmd)
    600           print("%10d  %9s  %s" % (sr.size(), "explicit", use_cmd))
    601         else:
    602           # Update the stashes map.
    603           if sh in stashes:
    604             stashes[sh] += 1
    605           else:
    606             stashes[sh] = 1
    607           stashed_blocks = stashed_blocks_after
    608 
    609       # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
    610       # ComputePatches(), they both have the style of "diff".
    611       if xf.style == "diff":
    612         assert xf.tgt_ranges and xf.src_ranges
    613         if xf.src_ranges.overlaps(xf.tgt_ranges):
    614           if stashed_blocks + xf.src_ranges.size() > max_allowed:
    615             replaced_cmds.append(xf)
    616             print("%10d  %9s  %s" % (xf.src_ranges.size(), "implicit", xf))
    617 
    618       # Replace the commands in replaced_cmds with "new"s.
    619       for cmd in replaced_cmds:
    620         # It no longer uses any commands in "use_stash". Remove the def points
    621         # for all those stashes.
    622         for stash_raw_id, sr in cmd.use_stash:
    623           def_cmd = stash_map[stash_raw_id][1]
    624           assert (stash_raw_id, sr) in def_cmd.stash_before
    625           def_cmd.stash_before.remove((stash_raw_id, sr))
    626 
    627         # Add up blocks that violates space limit and print total number to
    628         # screen later.
    629         new_blocks += cmd.tgt_ranges.size()
    630         cmd.ConvertToNew()
    631 
    632       # xf.use_stash may generate free commands.
    633       for _, sr in xf.use_stash:
    634         sh = self.src.RangeSha1(sr)
    635         assert sh in stashes
    636         stashes[sh] -= 1
    637         if stashes[sh] == 0:
    638           stashed_blocks -= sr.size()
    639           stashes.pop(sh)
    640 
    641     num_of_bytes = new_blocks * self.tgt.blocksize
    642     print("  Total %d blocks (%d bytes) are packed as new blocks due to "
    643           "insufficient cache size." % (new_blocks, num_of_bytes))
    644     return new_blocks
    645 
    646   def ComputePatches(self, prefix):
    647     print("Reticulating splines...")
    648     diff_queue = []
    649     patch_num = 0
    650     with open(prefix + ".new.dat", "wb") as new_f:
    651       for index, xf in enumerate(self.transfers):
    652         if xf.style == "zero":
    653           tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
    654           print("%10d %10d (%6.2f%%) %7s %s %s" % (
    655               tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
    656               str(xf.tgt_ranges)))
    657 
    658         elif xf.style == "new":
    659           self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
    660           tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
    661           print("%10d %10d (%6.2f%%) %7s %s %s" % (
    662               tgt_size, tgt_size, 100.0, xf.style,
    663               xf.tgt_name, str(xf.tgt_ranges)))
    664 
    665         elif xf.style == "diff":
    666           # We can't compare src and tgt directly because they may have
    667           # the same content but be broken up into blocks differently, eg:
    668           #
    669           #    ["he", "llo"]  vs  ["h", "ello"]
    670           #
    671           # We want those to compare equal, ideally without having to
    672           # actually concatenate the strings (these may be tens of
    673           # megabytes).
    674           if xf.src_sha1 == xf.tgt_sha1:
    675             # These are identical; we don't need to generate a patch,
    676             # just issue copy commands on the device.
    677             xf.style = "move"
    678             tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
    679             if xf.src_ranges != xf.tgt_ranges:
    680               print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
    681                   tgt_size, tgt_size, 100.0, xf.style,
    682                   xf.tgt_name if xf.tgt_name == xf.src_name else (
    683                       xf.tgt_name + " (from " + xf.src_name + ")"),
    684                   str(xf.tgt_ranges), str(xf.src_ranges)))
    685           else:
    686             # For files in zip format (eg, APKs, JARs, etc.) we would
    687             # like to use imgdiff -z if possible (because it usually
    688             # produces significantly smaller patches than bsdiff).
    689             # This is permissible if:
    690             #
    691             #  - imgdiff is not disabled, and
    692             #  - the source and target files are monotonic (ie, the
    693             #    data is stored with blocks in increasing order), and
    694             #  - we haven't removed any blocks from the source set.
    695             #
    696             # If these conditions are satisfied then appending all the
    697             # blocks in the set together in order will produce a valid
    698             # zip file (plus possibly extra zeros in the last block),
    699             # which is what imgdiff needs to operate.  (imgdiff is
    700             # fine with extra zeros at the end of the file.)
    701             imgdiff = (not self.disable_imgdiff and xf.intact and
    702                        xf.tgt_name.split(".")[-1].lower()
    703                        in ("apk", "jar", "zip"))
    704             xf.style = "imgdiff" if imgdiff else "bsdiff"
    705             diff_queue.append((index, imgdiff, patch_num))
    706             patch_num += 1
    707 
    708         else:
    709           assert False, "unknown style " + xf.style
    710 
    711     if diff_queue:
    712       if self.threads > 1:
    713         print("Computing patches (using %d threads)..." % (self.threads,))
    714       else:
    715         print("Computing patches...")
    716 
    717       diff_total = len(diff_queue)
    718       patches = [None] * diff_total
    719       error_messages = []
    720       if sys.stdout.isatty():
    721         global diff_done
    722         diff_done = 0
    723 
    724       # Using multiprocessing doesn't give additional benefits, due to the
    725       # pattern of the code. The diffing work is done by subprocess.call, which
    726       # already runs in a separate process (not affected much by the GIL -
    727       # Global Interpreter Lock). Using multiprocess also requires either a)
    728       # writing the diff input files in the main process before forking, or b)
    729       # reopening the image file (SparseImage) in the worker processes. Doing
    730       # neither of them further improves the performance.
    731       lock = threading.Lock()
    732       def diff_worker():
    733         while True:
    734           with lock:
    735             if not diff_queue:
    736               return
    737             xf_index, imgdiff, patch_index = diff_queue.pop()
    738 
    739           xf = self.transfers[xf_index]
    740           src_ranges = xf.src_ranges
    741           tgt_ranges = xf.tgt_ranges
    742 
    743           # Needs lock since WriteRangeDataToFd() is stateful (calling seek).
    744           with lock:
    745             src_file = common.MakeTempFile(prefix="src-")
    746             with open(src_file, "wb") as fd:
    747               self.src.WriteRangeDataToFd(src_ranges, fd)
    748 
    749             tgt_file = common.MakeTempFile(prefix="tgt-")
    750             with open(tgt_file, "wb") as fd:
    751               self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
    752 
    753           try:
    754             patch = compute_patch(src_file, tgt_file, imgdiff)
    755           except ValueError as e:
    756             with lock:
    757               error_messages.append(
    758                   "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
    759                       "imgdiff" if imgdiff else "bsdiff",
    760                       xf.tgt_name if xf.tgt_name == xf.src_name else
    761                           xf.tgt_name + " (from " + xf.src_name + ")",
    762                       xf.tgt_ranges, xf.src_ranges, e.message))
    763 
    764           with lock:
    765             patches[patch_index] = (xf_index, patch)
    766             if sys.stdout.isatty():
    767               global diff_done
    768               diff_done += 1
    769               progress = diff_done * 100 / diff_total
    770               # '\033[K' is to clear to EOL.
    771               print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
    772               sys.stdout.flush()
    773 
    774       threads = [threading.Thread(target=diff_worker)
    775                  for _ in range(self.threads)]
    776       for th in threads:
    777         th.start()
    778       while threads:
    779         threads.pop().join()
    780 
    781       if sys.stdout.isatty():
    782         print('\n')
    783 
    784       if error_messages:
    785         print('\n'.join(error_messages))
    786         sys.exit(1)
    787     else:
    788       patches = []
    789 
    790     offset = 0
    791     with open(prefix + ".patch.dat", "wb") as patch_fd:
    792       for index, patch in patches:
    793         xf = self.transfers[index]
    794         xf.patch_len = len(patch)
    795         xf.patch_start = offset
    796         offset += xf.patch_len
    797         patch_fd.write(patch)
    798 
    799         if common.OPTIONS.verbose:
    800           tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
    801           print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
    802                 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
    803                 xf.style,
    804                 xf.tgt_name if xf.tgt_name == xf.src_name else (
    805                     xf.tgt_name + " (from " + xf.src_name + ")"),
    806                 xf.tgt_ranges, xf.src_ranges))
    807 
    808   def AssertSequenceGood(self):
    809     # Simulate the sequences of transfers we will output, and check that:
    810     # - we never read a block after writing it, and
    811     # - we write every block we care about exactly once.
    812 
    813     # Start with no blocks having been touched yet.
    814     touched = array.array("B", "\0" * self.tgt.total_blocks)
    815 
    816     # Imagine processing the transfers in order.
    817     for xf in self.transfers:
    818       # Check that the input blocks for this transfer haven't yet been touched.
    819 
    820       x = xf.src_ranges
    821       for _, sr in xf.use_stash:
    822         x = x.subtract(sr)
    823 
    824       for s, e in x:
    825         # Source image could be larger. Don't check the blocks that are in the
    826         # source image only. Since they are not in 'touched', and won't ever
    827         # be touched.
    828         for i in range(s, min(e, self.tgt.total_blocks)):
    829           assert touched[i] == 0
    830 
    831       # Check that the output blocks for this transfer haven't yet
    832       # been touched, and touch all the blocks written by this
    833       # transfer.
    834       for s, e in xf.tgt_ranges:
    835         for i in range(s, e):
    836           assert touched[i] == 0
    837           touched[i] = 1
    838 
    839     # Check that we've written every target block.
    840     for s, e in self.tgt.care_map:
    841       for i in range(s, e):
    842         assert touched[i] == 1
    843 
    844   def ImproveVertexSequence(self):
    845     print("Improving vertex order...")
    846 
    847     # At this point our digraph is acyclic; we reversed any edges that
    848     # were backwards in the heuristically-generated sequence.  The
    849     # previously-generated order is still acceptable, but we hope to
    850     # find a better order that needs less memory for stashed data.
    851     # Now we do a topological sort to generate a new vertex order,
    852     # using a greedy algorithm to choose which vertex goes next
    853     # whenever we have a choice.
    854 
    855     # Make a copy of the edge set; this copy will get destroyed by the
    856     # algorithm.
    857     for xf in self.transfers:
    858       xf.incoming = xf.goes_after.copy()
    859       xf.outgoing = xf.goes_before.copy()
    860 
    861     L = []   # the new vertex order
    862 
    863     # S is the set of sources in the remaining graph; we always choose
    864     # the one that leaves the least amount of stashed data after it's
    865     # executed.
    866     S = [(u.NetStashChange(), u.order, u) for u in self.transfers
    867          if not u.incoming]
    868     heapq.heapify(S)
    869 
    870     while S:
    871       _, _, xf = heapq.heappop(S)
    872       L.append(xf)
    873       for u in xf.outgoing:
    874         del u.incoming[xf]
    875         if not u.incoming:
    876           heapq.heappush(S, (u.NetStashChange(), u.order, u))
    877 
    878     # if this fails then our graph had a cycle.
    879     assert len(L) == len(self.transfers)
    880 
    881     self.transfers = L
    882     for i, xf in enumerate(L):
    883       xf.order = i
    884 
    885   def RemoveBackwardEdges(self):
    886     print("Removing backward edges...")
    887     in_order = 0
    888     out_of_order = 0
    889     lost_source = 0
    890 
    891     for xf in self.transfers:
    892       lost = 0
    893       size = xf.src_ranges.size()
    894       for u in xf.goes_before:
    895         # xf should go before u
    896         if xf.order < u.order:
    897           # it does, hurray!
    898           in_order += 1
    899         else:
    900           # it doesn't, boo.  trim the blocks that u writes from xf's
    901           # source, so that xf can go after u.
    902           out_of_order += 1
    903           assert xf.src_ranges.overlaps(u.tgt_ranges)
    904           xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
    905           xf.intact = False
    906 
    907       if xf.style == "diff" and not xf.src_ranges:
    908         # nothing left to diff from; treat as new data
    909         xf.style = "new"
    910 
    911       lost = size - xf.src_ranges.size()
    912       lost_source += lost
    913 
    914     print(("  %d/%d dependencies (%.2f%%) were violated; "
    915            "%d source blocks removed.") %
    916           (out_of_order, in_order + out_of_order,
    917            (out_of_order * 100.0 / (in_order + out_of_order))
    918            if (in_order + out_of_order) else 0.0,
    919            lost_source))
    920 
    921   def ReverseBackwardEdges(self):
    922     """Reverse unsatisfying edges and compute pairs of stashed blocks.
    923 
    924     For each transfer, make sure it properly stashes the blocks it touches and
    925     will be used by later transfers. It uses pairs of (stash_raw_id, range) to
    926     record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
    927     identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
    928     it is possible to have multiple pairs with different 'stash_raw_id's. Each
    929     'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
    930     blocks will be written to the same stash slot in WriteTransfers().
    931     """
    932 
    933     print("Reversing backward edges...")
    934     in_order = 0
    935     out_of_order = 0
    936     stash_raw_id = 0
    937     stash_size = 0
    938 
    939     for xf in self.transfers:
    940       for u in xf.goes_before.copy():
    941         # xf should go before u
    942         if xf.order < u.order:
    943           # it does, hurray!
    944           in_order += 1
    945         else:
    946           # it doesn't, boo.  modify u to stash the blocks that it
    947           # writes that xf wants to read, and then require u to go
    948           # before xf.
    949           out_of_order += 1
    950 
    951           overlap = xf.src_ranges.intersect(u.tgt_ranges)
    952           assert overlap
    953 
    954           u.stash_before.append((stash_raw_id, overlap))
    955           xf.use_stash.append((stash_raw_id, overlap))
    956           stash_raw_id += 1
    957           stash_size += overlap.size()
    958 
    959           # reverse the edge direction; now xf must go after u
    960           del xf.goes_before[u]
    961           del u.goes_after[xf]
    962           xf.goes_after[u] = None    # value doesn't matter
    963           u.goes_before[xf] = None
    964 
    965     print(("  %d/%d dependencies (%.2f%%) were violated; "
    966            "%d source blocks stashed.") %
    967           (out_of_order, in_order + out_of_order,
    968            (out_of_order * 100.0 / (in_order + out_of_order))
    969            if (in_order + out_of_order) else 0.0,
    970            stash_size))
    971 
    972   def FindVertexSequence(self):
    973     print("Finding vertex sequence...")
    974 
    975     # This is based on "A Fast & Effective Heuristic for the Feedback
    976     # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth.  Think of
    977     # it as starting with the digraph G and moving all the vertices to
    978     # be on a horizontal line in some order, trying to minimize the
    979     # number of edges that end up pointing to the left.  Left-pointing
    980     # edges will get removed to turn the digraph into a DAG.  In this
    981     # case each edge has a weight which is the number of source blocks
    982     # we'll lose if that edge is removed; we try to minimize the total
    983     # weight rather than just the number of edges.
    984 
    985     # Make a copy of the edge set; this copy will get destroyed by the
    986     # algorithm.
    987     for xf in self.transfers:
    988       xf.incoming = xf.goes_after.copy()
    989       xf.outgoing = xf.goes_before.copy()
    990       xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
    991 
    992     # We use an OrderedDict instead of just a set so that the output
    993     # is repeatable; otherwise it would depend on the hash values of
    994     # the transfer objects.
    995     G = OrderedDict()
    996     for xf in self.transfers:
    997       G[xf] = None
    998     s1 = deque()  # the left side of the sequence, built from left to right
    999     s2 = deque()  # the right side of the sequence, built from right to left
   1000 
   1001     heap = []
   1002     for xf in self.transfers:
   1003       xf.heap_item = HeapItem(xf)
   1004       heap.append(xf.heap_item)
   1005     heapq.heapify(heap)
   1006 
   1007     # Use OrderedDict() instead of set() to preserve the insertion order. Need
   1008     # to use 'sinks[key] = None' to add key into the set. sinks will look like
   1009     # { key1: None, key2: None, ... }.
   1010     sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
   1011     sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
   1012 
   1013     def adjust_score(iu, delta):
   1014       iu.score += delta
   1015       iu.heap_item.clear()
   1016       iu.heap_item = HeapItem(iu)
   1017       heapq.heappush(heap, iu.heap_item)
   1018 
   1019     while G:
   1020       # Put all sinks at the end of the sequence.
   1021       while sinks:
   1022         new_sinks = OrderedDict()
   1023         for u in sinks:
   1024           if u not in G: continue
   1025           s2.appendleft(u)
   1026           del G[u]
   1027           for iu in u.incoming:
   1028             adjust_score(iu, -iu.outgoing.pop(u))
   1029             if not iu.outgoing:
   1030               new_sinks[iu] = None
   1031         sinks = new_sinks
   1032 
   1033       # Put all the sources at the beginning of the sequence.
   1034       while sources:
   1035         new_sources = OrderedDict()
   1036         for u in sources:
   1037           if u not in G: continue
   1038           s1.append(u)
   1039           del G[u]
   1040           for iu in u.outgoing:
   1041             adjust_score(iu, +iu.incoming.pop(u))
   1042             if not iu.incoming:
   1043               new_sources[iu] = None
   1044         sources = new_sources
   1045 
   1046       if not G: break
   1047 
   1048       # Find the "best" vertex to put next.  "Best" is the one that
   1049       # maximizes the net difference in source blocks saved we get by
   1050       # pretending it's a source rather than a sink.
   1051 
   1052       while True:
   1053         u = heapq.heappop(heap)
   1054         if u and u.item in G:
   1055           u = u.item
   1056           break
   1057 
   1058       s1.append(u)
   1059       del G[u]
   1060       for iu in u.outgoing:
   1061         adjust_score(iu, +iu.incoming.pop(u))
   1062         if not iu.incoming:
   1063           sources[iu] = None
   1064 
   1065       for iu in u.incoming:
   1066         adjust_score(iu, -iu.outgoing.pop(u))
   1067         if not iu.outgoing:
   1068           sinks[iu] = None
   1069 
   1070     # Now record the sequence in the 'order' field of each transfer,
   1071     # and by rearranging self.transfers to be in the chosen sequence.
   1072 
   1073     new_transfers = []
   1074     for x in itertools.chain(s1, s2):
   1075       x.order = len(new_transfers)
   1076       new_transfers.append(x)
   1077       del x.incoming
   1078       del x.outgoing
   1079 
   1080     self.transfers = new_transfers
   1081 
   1082   def GenerateDigraph(self):
   1083     print("Generating digraph...")
   1084 
   1085     # Each item of source_ranges will be:
   1086     #   - None, if that block is not used as a source,
   1087     #   - an ordered set of transfers.
   1088     source_ranges = []
   1089     for b in self.transfers:
   1090       for s, e in b.src_ranges:
   1091         if e > len(source_ranges):
   1092           source_ranges.extend([None] * (e-len(source_ranges)))
   1093         for i in range(s, e):
   1094           if source_ranges[i] is None:
   1095             source_ranges[i] = OrderedDict.fromkeys([b])
   1096           else:
   1097             source_ranges[i][b] = None
   1098 
   1099     for a in self.transfers:
   1100       intersections = OrderedDict()
   1101       for s, e in a.tgt_ranges:
   1102         for i in range(s, e):
   1103           if i >= len(source_ranges): break
   1104           # Add all the Transfers in source_ranges[i] to the (ordered) set.
   1105           if source_ranges[i] is not None:
   1106             for j in source_ranges[i]:
   1107               intersections[j] = None
   1108 
   1109       for b in intersections:
   1110         if a is b: continue
   1111 
   1112         # If the blocks written by A are read by B, then B needs to go before A.
   1113         i = a.tgt_ranges.intersect(b.src_ranges)
   1114         if i:
   1115           if b.src_name == "__ZERO":
   1116             # the cost of removing source blocks for the __ZERO domain
   1117             # is (nearly) zero.
   1118             size = 0
   1119           else:
   1120             size = i.size()
   1121           b.goes_before[a] = size
   1122           a.goes_after[b] = size
   1123 
   1124   def FindTransfers(self):
   1125     """Parse the file_map to generate all the transfers."""
   1126 
   1127     def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges,
   1128                           style, by_id):
   1129       """Add one or multiple Transfer()s by splitting large files.
   1130 
   1131       For BBOTA v3, we need to stash source blocks for resumable feature.
   1132       However, with the growth of file size and the shrink of the cache
   1133       partition source blocks are too large to be stashed. If a file occupies
   1134       too many blocks, we split it into smaller pieces by getting multiple
   1135       Transfer()s.
   1136 
   1137       The downside is that after splitting, we may increase the package size
   1138       since the split pieces don't align well. According to our experiments,
   1139       1/8 of the cache size as the per-piece limit appears to be optimal.
   1140       Compared to the fixed 1024-block limit, it reduces the overall package
   1141       size by 30% for volantis, and 20% for angler and bullhead."""
   1142 
   1143       # Possibly split large files into smaller chunks.
   1144       pieces = 0
   1145       cache_size = common.OPTIONS.cache_size
   1146       split_threshold = 0.125
   1147       max_blocks_per_transfer = int(cache_size * split_threshold /
   1148                                     self.tgt.blocksize)
   1149 
   1150       # Change nothing for small files.
   1151       if (tgt_ranges.size() <= max_blocks_per_transfer and
   1152           src_ranges.size() <= max_blocks_per_transfer):
   1153         Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
   1154                  self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
   1155                  style, by_id)
   1156         return
   1157 
   1158       while (tgt_ranges.size() > max_blocks_per_transfer and
   1159              src_ranges.size() > max_blocks_per_transfer):
   1160         tgt_split_name = "%s-%d" % (tgt_name, pieces)
   1161         src_split_name = "%s-%d" % (src_name, pieces)
   1162         tgt_first = tgt_ranges.first(max_blocks_per_transfer)
   1163         src_first = src_ranges.first(max_blocks_per_transfer)
   1164 
   1165         Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
   1166                  self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
   1167                  style, by_id)
   1168 
   1169         tgt_ranges = tgt_ranges.subtract(tgt_first)
   1170         src_ranges = src_ranges.subtract(src_first)
   1171         pieces += 1
   1172 
   1173       # Handle remaining blocks.
   1174       if tgt_ranges.size() or src_ranges.size():
   1175         # Must be both non-empty.
   1176         assert tgt_ranges.size() and src_ranges.size()
   1177         tgt_split_name = "%s-%d" % (tgt_name, pieces)
   1178         src_split_name = "%s-%d" % (src_name, pieces)
   1179         Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
   1180                  self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
   1181                  style, by_id)
   1182 
   1183     def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
   1184                     split=False):
   1185       """Wrapper function for adding a Transfer()."""
   1186 
   1187       # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
   1188       # otherwise add the Transfer() as is.
   1189       if style != "diff" or not split:
   1190         Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
   1191                  self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
   1192                  style, by_id)
   1193         return
   1194 
   1195       # Handle .odex files specially to analyze the block-wise difference. If
   1196       # most of the blocks are identical with only few changes (e.g. header),
   1197       # we will patch the changed blocks only. This avoids stashing unchanged
   1198       # blocks while patching. We limit the analysis to files without size
   1199       # changes only. This is to avoid sacrificing the OTA generation cost too
   1200       # much.
   1201       if (tgt_name.split(".")[-1].lower() == 'odex' and
   1202           tgt_ranges.size() == src_ranges.size()):
   1203 
   1204         # 0.5 threshold can be further tuned. The tradeoff is: if only very
   1205         # few blocks remain identical, we lose the opportunity to use imgdiff
   1206         # that may have better compression ratio than bsdiff.
   1207         crop_threshold = 0.5
   1208 
   1209         tgt_skipped = RangeSet()
   1210         src_skipped = RangeSet()
   1211         tgt_size = tgt_ranges.size()
   1212         tgt_changed = 0
   1213         for src_block, tgt_block in zip(src_ranges.next_item(),
   1214                                         tgt_ranges.next_item()):
   1215           src_rs = RangeSet(str(src_block))
   1216           tgt_rs = RangeSet(str(tgt_block))
   1217           if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
   1218             tgt_skipped = tgt_skipped.union(tgt_rs)
   1219             src_skipped = src_skipped.union(src_rs)
   1220           else:
   1221             tgt_changed += tgt_rs.size()
   1222 
   1223           # Terminate early if no clear sign of benefits.
   1224           if tgt_changed > tgt_size * crop_threshold:
   1225             break
   1226 
   1227         if tgt_changed < tgt_size * crop_threshold:
   1228           assert tgt_changed + tgt_skipped.size() == tgt_size
   1229           print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,
   1230                 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
   1231           AddSplitTransfers(
   1232               "%s-skipped" % (tgt_name,),
   1233               "%s-skipped" % (src_name,),
   1234               tgt_skipped, src_skipped, style, by_id)
   1235 
   1236           # Intentionally change the file extension to avoid being imgdiff'd as
   1237           # the files are no longer in their original format.
   1238           tgt_name = "%s-cropped" % (tgt_name,)
   1239           src_name = "%s-cropped" % (src_name,)
   1240           tgt_ranges = tgt_ranges.subtract(tgt_skipped)
   1241           src_ranges = src_ranges.subtract(src_skipped)
   1242 
   1243           # Possibly having no changed blocks.
   1244           if not tgt_ranges:
   1245             return
   1246 
   1247       # Add the transfer(s).
   1248       AddSplitTransfers(
   1249           tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
   1250 
   1251     print("Finding transfers...")
   1252 
   1253     empty = RangeSet()
   1254     for tgt_fn, tgt_ranges in self.tgt.file_map.items():
   1255       if tgt_fn == "__ZERO":
   1256         # the special "__ZERO" domain is all the blocks not contained
   1257         # in any file and that are filled with zeros.  We have a
   1258         # special transfer style for zero blocks.
   1259         src_ranges = self.src.file_map.get("__ZERO", empty)
   1260         AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
   1261                     "zero", self.transfers)
   1262         continue
   1263 
   1264       elif tgt_fn == "__COPY":
   1265         # "__COPY" domain includes all the blocks not contained in any
   1266         # file and that need to be copied unconditionally to the target.
   1267         AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
   1268         continue
   1269 
   1270       elif tgt_fn in self.src.file_map:
   1271         # Look for an exact pathname match in the source.
   1272         AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
   1273                     "diff", self.transfers, True)
   1274         continue
   1275 
   1276       b = os.path.basename(tgt_fn)
   1277       if b in self.src_basenames:
   1278         # Look for an exact basename match in the source.
   1279         src_fn = self.src_basenames[b]
   1280         AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
   1281                     "diff", self.transfers, True)
   1282         continue
   1283 
   1284       b = re.sub("[0-9]+", "#", b)
   1285       if b in self.src_numpatterns:
   1286         # Look for a 'number pattern' match (a basename match after
   1287         # all runs of digits are replaced by "#").  (This is useful
   1288         # for .so files that contain version numbers in the filename
   1289         # that get bumped.)
   1290         src_fn = self.src_numpatterns[b]
   1291         AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
   1292                     "diff", self.transfers, True)
   1293         continue
   1294 
   1295       AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
   1296 
   1297   def AbbreviateSourceNames(self):
   1298     for k in self.src.file_map.keys():
   1299       b = os.path.basename(k)
   1300       self.src_basenames[b] = k
   1301       b = re.sub("[0-9]+", "#", b)
   1302       self.src_numpatterns[b] = k
   1303 
   1304   @staticmethod
   1305   def AssertPartition(total, seq):
   1306     """Assert that all the RangeSets in 'seq' form a partition of the
   1307     'total' RangeSet (ie, they are nonintersecting and their union
   1308     equals 'total')."""
   1309 
   1310     so_far = RangeSet()
   1311     for i in seq:
   1312       assert not so_far.overlaps(i)
   1313       so_far = so_far.union(i)
   1314     assert so_far == total
   1315