Home | History | Annotate | Download | only in releasetools
      1 # Copyright (C) 2014 The Android Open Source Project
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #      http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 
     15 from __future__ import print_function
     16 
     17 from collections import deque, OrderedDict
     18 from hashlib import sha1
     19 import array
     20 import common
     21 import functools
     22 import heapq
     23 import itertools
     24 import multiprocessing
     25 import os
     26 import re
     27 import subprocess
     28 import threading
     29 import time
     30 import tempfile
     31 
     32 from rangelib import RangeSet
     33 
     34 
     35 __all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
     36 
     37 
     38 def compute_patch(src, tgt, imgdiff=False):
     39   srcfd, srcfile = tempfile.mkstemp(prefix="src-")
     40   tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
     41   patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
     42   os.close(patchfd)
     43 
     44   try:
     45     with os.fdopen(srcfd, "wb") as f_src:
     46       for p in src:
     47         f_src.write(p)
     48 
     49     with os.fdopen(tgtfd, "wb") as f_tgt:
     50       for p in tgt:
     51         f_tgt.write(p)
     52     try:
     53       os.unlink(patchfile)
     54     except OSError:
     55       pass
     56     if imgdiff:
     57       p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
     58                           stdout=open("/dev/null", "a"),
     59                           stderr=subprocess.STDOUT)
     60     else:
     61       p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
     62 
     63     if p:
     64       raise ValueError("diff failed: " + str(p))
     65 
     66     with open(patchfile, "rb") as f:
     67       return f.read()
     68   finally:
     69     try:
     70       os.unlink(srcfile)
     71       os.unlink(tgtfile)
     72       os.unlink(patchfile)
     73     except OSError:
     74       pass
     75 
     76 
     77 class Image(object):
     78   def ReadRangeSet(self, ranges):
     79     raise NotImplementedError
     80 
     81   def TotalSha1(self, include_clobbered_blocks=False):
     82     raise NotImplementedError
     83 
     84 
     85 class EmptyImage(Image):
     86   """A zero-length image."""
     87   blocksize = 4096
     88   care_map = RangeSet()
     89   clobbered_blocks = RangeSet()
     90   extended = RangeSet()
     91   total_blocks = 0
     92   file_map = {}
     93   def ReadRangeSet(self, ranges):
     94     return ()
     95   def TotalSha1(self, include_clobbered_blocks=False):
     96     # EmptyImage always carries empty clobbered_blocks, so
     97     # include_clobbered_blocks can be ignored.
     98     assert self.clobbered_blocks.size() == 0
     99     return sha1().hexdigest()
    100 
    101 
    102 class DataImage(Image):
    103   """An image wrapped around a single string of data."""
    104 
    105   def __init__(self, data, trim=False, pad=False):
    106     self.data = data
    107     self.blocksize = 4096
    108 
    109     assert not (trim and pad)
    110 
    111     partial = len(self.data) % self.blocksize
    112     padded = False
    113     if partial > 0:
    114       if trim:
    115         self.data = self.data[:-partial]
    116       elif pad:
    117         self.data += '\0' * (self.blocksize - partial)
    118         padded = True
    119       else:
    120         raise ValueError(("data for DataImage must be multiple of %d bytes "
    121                           "unless trim or pad is specified") %
    122                          (self.blocksize,))
    123 
    124     assert len(self.data) % self.blocksize == 0
    125 
    126     self.total_blocks = len(self.data) / self.blocksize
    127     self.care_map = RangeSet(data=(0, self.total_blocks))
    128     # When the last block is padded, we always write the whole block even for
    129     # incremental OTAs. Because otherwise the last block may get skipped if
    130     # unchanged for an incremental, but would fail the post-install
    131     # verification if it has non-zero contents in the padding bytes.
    132     # Bug: 23828506
    133     if padded:
    134       clobbered_blocks = [self.total_blocks-1, self.total_blocks]
    135     else:
    136       clobbered_blocks = []
    137     self.clobbered_blocks = clobbered_blocks
    138     self.extended = RangeSet()
    139 
    140     zero_blocks = []
    141     nonzero_blocks = []
    142     reference = '\0' * self.blocksize
    143 
    144     for i in range(self.total_blocks-1 if padded else self.total_blocks):
    145       d = self.data[i*self.blocksize : (i+1)*self.blocksize]
    146       if d == reference:
    147         zero_blocks.append(i)
    148         zero_blocks.append(i+1)
    149       else:
    150         nonzero_blocks.append(i)
    151         nonzero_blocks.append(i+1)
    152 
    153     assert zero_blocks or nonzero_blocks or clobbered_blocks
    154 
    155     self.file_map = dict()
    156     if zero_blocks:
    157       self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
    158     if nonzero_blocks:
    159       self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
    160     if clobbered_blocks:
    161       self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
    162 
    163   def ReadRangeSet(self, ranges):
    164     return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]
    165 
    166   def TotalSha1(self, include_clobbered_blocks=False):
    167     if not include_clobbered_blocks:
    168       ranges = self.care_map.subtract(self.clobbered_blocks)
    169       return sha1(self.ReadRangeSet(ranges)).hexdigest()
    170     else:
    171       return sha1(self.data).hexdigest()
    172 
    173 
    174 class Transfer(object):
    175   def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
    176     self.tgt_name = tgt_name
    177     self.src_name = src_name
    178     self.tgt_ranges = tgt_ranges
    179     self.src_ranges = src_ranges
    180     self.style = style
    181     self.intact = (getattr(tgt_ranges, "monotonic", False) and
    182                    getattr(src_ranges, "monotonic", False))
    183 
    184     # We use OrderedDict rather than dict so that the output is repeatable;
    185     # otherwise it would depend on the hash values of the Transfer objects.
    186     self.goes_before = OrderedDict()
    187     self.goes_after = OrderedDict()
    188 
    189     self.stash_before = []
    190     self.use_stash = []
    191 
    192     self.id = len(by_id)
    193     by_id.append(self)
    194 
    195   def NetStashChange(self):
    196     return (sum(sr.size() for (_, sr) in self.stash_before) -
    197             sum(sr.size() for (_, sr) in self.use_stash))
    198 
    199   def ConvertToNew(self):
    200     assert self.style != "new"
    201     self.use_stash = []
    202     self.style = "new"
    203     self.src_ranges = RangeSet()
    204 
    205   def __str__(self):
    206     return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
    207             " to " + str(self.tgt_ranges) + ">")
    208 
    209 
    210 @functools.total_ordering
    211 class HeapItem(object):
    212   def __init__(self, item):
    213     self.item = item
    214     # Negate the score since python's heap is a min-heap and we want
    215     # the maximum score.
    216     self.score = -item.score
    217   def clear(self):
    218     self.item = None
    219   def __bool__(self):
    220     return self.item is None
    221   def __eq__(self, other):
    222     return self.score == other.score
    223   def __le__(self, other):
    224     return self.score <= other.score
    225 
    226 
    227 # BlockImageDiff works on two image objects.  An image object is
    228 # anything that provides the following attributes:
    229 #
    230 #    blocksize: the size in bytes of a block, currently must be 4096.
    231 #
    232 #    total_blocks: the total size of the partition/image, in blocks.
    233 #
    234 #    care_map: a RangeSet containing which blocks (in the range [0,
    235 #      total_blocks) we actually care about; i.e. which blocks contain
    236 #      data.
    237 #
    238 #    file_map: a dict that partitions the blocks contained in care_map
    239 #      into smaller domains that are useful for doing diffs on.
    240 #      (Typically a domain is a file, and the key in file_map is the
    241 #      pathname.)
    242 #
    243 #    clobbered_blocks: a RangeSet containing which blocks contain data
    244 #      but may be altered by the FS. They need to be excluded when
    245 #      verifying the partition integrity.
    246 #
    247 #    ReadRangeSet(): a function that takes a RangeSet and returns the
    248 #      data contained in the image blocks of that RangeSet.  The data
    249 #      is returned as a list or tuple of strings; concatenating the
    250 #      elements together should produce the requested data.
    251 #      Implementations are free to break up the data into list/tuple
    252 #      elements in any way that is convenient.
    253 #
    254 #    TotalSha1(): a function that returns (as a hex string) the SHA-1
    255 #      hash of all the data in the image (ie, all the blocks in the
    256 #      care_map minus clobbered_blocks, or including the clobbered
    257 #      blocks if include_clobbered_blocks is True).
    258 #
    259 # When creating a BlockImageDiff, the src image may be None, in which
    260 # case the list of transfers produced will never read from the
    261 # original image.
    262 
    263 class BlockImageDiff(object):
    264   def __init__(self, tgt, src=None, threads=None, version=4,
    265                disable_imgdiff=False):
    266     if threads is None:
    267       threads = multiprocessing.cpu_count() // 2
    268       if threads == 0:
    269         threads = 1
    270     self.threads = threads
    271     self.version = version
    272     self.transfers = []
    273     self.src_basenames = {}
    274     self.src_numpatterns = {}
    275     self._max_stashed_size = 0
    276     self.touched_src_ranges = RangeSet()
    277     self.touched_src_sha1 = None
    278     self.disable_imgdiff = disable_imgdiff
    279 
    280     assert version in (1, 2, 3, 4)
    281 
    282     self.tgt = tgt
    283     if src is None:
    284       src = EmptyImage()
    285     self.src = src
    286 
    287     # The updater code that installs the patch always uses 4k blocks.
    288     assert tgt.blocksize == 4096
    289     assert src.blocksize == 4096
    290 
    291     # The range sets in each filemap should comprise a partition of
    292     # the care map.
    293     self.AssertPartition(src.care_map, src.file_map.values())
    294     self.AssertPartition(tgt.care_map, tgt.file_map.values())
    295 
    296   @property
    297   def max_stashed_size(self):
    298     return self._max_stashed_size
    299 
    300   def Compute(self, prefix):
    301     # When looking for a source file to use as the diff input for a
    302     # target file, we try:
    303     #   1) an exact path match if available, otherwise
    304     #   2) a exact basename match if available, otherwise
    305     #   3) a basename match after all runs of digits are replaced by
    306     #      "#" if available, otherwise
    307     #   4) we have no source for this target.
    308     self.AbbreviateSourceNames()
    309     self.FindTransfers()
    310 
    311     # Find the ordering dependencies among transfers (this is O(n^2)
    312     # in the number of transfers).
    313     self.GenerateDigraph()
    314     # Find a sequence of transfers that satisfies as many ordering
    315     # dependencies as possible (heuristically).
    316     self.FindVertexSequence()
    317     # Fix up the ordering dependencies that the sequence didn't
    318     # satisfy.
    319     if self.version == 1:
    320       self.RemoveBackwardEdges()
    321     else:
    322       self.ReverseBackwardEdges()
    323       self.ImproveVertexSequence()
    324 
    325     # Ensure the runtime stash size is under the limit.
    326     if self.version >= 2 and common.OPTIONS.cache_size is not None:
    327       self.ReviseStashSize()
    328 
    329     # Double-check our work.
    330     self.AssertSequenceGood()
    331 
    332     self.ComputePatches(prefix)
    333     self.WriteTransfers(prefix)
    334 
    335   def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
    336     data = source.ReadRangeSet(ranges)
    337     ctx = sha1()
    338 
    339     for p in data:
    340       ctx.update(p)
    341 
    342     return ctx.hexdigest()
    343 
    344   def WriteTransfers(self, prefix):
    345     def WriteSplitTransfers(out, style, target_blocks):
    346       """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
    347 
    348       This prevents the target size of one command from being too large; and
    349       might help to avoid fsync errors on some devices."""
    350 
    351       assert (style == "new" or style == "zero")
    352       blocks_limit = 1024
    353       total = 0
    354       while target_blocks:
    355         blocks_to_write = target_blocks.first(blocks_limit)
    356         out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
    357         total += blocks_to_write.size()
    358         target_blocks = target_blocks.subtract(blocks_to_write)
    359       return total
    360 
    361     out = []
    362 
    363     total = 0
    364 
    365     stashes = {}
    366     stashed_blocks = 0
    367     max_stashed_blocks = 0
    368 
    369     free_stash_ids = []
    370     next_stash_id = 0
    371 
    372     for xf in self.transfers:
    373 
    374       if self.version < 2:
    375         assert not xf.stash_before
    376         assert not xf.use_stash
    377 
    378       for s, sr in xf.stash_before:
    379         assert s not in stashes
    380         if free_stash_ids:
    381           sid = heapq.heappop(free_stash_ids)
    382         else:
    383           sid = next_stash_id
    384           next_stash_id += 1
    385         stashes[s] = sid
    386         if self.version == 2:
    387           stashed_blocks += sr.size()
    388           out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
    389         else:
    390           sh = self.HashBlocks(self.src, sr)
    391           if sh in stashes:
    392             stashes[sh] += 1
    393           else:
    394             stashes[sh] = 1
    395             stashed_blocks += sr.size()
    396             self.touched_src_ranges = self.touched_src_ranges.union(sr)
    397             out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
    398 
    399       if stashed_blocks > max_stashed_blocks:
    400         max_stashed_blocks = stashed_blocks
    401 
    402       free_string = []
    403       free_size = 0
    404 
    405       if self.version == 1:
    406         src_str = xf.src_ranges.to_string_raw() if xf.src_ranges else ""
    407       elif self.version >= 2:
    408 
    409         #   <# blocks> <src ranges>
    410         #     OR
    411         #   <# blocks> <src ranges> <src locs> <stash refs...>
    412         #     OR
    413         #   <# blocks> - <stash refs...>
    414 
    415         size = xf.src_ranges.size()
    416         src_str = [str(size)]
    417 
    418         unstashed_src_ranges = xf.src_ranges
    419         mapped_stashes = []
    420         for s, sr in xf.use_stash:
    421           sid = stashes.pop(s)
    422           unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
    423           sh = self.HashBlocks(self.src, sr)
    424           sr = xf.src_ranges.map_within(sr)
    425           mapped_stashes.append(sr)
    426           if self.version == 2:
    427             src_str.append("%d:%s" % (sid, sr.to_string_raw()))
    428             # A stash will be used only once. We need to free the stash
    429             # immediately after the use, instead of waiting for the automatic
    430             # clean-up at the end. Because otherwise it may take up extra space
    431             # and lead to OTA failures.
    432             # Bug: 23119955
    433             free_string.append("free %d\n" % (sid,))
    434             free_size += sr.size()
    435           else:
    436             assert sh in stashes
    437             src_str.append("%s:%s" % (sh, sr.to_string_raw()))
    438             stashes[sh] -= 1
    439             if stashes[sh] == 0:
    440               free_size += sr.size()
    441               free_string.append("free %s\n" % (sh))
    442               stashes.pop(sh)
    443           heapq.heappush(free_stash_ids, sid)
    444 
    445         if unstashed_src_ranges:
    446           src_str.insert(1, unstashed_src_ranges.to_string_raw())
    447           if xf.use_stash:
    448             mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
    449             src_str.insert(2, mapped_unstashed.to_string_raw())
    450             mapped_stashes.append(mapped_unstashed)
    451             self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
    452         else:
    453           src_str.insert(1, "-")
    454           self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
    455 
    456         src_str = " ".join(src_str)
    457 
    458       # all versions:
    459       #   zero <rangeset>
    460       #   new <rangeset>
    461       #   erase <rangeset>
    462       #
    463       # version 1:
    464       #   bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
    465       #   imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
    466       #   move <src rangeset> <tgt rangeset>
    467       #
    468       # version 2:
    469       #   bsdiff patchstart patchlen <tgt rangeset> <src_str>
    470       #   imgdiff patchstart patchlen <tgt rangeset> <src_str>
    471       #   move <tgt rangeset> <src_str>
    472       #
    473       # version 3:
    474       #   bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
    475       #   imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
    476       #   move hash <tgt rangeset> <src_str>
    477 
    478       tgt_size = xf.tgt_ranges.size()
    479 
    480       if xf.style == "new":
    481         assert xf.tgt_ranges
    482         assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
    483         total += tgt_size
    484       elif xf.style == "move":
    485         assert xf.tgt_ranges
    486         assert xf.src_ranges.size() == tgt_size
    487         if xf.src_ranges != xf.tgt_ranges:
    488           if self.version == 1:
    489             out.append("%s %s %s\n" % (
    490                 xf.style,
    491                 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
    492           elif self.version == 2:
    493             out.append("%s %s %s\n" % (
    494                 xf.style,
    495                 xf.tgt_ranges.to_string_raw(), src_str))
    496           elif self.version >= 3:
    497             # take into account automatic stashing of overlapping blocks
    498             if xf.src_ranges.overlaps(xf.tgt_ranges):
    499               temp_stash_usage = stashed_blocks + xf.src_ranges.size()
    500               if temp_stash_usage > max_stashed_blocks:
    501                 max_stashed_blocks = temp_stash_usage
    502 
    503             self.touched_src_ranges = self.touched_src_ranges.union(
    504                 xf.src_ranges)
    505 
    506             out.append("%s %s %s %s\n" % (
    507                 xf.style,
    508                 self.HashBlocks(self.tgt, xf.tgt_ranges),
    509                 xf.tgt_ranges.to_string_raw(), src_str))
    510           total += tgt_size
    511       elif xf.style in ("bsdiff", "imgdiff"):
    512         assert xf.tgt_ranges
    513         assert xf.src_ranges
    514         if self.version == 1:
    515           out.append("%s %d %d %s %s\n" % (
    516               xf.style, xf.patch_start, xf.patch_len,
    517               xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
    518         elif self.version == 2:
    519           out.append("%s %d %d %s %s\n" % (
    520               xf.style, xf.patch_start, xf.patch_len,
    521               xf.tgt_ranges.to_string_raw(), src_str))
    522         elif self.version >= 3:
    523           # take into account automatic stashing of overlapping blocks
    524           if xf.src_ranges.overlaps(xf.tgt_ranges):
    525             temp_stash_usage = stashed_blocks + xf.src_ranges.size()
    526             if temp_stash_usage > max_stashed_blocks:
    527               max_stashed_blocks = temp_stash_usage
    528 
    529           self.touched_src_ranges = self.touched_src_ranges.union(
    530               xf.src_ranges)
    531 
    532           out.append("%s %d %d %s %s %s %s\n" % (
    533               xf.style,
    534               xf.patch_start, xf.patch_len,
    535               self.HashBlocks(self.src, xf.src_ranges),
    536               self.HashBlocks(self.tgt, xf.tgt_ranges),
    537               xf.tgt_ranges.to_string_raw(), src_str))
    538         total += tgt_size
    539       elif xf.style == "zero":
    540         assert xf.tgt_ranges
    541         to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
    542         assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
    543         total += to_zero.size()
    544       else:
    545         raise ValueError("unknown transfer style '%s'\n" % xf.style)
    546 
    547       if free_string:
    548         out.append("".join(free_string))
    549         stashed_blocks -= free_size
    550 
    551       if self.version >= 2 and common.OPTIONS.cache_size is not None:
    552         # Sanity check: abort if we're going to need more stash space than
    553         # the allowed size (cache_size * threshold). There are two purposes
    554         # of having a threshold here. a) Part of the cache may have been
    555         # occupied by some recovery logs. b) It will buy us some time to deal
    556         # with the oversize issue.
    557         cache_size = common.OPTIONS.cache_size
    558         stash_threshold = common.OPTIONS.stash_threshold
    559         max_allowed = cache_size * stash_threshold
    560         assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
    561                'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
    562                    max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
    563                    self.tgt.blocksize, max_allowed, cache_size,
    564                    stash_threshold)
    565 
    566     if self.version >= 3:
    567       self.touched_src_sha1 = self.HashBlocks(
    568           self.src, self.touched_src_ranges)
    569 
    570     # Zero out extended blocks as a workaround for bug 20881595.
    571     if self.tgt.extended:
    572       assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
    573               self.tgt.extended.size())
    574       total += self.tgt.extended.size()
    575 
    576     # We erase all the blocks on the partition that a) don't contain useful
    577     # data in the new image; b) will not be touched by dm-verity. Out of those
    578     # blocks, we erase the ones that won't be used in this update at the
    579     # beginning of an update. The rest would be erased at the end. This is to
    580     # work around the eMMC issue observed on some devices, which may otherwise
    581     # get starving for clean blocks and thus fail the update. (b/28347095)
    582     all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
    583     all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
    584     new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
    585 
    586     erase_first = new_dontcare.subtract(self.touched_src_ranges)
    587     if erase_first:
    588       out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
    589 
    590     erase_last = new_dontcare.subtract(erase_first)
    591     if erase_last:
    592       out.append("erase %s\n" % (erase_last.to_string_raw(),))
    593 
    594     out.insert(0, "%d\n" % (self.version,))   # format version number
    595     out.insert(1, "%d\n" % (total,))
    596     if self.version >= 2:
    597       # version 2 only: after the total block count, we give the number
    598       # of stash slots needed, and the maximum size needed (in blocks)
    599       out.insert(2, str(next_stash_id) + "\n")
    600       out.insert(3, str(max_stashed_blocks) + "\n")
    601 
    602     with open(prefix + ".transfer.list", "wb") as f:
    603       for i in out:
    604         f.write(i)
    605 
    606     if self.version >= 2:
    607       self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
    608       OPTIONS = common.OPTIONS
    609       if OPTIONS.cache_size is not None:
    610         max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
    611         print("max stashed blocks: %d  (%d bytes), "
    612               "limit: %d bytes (%.2f%%)\n" % (
    613               max_stashed_blocks, self._max_stashed_size, max_allowed,
    614               self._max_stashed_size * 100.0 / max_allowed))
    615       else:
    616         print("max stashed blocks: %d  (%d bytes), limit: <unknown>\n" % (
    617               max_stashed_blocks, self._max_stashed_size))
    618 
    619   def ReviseStashSize(self):
    620     print("Revising stash size...")
    621     stashes = {}
    622 
    623     # Create the map between a stash and its def/use points. For example, for a
    624     # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
    625     for xf in self.transfers:
    626       # Command xf defines (stores) all the stashes in stash_before.
    627       for idx, sr in xf.stash_before:
    628         stashes[idx] = (sr, xf)
    629 
    630       # Record all the stashes command xf uses.
    631       for idx, _ in xf.use_stash:
    632         stashes[idx] += (xf,)
    633 
    634     # Compute the maximum blocks available for stash based on /cache size and
    635     # the threshold.
    636     cache_size = common.OPTIONS.cache_size
    637     stash_threshold = common.OPTIONS.stash_threshold
    638     max_allowed = cache_size * stash_threshold / self.tgt.blocksize
    639 
    640     stashed_blocks = 0
    641     new_blocks = 0
    642 
    643     # Now go through all the commands. Compute the required stash size on the
    644     # fly. If a command requires excess stash than available, it deletes the
    645     # stash by replacing the command that uses the stash with a "new" command
    646     # instead.
    647     for xf in self.transfers:
    648       replaced_cmds = []
    649 
    650       # xf.stash_before generates explicit stash commands.
    651       for idx, sr in xf.stash_before:
    652         if stashed_blocks + sr.size() > max_allowed:
    653           # We cannot stash this one for a later command. Find out the command
    654           # that will use this stash and replace the command with "new".
    655           use_cmd = stashes[idx][2]
    656           replaced_cmds.append(use_cmd)
    657           print("%10d  %9s  %s" % (sr.size(), "explicit", use_cmd))
    658         else:
    659           stashed_blocks += sr.size()
    660 
    661       # xf.use_stash generates free commands.
    662       for _, sr in xf.use_stash:
    663         stashed_blocks -= sr.size()
    664 
    665       # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
    666       # ComputePatches(), they both have the style of "diff".
    667       if xf.style == "diff" and self.version >= 3:
    668         assert xf.tgt_ranges and xf.src_ranges
    669         if xf.src_ranges.overlaps(xf.tgt_ranges):
    670           if stashed_blocks + xf.src_ranges.size() > max_allowed:
    671             replaced_cmds.append(xf)
    672             print("%10d  %9s  %s" % (xf.src_ranges.size(), "implicit", xf))
    673 
    674       # Replace the commands in replaced_cmds with "new"s.
    675       for cmd in replaced_cmds:
    676         # It no longer uses any commands in "use_stash". Remove the def points
    677         # for all those stashes.
    678         for idx, sr in cmd.use_stash:
    679           def_cmd = stashes[idx][1]
    680           assert (idx, sr) in def_cmd.stash_before
    681           def_cmd.stash_before.remove((idx, sr))
    682 
    683         # Add up blocks that violates space limit and print total number to
    684         # screen later.
    685         new_blocks += cmd.tgt_ranges.size()
    686         cmd.ConvertToNew()
    687 
    688     num_of_bytes = new_blocks * self.tgt.blocksize
    689     print("  Total %d blocks (%d bytes) are packed as new blocks due to "
    690           "insufficient cache size." % (new_blocks, num_of_bytes))
    691 
    692   def ComputePatches(self, prefix):
    693     print("Reticulating splines...")
    694     diff_q = []
    695     patch_num = 0
    696     with open(prefix + ".new.dat", "wb") as new_f:
    697       for xf in self.transfers:
    698         if xf.style == "zero":
    699           tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
    700           print("%10d %10d (%6.2f%%) %7s %s %s" % (
    701               tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
    702               str(xf.tgt_ranges)))
    703 
    704         elif xf.style == "new":
    705           for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
    706             new_f.write(piece)
    707           tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
    708           print("%10d %10d (%6.2f%%) %7s %s %s" % (
    709               tgt_size, tgt_size, 100.0, xf.style,
    710               xf.tgt_name, str(xf.tgt_ranges)))
    711 
    712         elif xf.style == "diff":
    713           src = self.src.ReadRangeSet(xf.src_ranges)
    714           tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
    715 
    716           # We can't compare src and tgt directly because they may have
    717           # the same content but be broken up into blocks differently, eg:
    718           #
    719           #    ["he", "llo"]  vs  ["h", "ello"]
    720           #
    721           # We want those to compare equal, ideally without having to
    722           # actually concatenate the strings (these may be tens of
    723           # megabytes).
    724 
    725           src_sha1 = sha1()
    726           for p in src:
    727             src_sha1.update(p)
    728           tgt_sha1 = sha1()
    729           tgt_size = 0
    730           for p in tgt:
    731             tgt_sha1.update(p)
    732             tgt_size += len(p)
    733 
    734           if src_sha1.digest() == tgt_sha1.digest():
    735             # These are identical; we don't need to generate a patch,
    736             # just issue copy commands on the device.
    737             xf.style = "move"
    738             if xf.src_ranges != xf.tgt_ranges:
    739               print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
    740                   tgt_size, tgt_size, 100.0, xf.style,
    741                   xf.tgt_name if xf.tgt_name == xf.src_name else (
    742                       xf.tgt_name + " (from " + xf.src_name + ")"),
    743                   str(xf.tgt_ranges), str(xf.src_ranges)))
    744           else:
    745             # For files in zip format (eg, APKs, JARs, etc.) we would
    746             # like to use imgdiff -z if possible (because it usually
    747             # produces significantly smaller patches than bsdiff).
    748             # This is permissible if:
    749             #
    750             #  - imgdiff is not disabled, and
    751             #  - the source and target files are monotonic (ie, the
    752             #    data is stored with blocks in increasing order), and
    753             #  - we haven't removed any blocks from the source set.
    754             #
    755             # If these conditions are satisfied then appending all the
    756             # blocks in the set together in order will produce a valid
    757             # zip file (plus possibly extra zeros in the last block),
    758             # which is what imgdiff needs to operate.  (imgdiff is
    759             # fine with extra zeros at the end of the file.)
    760             imgdiff = (not self.disable_imgdiff and xf.intact and
    761                        xf.tgt_name.split(".")[-1].lower()
    762                        in ("apk", "jar", "zip"))
    763             xf.style = "imgdiff" if imgdiff else "bsdiff"
    764             diff_q.append((tgt_size, src, tgt, xf, patch_num))
    765             patch_num += 1
    766 
    767         else:
    768           assert False, "unknown style " + xf.style
    769 
    770     if diff_q:
    771       if self.threads > 1:
    772         print("Computing patches (using %d threads)..." % (self.threads,))
    773       else:
    774         print("Computing patches...")
    775       diff_q.sort()
    776 
    777       patches = [None] * patch_num
    778 
    779       # TODO: Rewrite with multiprocessing.ThreadPool?
    780       lock = threading.Lock()
    781       def diff_worker():
    782         while True:
    783           with lock:
    784             if not diff_q:
    785               return
    786             tgt_size, src, tgt, xf, patchnum = diff_q.pop()
    787           patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
    788           size = len(patch)
    789           with lock:
    790             patches[patchnum] = (patch, xf)
    791             print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
    792                 size, tgt_size, size * 100.0 / tgt_size, xf.style,
    793                 xf.tgt_name if xf.tgt_name == xf.src_name else (
    794                     xf.tgt_name + " (from " + xf.src_name + ")"),
    795                 str(xf.tgt_ranges), str(xf.src_ranges)))
    796 
    797       threads = [threading.Thread(target=diff_worker)
    798                  for _ in range(self.threads)]
    799       for th in threads:
    800         th.start()
    801       while threads:
    802         threads.pop().join()
    803     else:
    804       patches = []
    805 
    806     p = 0
    807     with open(prefix + ".patch.dat", "wb") as patch_f:
    808       for patch, xf in patches:
    809         xf.patch_start = p
    810         xf.patch_len = len(patch)
    811         patch_f.write(patch)
    812         p += len(patch)
    813 
    814   def AssertSequenceGood(self):
    815     # Simulate the sequences of transfers we will output, and check that:
    816     # - we never read a block after writing it, and
    817     # - we write every block we care about exactly once.
    818 
    819     # Start with no blocks having been touched yet.
    820     touched = array.array("B", "\0" * self.tgt.total_blocks)
    821 
    822     # Imagine processing the transfers in order.
    823     for xf in self.transfers:
    824       # Check that the input blocks for this transfer haven't yet been touched.
    825 
    826       x = xf.src_ranges
    827       if self.version >= 2:
    828         for _, sr in xf.use_stash:
    829           x = x.subtract(sr)
    830 
    831       for s, e in x:
    832         # Source image could be larger. Don't check the blocks that are in the
    833         # source image only. Since they are not in 'touched', and won't ever
    834         # be touched.
    835         for i in range(s, min(e, self.tgt.total_blocks)):
    836           assert touched[i] == 0
    837 
    838       # Check that the output blocks for this transfer haven't yet
    839       # been touched, and touch all the blocks written by this
    840       # transfer.
    841       for s, e in xf.tgt_ranges:
    842         for i in range(s, e):
    843           assert touched[i] == 0
    844           touched[i] = 1
    845 
    846     # Check that we've written every target block.
    847     for s, e in self.tgt.care_map:
    848       for i in range(s, e):
    849         assert touched[i] == 1
    850 
    851   def ImproveVertexSequence(self):
    852     print("Improving vertex order...")
    853 
    854     # At this point our digraph is acyclic; we reversed any edges that
    855     # were backwards in the heuristically-generated sequence.  The
    856     # previously-generated order is still acceptable, but we hope to
    857     # find a better order that needs less memory for stashed data.
    858     # Now we do a topological sort to generate a new vertex order,
    859     # using a greedy algorithm to choose which vertex goes next
    860     # whenever we have a choice.
    861 
    862     # Make a copy of the edge set; this copy will get destroyed by the
    863     # algorithm.
    864     for xf in self.transfers:
    865       xf.incoming = xf.goes_after.copy()
    866       xf.outgoing = xf.goes_before.copy()
    867 
    868     L = []   # the new vertex order
    869 
    870     # S is the set of sources in the remaining graph; we always choose
    871     # the one that leaves the least amount of stashed data after it's
    872     # executed.
    873     S = [(u.NetStashChange(), u.order, u) for u in self.transfers
    874          if not u.incoming]
    875     heapq.heapify(S)
    876 
    877     while S:
    878       _, _, xf = heapq.heappop(S)
    879       L.append(xf)
    880       for u in xf.outgoing:
    881         del u.incoming[xf]
    882         if not u.incoming:
    883           heapq.heappush(S, (u.NetStashChange(), u.order, u))
    884 
    885     # if this fails then our graph had a cycle.
    886     assert len(L) == len(self.transfers)
    887 
    888     self.transfers = L
    889     for i, xf in enumerate(L):
    890       xf.order = i
    891 
    892   def RemoveBackwardEdges(self):
    893     print("Removing backward edges...")
    894     in_order = 0
    895     out_of_order = 0
    896     lost_source = 0
    897 
    898     for xf in self.transfers:
    899       lost = 0
    900       size = xf.src_ranges.size()
    901       for u in xf.goes_before:
    902         # xf should go before u
    903         if xf.order < u.order:
    904           # it does, hurray!
    905           in_order += 1
    906         else:
    907           # it doesn't, boo.  trim the blocks that u writes from xf's
    908           # source, so that xf can go after u.
    909           out_of_order += 1
    910           assert xf.src_ranges.overlaps(u.tgt_ranges)
    911           xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
    912           xf.intact = False
    913 
    914       if xf.style == "diff" and not xf.src_ranges:
    915         # nothing left to diff from; treat as new data
    916         xf.style = "new"
    917 
    918       lost = size - xf.src_ranges.size()
    919       lost_source += lost
    920 
    921     print(("  %d/%d dependencies (%.2f%%) were violated; "
    922            "%d source blocks removed.") %
    923           (out_of_order, in_order + out_of_order,
    924            (out_of_order * 100.0 / (in_order + out_of_order))
    925            if (in_order + out_of_order) else 0.0,
    926            lost_source))
    927 
    928   def ReverseBackwardEdges(self):
    929     print("Reversing backward edges...")
    930     in_order = 0
    931     out_of_order = 0
    932     stashes = 0
    933     stash_size = 0
    934 
    935     for xf in self.transfers:
    936       for u in xf.goes_before.copy():
    937         # xf should go before u
    938         if xf.order < u.order:
    939           # it does, hurray!
    940           in_order += 1
    941         else:
    942           # it doesn't, boo.  modify u to stash the blocks that it
    943           # writes that xf wants to read, and then require u to go
    944           # before xf.
    945           out_of_order += 1
    946 
    947           overlap = xf.src_ranges.intersect(u.tgt_ranges)
    948           assert overlap
    949 
    950           u.stash_before.append((stashes, overlap))
    951           xf.use_stash.append((stashes, overlap))
    952           stashes += 1
    953           stash_size += overlap.size()
    954 
    955           # reverse the edge direction; now xf must go after u
    956           del xf.goes_before[u]
    957           del u.goes_after[xf]
    958           xf.goes_after[u] = None    # value doesn't matter
    959           u.goes_before[xf] = None
    960 
    961     print(("  %d/%d dependencies (%.2f%%) were violated; "
    962            "%d source blocks stashed.") %
    963           (out_of_order, in_order + out_of_order,
    964            (out_of_order * 100.0 / (in_order + out_of_order))
    965            if (in_order + out_of_order) else 0.0,
    966            stash_size))
    967 
    968   def FindVertexSequence(self):
    969     print("Finding vertex sequence...")
    970 
    971     # This is based on "A Fast & Effective Heuristic for the Feedback
    972     # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth.  Think of
    973     # it as starting with the digraph G and moving all the vertices to
    974     # be on a horizontal line in some order, trying to minimize the
    975     # number of edges that end up pointing to the left.  Left-pointing
    976     # edges will get removed to turn the digraph into a DAG.  In this
    977     # case each edge has a weight which is the number of source blocks
    978     # we'll lose if that edge is removed; we try to minimize the total
    979     # weight rather than just the number of edges.
    980 
    981     # Make a copy of the edge set; this copy will get destroyed by the
    982     # algorithm.
    983     for xf in self.transfers:
    984       xf.incoming = xf.goes_after.copy()
    985       xf.outgoing = xf.goes_before.copy()
    986       xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
    987 
    988     # We use an OrderedDict instead of just a set so that the output
    989     # is repeatable; otherwise it would depend on the hash values of
    990     # the transfer objects.
    991     G = OrderedDict()
    992     for xf in self.transfers:
    993       G[xf] = None
    994     s1 = deque()  # the left side of the sequence, built from left to right
    995     s2 = deque()  # the right side of the sequence, built from right to left
    996 
    997     heap = []
    998     for xf in self.transfers:
    999       xf.heap_item = HeapItem(xf)
   1000       heap.append(xf.heap_item)
   1001     heapq.heapify(heap)
   1002 
   1003     sinks = set(u for u in G if not u.outgoing)
   1004     sources = set(u for u in G if not u.incoming)
   1005 
   1006     def adjust_score(iu, delta):
   1007       iu.score += delta
   1008       iu.heap_item.clear()
   1009       iu.heap_item = HeapItem(iu)
   1010       heapq.heappush(heap, iu.heap_item)
   1011 
   1012     while G:
   1013       # Put all sinks at the end of the sequence.
   1014       while sinks:
   1015         new_sinks = set()
   1016         for u in sinks:
   1017           if u not in G: continue
   1018           s2.appendleft(u)
   1019           del G[u]
   1020           for iu in u.incoming:
   1021             adjust_score(iu, -iu.outgoing.pop(u))
   1022             if not iu.outgoing: new_sinks.add(iu)
   1023         sinks = new_sinks
   1024 
   1025       # Put all the sources at the beginning of the sequence.
   1026       while sources:
   1027         new_sources = set()
   1028         for u in sources:
   1029           if u not in G: continue
   1030           s1.append(u)
   1031           del G[u]
   1032           for iu in u.outgoing:
   1033             adjust_score(iu, +iu.incoming.pop(u))
   1034             if not iu.incoming: new_sources.add(iu)
   1035         sources = new_sources
   1036 
   1037       if not G: break
   1038 
   1039       # Find the "best" vertex to put next.  "Best" is the one that
   1040       # maximizes the net difference in source blocks saved we get by
   1041       # pretending it's a source rather than a sink.
   1042 
   1043       while True:
   1044         u = heapq.heappop(heap)
   1045         if u and u.item in G:
   1046           u = u.item
   1047           break
   1048 
   1049       s1.append(u)
   1050       del G[u]
   1051       for iu in u.outgoing:
   1052         adjust_score(iu, +iu.incoming.pop(u))
   1053         if not iu.incoming: sources.add(iu)
   1054 
   1055       for iu in u.incoming:
   1056         adjust_score(iu, -iu.outgoing.pop(u))
   1057         if not iu.outgoing: sinks.add(iu)
   1058 
   1059     # Now record the sequence in the 'order' field of each transfer,
   1060     # and by rearranging self.transfers to be in the chosen sequence.
   1061 
   1062     new_transfers = []
   1063     for x in itertools.chain(s1, s2):
   1064       x.order = len(new_transfers)
   1065       new_transfers.append(x)
   1066       del x.incoming
   1067       del x.outgoing
   1068 
   1069     self.transfers = new_transfers
   1070 
   1071   def GenerateDigraph(self):
   1072     print("Generating digraph...")
   1073 
   1074     # Each item of source_ranges will be:
   1075     #   - None, if that block is not used as a source,
   1076     #   - a transfer, if one transfer uses it as a source, or
   1077     #   - a set of transfers.
   1078     source_ranges = []
   1079     for b in self.transfers:
   1080       for s, e in b.src_ranges:
   1081         if e > len(source_ranges):
   1082           source_ranges.extend([None] * (e-len(source_ranges)))
   1083         for i in range(s, e):
   1084           if source_ranges[i] is None:
   1085             source_ranges[i] = b
   1086           else:
   1087             if not isinstance(source_ranges[i], set):
   1088               source_ranges[i] = set([source_ranges[i]])
   1089             source_ranges[i].add(b)
   1090 
   1091     for a in self.transfers:
   1092       intersections = set()
   1093       for s, e in a.tgt_ranges:
   1094         for i in range(s, e):
   1095           if i >= len(source_ranges): break
   1096           b = source_ranges[i]
   1097           if b is not None:
   1098             if isinstance(b, set):
   1099               intersections.update(b)
   1100             else:
   1101               intersections.add(b)
   1102 
   1103       for b in intersections:
   1104         if a is b: continue
   1105 
   1106         # If the blocks written by A are read by B, then B needs to go before A.
   1107         i = a.tgt_ranges.intersect(b.src_ranges)
   1108         if i:
   1109           if b.src_name == "__ZERO":
   1110             # the cost of removing source blocks for the __ZERO domain
   1111             # is (nearly) zero.
   1112             size = 0
   1113           else:
   1114             size = i.size()
   1115           b.goes_before[a] = size
   1116           a.goes_after[b] = size
   1117 
   1118   def FindTransfers(self):
   1119     """Parse the file_map to generate all the transfers."""
   1120 
   1121     def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges,
   1122                           style, by_id):
   1123       """Add one or multiple Transfer()s by splitting large files.
   1124 
   1125       For BBOTA v3, we need to stash source blocks for resumable feature.
   1126       However, with the growth of file size and the shrink of the cache
   1127       partition source blocks are too large to be stashed. If a file occupies
   1128       too many blocks, we split it into smaller pieces by getting multiple
   1129       Transfer()s.
   1130 
   1131       The downside is that after splitting, we may increase the package size
   1132       since the split pieces don't align well. According to our experiments,
   1133       1/8 of the cache size as the per-piece limit appears to be optimal.
   1134       Compared to the fixed 1024-block limit, it reduces the overall package
   1135       size by 30% for volantis, and 20% for angler and bullhead."""
   1136 
   1137       # Possibly split large files into smaller chunks.
   1138       pieces = 0
   1139       cache_size = common.OPTIONS.cache_size
   1140       split_threshold = 0.125
   1141       max_blocks_per_transfer = int(cache_size * split_threshold /
   1142                                     self.tgt.blocksize)
   1143 
   1144       # Change nothing for small files.
   1145       if (tgt_ranges.size() <= max_blocks_per_transfer and
   1146           src_ranges.size() <= max_blocks_per_transfer):
   1147         Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
   1148         return
   1149 
   1150       while (tgt_ranges.size() > max_blocks_per_transfer and
   1151              src_ranges.size() > max_blocks_per_transfer):
   1152         tgt_split_name = "%s-%d" % (tgt_name, pieces)
   1153         src_split_name = "%s-%d" % (src_name, pieces)
   1154         tgt_first = tgt_ranges.first(max_blocks_per_transfer)
   1155         src_first = src_ranges.first(max_blocks_per_transfer)
   1156 
   1157         Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
   1158                  by_id)
   1159 
   1160         tgt_ranges = tgt_ranges.subtract(tgt_first)
   1161         src_ranges = src_ranges.subtract(src_first)
   1162         pieces += 1
   1163 
   1164       # Handle remaining blocks.
   1165       if tgt_ranges.size() or src_ranges.size():
   1166         # Must be both non-empty.
   1167         assert tgt_ranges.size() and src_ranges.size()
   1168         tgt_split_name = "%s-%d" % (tgt_name, pieces)
   1169         src_split_name = "%s-%d" % (src_name, pieces)
   1170         Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
   1171                  by_id)
   1172 
   1173     def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
   1174                     split=False):
   1175       """Wrapper function for adding a Transfer()."""
   1176 
   1177       # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
   1178       # otherwise add the Transfer() as is.
   1179       if style != "diff" or not split:
   1180         Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
   1181         return
   1182 
   1183       # Handle .odex files specially to analyze the block-wise difference. If
   1184       # most of the blocks are identical with only few changes (e.g. header),
   1185       # we will patch the changed blocks only. This avoids stashing unchanged
   1186       # blocks while patching. We limit the analysis to files without size
   1187       # changes only. This is to avoid sacrificing the OTA generation cost too
   1188       # much.
   1189       if (tgt_name.split(".")[-1].lower() == 'odex' and
   1190           tgt_ranges.size() == src_ranges.size()):
   1191 
   1192         # 0.5 threshold can be further tuned. The tradeoff is: if only very
   1193         # few blocks remain identical, we lose the opportunity to use imgdiff
   1194         # that may have better compression ratio than bsdiff.
   1195         crop_threshold = 0.5
   1196 
   1197         tgt_skipped = RangeSet()
   1198         src_skipped = RangeSet()
   1199         tgt_size = tgt_ranges.size()
   1200         tgt_changed = 0
   1201         for src_block, tgt_block in zip(src_ranges.next_item(),
   1202                                         tgt_ranges.next_item()):
   1203           src_rs = RangeSet(str(src_block))
   1204           tgt_rs = RangeSet(str(tgt_block))
   1205           if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
   1206             tgt_skipped = tgt_skipped.union(tgt_rs)
   1207             src_skipped = src_skipped.union(src_rs)
   1208           else:
   1209             tgt_changed += tgt_rs.size()
   1210 
   1211           # Terminate early if no clear sign of benefits.
   1212           if tgt_changed > tgt_size * crop_threshold:
   1213             break
   1214 
   1215         if tgt_changed < tgt_size * crop_threshold:
   1216           assert tgt_changed + tgt_skipped.size() == tgt_size
   1217           print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,
   1218                 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
   1219           AddSplitTransfers(
   1220               "%s-skipped" % (tgt_name,),
   1221               "%s-skipped" % (src_name,),
   1222               tgt_skipped, src_skipped, style, by_id)
   1223 
   1224           # Intentionally change the file extension to avoid being imgdiff'd as
   1225           # the files are no longer in their original format.
   1226           tgt_name = "%s-cropped" % (tgt_name,)
   1227           src_name = "%s-cropped" % (src_name,)
   1228           tgt_ranges = tgt_ranges.subtract(tgt_skipped)
   1229           src_ranges = src_ranges.subtract(src_skipped)
   1230 
   1231           # Possibly having no changed blocks.
   1232           if not tgt_ranges:
   1233             return
   1234 
   1235       # Add the transfer(s).
   1236       AddSplitTransfers(
   1237           tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
   1238 
   1239     print("Finding transfers...")
   1240 
   1241     empty = RangeSet()
   1242     for tgt_fn, tgt_ranges in self.tgt.file_map.items():
   1243       if tgt_fn == "__ZERO":
   1244         # the special "__ZERO" domain is all the blocks not contained
   1245         # in any file and that are filled with zeros.  We have a
   1246         # special transfer style for zero blocks.
   1247         src_ranges = self.src.file_map.get("__ZERO", empty)
   1248         AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
   1249                     "zero", self.transfers)
   1250         continue
   1251 
   1252       elif tgt_fn == "__COPY":
   1253         # "__COPY" domain includes all the blocks not contained in any
   1254         # file and that need to be copied unconditionally to the target.
   1255         AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
   1256         continue
   1257 
   1258       elif tgt_fn in self.src.file_map:
   1259         # Look for an exact pathname match in the source.
   1260         AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
   1261                     "diff", self.transfers, self.version >= 3)
   1262         continue
   1263 
   1264       b = os.path.basename(tgt_fn)
   1265       if b in self.src_basenames:
   1266         # Look for an exact basename match in the source.
   1267         src_fn = self.src_basenames[b]
   1268         AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
   1269                     "diff", self.transfers, self.version >= 3)
   1270         continue
   1271 
   1272       b = re.sub("[0-9]+", "#", b)
   1273       if b in self.src_numpatterns:
   1274         # Look for a 'number pattern' match (a basename match after
   1275         # all runs of digits are replaced by "#").  (This is useful
   1276         # for .so files that contain version numbers in the filename
   1277         # that get bumped.)
   1278         src_fn = self.src_numpatterns[b]
   1279         AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
   1280                     "diff", self.transfers, self.version >= 3)
   1281         continue
   1282 
   1283       AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
   1284 
   1285   def AbbreviateSourceNames(self):
   1286     for k in self.src.file_map.keys():
   1287       b = os.path.basename(k)
   1288       self.src_basenames[b] = k
   1289       b = re.sub("[0-9]+", "#", b)
   1290       self.src_numpatterns[b] = k
   1291 
   1292   @staticmethod
   1293   def AssertPartition(total, seq):
   1294     """Assert that all the RangeSets in 'seq' form a partition of the
   1295     'total' RangeSet (ie, they are nonintersecting and their union
   1296     equals 'total')."""
   1297 
   1298     so_far = RangeSet()
   1299     for i in seq:
   1300       assert not so_far.overlaps(i)
   1301       so_far = so_far.union(i)
   1302     assert so_far == total
   1303