1 # Copyright (C) 2014 The Android Open Source Project 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 15 from __future__ import print_function 16 17 import array 18 import common 19 import functools 20 import heapq 21 import itertools 22 import multiprocessing 23 import os 24 import os.path 25 import re 26 import subprocess 27 import sys 28 import threading 29 30 from collections import deque, OrderedDict 31 from hashlib import sha1 32 from rangelib import RangeSet 33 34 35 __all__ = ["EmptyImage", "DataImage", "BlockImageDiff"] 36 37 38 def compute_patch(srcfile, tgtfile, imgdiff=False): 39 patchfile = common.MakeTempFile(prefix='patch-') 40 41 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff'] 42 cmd.extend([srcfile, tgtfile, patchfile]) 43 44 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case 45 # here, since they contain temp filenames only. 46 p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE, 47 stderr=subprocess.STDOUT) 48 output, _ = p.communicate() 49 50 if p.returncode != 0: 51 raise ValueError(output) 52 53 with open(patchfile, 'rb') as f: 54 return f.read() 55 56 57 class Image(object): 58 def RangeSha1(self, ranges): 59 raise NotImplementedError 60 61 def ReadRangeSet(self, ranges): 62 raise NotImplementedError 63 64 def TotalSha1(self, include_clobbered_blocks=False): 65 raise NotImplementedError 66 67 def WriteRangeDataToFd(self, ranges, fd): 68 raise NotImplementedError 69 70 71 class EmptyImage(Image): 72 """A zero-length image.""" 73 74 def __init__(self): 75 self.blocksize = 4096 76 self.care_map = RangeSet() 77 self.clobbered_blocks = RangeSet() 78 self.extended = RangeSet() 79 self.total_blocks = 0 80 self.file_map = {} 81 82 def RangeSha1(self, ranges): 83 return sha1().hexdigest() 84 85 def ReadRangeSet(self, ranges): 86 return () 87 88 def TotalSha1(self, include_clobbered_blocks=False): 89 # EmptyImage always carries empty clobbered_blocks, so 90 # include_clobbered_blocks can be ignored. 91 assert self.clobbered_blocks.size() == 0 92 return sha1().hexdigest() 93 94 def WriteRangeDataToFd(self, ranges, fd): 95 raise ValueError("Can't write data from EmptyImage to file") 96 97 98 class DataImage(Image): 99 """An image wrapped around a single string of data.""" 100 101 def __init__(self, data, trim=False, pad=False): 102 self.data = data 103 self.blocksize = 4096 104 105 assert not (trim and pad) 106 107 partial = len(self.data) % self.blocksize 108 padded = False 109 if partial > 0: 110 if trim: 111 self.data = self.data[:-partial] 112 elif pad: 113 self.data += '\0' * (self.blocksize - partial) 114 padded = True 115 else: 116 raise ValueError(("data for DataImage must be multiple of %d bytes " 117 "unless trim or pad is specified") % 118 (self.blocksize,)) 119 120 assert len(self.data) % self.blocksize == 0 121 122 self.total_blocks = len(self.data) / self.blocksize 123 self.care_map = RangeSet(data=(0, self.total_blocks)) 124 # When the last block is padded, we always write the whole block even for 125 # incremental OTAs. Because otherwise the last block may get skipped if 126 # unchanged for an incremental, but would fail the post-install 127 # verification if it has non-zero contents in the padding bytes. 128 # Bug: 23828506 129 if padded: 130 clobbered_blocks = [self.total_blocks-1, self.total_blocks] 131 else: 132 clobbered_blocks = [] 133 self.clobbered_blocks = clobbered_blocks 134 self.extended = RangeSet() 135 136 zero_blocks = [] 137 nonzero_blocks = [] 138 reference = '\0' * self.blocksize 139 140 for i in range(self.total_blocks-1 if padded else self.total_blocks): 141 d = self.data[i*self.blocksize : (i+1)*self.blocksize] 142 if d == reference: 143 zero_blocks.append(i) 144 zero_blocks.append(i+1) 145 else: 146 nonzero_blocks.append(i) 147 nonzero_blocks.append(i+1) 148 149 assert zero_blocks or nonzero_blocks or clobbered_blocks 150 151 self.file_map = dict() 152 if zero_blocks: 153 self.file_map["__ZERO"] = RangeSet(data=zero_blocks) 154 if nonzero_blocks: 155 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks) 156 if clobbered_blocks: 157 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks) 158 159 def _GetRangeData(self, ranges): 160 for s, e in ranges: 161 yield self.data[s*self.blocksize:e*self.blocksize] 162 163 def RangeSha1(self, ranges): 164 h = sha1() 165 for data in self._GetRangeData(ranges): 166 h.update(data) 167 return h.hexdigest() 168 169 def ReadRangeSet(self, ranges): 170 return [self._GetRangeData(ranges)] 171 172 def TotalSha1(self, include_clobbered_blocks=False): 173 if not include_clobbered_blocks: 174 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks)) 175 else: 176 return sha1(self.data).hexdigest() 177 178 def WriteRangeDataToFd(self, ranges, fd): 179 for data in self._GetRangeData(ranges): 180 fd.write(data) 181 182 183 class Transfer(object): 184 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1, 185 src_sha1, style, by_id): 186 self.tgt_name = tgt_name 187 self.src_name = src_name 188 self.tgt_ranges = tgt_ranges 189 self.src_ranges = src_ranges 190 self.tgt_sha1 = tgt_sha1 191 self.src_sha1 = src_sha1 192 self.style = style 193 self.intact = (getattr(tgt_ranges, "monotonic", False) and 194 getattr(src_ranges, "monotonic", False)) 195 196 # We use OrderedDict rather than dict so that the output is repeatable; 197 # otherwise it would depend on the hash values of the Transfer objects. 198 self.goes_before = OrderedDict() 199 self.goes_after = OrderedDict() 200 201 self.stash_before = [] 202 self.use_stash = [] 203 204 self.id = len(by_id) 205 by_id.append(self) 206 207 def NetStashChange(self): 208 return (sum(sr.size() for (_, sr) in self.stash_before) - 209 sum(sr.size() for (_, sr) in self.use_stash)) 210 211 def ConvertToNew(self): 212 assert self.style != "new" 213 self.use_stash = [] 214 self.style = "new" 215 self.src_ranges = RangeSet() 216 217 def __str__(self): 218 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style + 219 " to " + str(self.tgt_ranges) + ">") 220 221 222 @functools.total_ordering 223 class HeapItem(object): 224 def __init__(self, item): 225 self.item = item 226 # Negate the score since python's heap is a min-heap and we want 227 # the maximum score. 228 self.score = -item.score 229 def clear(self): 230 self.item = None 231 def __bool__(self): 232 return self.item is None 233 def __eq__(self, other): 234 return self.score == other.score 235 def __le__(self, other): 236 return self.score <= other.score 237 238 239 # BlockImageDiff works on two image objects. An image object is 240 # anything that provides the following attributes: 241 # 242 # blocksize: the size in bytes of a block, currently must be 4096. 243 # 244 # total_blocks: the total size of the partition/image, in blocks. 245 # 246 # care_map: a RangeSet containing which blocks (in the range [0, 247 # total_blocks) we actually care about; i.e. which blocks contain 248 # data. 249 # 250 # file_map: a dict that partitions the blocks contained in care_map 251 # into smaller domains that are useful for doing diffs on. 252 # (Typically a domain is a file, and the key in file_map is the 253 # pathname.) 254 # 255 # clobbered_blocks: a RangeSet containing which blocks contain data 256 # but may be altered by the FS. They need to be excluded when 257 # verifying the partition integrity. 258 # 259 # ReadRangeSet(): a function that takes a RangeSet and returns the 260 # data contained in the image blocks of that RangeSet. The data 261 # is returned as a list or tuple of strings; concatenating the 262 # elements together should produce the requested data. 263 # Implementations are free to break up the data into list/tuple 264 # elements in any way that is convenient. 265 # 266 # RangeSha1(): a function that returns (as a hex string) the SHA-1 267 # hash of all the data in the specified range. 268 # 269 # TotalSha1(): a function that returns (as a hex string) the SHA-1 270 # hash of all the data in the image (ie, all the blocks in the 271 # care_map minus clobbered_blocks, or including the clobbered 272 # blocks if include_clobbered_blocks is True). 273 # 274 # When creating a BlockImageDiff, the src image may be None, in which 275 # case the list of transfers produced will never read from the 276 # original image. 277 278 class BlockImageDiff(object): 279 def __init__(self, tgt, src=None, threads=None, version=4, 280 disable_imgdiff=False): 281 if threads is None: 282 threads = multiprocessing.cpu_count() // 2 283 if threads == 0: 284 threads = 1 285 self.threads = threads 286 self.version = version 287 self.transfers = [] 288 self.src_basenames = {} 289 self.src_numpatterns = {} 290 self._max_stashed_size = 0 291 self.touched_src_ranges = RangeSet() 292 self.touched_src_sha1 = None 293 self.disable_imgdiff = disable_imgdiff 294 295 assert version in (3, 4) 296 297 self.tgt = tgt 298 if src is None: 299 src = EmptyImage() 300 self.src = src 301 302 # The updater code that installs the patch always uses 4k blocks. 303 assert tgt.blocksize == 4096 304 assert src.blocksize == 4096 305 306 # The range sets in each filemap should comprise a partition of 307 # the care map. 308 self.AssertPartition(src.care_map, src.file_map.values()) 309 self.AssertPartition(tgt.care_map, tgt.file_map.values()) 310 311 @property 312 def max_stashed_size(self): 313 return self._max_stashed_size 314 315 def Compute(self, prefix): 316 # When looking for a source file to use as the diff input for a 317 # target file, we try: 318 # 1) an exact path match if available, otherwise 319 # 2) a exact basename match if available, otherwise 320 # 3) a basename match after all runs of digits are replaced by 321 # "#" if available, otherwise 322 # 4) we have no source for this target. 323 self.AbbreviateSourceNames() 324 self.FindTransfers() 325 326 # Find the ordering dependencies among transfers (this is O(n^2) 327 # in the number of transfers). 328 self.GenerateDigraph() 329 # Find a sequence of transfers that satisfies as many ordering 330 # dependencies as possible (heuristically). 331 self.FindVertexSequence() 332 # Fix up the ordering dependencies that the sequence didn't 333 # satisfy. 334 self.ReverseBackwardEdges() 335 self.ImproveVertexSequence() 336 337 # Ensure the runtime stash size is under the limit. 338 if common.OPTIONS.cache_size is not None: 339 self.ReviseStashSize() 340 341 # Double-check our work. 342 self.AssertSequenceGood() 343 344 self.ComputePatches(prefix) 345 self.WriteTransfers(prefix) 346 347 def WriteTransfers(self, prefix): 348 def WriteSplitTransfers(out, style, target_blocks): 349 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks. 350 351 This prevents the target size of one command from being too large; and 352 might help to avoid fsync errors on some devices.""" 353 354 assert style == "new" or style == "zero" 355 blocks_limit = 1024 356 total = 0 357 while target_blocks: 358 blocks_to_write = target_blocks.first(blocks_limit) 359 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw())) 360 total += blocks_to_write.size() 361 target_blocks = target_blocks.subtract(blocks_to_write) 362 return total 363 364 out = [] 365 total = 0 366 367 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot 368 # id. 'stashes' records the map from 'hash' to the ref count. The stash 369 # will be freed only if the count decrements to zero. 370 stashes = {} 371 stashed_blocks = 0 372 max_stashed_blocks = 0 373 374 for xf in self.transfers: 375 376 for _, sr in xf.stash_before: 377 sh = self.src.RangeSha1(sr) 378 if sh in stashes: 379 stashes[sh] += 1 380 else: 381 stashes[sh] = 1 382 stashed_blocks += sr.size() 383 self.touched_src_ranges = self.touched_src_ranges.union(sr) 384 out.append("stash %s %s\n" % (sh, sr.to_string_raw())) 385 386 if stashed_blocks > max_stashed_blocks: 387 max_stashed_blocks = stashed_blocks 388 389 free_string = [] 390 free_size = 0 391 392 # <# blocks> <src ranges> 393 # OR 394 # <# blocks> <src ranges> <src locs> <stash refs...> 395 # OR 396 # <# blocks> - <stash refs...> 397 398 size = xf.src_ranges.size() 399 src_str = [str(size)] 400 401 unstashed_src_ranges = xf.src_ranges 402 mapped_stashes = [] 403 for _, sr in xf.use_stash: 404 unstashed_src_ranges = unstashed_src_ranges.subtract(sr) 405 sh = self.src.RangeSha1(sr) 406 sr = xf.src_ranges.map_within(sr) 407 mapped_stashes.append(sr) 408 assert sh in stashes 409 src_str.append("%s:%s" % (sh, sr.to_string_raw())) 410 stashes[sh] -= 1 411 if stashes[sh] == 0: 412 free_string.append("free %s\n" % (sh,)) 413 free_size += sr.size() 414 stashes.pop(sh) 415 416 if unstashed_src_ranges: 417 src_str.insert(1, unstashed_src_ranges.to_string_raw()) 418 if xf.use_stash: 419 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges) 420 src_str.insert(2, mapped_unstashed.to_string_raw()) 421 mapped_stashes.append(mapped_unstashed) 422 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes) 423 else: 424 src_str.insert(1, "-") 425 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes) 426 427 src_str = " ".join(src_str) 428 429 # version 3+: 430 # zero <rangeset> 431 # new <rangeset> 432 # erase <rangeset> 433 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str> 434 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str> 435 # move hash <tgt rangeset> <src_str> 436 437 tgt_size = xf.tgt_ranges.size() 438 439 if xf.style == "new": 440 assert xf.tgt_ranges 441 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges) 442 total += tgt_size 443 elif xf.style == "move": 444 assert xf.tgt_ranges 445 assert xf.src_ranges.size() == tgt_size 446 if xf.src_ranges != xf.tgt_ranges: 447 # take into account automatic stashing of overlapping blocks 448 if xf.src_ranges.overlaps(xf.tgt_ranges): 449 temp_stash_usage = stashed_blocks + xf.src_ranges.size() 450 if temp_stash_usage > max_stashed_blocks: 451 max_stashed_blocks = temp_stash_usage 452 453 self.touched_src_ranges = self.touched_src_ranges.union( 454 xf.src_ranges) 455 456 out.append("%s %s %s %s\n" % ( 457 xf.style, 458 xf.tgt_sha1, 459 xf.tgt_ranges.to_string_raw(), src_str)) 460 total += tgt_size 461 elif xf.style in ("bsdiff", "imgdiff"): 462 assert xf.tgt_ranges 463 assert xf.src_ranges 464 # take into account automatic stashing of overlapping blocks 465 if xf.src_ranges.overlaps(xf.tgt_ranges): 466 temp_stash_usage = stashed_blocks + xf.src_ranges.size() 467 if temp_stash_usage > max_stashed_blocks: 468 max_stashed_blocks = temp_stash_usage 469 470 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges) 471 472 out.append("%s %d %d %s %s %s %s\n" % ( 473 xf.style, 474 xf.patch_start, xf.patch_len, 475 xf.src_sha1, 476 xf.tgt_sha1, 477 xf.tgt_ranges.to_string_raw(), src_str)) 478 total += tgt_size 479 elif xf.style == "zero": 480 assert xf.tgt_ranges 481 to_zero = xf.tgt_ranges.subtract(xf.src_ranges) 482 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size() 483 total += to_zero.size() 484 else: 485 raise ValueError("unknown transfer style '%s'\n" % xf.style) 486 487 if free_string: 488 out.append("".join(free_string)) 489 stashed_blocks -= free_size 490 491 if common.OPTIONS.cache_size is not None: 492 # Sanity check: abort if we're going to need more stash space than 493 # the allowed size (cache_size * threshold). There are two purposes 494 # of having a threshold here. a) Part of the cache may have been 495 # occupied by some recovery logs. b) It will buy us some time to deal 496 # with the oversize issue. 497 cache_size = common.OPTIONS.cache_size 498 stash_threshold = common.OPTIONS.stash_threshold 499 max_allowed = cache_size * stash_threshold 500 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \ 501 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % ( 502 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks, 503 self.tgt.blocksize, max_allowed, cache_size, 504 stash_threshold) 505 506 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges) 507 508 # Zero out extended blocks as a workaround for bug 20881595. 509 if self.tgt.extended: 510 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) == 511 self.tgt.extended.size()) 512 total += self.tgt.extended.size() 513 514 # We erase all the blocks on the partition that a) don't contain useful 515 # data in the new image; b) will not be touched by dm-verity. Out of those 516 # blocks, we erase the ones that won't be used in this update at the 517 # beginning of an update. The rest would be erased at the end. This is to 518 # work around the eMMC issue observed on some devices, which may otherwise 519 # get starving for clean blocks and thus fail the update. (b/28347095) 520 all_tgt = RangeSet(data=(0, self.tgt.total_blocks)) 521 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended) 522 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map) 523 524 erase_first = new_dontcare.subtract(self.touched_src_ranges) 525 if erase_first: 526 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),)) 527 528 erase_last = new_dontcare.subtract(erase_first) 529 if erase_last: 530 out.append("erase %s\n" % (erase_last.to_string_raw(),)) 531 532 out.insert(0, "%d\n" % (self.version,)) # format version number 533 out.insert(1, "%d\n" % (total,)) 534 # v3+: the number of stash slots is unused. 535 out.insert(2, "0\n") 536 out.insert(3, str(max_stashed_blocks) + "\n") 537 538 with open(prefix + ".transfer.list", "wb") as f: 539 for i in out: 540 f.write(i) 541 542 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize 543 OPTIONS = common.OPTIONS 544 if OPTIONS.cache_size is not None: 545 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold 546 print("max stashed blocks: %d (%d bytes), " 547 "limit: %d bytes (%.2f%%)\n" % ( 548 max_stashed_blocks, self._max_stashed_size, max_allowed, 549 self._max_stashed_size * 100.0 / max_allowed)) 550 else: 551 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % ( 552 max_stashed_blocks, self._max_stashed_size)) 553 554 def ReviseStashSize(self): 555 print("Revising stash size...") 556 stash_map = {} 557 558 # Create the map between a stash and its def/use points. For example, for a 559 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd). 560 for xf in self.transfers: 561 # Command xf defines (stores) all the stashes in stash_before. 562 for stash_raw_id, sr in xf.stash_before: 563 stash_map[stash_raw_id] = (sr, xf) 564 565 # Record all the stashes command xf uses. 566 for stash_raw_id, _ in xf.use_stash: 567 stash_map[stash_raw_id] += (xf,) 568 569 # Compute the maximum blocks available for stash based on /cache size and 570 # the threshold. 571 cache_size = common.OPTIONS.cache_size 572 stash_threshold = common.OPTIONS.stash_threshold 573 max_allowed = cache_size * stash_threshold / self.tgt.blocksize 574 575 # See the comments for 'stashes' in WriteTransfers(). 576 stashes = {} 577 stashed_blocks = 0 578 new_blocks = 0 579 580 # Now go through all the commands. Compute the required stash size on the 581 # fly. If a command requires excess stash than available, it deletes the 582 # stash by replacing the command that uses the stash with a "new" command 583 # instead. 584 for xf in self.transfers: 585 replaced_cmds = [] 586 587 # xf.stash_before generates explicit stash commands. 588 for stash_raw_id, sr in xf.stash_before: 589 # Check the post-command stashed_blocks. 590 stashed_blocks_after = stashed_blocks 591 sh = self.src.RangeSha1(sr) 592 if sh not in stashes: 593 stashed_blocks_after += sr.size() 594 595 if stashed_blocks_after > max_allowed: 596 # We cannot stash this one for a later command. Find out the command 597 # that will use this stash and replace the command with "new". 598 use_cmd = stash_map[stash_raw_id][2] 599 replaced_cmds.append(use_cmd) 600 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd)) 601 else: 602 # Update the stashes map. 603 if sh in stashes: 604 stashes[sh] += 1 605 else: 606 stashes[sh] = 1 607 stashed_blocks = stashed_blocks_after 608 609 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to 610 # ComputePatches(), they both have the style of "diff". 611 if xf.style == "diff": 612 assert xf.tgt_ranges and xf.src_ranges 613 if xf.src_ranges.overlaps(xf.tgt_ranges): 614 if stashed_blocks + xf.src_ranges.size() > max_allowed: 615 replaced_cmds.append(xf) 616 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf)) 617 618 # Replace the commands in replaced_cmds with "new"s. 619 for cmd in replaced_cmds: 620 # It no longer uses any commands in "use_stash". Remove the def points 621 # for all those stashes. 622 for stash_raw_id, sr in cmd.use_stash: 623 def_cmd = stash_map[stash_raw_id][1] 624 assert (stash_raw_id, sr) in def_cmd.stash_before 625 def_cmd.stash_before.remove((stash_raw_id, sr)) 626 627 # Add up blocks that violates space limit and print total number to 628 # screen later. 629 new_blocks += cmd.tgt_ranges.size() 630 cmd.ConvertToNew() 631 632 # xf.use_stash may generate free commands. 633 for _, sr in xf.use_stash: 634 sh = self.src.RangeSha1(sr) 635 assert sh in stashes 636 stashes[sh] -= 1 637 if stashes[sh] == 0: 638 stashed_blocks -= sr.size() 639 stashes.pop(sh) 640 641 num_of_bytes = new_blocks * self.tgt.blocksize 642 print(" Total %d blocks (%d bytes) are packed as new blocks due to " 643 "insufficient cache size." % (new_blocks, num_of_bytes)) 644 return new_blocks 645 646 def ComputePatches(self, prefix): 647 print("Reticulating splines...") 648 diff_queue = [] 649 patch_num = 0 650 with open(prefix + ".new.dat", "wb") as new_f: 651 for index, xf in enumerate(self.transfers): 652 if xf.style == "zero": 653 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize 654 print("%10d %10d (%6.2f%%) %7s %s %s" % ( 655 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name, 656 str(xf.tgt_ranges))) 657 658 elif xf.style == "new": 659 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f) 660 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize 661 print("%10d %10d (%6.2f%%) %7s %s %s" % ( 662 tgt_size, tgt_size, 100.0, xf.style, 663 xf.tgt_name, str(xf.tgt_ranges))) 664 665 elif xf.style == "diff": 666 # We can't compare src and tgt directly because they may have 667 # the same content but be broken up into blocks differently, eg: 668 # 669 # ["he", "llo"] vs ["h", "ello"] 670 # 671 # We want those to compare equal, ideally without having to 672 # actually concatenate the strings (these may be tens of 673 # megabytes). 674 if xf.src_sha1 == xf.tgt_sha1: 675 # These are identical; we don't need to generate a patch, 676 # just issue copy commands on the device. 677 xf.style = "move" 678 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize 679 if xf.src_ranges != xf.tgt_ranges: 680 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % ( 681 tgt_size, tgt_size, 100.0, xf.style, 682 xf.tgt_name if xf.tgt_name == xf.src_name else ( 683 xf.tgt_name + " (from " + xf.src_name + ")"), 684 str(xf.tgt_ranges), str(xf.src_ranges))) 685 else: 686 # For files in zip format (eg, APKs, JARs, etc.) we would 687 # like to use imgdiff -z if possible (because it usually 688 # produces significantly smaller patches than bsdiff). 689 # This is permissible if: 690 # 691 # - imgdiff is not disabled, and 692 # - the source and target files are monotonic (ie, the 693 # data is stored with blocks in increasing order), and 694 # - we haven't removed any blocks from the source set. 695 # 696 # If these conditions are satisfied then appending all the 697 # blocks in the set together in order will produce a valid 698 # zip file (plus possibly extra zeros in the last block), 699 # which is what imgdiff needs to operate. (imgdiff is 700 # fine with extra zeros at the end of the file.) 701 imgdiff = (not self.disable_imgdiff and xf.intact and 702 xf.tgt_name.split(".")[-1].lower() 703 in ("apk", "jar", "zip")) 704 xf.style = "imgdiff" if imgdiff else "bsdiff" 705 diff_queue.append((index, imgdiff, patch_num)) 706 patch_num += 1 707 708 else: 709 assert False, "unknown style " + xf.style 710 711 if diff_queue: 712 if self.threads > 1: 713 print("Computing patches (using %d threads)..." % (self.threads,)) 714 else: 715 print("Computing patches...") 716 717 diff_total = len(diff_queue) 718 patches = [None] * diff_total 719 error_messages = [] 720 if sys.stdout.isatty(): 721 global diff_done 722 diff_done = 0 723 724 # Using multiprocessing doesn't give additional benefits, due to the 725 # pattern of the code. The diffing work is done by subprocess.call, which 726 # already runs in a separate process (not affected much by the GIL - 727 # Global Interpreter Lock). Using multiprocess also requires either a) 728 # writing the diff input files in the main process before forking, or b) 729 # reopening the image file (SparseImage) in the worker processes. Doing 730 # neither of them further improves the performance. 731 lock = threading.Lock() 732 def diff_worker(): 733 while True: 734 with lock: 735 if not diff_queue: 736 return 737 xf_index, imgdiff, patch_index = diff_queue.pop() 738 739 xf = self.transfers[xf_index] 740 src_ranges = xf.src_ranges 741 tgt_ranges = xf.tgt_ranges 742 743 # Needs lock since WriteRangeDataToFd() is stateful (calling seek). 744 with lock: 745 src_file = common.MakeTempFile(prefix="src-") 746 with open(src_file, "wb") as fd: 747 self.src.WriteRangeDataToFd(src_ranges, fd) 748 749 tgt_file = common.MakeTempFile(prefix="tgt-") 750 with open(tgt_file, "wb") as fd: 751 self.tgt.WriteRangeDataToFd(tgt_ranges, fd) 752 753 try: 754 patch = compute_patch(src_file, tgt_file, imgdiff) 755 except ValueError as e: 756 with lock: 757 error_messages.append( 758 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % ( 759 "imgdiff" if imgdiff else "bsdiff", 760 xf.tgt_name if xf.tgt_name == xf.src_name else 761 xf.tgt_name + " (from " + xf.src_name + ")", 762 xf.tgt_ranges, xf.src_ranges, e.message)) 763 764 with lock: 765 patches[patch_index] = (xf_index, patch) 766 if sys.stdout.isatty(): 767 global diff_done 768 diff_done += 1 769 progress = diff_done * 100 / diff_total 770 # '\033[K' is to clear to EOL. 771 print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r') 772 sys.stdout.flush() 773 774 threads = [threading.Thread(target=diff_worker) 775 for _ in range(self.threads)] 776 for th in threads: 777 th.start() 778 while threads: 779 threads.pop().join() 780 781 if sys.stdout.isatty(): 782 print('\n') 783 784 if error_messages: 785 print('\n'.join(error_messages)) 786 sys.exit(1) 787 else: 788 patches = [] 789 790 offset = 0 791 with open(prefix + ".patch.dat", "wb") as patch_fd: 792 for index, patch in patches: 793 xf = self.transfers[index] 794 xf.patch_len = len(patch) 795 xf.patch_start = offset 796 offset += xf.patch_len 797 patch_fd.write(patch) 798 799 if common.OPTIONS.verbose: 800 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize 801 print("%10d %10d (%6.2f%%) %7s %s %s %s" % ( 802 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size, 803 xf.style, 804 xf.tgt_name if xf.tgt_name == xf.src_name else ( 805 xf.tgt_name + " (from " + xf.src_name + ")"), 806 xf.tgt_ranges, xf.src_ranges)) 807 808 def AssertSequenceGood(self): 809 # Simulate the sequences of transfers we will output, and check that: 810 # - we never read a block after writing it, and 811 # - we write every block we care about exactly once. 812 813 # Start with no blocks having been touched yet. 814 touched = array.array("B", "\0" * self.tgt.total_blocks) 815 816 # Imagine processing the transfers in order. 817 for xf in self.transfers: 818 # Check that the input blocks for this transfer haven't yet been touched. 819 820 x = xf.src_ranges 821 for _, sr in xf.use_stash: 822 x = x.subtract(sr) 823 824 for s, e in x: 825 # Source image could be larger. Don't check the blocks that are in the 826 # source image only. Since they are not in 'touched', and won't ever 827 # be touched. 828 for i in range(s, min(e, self.tgt.total_blocks)): 829 assert touched[i] == 0 830 831 # Check that the output blocks for this transfer haven't yet 832 # been touched, and touch all the blocks written by this 833 # transfer. 834 for s, e in xf.tgt_ranges: 835 for i in range(s, e): 836 assert touched[i] == 0 837 touched[i] = 1 838 839 # Check that we've written every target block. 840 for s, e in self.tgt.care_map: 841 for i in range(s, e): 842 assert touched[i] == 1 843 844 def ImproveVertexSequence(self): 845 print("Improving vertex order...") 846 847 # At this point our digraph is acyclic; we reversed any edges that 848 # were backwards in the heuristically-generated sequence. The 849 # previously-generated order is still acceptable, but we hope to 850 # find a better order that needs less memory for stashed data. 851 # Now we do a topological sort to generate a new vertex order, 852 # using a greedy algorithm to choose which vertex goes next 853 # whenever we have a choice. 854 855 # Make a copy of the edge set; this copy will get destroyed by the 856 # algorithm. 857 for xf in self.transfers: 858 xf.incoming = xf.goes_after.copy() 859 xf.outgoing = xf.goes_before.copy() 860 861 L = [] # the new vertex order 862 863 # S is the set of sources in the remaining graph; we always choose 864 # the one that leaves the least amount of stashed data after it's 865 # executed. 866 S = [(u.NetStashChange(), u.order, u) for u in self.transfers 867 if not u.incoming] 868 heapq.heapify(S) 869 870 while S: 871 _, _, xf = heapq.heappop(S) 872 L.append(xf) 873 for u in xf.outgoing: 874 del u.incoming[xf] 875 if not u.incoming: 876 heapq.heappush(S, (u.NetStashChange(), u.order, u)) 877 878 # if this fails then our graph had a cycle. 879 assert len(L) == len(self.transfers) 880 881 self.transfers = L 882 for i, xf in enumerate(L): 883 xf.order = i 884 885 def RemoveBackwardEdges(self): 886 print("Removing backward edges...") 887 in_order = 0 888 out_of_order = 0 889 lost_source = 0 890 891 for xf in self.transfers: 892 lost = 0 893 size = xf.src_ranges.size() 894 for u in xf.goes_before: 895 # xf should go before u 896 if xf.order < u.order: 897 # it does, hurray! 898 in_order += 1 899 else: 900 # it doesn't, boo. trim the blocks that u writes from xf's 901 # source, so that xf can go after u. 902 out_of_order += 1 903 assert xf.src_ranges.overlaps(u.tgt_ranges) 904 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges) 905 xf.intact = False 906 907 if xf.style == "diff" and not xf.src_ranges: 908 # nothing left to diff from; treat as new data 909 xf.style = "new" 910 911 lost = size - xf.src_ranges.size() 912 lost_source += lost 913 914 print((" %d/%d dependencies (%.2f%%) were violated; " 915 "%d source blocks removed.") % 916 (out_of_order, in_order + out_of_order, 917 (out_of_order * 100.0 / (in_order + out_of_order)) 918 if (in_order + out_of_order) else 0.0, 919 lost_source)) 920 921 def ReverseBackwardEdges(self): 922 """Reverse unsatisfying edges and compute pairs of stashed blocks. 923 924 For each transfer, make sure it properly stashes the blocks it touches and 925 will be used by later transfers. It uses pairs of (stash_raw_id, range) to 926 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely 927 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")), 928 it is possible to have multiple pairs with different 'stash_raw_id's. Each 929 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical 930 blocks will be written to the same stash slot in WriteTransfers(). 931 """ 932 933 print("Reversing backward edges...") 934 in_order = 0 935 out_of_order = 0 936 stash_raw_id = 0 937 stash_size = 0 938 939 for xf in self.transfers: 940 for u in xf.goes_before.copy(): 941 # xf should go before u 942 if xf.order < u.order: 943 # it does, hurray! 944 in_order += 1 945 else: 946 # it doesn't, boo. modify u to stash the blocks that it 947 # writes that xf wants to read, and then require u to go 948 # before xf. 949 out_of_order += 1 950 951 overlap = xf.src_ranges.intersect(u.tgt_ranges) 952 assert overlap 953 954 u.stash_before.append((stash_raw_id, overlap)) 955 xf.use_stash.append((stash_raw_id, overlap)) 956 stash_raw_id += 1 957 stash_size += overlap.size() 958 959 # reverse the edge direction; now xf must go after u 960 del xf.goes_before[u] 961 del u.goes_after[xf] 962 xf.goes_after[u] = None # value doesn't matter 963 u.goes_before[xf] = None 964 965 print((" %d/%d dependencies (%.2f%%) were violated; " 966 "%d source blocks stashed.") % 967 (out_of_order, in_order + out_of_order, 968 (out_of_order * 100.0 / (in_order + out_of_order)) 969 if (in_order + out_of_order) else 0.0, 970 stash_size)) 971 972 def FindVertexSequence(self): 973 print("Finding vertex sequence...") 974 975 # This is based on "A Fast & Effective Heuristic for the Feedback 976 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of 977 # it as starting with the digraph G and moving all the vertices to 978 # be on a horizontal line in some order, trying to minimize the 979 # number of edges that end up pointing to the left. Left-pointing 980 # edges will get removed to turn the digraph into a DAG. In this 981 # case each edge has a weight which is the number of source blocks 982 # we'll lose if that edge is removed; we try to minimize the total 983 # weight rather than just the number of edges. 984 985 # Make a copy of the edge set; this copy will get destroyed by the 986 # algorithm. 987 for xf in self.transfers: 988 xf.incoming = xf.goes_after.copy() 989 xf.outgoing = xf.goes_before.copy() 990 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values()) 991 992 # We use an OrderedDict instead of just a set so that the output 993 # is repeatable; otherwise it would depend on the hash values of 994 # the transfer objects. 995 G = OrderedDict() 996 for xf in self.transfers: 997 G[xf] = None 998 s1 = deque() # the left side of the sequence, built from left to right 999 s2 = deque() # the right side of the sequence, built from right to left 1000 1001 heap = [] 1002 for xf in self.transfers: 1003 xf.heap_item = HeapItem(xf) 1004 heap.append(xf.heap_item) 1005 heapq.heapify(heap) 1006 1007 # Use OrderedDict() instead of set() to preserve the insertion order. Need 1008 # to use 'sinks[key] = None' to add key into the set. sinks will look like 1009 # { key1: None, key2: None, ... }. 1010 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing) 1011 sources = OrderedDict.fromkeys(u for u in G if not u.incoming) 1012 1013 def adjust_score(iu, delta): 1014 iu.score += delta 1015 iu.heap_item.clear() 1016 iu.heap_item = HeapItem(iu) 1017 heapq.heappush(heap, iu.heap_item) 1018 1019 while G: 1020 # Put all sinks at the end of the sequence. 1021 while sinks: 1022 new_sinks = OrderedDict() 1023 for u in sinks: 1024 if u not in G: continue 1025 s2.appendleft(u) 1026 del G[u] 1027 for iu in u.incoming: 1028 adjust_score(iu, -iu.outgoing.pop(u)) 1029 if not iu.outgoing: 1030 new_sinks[iu] = None 1031 sinks = new_sinks 1032 1033 # Put all the sources at the beginning of the sequence. 1034 while sources: 1035 new_sources = OrderedDict() 1036 for u in sources: 1037 if u not in G: continue 1038 s1.append(u) 1039 del G[u] 1040 for iu in u.outgoing: 1041 adjust_score(iu, +iu.incoming.pop(u)) 1042 if not iu.incoming: 1043 new_sources[iu] = None 1044 sources = new_sources 1045 1046 if not G: break 1047 1048 # Find the "best" vertex to put next. "Best" is the one that 1049 # maximizes the net difference in source blocks saved we get by 1050 # pretending it's a source rather than a sink. 1051 1052 while True: 1053 u = heapq.heappop(heap) 1054 if u and u.item in G: 1055 u = u.item 1056 break 1057 1058 s1.append(u) 1059 del G[u] 1060 for iu in u.outgoing: 1061 adjust_score(iu, +iu.incoming.pop(u)) 1062 if not iu.incoming: 1063 sources[iu] = None 1064 1065 for iu in u.incoming: 1066 adjust_score(iu, -iu.outgoing.pop(u)) 1067 if not iu.outgoing: 1068 sinks[iu] = None 1069 1070 # Now record the sequence in the 'order' field of each transfer, 1071 # and by rearranging self.transfers to be in the chosen sequence. 1072 1073 new_transfers = [] 1074 for x in itertools.chain(s1, s2): 1075 x.order = len(new_transfers) 1076 new_transfers.append(x) 1077 del x.incoming 1078 del x.outgoing 1079 1080 self.transfers = new_transfers 1081 1082 def GenerateDigraph(self): 1083 print("Generating digraph...") 1084 1085 # Each item of source_ranges will be: 1086 # - None, if that block is not used as a source, 1087 # - an ordered set of transfers. 1088 source_ranges = [] 1089 for b in self.transfers: 1090 for s, e in b.src_ranges: 1091 if e > len(source_ranges): 1092 source_ranges.extend([None] * (e-len(source_ranges))) 1093 for i in range(s, e): 1094 if source_ranges[i] is None: 1095 source_ranges[i] = OrderedDict.fromkeys([b]) 1096 else: 1097 source_ranges[i][b] = None 1098 1099 for a in self.transfers: 1100 intersections = OrderedDict() 1101 for s, e in a.tgt_ranges: 1102 for i in range(s, e): 1103 if i >= len(source_ranges): break 1104 # Add all the Transfers in source_ranges[i] to the (ordered) set. 1105 if source_ranges[i] is not None: 1106 for j in source_ranges[i]: 1107 intersections[j] = None 1108 1109 for b in intersections: 1110 if a is b: continue 1111 1112 # If the blocks written by A are read by B, then B needs to go before A. 1113 i = a.tgt_ranges.intersect(b.src_ranges) 1114 if i: 1115 if b.src_name == "__ZERO": 1116 # the cost of removing source blocks for the __ZERO domain 1117 # is (nearly) zero. 1118 size = 0 1119 else: 1120 size = i.size() 1121 b.goes_before[a] = size 1122 a.goes_after[b] = size 1123 1124 def FindTransfers(self): 1125 """Parse the file_map to generate all the transfers.""" 1126 1127 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, 1128 style, by_id): 1129 """Add one or multiple Transfer()s by splitting large files. 1130 1131 For BBOTA v3, we need to stash source blocks for resumable feature. 1132 However, with the growth of file size and the shrink of the cache 1133 partition source blocks are too large to be stashed. If a file occupies 1134 too many blocks, we split it into smaller pieces by getting multiple 1135 Transfer()s. 1136 1137 The downside is that after splitting, we may increase the package size 1138 since the split pieces don't align well. According to our experiments, 1139 1/8 of the cache size as the per-piece limit appears to be optimal. 1140 Compared to the fixed 1024-block limit, it reduces the overall package 1141 size by 30% for volantis, and 20% for angler and bullhead.""" 1142 1143 # Possibly split large files into smaller chunks. 1144 pieces = 0 1145 cache_size = common.OPTIONS.cache_size 1146 split_threshold = 0.125 1147 max_blocks_per_transfer = int(cache_size * split_threshold / 1148 self.tgt.blocksize) 1149 1150 # Change nothing for small files. 1151 if (tgt_ranges.size() <= max_blocks_per_transfer and 1152 src_ranges.size() <= max_blocks_per_transfer): 1153 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, 1154 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges), 1155 style, by_id) 1156 return 1157 1158 while (tgt_ranges.size() > max_blocks_per_transfer and 1159 src_ranges.size() > max_blocks_per_transfer): 1160 tgt_split_name = "%s-%d" % (tgt_name, pieces) 1161 src_split_name = "%s-%d" % (src_name, pieces) 1162 tgt_first = tgt_ranges.first(max_blocks_per_transfer) 1163 src_first = src_ranges.first(max_blocks_per_transfer) 1164 1165 Transfer(tgt_split_name, src_split_name, tgt_first, src_first, 1166 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first), 1167 style, by_id) 1168 1169 tgt_ranges = tgt_ranges.subtract(tgt_first) 1170 src_ranges = src_ranges.subtract(src_first) 1171 pieces += 1 1172 1173 # Handle remaining blocks. 1174 if tgt_ranges.size() or src_ranges.size(): 1175 # Must be both non-empty. 1176 assert tgt_ranges.size() and src_ranges.size() 1177 tgt_split_name = "%s-%d" % (tgt_name, pieces) 1178 src_split_name = "%s-%d" % (src_name, pieces) 1179 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, 1180 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges), 1181 style, by_id) 1182 1183 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id, 1184 split=False): 1185 """Wrapper function for adding a Transfer().""" 1186 1187 # We specialize diff transfers only (which covers bsdiff/imgdiff/move); 1188 # otherwise add the Transfer() as is. 1189 if style != "diff" or not split: 1190 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, 1191 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges), 1192 style, by_id) 1193 return 1194 1195 # Handle .odex files specially to analyze the block-wise difference. If 1196 # most of the blocks are identical with only few changes (e.g. header), 1197 # we will patch the changed blocks only. This avoids stashing unchanged 1198 # blocks while patching. We limit the analysis to files without size 1199 # changes only. This is to avoid sacrificing the OTA generation cost too 1200 # much. 1201 if (tgt_name.split(".")[-1].lower() == 'odex' and 1202 tgt_ranges.size() == src_ranges.size()): 1203 1204 # 0.5 threshold can be further tuned. The tradeoff is: if only very 1205 # few blocks remain identical, we lose the opportunity to use imgdiff 1206 # that may have better compression ratio than bsdiff. 1207 crop_threshold = 0.5 1208 1209 tgt_skipped = RangeSet() 1210 src_skipped = RangeSet() 1211 tgt_size = tgt_ranges.size() 1212 tgt_changed = 0 1213 for src_block, tgt_block in zip(src_ranges.next_item(), 1214 tgt_ranges.next_item()): 1215 src_rs = RangeSet(str(src_block)) 1216 tgt_rs = RangeSet(str(tgt_block)) 1217 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs): 1218 tgt_skipped = tgt_skipped.union(tgt_rs) 1219 src_skipped = src_skipped.union(src_rs) 1220 else: 1221 tgt_changed += tgt_rs.size() 1222 1223 # Terminate early if no clear sign of benefits. 1224 if tgt_changed > tgt_size * crop_threshold: 1225 break 1226 1227 if tgt_changed < tgt_size * crop_threshold: 1228 assert tgt_changed + tgt_skipped.size() == tgt_size 1229 print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size, 1230 tgt_skipped.size() * 100.0 / tgt_size, tgt_name)) 1231 AddSplitTransfers( 1232 "%s-skipped" % (tgt_name,), 1233 "%s-skipped" % (src_name,), 1234 tgt_skipped, src_skipped, style, by_id) 1235 1236 # Intentionally change the file extension to avoid being imgdiff'd as 1237 # the files are no longer in their original format. 1238 tgt_name = "%s-cropped" % (tgt_name,) 1239 src_name = "%s-cropped" % (src_name,) 1240 tgt_ranges = tgt_ranges.subtract(tgt_skipped) 1241 src_ranges = src_ranges.subtract(src_skipped) 1242 1243 # Possibly having no changed blocks. 1244 if not tgt_ranges: 1245 return 1246 1247 # Add the transfer(s). 1248 AddSplitTransfers( 1249 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id) 1250 1251 print("Finding transfers...") 1252 1253 empty = RangeSet() 1254 for tgt_fn, tgt_ranges in self.tgt.file_map.items(): 1255 if tgt_fn == "__ZERO": 1256 # the special "__ZERO" domain is all the blocks not contained 1257 # in any file and that are filled with zeros. We have a 1258 # special transfer style for zero blocks. 1259 src_ranges = self.src.file_map.get("__ZERO", empty) 1260 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges, 1261 "zero", self.transfers) 1262 continue 1263 1264 elif tgt_fn == "__COPY": 1265 # "__COPY" domain includes all the blocks not contained in any 1266 # file and that need to be copied unconditionally to the target. 1267 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers) 1268 continue 1269 1270 elif tgt_fn in self.src.file_map: 1271 # Look for an exact pathname match in the source. 1272 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn], 1273 "diff", self.transfers, True) 1274 continue 1275 1276 b = os.path.basename(tgt_fn) 1277 if b in self.src_basenames: 1278 # Look for an exact basename match in the source. 1279 src_fn = self.src_basenames[b] 1280 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn], 1281 "diff", self.transfers, True) 1282 continue 1283 1284 b = re.sub("[0-9]+", "#", b) 1285 if b in self.src_numpatterns: 1286 # Look for a 'number pattern' match (a basename match after 1287 # all runs of digits are replaced by "#"). (This is useful 1288 # for .so files that contain version numbers in the filename 1289 # that get bumped.) 1290 src_fn = self.src_numpatterns[b] 1291 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn], 1292 "diff", self.transfers, True) 1293 continue 1294 1295 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers) 1296 1297 def AbbreviateSourceNames(self): 1298 for k in self.src.file_map.keys(): 1299 b = os.path.basename(k) 1300 self.src_basenames[b] = k 1301 b = re.sub("[0-9]+", "#", b) 1302 self.src_numpatterns[b] = k 1303 1304 @staticmethod 1305 def AssertPartition(total, seq): 1306 """Assert that all the RangeSets in 'seq' form a partition of the 1307 'total' RangeSet (ie, they are nonintersecting and their union 1308 equals 'total').""" 1309 1310 so_far = RangeSet() 1311 for i in seq: 1312 assert not so_far.overlaps(i) 1313 so_far = so_far.union(i) 1314 assert so_far == total 1315