Home | History | Annotate | Download | only in releasetools
      1 # Copyright (C) 2014 The Android Open Source Project
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #      http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 
     15 import bisect
     16 import os
     17 import sys
     18 import struct
     19 import pprint
     20 from hashlib import sha1
     21 
     22 from rangelib import *
     23 
     24 class SparseImage(object):
     25   """Wraps a sparse image file (and optional file map) into an image
     26   object suitable for passing to BlockImageDiff."""
     27 
     28   def __init__(self, simg_fn, file_map_fn=None):
     29     self.simg_f = f = open(simg_fn, "rb")
     30 
     31     header_bin = f.read(28)
     32     header = struct.unpack("<I4H4I", header_bin)
     33 
     34     magic = header[0]
     35     major_version = header[1]
     36     minor_version = header[2]
     37     file_hdr_sz = header[3]
     38     chunk_hdr_sz = header[4]
     39     self.blocksize = blk_sz = header[5]
     40     self.total_blocks = total_blks = header[6]
     41     total_chunks = header[7]
     42     image_checksum = header[8]
     43 
     44     if magic != 0xED26FF3A:
     45       raise ValueError("Magic should be 0xED26FF3A but is 0x%08X" % (magic,))
     46     if major_version != 1 or minor_version != 0:
     47       raise ValueError("I know about version 1.0, but this is version %u.%u" %
     48                        (major_version, minor_version))
     49     if file_hdr_sz != 28:
     50       raise ValueError("File header size was expected to be 28, but is %u." %
     51                        (file_hdr_sz,))
     52     if chunk_hdr_sz != 12:
     53       raise ValueError("Chunk header size was expected to be 12, but is %u." %
     54                        (chunk_hdr_sz,))
     55 
     56     print("Total of %u %u-byte output blocks in %u input chunks."
     57           % (total_blks, blk_sz, total_chunks))
     58 
     59     pos = 0   # in blocks
     60     care_data = []
     61     self.offset_map = offset_map = []
     62 
     63     for i in range(total_chunks):
     64       header_bin = f.read(12)
     65       header = struct.unpack("<2H2I", header_bin)
     66       chunk_type = header[0]
     67       reserved1 = header[1]
     68       chunk_sz = header[2]
     69       total_sz = header[3]
     70       data_sz = total_sz - 12
     71 
     72       if chunk_type == 0xCAC1:
     73         if data_sz != (chunk_sz * blk_sz):
     74           raise ValueError(
     75               "Raw chunk input size (%u) does not match output size (%u)" %
     76               (data_sz, chunk_sz * blk_sz))
     77         else:
     78           care_data.append(pos)
     79           care_data.append(pos + chunk_sz)
     80           offset_map.append((pos, chunk_sz, f.tell(), None))
     81           pos += chunk_sz
     82           f.seek(data_sz, os.SEEK_CUR)
     83 
     84       elif chunk_type == 0xCAC2:
     85         fill_data = f.read(4)
     86         care_data.append(pos)
     87         care_data.append(pos + chunk_sz)
     88         offset_map.append((pos, chunk_sz, None, fill_data))
     89         pos += chunk_sz
     90 
     91       elif chunk_type == 0xCAC3:
     92         if data_sz != 0:
     93           raise ValueError("Don't care chunk input size is non-zero (%u)" %
     94                            (data_sz))
     95         else:
     96           pos += chunk_sz
     97 
     98       elif chunk_type == 0xCAC4:
     99         raise ValueError("CRC32 chunks are not supported")
    100 
    101       else:
    102         raise ValueError("Unknown chunk type 0x%04X not supported" %
    103                          (chunk_type,))
    104 
    105     self.care_map = RangeSet(care_data)
    106     self.offset_index = [i[0] for i in offset_map]
    107 
    108     if file_map_fn:
    109       self.LoadFileBlockMap(file_map_fn)
    110     else:
    111       self.file_map = {"__DATA": self.care_map}
    112 
    113   def ReadRangeSet(self, ranges):
    114     return [d for d in self._GetRangeData(ranges)]
    115 
    116   def TotalSha1(self):
    117     """Return the SHA-1 hash of all data in the 'care' regions of this image."""
    118     h = sha1()
    119     for d in self._GetRangeData(self.care_map):
    120       h.update(d)
    121     return h.hexdigest()
    122 
    123   def _GetRangeData(self, ranges):
    124     """Generator that produces all the image data in 'ranges'.  The
    125     number of individual pieces returned is arbitrary (and in
    126     particular is not necessarily equal to the number of ranges in
    127     'ranges'.
    128 
    129     This generator is stateful -- it depends on the open file object
    130     contained in this SparseImage, so you should not try to run two
    131     instances of this generator on the same object simultaneously."""
    132 
    133     f = self.simg_f
    134     for s, e in ranges:
    135       to_read = e-s
    136       idx = bisect.bisect_right(self.offset_index, s) - 1
    137       chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
    138 
    139       # for the first chunk we may be starting partway through it.
    140       remain = chunk_len - (s - chunk_start)
    141       this_read = min(remain, to_read)
    142       if filepos is not None:
    143         p = filepos + ((s - chunk_start) * self.blocksize)
    144         f.seek(p, os.SEEK_SET)
    145         yield f.read(this_read * self.blocksize)
    146       else:
    147         yield fill_data * (this_read * (self.blocksize >> 2))
    148       to_read -= this_read
    149 
    150       while to_read > 0:
    151         # continue with following chunks if this range spans multiple chunks.
    152         idx += 1
    153         chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
    154         this_read = min(chunk_len, to_read)
    155         if filepos is not None:
    156           f.seek(filepos, os.SEEK_SET)
    157           yield f.read(this_read * self.blocksize)
    158         else:
    159           yield fill_data * (this_read * (self.blocksize >> 2))
    160         to_read -= this_read
    161 
    162   def LoadFileBlockMap(self, fn):
    163     remaining = self.care_map
    164     self.file_map = out = {}
    165 
    166     with open(fn) as f:
    167       for line in f:
    168         fn, ranges = line.split(None, 1)
    169         ranges = RangeSet.parse(ranges)
    170         out[fn] = ranges
    171         assert ranges.size() == ranges.intersect(remaining).size()
    172         remaining = remaining.subtract(ranges)
    173 
    174     # For all the remaining blocks in the care_map (ie, those that
    175     # aren't part of the data for any file), divide them into blocks
    176     # that are all zero and blocks that aren't.  (Zero blocks are
    177     # handled specially because (1) there are usually a lot of them
    178     # and (2) bsdiff handles files with long sequences of repeated
    179     # bytes especially poorly.)
    180 
    181     zero_blocks = []
    182     nonzero_blocks = []
    183     reference = '\0' * self.blocksize
    184 
    185     f = self.simg_f
    186     for s, e in remaining:
    187       for b in range(s, e):
    188         idx = bisect.bisect_right(self.offset_index, b) - 1
    189         chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
    190         if filepos is not None:
    191           filepos += (b-chunk_start) * self.blocksize
    192           f.seek(filepos, os.SEEK_SET)
    193           data = f.read(self.blocksize)
    194         else:
    195           if fill_data == reference[:4]:   # fill with all zeros
    196             data = reference
    197           else:
    198             data = None
    199 
    200         if data == reference:
    201           zero_blocks.append(b)
    202           zero_blocks.append(b+1)
    203         else:
    204           nonzero_blocks.append(b)
    205           nonzero_blocks.append(b+1)
    206 
    207     out["__ZERO"] = RangeSet(data=zero_blocks)
    208     out["__NONZERO"] = RangeSet(data=nonzero_blocks)
    209 
    210   def ResetFileMap(self):
    211     """Throw away the file map and treat the entire image as
    212     undifferentiated data."""
    213     self.file_map = {"__DATA": self.care_map}
    214