1 # Copyright (C) 2014 The Android Open Source Project 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 15 import bisect 16 import os 17 import sys 18 import struct 19 import pprint 20 from hashlib import sha1 21 22 from rangelib import * 23 24 class SparseImage(object): 25 """Wraps a sparse image file (and optional file map) into an image 26 object suitable for passing to BlockImageDiff.""" 27 28 def __init__(self, simg_fn, file_map_fn=None): 29 self.simg_f = f = open(simg_fn, "rb") 30 31 header_bin = f.read(28) 32 header = struct.unpack("<I4H4I", header_bin) 33 34 magic = header[0] 35 major_version = header[1] 36 minor_version = header[2] 37 file_hdr_sz = header[3] 38 chunk_hdr_sz = header[4] 39 self.blocksize = blk_sz = header[5] 40 self.total_blocks = total_blks = header[6] 41 total_chunks = header[7] 42 image_checksum = header[8] 43 44 if magic != 0xED26FF3A: 45 raise ValueError("Magic should be 0xED26FF3A but is 0x%08X" % (magic,)) 46 if major_version != 1 or minor_version != 0: 47 raise ValueError("I know about version 1.0, but this is version %u.%u" % 48 (major_version, minor_version)) 49 if file_hdr_sz != 28: 50 raise ValueError("File header size was expected to be 28, but is %u." % 51 (file_hdr_sz,)) 52 if chunk_hdr_sz != 12: 53 raise ValueError("Chunk header size was expected to be 12, but is %u." % 54 (chunk_hdr_sz,)) 55 56 print("Total of %u %u-byte output blocks in %u input chunks." 57 % (total_blks, blk_sz, total_chunks)) 58 59 pos = 0 # in blocks 60 care_data = [] 61 self.offset_map = offset_map = [] 62 63 for i in range(total_chunks): 64 header_bin = f.read(12) 65 header = struct.unpack("<2H2I", header_bin) 66 chunk_type = header[0] 67 reserved1 = header[1] 68 chunk_sz = header[2] 69 total_sz = header[3] 70 data_sz = total_sz - 12 71 72 if chunk_type == 0xCAC1: 73 if data_sz != (chunk_sz * blk_sz): 74 raise ValueError( 75 "Raw chunk input size (%u) does not match output size (%u)" % 76 (data_sz, chunk_sz * blk_sz)) 77 else: 78 care_data.append(pos) 79 care_data.append(pos + chunk_sz) 80 offset_map.append((pos, chunk_sz, f.tell(), None)) 81 pos += chunk_sz 82 f.seek(data_sz, os.SEEK_CUR) 83 84 elif chunk_type == 0xCAC2: 85 fill_data = f.read(4) 86 care_data.append(pos) 87 care_data.append(pos + chunk_sz) 88 offset_map.append((pos, chunk_sz, None, fill_data)) 89 pos += chunk_sz 90 91 elif chunk_type == 0xCAC3: 92 if data_sz != 0: 93 raise ValueError("Don't care chunk input size is non-zero (%u)" % 94 (data_sz)) 95 else: 96 pos += chunk_sz 97 98 elif chunk_type == 0xCAC4: 99 raise ValueError("CRC32 chunks are not supported") 100 101 else: 102 raise ValueError("Unknown chunk type 0x%04X not supported" % 103 (chunk_type,)) 104 105 self.care_map = RangeSet(care_data) 106 self.offset_index = [i[0] for i in offset_map] 107 108 if file_map_fn: 109 self.LoadFileBlockMap(file_map_fn) 110 else: 111 self.file_map = {"__DATA": self.care_map} 112 113 def ReadRangeSet(self, ranges): 114 return [d for d in self._GetRangeData(ranges)] 115 116 def TotalSha1(self): 117 """Return the SHA-1 hash of all data in the 'care' regions of this image.""" 118 h = sha1() 119 for d in self._GetRangeData(self.care_map): 120 h.update(d) 121 return h.hexdigest() 122 123 def _GetRangeData(self, ranges): 124 """Generator that produces all the image data in 'ranges'. The 125 number of individual pieces returned is arbitrary (and in 126 particular is not necessarily equal to the number of ranges in 127 'ranges'. 128 129 This generator is stateful -- it depends on the open file object 130 contained in this SparseImage, so you should not try to run two 131 instances of this generator on the same object simultaneously.""" 132 133 f = self.simg_f 134 for s, e in ranges: 135 to_read = e-s 136 idx = bisect.bisect_right(self.offset_index, s) - 1 137 chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx] 138 139 # for the first chunk we may be starting partway through it. 140 remain = chunk_len - (s - chunk_start) 141 this_read = min(remain, to_read) 142 if filepos is not None: 143 p = filepos + ((s - chunk_start) * self.blocksize) 144 f.seek(p, os.SEEK_SET) 145 yield f.read(this_read * self.blocksize) 146 else: 147 yield fill_data * (this_read * (self.blocksize >> 2)) 148 to_read -= this_read 149 150 while to_read > 0: 151 # continue with following chunks if this range spans multiple chunks. 152 idx += 1 153 chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx] 154 this_read = min(chunk_len, to_read) 155 if filepos is not None: 156 f.seek(filepos, os.SEEK_SET) 157 yield f.read(this_read * self.blocksize) 158 else: 159 yield fill_data * (this_read * (self.blocksize >> 2)) 160 to_read -= this_read 161 162 def LoadFileBlockMap(self, fn): 163 remaining = self.care_map 164 self.file_map = out = {} 165 166 with open(fn) as f: 167 for line in f: 168 fn, ranges = line.split(None, 1) 169 ranges = RangeSet.parse(ranges) 170 out[fn] = ranges 171 assert ranges.size() == ranges.intersect(remaining).size() 172 remaining = remaining.subtract(ranges) 173 174 # For all the remaining blocks in the care_map (ie, those that 175 # aren't part of the data for any file), divide them into blocks 176 # that are all zero and blocks that aren't. (Zero blocks are 177 # handled specially because (1) there are usually a lot of them 178 # and (2) bsdiff handles files with long sequences of repeated 179 # bytes especially poorly.) 180 181 zero_blocks = [] 182 nonzero_blocks = [] 183 reference = '\0' * self.blocksize 184 185 f = self.simg_f 186 for s, e in remaining: 187 for b in range(s, e): 188 idx = bisect.bisect_right(self.offset_index, b) - 1 189 chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx] 190 if filepos is not None: 191 filepos += (b-chunk_start) * self.blocksize 192 f.seek(filepos, os.SEEK_SET) 193 data = f.read(self.blocksize) 194 else: 195 if fill_data == reference[:4]: # fill with all zeros 196 data = reference 197 else: 198 data = None 199 200 if data == reference: 201 zero_blocks.append(b) 202 zero_blocks.append(b+1) 203 else: 204 nonzero_blocks.append(b) 205 nonzero_blocks.append(b+1) 206 207 out["__ZERO"] = RangeSet(data=zero_blocks) 208 out["__NONZERO"] = RangeSet(data=nonzero_blocks) 209 210 def ResetFileMap(self): 211 """Throw away the file map and treat the entire image as 212 undifferentiated data.""" 213 self.file_map = {"__DATA": self.care_map} 214