1 #!/usr/bin/env python 2 # Merge or print the coverage data collected by asan's coverage. 3 # Input files are sequences of 4-byte integers. 4 # We need to merge these integers into a set and then 5 # either print them (as hex) or dump them into another file. 6 import array 7 import bisect 8 import glob 9 import os.path 10 import struct 11 import subprocess 12 import sys 13 14 prog_name = "" 15 16 def Usage(): 17 print >> sys.stderr, "Usage: \n" + \ 18 " " + prog_name + " merge FILE [FILE...] > OUTPUT\n" \ 19 " " + prog_name + " print FILE [FILE...]\n" \ 20 " " + prog_name + " unpack FILE [FILE...]\n" \ 21 " " + prog_name + " rawunpack FILE [FILE ...]\n" \ 22 " " + prog_name + " missing BINARY < LIST_OF_PCS\n" 23 exit(1) 24 25 def CheckBits(bits): 26 if bits != 32 and bits != 64: 27 raise Exception("Wrong bitness: %d" % bits) 28 29 def TypeCodeForBits(bits): 30 CheckBits(bits) 31 return 'L' if bits == 64 else 'I' 32 33 def TypeCodeForStruct(bits): 34 CheckBits(bits) 35 return 'Q' if bits == 64 else 'I' 36 37 kMagic32SecondHalf = 0xFFFFFF32; 38 kMagic64SecondHalf = 0xFFFFFF64; 39 kMagicFirstHalf = 0xC0BFFFFF; 40 41 def MagicForBits(bits): 42 CheckBits(bits) 43 if sys.byteorder == 'little': 44 return [kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf, kMagicFirstHalf] 45 else: 46 return [kMagicFirstHalf, kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf] 47 48 def ReadMagicAndReturnBitness(f, path): 49 magic_bytes = f.read(8) 50 magic_words = struct.unpack('II', magic_bytes); 51 bits = 0 52 idx = 1 if sys.byteorder == 'little' else 0 53 if magic_words[idx] == kMagicFirstHalf: 54 if magic_words[1-idx] == kMagic64SecondHalf: 55 bits = 64 56 elif magic_words[1-idx] == kMagic32SecondHalf: 57 bits = 32 58 if bits == 0: 59 raise Exception('Bad magic word in %s' % path) 60 return bits 61 62 def ReadOneFile(path): 63 with open(path, mode="rb") as f: 64 f.seek(0, 2) 65 size = f.tell() 66 f.seek(0, 0) 67 if size < 8: 68 raise Exception('File %s is short (< 8 bytes)' % path) 69 bits = ReadMagicAndReturnBitness(f, path) 70 size -= 8 71 s = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 / bits), f.read(size)) 72 print >>sys.stderr, "%s: read %d %d-bit PCs from %s" % (prog_name, size * 8 / bits, bits, path) 73 return s 74 75 def Merge(files): 76 s = set() 77 for f in files: 78 s = s.union(set(ReadOneFile(f))) 79 print >> sys.stderr, "%s: %d files merged; %d PCs total" % \ 80 (prog_name, len(files), len(s)) 81 return sorted(s) 82 83 def PrintFiles(files): 84 if len(files) > 1: 85 s = Merge(files) 86 else: # If there is just on file, print the PCs in order. 87 s = ReadOneFile(files[0]) 88 print >> sys.stderr, "%s: 1 file merged; %d PCs total" % \ 89 (prog_name, len(s)) 90 for i in s: 91 print "0x%x" % i 92 93 def MergeAndPrint(files): 94 if sys.stdout.isatty(): 95 Usage() 96 s = Merge(files) 97 bits = 32 98 if max(s) > 0xFFFFFFFF: 99 bits = 64 100 array.array('I', MagicForBits(bits)).tofile(sys.stdout) 101 a = struct.pack(TypeCodeForStruct(bits) * len(s), *s) 102 sys.stdout.write(a) 103 104 105 def UnpackOneFile(path): 106 with open(path, mode="rb") as f: 107 print >> sys.stderr, "%s: unpacking %s" % (prog_name, path) 108 while True: 109 header = f.read(12) 110 if not header: return 111 if len(header) < 12: 112 break 113 pid, module_length, blob_size = struct.unpack('iII', header) 114 module = f.read(module_length) 115 blob = f.read(blob_size) 116 assert(len(module) == module_length) 117 assert(len(blob) == blob_size) 118 extracted_file = "%s.%d.sancov" % (module, pid) 119 print >> sys.stderr, "%s: extracting %s" % \ 120 (prog_name, extracted_file) 121 # The packed file may contain multiple blobs for the same pid/module 122 # pair. Append to the end of the file instead of overwriting. 123 with open(extracted_file, 'ab') as f2: 124 f2.write(blob) 125 # fail 126 raise Exception('Error reading file %s' % path) 127 128 129 def Unpack(files): 130 for f in files: 131 UnpackOneFile(f) 132 133 def UnpackOneRawFile(path, map_path): 134 mem_map = [] 135 with open(map_path, mode="rt") as f_map: 136 print >> sys.stderr, "%s: reading map %s" % (prog_name, map_path) 137 bits = int(f_map.readline()) 138 if bits != 32 and bits != 64: 139 raise Exception('Wrong bits size in the map') 140 for line in f_map: 141 parts = line.rstrip().split() 142 mem_map.append((int(parts[0], 16), 143 int(parts[1], 16), 144 int(parts[2], 16), 145 ' '.join(parts[3:]))) 146 mem_map.sort(key=lambda m : m[0]) 147 mem_map_keys = [m[0] for m in mem_map] 148 149 with open(path, mode="rb") as f: 150 print >> sys.stderr, "%s: unpacking %s" % (prog_name, path) 151 152 f.seek(0, 2) 153 size = f.tell() 154 f.seek(0, 0) 155 pcs = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 / bits), f.read(size)) 156 mem_map_pcs = [[] for i in range(0, len(mem_map))] 157 158 for pc in pcs: 159 if pc == 0: continue 160 map_idx = bisect.bisect(mem_map_keys, pc) - 1 161 (start, end, base, module_path) = mem_map[map_idx] 162 assert pc >= start 163 if pc >= end: 164 print >> sys.stderr, "warning: %s: pc %x outside of any known mapping" % (prog_name, pc) 165 continue 166 mem_map_pcs[map_idx].append(pc - base) 167 168 for ((start, end, base, module_path), pc_list) in zip(mem_map, mem_map_pcs): 169 if len(pc_list) == 0: continue 170 assert path.endswith('.sancov.raw') 171 dst_path = module_path + '.' + os.path.basename(path)[:-4] 172 print >> sys.stderr, "%s: writing %d PCs to %s" % (prog_name, len(pc_list), dst_path) 173 sorted_pc_list = sorted(pc_list) 174 pc_buffer = struct.pack(TypeCodeForStruct(bits) * len(pc_list), *sorted_pc_list) 175 with open(dst_path, 'ab+') as f2: 176 array.array('I', MagicForBits(bits)).tofile(f2) 177 f2.seek(0, 2) 178 f2.write(pc_buffer) 179 180 def RawUnpack(files): 181 for f in files: 182 if not f.endswith('.sancov.raw'): 183 raise Exception('Unexpected raw file name %s' % f) 184 f_map = f[:-3] + 'map' 185 UnpackOneRawFile(f, f_map) 186 187 def GetInstrumentedPCs(binary): 188 # This looks scary, but all it does is extract all offsets where we call: 189 # - __sanitizer_cov() or __sanitizer_cov_with_check(), 190 # - with call or callq, 191 # - directly or via PLT. 192 cmd = "objdump -d %s | " \ 193 "grep '^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\)\(@plt\|\)>' | " \ 194 "grep '^\s\+[0-9a-f]\+' -o" % binary 195 proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, 196 shell=True) 197 proc.stdin.close() 198 # The PCs we get from objdump are off by 4 bytes, as they point to the 199 # beginning of the callq instruction. Empirically this is true on x86 and 200 # x86_64. 201 return set(int(line.strip(), 16) + 4 for line in proc.stdout) 202 203 def PrintMissing(binary): 204 if not os.path.isfile(binary): 205 raise Exception('File not found: %s' % binary) 206 instrumented = GetInstrumentedPCs(binary) 207 print >> sys.stderr, "%s: found %d instrumented PCs in %s" % (prog_name, 208 len(instrumented), 209 binary) 210 covered = set(int(line, 16) for line in sys.stdin) 211 print >> sys.stderr, "%s: read %d PCs from stdin" % (prog_name, len(covered)) 212 missing = instrumented - covered 213 print >> sys.stderr, "%s: %d PCs missing from coverage" % (prog_name, len(missing)) 214 if (len(missing) > len(instrumented) - len(covered)): 215 print >> sys.stderr, \ 216 "%s: WARNING: stdin contains PCs not found in binary" % prog_name 217 for pc in sorted(missing): 218 print "0x%x" % pc 219 220 if __name__ == '__main__': 221 prog_name = sys.argv[0] 222 if len(sys.argv) <= 2: 223 Usage(); 224 225 if sys.argv[1] == "missing": 226 if len(sys.argv) != 3: 227 Usage() 228 PrintMissing(sys.argv[2]) 229 exit(0) 230 231 file_list = [] 232 for f in sys.argv[2:]: 233 file_list += glob.glob(f) 234 if not file_list: 235 Usage() 236 237 if sys.argv[1] == "print": 238 PrintFiles(file_list) 239 elif sys.argv[1] == "merge": 240 MergeAndPrint(file_list) 241 elif sys.argv[1] == "unpack": 242 Unpack(file_list) 243 elif sys.argv[1] == "rawunpack": 244 RawUnpack(file_list) 245 else: 246 Usage() 247