Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python
      2 # Merge or print the coverage data collected by asan's coverage.
      3 # Input files are sequences of 4-byte integers.
      4 # We need to merge these integers into a set and then
      5 # either print them (as hex) or dump them into another file.
      6 import array
      7 import bisect
      8 import glob
      9 import os.path
     10 import struct
     11 import subprocess
     12 import sys
     13 
     14 prog_name = ""
     15 
     16 def Usage():
     17   print >> sys.stderr, "Usage: \n" + \
     18       " " + prog_name + " merge FILE [FILE...] > OUTPUT\n" \
     19       " " + prog_name + " print FILE [FILE...]\n" \
     20       " " + prog_name + " unpack FILE [FILE...]\n" \
     21       " " + prog_name + " rawunpack FILE [FILE ...]\n" \
     22       " " + prog_name + " missing BINARY < LIST_OF_PCS\n"
     23   exit(1)
     24 
     25 def CheckBits(bits):
     26   if bits != 32 and bits != 64:
     27     raise Exception("Wrong bitness: %d" % bits)
     28 
     29 def TypeCodeForBits(bits):
     30   CheckBits(bits)
     31   return 'L' if bits == 64 else 'I'
     32 
     33 kMagic32SecondHalf = 0xFFFFFF32;
     34 kMagic64SecondHalf = 0xFFFFFF64;
     35 kMagicFirstHalf    = 0xC0BFFFFF;
     36 
     37 def MagicForBits(bits):
     38   CheckBits(bits)
     39   if sys.byteorder == 'little':
     40     return [kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf, kMagicFirstHalf]
     41   else:
     42     return [kMagicFirstHalf, kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf]
     43 
     44 def ReadMagicAndReturnBitness(f, path):
     45   magic_bytes = f.read(8)
     46   magic_words = struct.unpack('II', magic_bytes);
     47   bits = 0
     48   idx = 1 if sys.byteorder == 'little' else 0
     49   if magic_words[idx] == kMagicFirstHalf:
     50     if magic_words[1-idx] == kMagic64SecondHalf:
     51       bits = 64
     52     elif magic_words[1-idx] == kMagic32SecondHalf:
     53       bits = 32
     54   if bits == 0:
     55     raise Exception('Bad magic word in %s' % path)
     56   return bits
     57 
     58 def ReadOneFile(path):
     59   with open(path, mode="rb") as f:
     60     f.seek(0, 2)
     61     size = f.tell()
     62     f.seek(0, 0)
     63     if size < 8:
     64       raise Exception('File %s is short (< 8 bytes)' % path)
     65     bits = ReadMagicAndReturnBitness(f, path)
     66     size -= 8
     67     s = array.array(TypeCodeForBits(bits), f.read(size))
     68   print >>sys.stderr, "%s: read %d %d-bit PCs from %s" % (prog_name, size * 8 / bits, bits, path)
     69   return s
     70 
     71 def Merge(files):
     72   s = set()
     73   for f in files:
     74     s = s.union(set(ReadOneFile(f)))
     75   print >> sys.stderr, "%s: %d files merged; %d PCs total" % \
     76     (prog_name, len(files), len(s))
     77   return sorted(s)
     78 
     79 def PrintFiles(files):
     80   if len(files) > 1:
     81     s = Merge(files)
     82   else:  # If there is just on file, print the PCs in order.
     83     s = ReadOneFile(files[0])
     84     print >> sys.stderr, "%s: 1 file merged; %d PCs total" % \
     85       (prog_name, len(s))
     86   for i in s:
     87     print "0x%x" % i
     88 
     89 def MergeAndPrint(files):
     90   if sys.stdout.isatty():
     91     Usage()
     92   s = Merge(files)
     93   bits = 32
     94   if max(s) > 0xFFFFFFFF:
     95     bits = 64
     96   array.array('I', MagicForBits(bits)).tofile(sys.stdout)
     97   a = array.array(TypeCodeForBits(bits), s)
     98   a.tofile(sys.stdout)
     99 
    100 
    101 def UnpackOneFile(path):
    102   with open(path, mode="rb") as f:
    103     print >> sys.stderr, "%s: unpacking %s" % (prog_name, path)
    104     while True:
    105       header = f.read(12)
    106       if not header: return
    107       if len(header) < 12:
    108         break
    109       pid, module_length, blob_size = struct.unpack('iII', header)
    110       module = f.read(module_length)
    111       blob = f.read(blob_size)
    112       assert(len(module) == module_length)
    113       assert(len(blob) == blob_size)
    114       extracted_file = "%s.%d.sancov" % (module, pid)
    115       print >> sys.stderr, "%s: extracting %s" % \
    116         (prog_name, extracted_file)
    117       # The packed file may contain multiple blobs for the same pid/module
    118       # pair. Append to the end of the file instead of overwriting.
    119       with open(extracted_file, 'ab') as f2:
    120         f2.write(blob)
    121     # fail
    122     raise Exception('Error reading file %s' % path)
    123 
    124 
    125 def Unpack(files):
    126   for f in files:
    127     UnpackOneFile(f)
    128 
    129 def UnpackOneRawFile(path, map_path):
    130   mem_map = []
    131   with open(map_path, mode="rt") as f_map:
    132     print >> sys.stderr, "%s: reading map %s" % (prog_name, map_path)
    133     bits = int(f_map.readline())
    134     if bits != 32 and bits != 64:
    135       raise Exception('Wrong bits size in the map')
    136     for line in f_map:
    137       parts = line.rstrip().split()
    138       mem_map.append((int(parts[0], 16),
    139                   int(parts[1], 16),
    140                   int(parts[2], 16),
    141                   ' '.join(parts[3:])))
    142   mem_map.sort(key=lambda m : m[0])
    143   mem_map_keys = [m[0] for m in mem_map]
    144 
    145   with open(path, mode="rb") as f:
    146     print >> sys.stderr, "%s: unpacking %s" % (prog_name, path)
    147 
    148     f.seek(0, 2)
    149     size = f.tell()
    150     f.seek(0, 0)
    151     pcs = array.array(TypeCodeForBits(bits), f.read(size))
    152     mem_map_pcs = [[] for i in range(0, len(mem_map))]
    153 
    154     for pc in pcs:
    155       if pc == 0: continue
    156       map_idx = bisect.bisect(mem_map_keys, pc) - 1
    157       (start, end, base, module_path) = mem_map[map_idx]
    158       assert pc >= start
    159       if pc >= end:
    160         print >> sys.stderr, "warning: %s: pc %x outside of any known mapping" % (prog_name, pc)
    161         continue
    162       mem_map_pcs[map_idx].append(pc - base)
    163 
    164     for ((start, end, base, module_path), pc_list) in zip(mem_map, mem_map_pcs):
    165       if len(pc_list) == 0: continue
    166       assert path.endswith('.sancov.raw')
    167       dst_path = module_path + '.' + os.path.basename(path)[:-4]
    168       print >> sys.stderr, "%s: writing %d PCs to %s" % (prog_name, len(pc_list), dst_path)
    169       arr = array.array(TypeCodeForBits(bits))
    170       arr.fromlist(sorted(pc_list))
    171       with open(dst_path, 'ab') as f2:
    172         array.array('I', MagicForBits(bits)).tofile(f2)
    173         arr.tofile(f2)
    174 
    175 def RawUnpack(files):
    176   for f in files:
    177     if not f.endswith('.sancov.raw'):
    178       raise Exception('Unexpected raw file name %s' % f)
    179     f_map = f[:-3] + 'map'
    180     UnpackOneRawFile(f, f_map)
    181 
    182 def GetInstrumentedPCs(binary):
    183   # This looks scary, but all it does is extract all offsets where we call:
    184   # - __sanitizer_cov() or __sanitizer_cov_with_check(),
    185   # - with call or callq,
    186   # - directly or via PLT.
    187   cmd = "objdump -d %s | " \
    188         "grep '^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\)\(@plt\|\)>' | " \
    189         "grep '^\s\+[0-9a-f]\+' -o" % binary
    190   proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
    191                           shell=True)
    192   proc.stdin.close()
    193   # The PCs we get from objdump are off by 4 bytes, as they point to the
    194   # beginning of the callq instruction. Empirically this is true on x86 and
    195   # x86_64.
    196   return set(int(line.strip(), 16) + 4 for line in proc.stdout)
    197 
    198 def PrintMissing(binary):
    199   if not os.path.isfile(binary):
    200     raise Exception('File not found: %s' % binary)
    201   instrumented = GetInstrumentedPCs(binary)
    202   print >> sys.stderr, "%s: found %d instrumented PCs in %s" % (prog_name,
    203                                                                 len(instrumented),
    204                                                                 binary)
    205   covered = set(int(line, 16) for line in sys.stdin)
    206   print >> sys.stderr, "%s: read %d PCs from stdin" % (prog_name, len(covered))
    207   missing = instrumented - covered
    208   print >> sys.stderr, "%s: %d PCs missing from coverage" % (prog_name, len(missing))
    209   if (len(missing) > len(instrumented) - len(covered)):
    210     print >> sys.stderr, \
    211         "%s: WARNING: stdin contains PCs not found in binary" % prog_name
    212   for pc in sorted(missing):
    213     print "0x%x" % pc
    214 
    215 if __name__ == '__main__':
    216   prog_name = sys.argv[0]
    217   if len(sys.argv) <= 2:
    218     Usage();
    219 
    220   if sys.argv[1] == "missing":
    221     if len(sys.argv) != 3:
    222       Usage()
    223     PrintMissing(sys.argv[2])
    224     exit(0)
    225 
    226   file_list = []
    227   for f in sys.argv[2:]:
    228     file_list += glob.glob(f)
    229   if not file_list:
    230     Usage()
    231 
    232   if sys.argv[1] == "print":
    233     PrintFiles(file_list)
    234   elif sys.argv[1] == "merge":
    235     MergeAndPrint(file_list)
    236   elif sys.argv[1] == "unpack":
    237     Unpack(file_list)
    238   elif sys.argv[1] == "rawunpack":
    239     RawUnpack(file_list)
    240   else:
    241     Usage()
    242