Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python
      2 # Merge or print the coverage data collected by asan's coverage.
      3 # Input files are sequences of 4-byte integers.
      4 # We need to merge these integers into a set and then
      5 # either print them (as hex) or dump them into another file.
      6 import array
      7 import bisect
      8 import glob
      9 import os.path
     10 import struct
     11 import subprocess
     12 import sys
     13 
     14 prog_name = ""
     15 
     16 def Usage():
     17   print >> sys.stderr, "Usage: \n" + \
     18       " " + prog_name + " merge FILE [FILE...] > OUTPUT\n" \
     19       " " + prog_name + " print FILE [FILE...]\n" \
     20       " " + prog_name + " unpack FILE [FILE...]\n" \
     21       " " + prog_name + " rawunpack FILE [FILE ...]\n" \
     22       " " + prog_name + " missing BINARY < LIST_OF_PCS\n"
     23   exit(1)
     24 
     25 def CheckBits(bits):
     26   if bits != 32 and bits != 64:
     27     raise Exception("Wrong bitness: %d" % bits)
     28 
     29 def TypeCodeForBits(bits):
     30   CheckBits(bits)
     31   return 'L' if bits == 64 else 'I'
     32 
     33 def TypeCodeForStruct(bits):
     34   CheckBits(bits)
     35   return 'Q' if bits == 64 else 'I'
     36 
     37 kMagic32SecondHalf = 0xFFFFFF32;
     38 kMagic64SecondHalf = 0xFFFFFF64;
     39 kMagicFirstHalf    = 0xC0BFFFFF;
     40 
     41 def MagicForBits(bits):
     42   CheckBits(bits)
     43   if sys.byteorder == 'little':
     44     return [kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf, kMagicFirstHalf]
     45   else:
     46     return [kMagicFirstHalf, kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf]
     47 
     48 def ReadMagicAndReturnBitness(f, path):
     49   magic_bytes = f.read(8)
     50   magic_words = struct.unpack('II', magic_bytes);
     51   bits = 0
     52   idx = 1 if sys.byteorder == 'little' else 0
     53   if magic_words[idx] == kMagicFirstHalf:
     54     if magic_words[1-idx] == kMagic64SecondHalf:
     55       bits = 64
     56     elif magic_words[1-idx] == kMagic32SecondHalf:
     57       bits = 32
     58   if bits == 0:
     59     raise Exception('Bad magic word in %s' % path)
     60   return bits
     61 
     62 def ReadOneFile(path):
     63   with open(path, mode="rb") as f:
     64     f.seek(0, 2)
     65     size = f.tell()
     66     f.seek(0, 0)
     67     if size < 8:
     68       raise Exception('File %s is short (< 8 bytes)' % path)
     69     bits = ReadMagicAndReturnBitness(f, path)
     70     size -= 8
     71     s = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 / bits), f.read(size))
     72   print >>sys.stderr, "%s: read %d %d-bit PCs from %s" % (prog_name, size * 8 / bits, bits, path)
     73   return s
     74 
     75 def Merge(files):
     76   s = set()
     77   for f in files:
     78     s = s.union(set(ReadOneFile(f)))
     79   print >> sys.stderr, "%s: %d files merged; %d PCs total" % \
     80     (prog_name, len(files), len(s))
     81   return sorted(s)
     82 
     83 def PrintFiles(files):
     84   if len(files) > 1:
     85     s = Merge(files)
     86   else:  # If there is just on file, print the PCs in order.
     87     s = ReadOneFile(files[0])
     88     print >> sys.stderr, "%s: 1 file merged; %d PCs total" % \
     89       (prog_name, len(s))
     90   for i in s:
     91     print "0x%x" % i
     92 
     93 def MergeAndPrint(files):
     94   if sys.stdout.isatty():
     95     Usage()
     96   s = Merge(files)
     97   bits = 32
     98   if max(s) > 0xFFFFFFFF:
     99     bits = 64
    100   array.array('I', MagicForBits(bits)).tofile(sys.stdout)
    101   a = struct.pack(TypeCodeForStruct(bits) * len(s), *s)
    102   sys.stdout.write(a)
    103 
    104 
    105 def UnpackOneFile(path):
    106   with open(path, mode="rb") as f:
    107     print >> sys.stderr, "%s: unpacking %s" % (prog_name, path)
    108     while True:
    109       header = f.read(12)
    110       if not header: return
    111       if len(header) < 12:
    112         break
    113       pid, module_length, blob_size = struct.unpack('iII', header)
    114       module = f.read(module_length)
    115       blob = f.read(blob_size)
    116       assert(len(module) == module_length)
    117       assert(len(blob) == blob_size)
    118       extracted_file = "%s.%d.sancov" % (module, pid)
    119       print >> sys.stderr, "%s: extracting %s" % \
    120         (prog_name, extracted_file)
    121       # The packed file may contain multiple blobs for the same pid/module
    122       # pair. Append to the end of the file instead of overwriting.
    123       with open(extracted_file, 'ab') as f2:
    124         f2.write(blob)
    125     # fail
    126     raise Exception('Error reading file %s' % path)
    127 
    128 
    129 def Unpack(files):
    130   for f in files:
    131     UnpackOneFile(f)
    132 
    133 def UnpackOneRawFile(path, map_path):
    134   mem_map = []
    135   with open(map_path, mode="rt") as f_map:
    136     print >> sys.stderr, "%s: reading map %s" % (prog_name, map_path)
    137     bits = int(f_map.readline())
    138     if bits != 32 and bits != 64:
    139       raise Exception('Wrong bits size in the map')
    140     for line in f_map:
    141       parts = line.rstrip().split()
    142       mem_map.append((int(parts[0], 16),
    143                   int(parts[1], 16),
    144                   int(parts[2], 16),
    145                   ' '.join(parts[3:])))
    146   mem_map.sort(key=lambda m : m[0])
    147   mem_map_keys = [m[0] for m in mem_map]
    148 
    149   with open(path, mode="rb") as f:
    150     print >> sys.stderr, "%s: unpacking %s" % (prog_name, path)
    151 
    152     f.seek(0, 2)
    153     size = f.tell()
    154     f.seek(0, 0)
    155     pcs = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 / bits), f.read(size))
    156     mem_map_pcs = [[] for i in range(0, len(mem_map))]
    157 
    158     for pc in pcs:
    159       if pc == 0: continue
    160       map_idx = bisect.bisect(mem_map_keys, pc) - 1
    161       (start, end, base, module_path) = mem_map[map_idx]
    162       assert pc >= start
    163       if pc >= end:
    164         print >> sys.stderr, "warning: %s: pc %x outside of any known mapping" % (prog_name, pc)
    165         continue
    166       mem_map_pcs[map_idx].append(pc - base)
    167 
    168     for ((start, end, base, module_path), pc_list) in zip(mem_map, mem_map_pcs):
    169       if len(pc_list) == 0: continue
    170       assert path.endswith('.sancov.raw')
    171       dst_path = module_path + '.' + os.path.basename(path)[:-4]
    172       print >> sys.stderr, "%s: writing %d PCs to %s" % (prog_name, len(pc_list), dst_path)
    173       sorted_pc_list = sorted(pc_list)
    174       pc_buffer = struct.pack(TypeCodeForStruct(bits) * len(pc_list), *sorted_pc_list)
    175       with open(dst_path, 'ab+') as f2:
    176         array.array('I', MagicForBits(bits)).tofile(f2)
    177         f2.seek(0, 2)
    178         f2.write(pc_buffer)
    179 
    180 def RawUnpack(files):
    181   for f in files:
    182     if not f.endswith('.sancov.raw'):
    183       raise Exception('Unexpected raw file name %s' % f)
    184     f_map = f[:-3] + 'map'
    185     UnpackOneRawFile(f, f_map)
    186 
    187 def GetInstrumentedPCs(binary):
    188   # This looks scary, but all it does is extract all offsets where we call:
    189   # - __sanitizer_cov() or __sanitizer_cov_with_check(),
    190   # - with call or callq,
    191   # - directly or via PLT.
    192   cmd = "objdump -d %s | " \
    193         "grep '^\s\+[0-9a-f]\+:.*\scall\(q\|\)\s\+[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\)\(@plt\|\)>' | " \
    194         "grep '^\s\+[0-9a-f]\+' -o" % binary
    195   proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
    196                           shell=True)
    197   proc.stdin.close()
    198   # The PCs we get from objdump are off by 4 bytes, as they point to the
    199   # beginning of the callq instruction. Empirically this is true on x86 and
    200   # x86_64.
    201   return set(int(line.strip(), 16) + 4 for line in proc.stdout)
    202 
    203 def PrintMissing(binary):
    204   if not os.path.isfile(binary):
    205     raise Exception('File not found: %s' % binary)
    206   instrumented = GetInstrumentedPCs(binary)
    207   print >> sys.stderr, "%s: found %d instrumented PCs in %s" % (prog_name,
    208                                                                 len(instrumented),
    209                                                                 binary)
    210   covered = set(int(line, 16) for line in sys.stdin)
    211   print >> sys.stderr, "%s: read %d PCs from stdin" % (prog_name, len(covered))
    212   missing = instrumented - covered
    213   print >> sys.stderr, "%s: %d PCs missing from coverage" % (prog_name, len(missing))
    214   if (len(missing) > len(instrumented) - len(covered)):
    215     print >> sys.stderr, \
    216         "%s: WARNING: stdin contains PCs not found in binary" % prog_name
    217   for pc in sorted(missing):
    218     print "0x%x" % pc
    219 
    220 if __name__ == '__main__':
    221   prog_name = sys.argv[0]
    222   if len(sys.argv) <= 2:
    223     Usage();
    224 
    225   if sys.argv[1] == "missing":
    226     if len(sys.argv) != 3:
    227       Usage()
    228     PrintMissing(sys.argv[2])
    229     exit(0)
    230 
    231   file_list = []
    232   for f in sys.argv[2:]:
    233     file_list += glob.glob(f)
    234   if not file_list:
    235     Usage()
    236 
    237   if sys.argv[1] == "print":
    238     PrintFiles(file_list)
    239   elif sys.argv[1] == "merge":
    240     MergeAndPrint(file_list)
    241   elif sys.argv[1] == "unpack":
    242     Unpack(file_list)
    243   elif sys.argv[1] == "rawunpack":
    244     RawUnpack(file_list)
    245   else:
    246     Usage()
    247