Home | History | Annotate | Download | only in suite
      1 #!/usr/bin/python
      2 # Test tool to compare Capstone output with llvm-mc. By Nguyen Anh Quynh, 2014
      3 import array, os.path, sys
      4 from subprocess import Popen, PIPE, STDOUT
      5 from capstone import *
      6 
      7 
      8 # convert all hex numbers to decimal numbers in a text
      9 def normalize_hex(a):
     10     while(True):
     11         i = a.find('0x')
     12         if i == -1: # no more hex number
     13             break
     14         hexnum = '0x'
     15         for c in a[i + 2:]:
     16             if c in '0123456789abcdefABCDEF':
     17                 hexnum += c
     18             else:
     19                 break
     20         num = int(hexnum, 16)
     21         a = a.replace(hexnum, str(num))
     22     return a
     23 
     24 
     25 def run_mc(arch, hexcode, option, syntax=None):
     26     def normalize(text):
     27         # remove tabs
     28         text = text.lower()
     29         items = text.split()
     30         text = ' '.join(items)
     31         if arch == CS_ARCH_X86:
     32             # remove comment after #
     33             i = text.find('# ')
     34             if i != -1:
     35                 return text[:i].strip()
     36         if arch == CS_ARCH_ARM64:
     37             # remove comment after #
     38             i = text.find('// ')
     39             if i != -1:
     40                 return text[:i].strip()
     41         # remove some redundant spaces
     42         text = text.replace('{ ', '{')
     43         text = text.replace(' }', '}')
     44         return text.strip()
     45 
     46     #print("Trying to decode: %s" %hexcode)
     47     if syntax:
     48         if arch == CS_ARCH_MIPS:
     49             p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', '-mattr=+msa', syntax] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
     50         else:
     51             p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', syntax] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
     52     else:
     53         if arch == CS_ARCH_MIPS:
     54             p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', '-mattr=+msa'] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
     55         else:
     56             p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex'] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
     57     output = p.communicate(input=hexcode)[0]
     58     lines = output.split('\n')
     59     #print lines
     60     if 'invalid' in lines[0]:
     61         #print 'invalid ----'
     62         return 'FAILED to disassemble (MC)'
     63     else:
     64         #print 'OK:', lines[1]
     65         return normalize(lines[1].strip())
     66 
     67 def test_file(fname):
     68     print("Test %s" %fname);
     69     f = open(fname)
     70     lines = f.readlines()
     71     f.close()
     72 
     73     if not lines[0].startswith('# '):
     74         print("ERROR: decoding information is missing")
     75         return
     76 
     77     # skip '# ' at the front, then split line to get out hexcode
     78     # Note: option can be '', or 'None'
     79     #print lines[0]
     80     #print lines[0][2:].split(', ')
     81     (arch, mode, option) = lines[0][2:].split(', ')
     82     mode = mode.replace(' ', '')
     83     option = option.strip()
     84 
     85     archs = {
     86         "CS_ARCH_ARM": CS_ARCH_ARM,
     87         "CS_ARCH_ARM64": CS_ARCH_ARM64,
     88         "CS_ARCH_MIPS": CS_ARCH_MIPS,
     89         "CS_ARCH_PPC": CS_ARCH_PPC,
     90         "CS_ARCH_SPARC": CS_ARCH_SPARC,
     91         "CS_ARCH_SYSZ": CS_ARCH_SYSZ,
     92         "CS_ARCH_X86": CS_ARCH_X86,
     93         "CS_ARCH_XCORE": CS_ARCH_XCORE,
     94     }
     95     
     96     modes = {
     97         "CS_MODE_16": CS_MODE_16,
     98         "CS_MODE_32": CS_MODE_32,
     99         "CS_MODE_64": CS_MODE_64,
    100         "CS_MODE_MIPS32": CS_MODE_MIPS32,
    101         "CS_MODE_MIPS64": CS_MODE_MIPS64,
    102         "0": CS_MODE_ARM,
    103         "CS_MODE_ARM": CS_MODE_ARM,
    104         "CS_MODE_THUMB": CS_MODE_THUMB,
    105         "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8,
    106         "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8,
    107         "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS,
    108         "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN,
    109         "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN,
    110         "CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN,
    111         "CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN,
    112         "CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO,
    113         "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
    114         "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
    115         "CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9,
    116         "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN,
    117         "CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN,
    118         "CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN,
    119         "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN,
    120     }
    121     
    122     options = {
    123         "CS_OPT_SYNTAX_ATT": CS_OPT_SYNTAX_ATT,
    124         "CS_OPT_SYNTAX_NOREGNAME": CS_OPT_SYNTAX_NOREGNAME,
    125     }
    126 
    127     mc_modes = {
    128         ("CS_ARCH_X86", "CS_MODE_32"): ['-triple=i386'],
    129         ("CS_ARCH_X86", "CS_MODE_64"): ['-triple=x86_64'],
    130         ("CS_ARCH_ARM", "CS_MODE_ARM"): ['-triple=armv7'],
    131         ("CS_ARCH_ARM", "CS_MODE_THUMB"): ['-triple=thumbv7'],
    132         ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): ['-triple=armv8'],
    133         ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): ['-triple=thumbv8'],
    134         ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): ['-triple=thumbv7m'],
    135         ("CS_ARCH_ARM64", "0"): ['-triple=aarch64'],
    136         ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): ['-triple=mips'],
    137         ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): ['-triple=mipsel', '-mattr=+micromips'],
    138         ("CS_ARCH_MIPS", "CS_MODE_MIPS64"): ['-triple=mips64el'],
    139         ("CS_ARCH_MIPS", "CS_MODE_MIPS32"): ['-triple=mipsel'],
    140         ("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): ['-triple=mips64'],
    141         ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): ['-triple=mips', '-mattr=+micromips'],
    142         ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): ['-triple=mips', '-mattr=+micromips'],
    143         ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): ['-triple=powerpc64'],
    144         ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN'): ['-triple=sparc'],
    145         ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN+CS_MODE_V9'): ['-triple=sparcv9'],
    146         ('CS_ARCH_SYSZ', '0'): ['-triple=s390x', '-mcpu=z196'],
    147     }
    148 
    149     #if not option in ('', 'None'):
    150     #    print archs[arch], modes[mode], options[option]
    151     
    152     #print(arch, mode, option)
    153     md = Cs(archs[arch], modes[mode])
    154 
    155     mc_option = None
    156     if arch == 'CS_ARCH_X86':
    157         # tell llvm-mc to use Intel syntax
    158         mc_option = '-output-asm-variant=1'
    159 
    160     if arch == 'CS_ARCH_ARM' or arch == 'CS_ARCH_PPC' :
    161         md.syntax = CS_OPT_SYNTAX_NOREGNAME
    162 
    163     if fname.endswith('3DNow.s.cs'):
    164         md.syntax = CS_OPT_SYNTAX_ATT
    165 
    166     for line in lines[1:]:
    167         # ignore all the input lines having # in front.
    168         if line.startswith('#'):
    169             continue
    170         #print("Check %s" %line)
    171         code = line.split(' = ')[0]
    172         asm  = ''.join(line.split(' = ')[1:])
    173         hex_code = code.replace('0x', '')
    174         hex_code = hex_code.replace(',', '')
    175         hex_data = hex_code.decode('hex')
    176         #hex_bytes = array.array('B', hex_data)
    177 
    178         x = list(md.disasm(hex_data, 0))
    179         if len(x) > 0:
    180             if x[0].op_str != '':
    181                 cs_output = "%s %s" %(x[0].mnemonic, x[0].op_str)
    182             else:
    183                 cs_output = x[0].mnemonic
    184         else:
    185             cs_output = 'FAILED to disassemble'
    186 
    187         cs_output2 = normalize_hex(cs_output)
    188         cs_output2 = cs_output2.replace(' ', '')
    189 
    190         if arch == 'CS_ARCH_MIPS':
    191             # normalize register alias names
    192             cs_output2 = cs_output2.replace('$at', '$1')
    193             cs_output2 = cs_output2.replace('$v0', '$2')
    194             cs_output2 = cs_output2.replace('$v1', '$3')
    195 
    196             cs_output2 = cs_output2.replace('$a0', '$4')
    197             cs_output2 = cs_output2.replace('$a1', '$5')
    198             cs_output2 = cs_output2.replace('$a2', '$6')
    199             cs_output2 = cs_output2.replace('$a3', '$7')
    200 
    201             cs_output2 = cs_output2.replace('$t0', '$8')
    202             cs_output2 = cs_output2.replace('$t1', '$9')
    203             cs_output2 = cs_output2.replace('$t2', '$10')
    204             cs_output2 = cs_output2.replace('$t3', '$11')
    205             cs_output2 = cs_output2.replace('$t4', '$12')
    206             cs_output2 = cs_output2.replace('$t5', '$13')
    207             cs_output2 = cs_output2.replace('$t6', '$14')
    208             cs_output2 = cs_output2.replace('$t7', '$15')
    209             cs_output2 = cs_output2.replace('$t8', '$24')
    210             cs_output2 = cs_output2.replace('$t9', '$25')
    211 
    212             cs_output2 = cs_output2.replace('$s0', '$16')
    213             cs_output2 = cs_output2.replace('$s1', '$17')
    214             cs_output2 = cs_output2.replace('$s2', '$18')
    215             cs_output2 = cs_output2.replace('$s3', '$19')
    216             cs_output2 = cs_output2.replace('$s4', '$20')
    217             cs_output2 = cs_output2.replace('$s5', '$21')
    218             cs_output2 = cs_output2.replace('$s6', '$22')
    219             cs_output2 = cs_output2.replace('$s7', '$23')
    220 
    221             cs_output2 = cs_output2.replace('$k0', '$26')
    222             cs_output2 = cs_output2.replace('$k1', '$27')
    223 
    224         #print("Running MC ...")
    225         if fname.endswith('thumb-fp-armv8.s.cs'):
    226             mc_output = run_mc(archs[arch], code, ['-triple=thumbv8'], mc_option)
    227         elif fname.endswith('mips64-alu-instructions.s.cs'):
    228             mc_output = run_mc(archs[arch], code, ['-triple=mips64el', '-mcpu=mips64r2'], mc_option)
    229         else:
    230             mc_output = run_mc(archs[arch], code, mc_modes[(arch, mode)], mc_option)
    231         mc_output2 = normalize_hex(mc_output)
    232 
    233         if arch == 'CS_ARCH_MIPS':
    234             mc_output2 = mc_output2.replace(' 0(', '(')
    235 
    236         if arch == 'CS_ARCH_PPC':
    237             mc_output2 = mc_output2.replace('.+', '')
    238             mc_output2 = mc_output2.replace('.', '')
    239             mc_output2 = mc_output2.replace(' 0(', '(')
    240 
    241         mc_output2 = mc_output2.replace(' ', '')
    242         mc_output2 = mc_output2.replace('opaque', '')
    243 
    244 
    245         if (cs_output2 != mc_output2):
    246             asm = asm.replace(' ', '').strip().lower()
    247             if asm != cs_output2:
    248                 print("Mismatch: %s" %line.strip())
    249                 print("\tMC = %s" %mc_output)
    250                 print("\tCS = %s" %cs_output)
    251 
    252 
    253 if __name__ == '__main__':
    254     if len(sys.argv) == 1:
    255         fnames = sys.stdin.readlines()
    256         for fname in fnames:
    257             test_file(fname.strip())
    258     else:
    259         #print("Usage: ./test_mc.py <input-file.s.cs>")
    260         test_file(sys.argv[1])
    261 
    262