Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 
      3 """
      4 strip_asm.py - Cleanup ASM output for the specified file
      5 """
      6 
      7 from argparse import ArgumentParser
      8 import sys
      9 import os
     10 import re
     11 
     12 def find_used_labels(asm):
     13     found = set()
     14     label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
     15     for l in asm.splitlines():
     16         m = label_re.match(l)
     17         if m:
     18             found.add('.L%s' % m.group(1))
     19     return found
     20 
     21 
     22 def normalize_labels(asm):
     23     decls = set()
     24     label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
     25     for l in asm.splitlines():
     26         m = label_decl.match(l)
     27         if m:
     28             decls.add(m.group(0))
     29     if len(decls) == 0:
     30         return asm
     31     needs_dot = next(iter(decls))[0] != '.'
     32     if not needs_dot:
     33         return asm
     34     for ld in decls:
     35         asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
     36     return asm
     37 
     38 
     39 def transform_labels(asm):
     40     asm = normalize_labels(asm)
     41     used_decls = find_used_labels(asm)
     42     new_asm = ''
     43     label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
     44     for l in asm.splitlines():
     45         m = label_decl.match(l)
     46         if not m or m.group(0) in used_decls:
     47             new_asm += l
     48             new_asm += '\n'
     49     return new_asm
     50 
     51 
     52 def is_identifier(tk):
     53     if len(tk) == 0:
     54         return False
     55     first = tk[0]
     56     if not first.isalpha() and first != '_':
     57         return False
     58     for i in range(1, len(tk)):
     59         c = tk[i]
     60         if not c.isalnum() and c != '_':
     61             return False
     62     return True
     63 
     64 def process_identifiers(l):
     65     """
     66     process_identifiers - process all identifiers and modify them to have
     67     consistent names across all platforms; specifically across ELF and MachO.
     68     For example, MachO inserts an additional understore at the beginning of
     69     names. This function removes that.
     70     """
     71     parts = re.split(r'([a-zA-Z0-9_]+)', l)
     72     new_line = ''
     73     for tk in parts:
     74         if is_identifier(tk):
     75             if tk.startswith('__Z'):
     76                 tk = tk[1:]
     77             elif tk.startswith('_') and len(tk) > 1 and \
     78                     tk[1].isalpha() and tk[1] != 'Z':
     79                 tk = tk[1:]
     80         new_line += tk
     81     return new_line
     82 
     83 
     84 def process_asm(asm):
     85     """
     86     Strip the ASM of unwanted directives and lines
     87     """
     88     new_contents = ''
     89     asm = transform_labels(asm)
     90 
     91     # TODO: Add more things we want to remove
     92     discard_regexes = [
     93         re.compile("\s+\..*$"), # directive
     94         re.compile("\s*#(NO_APP|APP)$"), #inline ASM
     95         re.compile("\s*#.*$"), # comment line
     96         re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
     97         re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
     98     ]
     99     keep_regexes = [
    100 
    101     ]
    102     fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
    103     for l in asm.splitlines():
    104         # Remove Mach-O attribute
    105         l = l.replace('@GOTPCREL', '')
    106         add_line = True
    107         for reg in discard_regexes:
    108             if reg.match(l) is not None:
    109                 add_line = False
    110                 break
    111         for reg in keep_regexes:
    112             if reg.match(l) is not None:
    113                 add_line = True
    114                 break
    115         if add_line:
    116             if fn_label_def.match(l) and len(new_contents) != 0:
    117                 new_contents += '\n'
    118             l = process_identifiers(l)
    119             new_contents += l
    120             new_contents += '\n'
    121     return new_contents
    122 
    123 def main():
    124     parser = ArgumentParser(
    125         description='generate a stripped assembly file')
    126     parser.add_argument(
    127         'input', metavar='input', type=str, nargs=1,
    128         help='An input assembly file')
    129     parser.add_argument(
    130         'out', metavar='output', type=str, nargs=1,
    131         help='The output file')
    132     args, unknown_args = parser.parse_known_args()
    133     input = args.input[0]
    134     output = args.out[0]
    135     if not os.path.isfile(input):
    136         print(("ERROR: input file '%s' does not exist") % input)
    137         sys.exit(1)
    138     contents = None
    139     with open(input, 'r') as f:
    140         contents = f.read()
    141     new_contents = process_asm(contents)
    142     with open(output, 'w') as f:
    143         f.write(new_contents)
    144 
    145 
    146 if __name__ == '__main__':
    147     main()
    148 
    149 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
    150 # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
    151 # kate: indent-mode python; remove-trailing-spaces modified;
    152