Home | History | Annotate | Download | only in pydir
      1 #!/usr/bin/env python2
      2 
      3 import argparse
      4 import os
      5 import pipes
      6 import re
      7 import sys
      8 
      9 from utils import FindBaseNaCl, GetObjcopyCmd, get_sfi_string, shellcmd
     10 
     11 def NewerThanOrNotThere(old_path, new_path):
     12     """Returns whether old_path is newer than new_path.
     13 
     14     Also returns true if either path doesn't exist.
     15     """
     16     if not (os.path.exists(old_path) and os.path.exists(new_path)):
     17         return True
     18     return os.path.getmtime(old_path) > os.path.getmtime(new_path)
     19 
     20 def BuildRegex(patterns, syms):
     21     """Build a regular expression string for inclusion or exclusion.
     22 
     23     Creates a regex string from an array of patterns and an array
     24     of symbol names.  Each element in the patterns array is either a
     25     regex, or a range of entries in the symbol name array, e.g. '2:9'.
     26     """
     27     pattern_list = []
     28     for pattern in patterns:
     29         if pattern[0].isdigit() or pattern[0] == ':':
     30             # Legitimate symbols or regexes shouldn't start with a
     31             # digit or a ':', so interpret the pattern as a range.
     32             interval = pattern.split(':')
     33             if len(interval) == 1:
     34                 # Treat singleton 'n' as 'n:n+1'.
     35                 lower = int(interval[0])
     36                 upper = lower + 1
     37             elif len(interval) == 2:
     38                 # Handle 'a:b', 'a:', and ':b' with suitable defaults.
     39                 lower = int(interval[0]) if len(interval[0]) else 0
     40                 upper = int(interval[1]) if len(interval[1]) else len(syms)
     41             else:
     42                 print 'Invalid range syntax: {p}'.format(p=pattern)
     43                 exit(1)
     44             pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]])
     45         pattern_list.append('^' + pattern + '$')
     46     return '|'.join(pattern_list) if len(pattern_list) else '^$'
     47 
     48 def MatchSymbol(sym, re_include, re_exclude, default_match):
     49     """Match a symbol name against inclusion/exclusion rules.
     50 
     51     Returns True or False depending on whether the given symbol
     52     matches the compiled include or exclude regexes.  The default is
     53     returned if neither the include nor the exclude regex matches.
     54     """
     55     if re_exclude.match(sym):
     56         # Always honor an explicit exclude before considering
     57         # includes.
     58         return False
     59     if re_include.match(sym):
     60         return True
     61     return default_match
     62 
     63 def AddOptionalArgs(argparser):
     64     argparser.add_argument('--force', dest='force', type=int, choices=[0, 1],
     65                            default=1,
     66                            help='Force all re-translations of the pexe.' +
     67                                 ' Default %(default)s.')
     68     argparser.add_argument('--include', '-i', default=[], dest='include',
     69                            action='append',
     70                            help='Subzero symbols to include ' +
     71                                 '(regex or line range)')
     72     argparser.add_argument('--exclude', '-e', default=[], dest='exclude',
     73                            action='append',
     74                            help='Subzero symbols to exclude ' +
     75                                 '(regex or line range)')
     76     argparser.add_argument('--output', '-o', default='a.out', dest='output',
     77                            action='store',
     78                            help='Output executable. Default %(default)s.')
     79     argparser.add_argument('-O', default='2', dest='optlevel',
     80                            choices=['m1', '-1', '0', '1', '2'],
     81                            help='Optimization level ' +
     82                                 '(m1 and -1 are equivalent).' +
     83                                 ' Default %(default)s.')
     84     argparser.add_argument('--filetype', default='iasm', dest='filetype',
     85                            choices=['obj', 'asm', 'iasm'],
     86                            help='Output file type.  Default %(default)s.')
     87     argparser.add_argument('--sandbox', dest='sandbox', action='store_true',
     88                            help='Enable sandboxing in the translator')
     89     argparser.add_argument('--nonsfi', dest='nonsfi', action='store_true',
     90                            help='Enable Non-SFI in the translator')
     91     argparser.add_argument('--enable-block-profile',
     92                            dest='enable_block_profile', action='store_true',
     93                            help='Enable basic block profiling.')
     94     argparser.add_argument('--target', default='x8632', dest='target',
     95                            choices=['arm32', 'x8632', 'x8664'],
     96                            help='Generate code for specified target.')
     97     argparser.add_argument('--verbose', '-v', dest='verbose',
     98                            action='store_true',
     99                            help='Display some extra debugging output')
    100     argparser.add_argument('--sz', dest='sz_args', action='append', default=[],
    101                            help='Extra arguments for Subzero')
    102     argparser.add_argument('--llc', dest='llc_args', action='append',
    103                            default=[], help='Extra arguments for llc')
    104     argparser.add_argument('--no-sz', dest='nosz', action='store_true',
    105                            help='Run only post-Subzero build steps')
    106     argparser.add_argument('--fsanitize-address', dest='asan',
    107                            action='store_true',
    108                            help='Instrument with AddressSanitizer')
    109 
    110 def LinkSandbox(objs, exe, target, verbose=True):
    111     assert target in ('x8632', 'x8664', 'arm32'), \
    112         '-sandbox is not available for %s' % target
    113     nacl_root = FindBaseNaCl()
    114     gold = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/bin/' +
    115             'le32-nacl-ld.gold').format(root=nacl_root)
    116     target_lib_dir = {
    117       'arm32': 'arm',
    118       'x8632': 'x86-32',
    119       'x8664': 'x86-64',
    120     }[target]
    121     linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' +
    122                '{target_dir}/lib').format(root=nacl_root,
    123                                           target_dir=target_lib_dir)
    124     shellcmd([gold,
    125               '-nostdlib',
    126               '--no-fix-cortex-a8',
    127               '--eh-frame-hdr',
    128               '-z', 'text',
    129               #'-z', 'noexecstack',
    130               '--build-id',
    131               '--entry=__pnacl_start',
    132               '-static', #'-pie',
    133               '{linklib}/crtbegin.o'.format(linklib=linklib)] +
    134              objs +
    135              [('{root}/toolchain_build/src/subzero/build/runtime/' +
    136                'szrt_sb_{target}.o').format(root=nacl_root, target=target),
    137               '{linklib}/libpnacl_irt_shim_dummy.a'.format(linklib=linklib),
    138               '--start-group',
    139               '{linklib}/libgcc.a'.format(linklib=linklib),
    140               '{linklib}/libcrt_platform.a'.format(linklib=linklib),
    141               '--end-group',
    142               '{linklib}/crtend.o'.format(linklib=linklib),
    143               '--undefined=_start',
    144               '--defsym=__Sz_AbsoluteZero=0',
    145               #'--defsym=_begin=0',
    146               '-o', exe
    147              ], echo=verbose)
    148 
    149 def LinkNonsfi(objs, exe, target, verbose=True):
    150     nacl_root = FindBaseNaCl()
    151     gold = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/bin/' +
    152             'le32-nacl-ld.gold').format(root=nacl_root)
    153     target_lib_dir = {
    154       'arm32': 'arm-nonsfi',
    155       'x8632': 'x86-32-nonsfi',
    156     }[target]
    157     linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' +
    158                '{target_dir}/lib').format(root=nacl_root,
    159                                           target_dir=target_lib_dir)
    160     shellcmd([gold,
    161               '-nostdlib',
    162               '--no-fix-cortex-a8',
    163               '--eh-frame-hdr',
    164               '-z', 'text',
    165               '-z', 'noexecstack',
    166               '--build-id',
    167               '--entry=__pnacl_start',
    168               '-pie',
    169               '{linklib}/crtbegin.o'.format(linklib=linklib)] +
    170              objs +
    171              [('{root}/toolchain_build/src/subzero/build/runtime/' +
    172                'szrt_nonsfi_{target}.o').format(root=nacl_root, target=target),
    173               '{linklib}/libpnacl_irt_shim_dummy.a'.format(linklib=linklib),
    174               '--start-group',
    175               '{linklib}/libgcc.a'.format(linklib=linklib),
    176               '{linklib}/libcrt_platform.a'.format(linklib=linklib),
    177               '--end-group',
    178               '{linklib}/crtend.o'.format(linklib=linklib),
    179               '--undefined=_start',
    180               '--defsym=__Sz_AbsoluteZero=0',
    181               '--defsym=_begin=0',
    182               '-o', exe
    183              ], echo=verbose)
    184 
    185 def LinkNative(objs, exe, target, verbose=True):
    186     nacl_root = FindBaseNaCl()
    187     linker = {
    188       'arm32': '/usr/bin/arm-linux-gnueabihf-g++',
    189       'mips32': '/usr/bin/mipsel-linux-gnu-g++',
    190       'x8632': ('{root}/../third_party/llvm-build/Release+Asserts/bin/clang'
    191                ).format(root=nacl_root),
    192       'x8664': ('{root}/../third_party/llvm-build/Release+Asserts/bin/clang'
    193                ).format(root=nacl_root)
    194     }[target]
    195 
    196     extra_linker_args = {
    197       'arm32': ['-mcpu=cortex-a9'],
    198       'x8632': ['-m32'],
    199       'x8664': ['-mx32']
    200     }[target]
    201 
    202     lib_dir = {
    203       'arm32': 'arm-linux',
    204       'x8632': 'x86-32-linux',
    205       'x8664': 'x86-64-linux',
    206     }[target]
    207 
    208     shellcmd([linker] +
    209              extra_linker_args +
    210              objs +
    211              ['-o', exe,
    212               ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' +
    213                '{lib_dir}/lib/' +
    214                '{{unsandboxed_irt,irt_random,irt_query_list}}.o').format(
    215                    root=nacl_root, lib_dir=lib_dir),
    216               ('{root}/toolchain_build/src/subzero/build/runtime/' +
    217                'szrt_native_{target}.o').format(root=nacl_root, target=target),
    218               '-lm', '-lpthread', '-lrt',
    219               '-Wl,--defsym=__Sz_AbsoluteZero=0'
    220              ], echo=verbose)
    221 
    222 def main():
    223     """Create a hybrid translation from Subzero and llc.
    224 
    225     Takes a finalized pexe and builds a native executable as a hybrid of Subzero
    226     and llc translated bitcode.  Linker tricks are used to determine whether
    227     Subzero or llc generated symbols are used, on a per-symbol basis.
    228 
    229     By default, for every symbol, its Subzero version is used.  Subzero and llc
    230     symbols can be selectively enabled/disabled via regular expressions on the
    231     symbol name, or by ranges of lines in this program's auto-generated symbol
    232     file.
    233 
    234     For each symbol, the --exclude arguments are first checked (the symbol is
    235     'rejected' on a match), followed by the --include arguments (the symbol is
    236     'accepted' on a match), followed by unconditional 'rejection'.  The Subzero
    237     version is used for an 'accepted' symbol, and the llc version is used for a
    238     'rejected' symbol.
    239 
    240     Each --include and --exclude argument can be a regular expression or a range
    241     of lines in the symbol file.  Each regular expression is wrapped inside
    242     '^$', so if you want a substring match on 'foo', use '.*foo.*' instead.
    243     Ranges use python-style 'first:last' notation, so e.g. use '0:10' or ':10'
    244     for the first 10 lines of the file, or '1' for the second line of the file.
    245 
    246     If no --include or --exclude arguments are given, the executable is produced
    247     entirely using Subzero, without using llc or linker tricks.
    248 
    249     When using the --force=0 option, this script uses file modification
    250     timestamps to determine whether llc and Subzero re-translation are needed.
    251     It checks timestamps of llc, pnacl-sz, and the pexe against the translated
    252     object files to determine the minimal work necessary.  The --force=1 option
    253     (default) suppresses those checks and re-translates everything.
    254 
    255     This script expects various PNaCl and LLVM tools to be found within the
    256     native_client tree.  When changes are made to these tools, copy them this
    257     way:
    258       cd native_client
    259       toolchain_build/toolchain_build_pnacl.py llvm_x86_64_linux \\
    260       --install=toolchain/linux_x86/pnacl_newlib_raw
    261     """
    262     argparser = argparse.ArgumentParser(
    263         description='    ' + main.__doc__,
    264         formatter_class=argparse.RawTextHelpFormatter)
    265     AddOptionalArgs(argparser)
    266     argparser.add_argument('pexe', help='Finalized pexe to translate')
    267     args = argparser.parse_args()
    268     pexe = args.pexe
    269     exe = args.output
    270     ProcessPexe(args, pexe, exe)
    271 
    272 def ProcessPexe(args, pexe, exe):
    273     [pexe_base, ext] = os.path.splitext(pexe)
    274     if ext != '.pexe':
    275         pexe_base = pexe
    276     pexe_base_unescaped = pexe_base
    277     pexe_base = pipes.quote(pexe_base)
    278     pexe = pipes.quote(pexe)
    279 
    280     nacl_root = FindBaseNaCl()
    281     path_addition = (
    282         '{root}/toolchain/linux_x86/pnacl_newlib_raw/bin'
    283         ).format(root=nacl_root)
    284     obj_llc = pexe_base + '.llc.o'
    285     obj_sz = pexe_base + '.sz.o'
    286     asm_sz = pexe_base + '.sz.s'
    287     obj_llc_weak = pexe_base + '.weak.llc.o'
    288     obj_sz_weak = pexe_base + '.weak.sz.o'
    289     obj_partial = obj_sz  # overridden for hybrid mode
    290     sym_llc = pexe_base + '.sym.llc.txt'
    291     sym_sz = pexe_base + '.sym.sz.txt'
    292     sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt'
    293     whitelist_sz = pexe_base + '.wl.sz.txt'
    294     whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt'
    295     pnacl_sz = (
    296         '{root}/toolchain_build/src/subzero/pnacl-sz'
    297         ).format(root=nacl_root)
    298     llcbin = '{base}/pnacl-llc'.format(base=path_addition)
    299     gold = '{base}/le32-nacl-ld.gold'.format(base=path_addition)
    300     objcopy = '{base}/{objcopy}'.format(base=path_addition,
    301                                         objcopy=GetObjcopyCmd(args.target))
    302     opt_level = args.optlevel
    303     opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' }
    304     hybrid = args.include or args.exclude
    305     native = not args.sandbox and not args.nonsfi
    306     if args.asan:
    307         if args.sandbox or args.nonsfi:
    308             print 'Can only use AddressSanitizer with a native build'
    309             exit(1)
    310         if '-fsanitize-address' not in args.sz_args:
    311           args.sz_args.append('-fsanitize-address')
    312 
    313     if hybrid and (args.force or
    314                    NewerThanOrNotThere(pexe, obj_llc) or
    315                    NewerThanOrNotThere(llcbin, obj_llc)):
    316         arch = {
    317           'arm32': 'arm' + get_sfi_string(args, 'v7', '-nonsfi', '-nonsfi'),
    318           'x8632': 'x86-32' + get_sfi_string(args, '', '-nonsfi', '-linux'),
    319           'x8664': 'x86-64' + get_sfi_string(args, '', '', '-linux')
    320         }[args.target]
    321 
    322         # Only run pnacl-translate in hybrid mode.
    323         shellcmd(['{base}/pnacl-translate'.format(base=path_addition),
    324                   '-split-module=1',
    325                   '-ffunction-sections',
    326                   '-fdata-sections',
    327                   '-c',
    328                   '-arch',  arch,
    329                   '-O' + opt_level_map[opt_level],
    330                   '--pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize',
    331                   '-o', obj_llc] +
    332                  (['--pnacl-driver-verbose'] if args.verbose else []) +
    333                  args.llc_args +
    334                  [pexe],
    335                  echo=args.verbose)
    336         if native:
    337             shellcmd((
    338                 '{objcopy} --redefine-sym _start=_user_start {obj}'
    339                 ).format(objcopy=objcopy, obj=obj_llc), echo=args.verbose)
    340         # Generate llc syms file for consistency, even though it's not used.
    341         shellcmd((
    342             'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
    343             ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose)
    344 
    345     if (args.force or
    346         NewerThanOrNotThere(pexe, obj_sz) or
    347         NewerThanOrNotThere(pnacl_sz, obj_sz)):
    348         if not args.nosz:
    349             # Run pnacl-sz regardless of hybrid mode.
    350             shellcmd([pnacl_sz,
    351                       '-O' + opt_level,
    352                       '-bitcode-format=pnacl',
    353                       '-filetype=' + args.filetype,
    354                       '-o', obj_sz if args.filetype == 'obj' else asm_sz,
    355                       '-target=' + args.target] +
    356                      (['-externalize',
    357                        '-ffunction-sections',
    358                        '-fdata-sections'] if hybrid else []) +
    359                      (['-sandbox'] if args.sandbox else []) +
    360                      (['-nonsfi'] if args.nonsfi else []) +
    361                      (['-enable-block-profile'] if
    362                           args.enable_block_profile and not args.sandbox
    363                           else []) +
    364                      args.sz_args +
    365                      [pexe],
    366                      echo=args.verbose)
    367         if args.filetype != 'obj':
    368             triple = {
    369               'arm32': 'arm' + get_sfi_string(args, '-nacl', '', ''),
    370               'x8632': 'i686' + get_sfi_string(args, '-nacl', '', ''),
    371               'x8664': 'x86_64' +
    372                         get_sfi_string(args, '-nacl', '-linux-gnux32',
    373                                        '-linux-gnux32'),
    374             }[args.target]
    375 
    376             shellcmd((
    377                 '{base}/llvm-mc -triple={triple} -filetype=obj -o {obj} {asm}'
    378                 ).format(base=path_addition, asm=asm_sz, obj=obj_sz,
    379                          triple=triple),
    380                      echo=args.verbose)
    381         if native:
    382             shellcmd((
    383                 '{objcopy} --redefine-sym _start=_user_start {obj}'
    384                 ).format(objcopy=objcopy, obj=obj_sz), echo=args.verbose)
    385         if hybrid:
    386             shellcmd((
    387                 'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
    388                 ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose)
    389 
    390     if hybrid:
    391         with open(sym_sz_unescaped) as f:
    392             sz_syms = f.read().splitlines()
    393         re_include_str = BuildRegex(args.include, sz_syms)
    394         re_exclude_str = BuildRegex(args.exclude, sz_syms)
    395         re_include = re.compile(re_include_str)
    396         re_exclude = re.compile(re_exclude_str)
    397         # If a symbol doesn't explicitly match re_include or re_exclude,
    398         # the default MatchSymbol() result is True, unless some --include
    399         # args are provided.
    400         default_match = not args.include
    401 
    402         whitelist_has_items = False
    403         with open(whitelist_sz_unescaped, 'w') as f:
    404             for sym in sz_syms:
    405                 if MatchSymbol(sym, re_include, re_exclude, default_match):
    406                     f.write(sym + '\n')
    407                     whitelist_has_items = True
    408         shellcmd((
    409             '{objcopy} --weaken {obj} {weak}'
    410             ).format(objcopy=objcopy, obj=obj_sz, weak=obj_sz_weak),
    411             echo=args.verbose)
    412         if whitelist_has_items:
    413             # objcopy returns an error if the --weaken-symbols file is empty.
    414             shellcmd((
    415                 '{objcopy} --weaken-symbols={whitelist} {obj} {weak}'
    416                 ).format(objcopy=objcopy,
    417                          whitelist=whitelist_sz, obj=obj_llc,
    418                          weak=obj_llc_weak),
    419                      echo=args.verbose)
    420         else:
    421             shellcmd((
    422                 '{objcopy} {obj} {weak}'
    423                 ).format(objcopy=objcopy, obj=obj_llc, weak=obj_llc_weak),
    424                 echo=args.verbose)
    425         obj_partial = pexe_base + '.o'
    426         ld = {
    427           'arm32': 'arm-linux-gnueabihf-ld',
    428           'x8632': 'ld',
    429           'x8664': 'ld',
    430         }[args.target]
    431         emulation = {
    432           'arm32': 'armelf_linux_eabi',
    433           'x8632': 'elf_i386',
    434           'x8664': 'elf32_x86_64' if not args.sandbox else 'elf_x86_64',
    435         }[args.target]
    436         shellcmd((
    437             '{ld} -r -m {emulation} -o {partial} {sz} {llc}'
    438             ).format(ld=ld, emulation=emulation, partial=obj_partial,
    439                      sz=obj_sz_weak, llc=obj_llc_weak),
    440                  echo=args.verbose)
    441         shellcmd((
    442             '{objcopy} -w --localize-symbol="*" {partial}'
    443             ).format(objcopy=objcopy, partial=obj_partial),
    444             echo=args.verbose)
    445         shellcmd((
    446             '{objcopy} --globalize-symbol={start} ' +
    447             '--globalize-symbol=__Sz_block_profile_info {partial}'
    448             ).format(objcopy=objcopy, partial=obj_partial,
    449                      start=get_sfi_string(args, '_start', '_start',
    450                                           '_user_start')),
    451                  echo=args.verbose)
    452 
    453     # Run the linker regardless of hybrid mode.
    454     if args.sandbox:
    455         LinkSandbox([obj_partial], exe, args.target, args.verbose)
    456     elif args.nonsfi:
    457         LinkNonsfi([obj_partial], exe, args.target, args.verbose)
    458     else:
    459         objs = [obj_partial]
    460         if args.asan:
    461             objs.append(
    462                 ('{root}/toolchain_build/src/subzero/build/runtime/' +
    463                  'szrt_asan_{target}.o').format(root=nacl_root,
    464                                                 target=args.target))
    465         LinkNative(objs, exe, args.target, args.verbose)
    466 
    467     # Put the extra verbose printing at the end.
    468     if args.verbose and hybrid:
    469         print 'include={regex}'.format(regex=re_include_str)
    470         print 'exclude={regex}'.format(regex=re_exclude_str)
    471         print 'default_match={dm}'.format(dm=default_match)
    472         print 'Number of Subzero syms = {num}'.format(num=len(sz_syms))
    473 
    474 if __name__ == '__main__':
    475     main()
    476