Home | History | Annotate | Download | only in jumper
      1 #!/usr/bin/env python2.7
      2 #
      3 # Copyright 2017 Google Inc.
      4 #
      5 # Use of this source code is governed by a BSD-style license that can be
      6 # found in the LICENSE file.
      7 
      8 import re
      9 import subprocess
     10 import sys
     11 
     12 clang         = 'clang-4.0'
     13 objdump       = 'gobjdump'
     14 ccache        = 'ccache'
     15 stages        = 'src/jumper/SkJumper_stages.cpp'
     16 stages_lowp   = 'src/jumper/SkJumper_stages_lowp.cpp'
     17 generated     = 'src/jumper/SkJumper_generated.S'
     18 generated_win = 'src/jumper/SkJumper_generated_win.S'
     19 
     20 clang         = sys.argv[1] if len(sys.argv) > 1 else clang
     21 objdump       = sys.argv[2] if len(sys.argv) > 2 else objdump
     22 ccache        = sys.argv[3] if len(sys.argv) > 3 else ccache
     23 stages        = sys.argv[4] if len(sys.argv) > 4 else stages
     24 stages_lowp   = sys.argv[5] if len(sys.argv) > 5 else stages_lowp
     25 generated     = sys.argv[6] if len(sys.argv) > 6 else generated
     26 generated_win = sys.argv[7] if len(sys.argv) > 7 else generated_win
     27 
     28 clang = [ccache, clang, '-x', 'c++']
     29 
     30 
     31 cflags = ['-std=c++11', '-Os', '-DJUMPER',
     32           '-momit-leaf-frame-pointer', '-ffp-contract=fast',
     33           '-fno-exceptions', '-fno-rtti', '-fno-unwind-tables']
     34 
     35 x86 = [ '-m32' ]
     36 win = ['-DWIN', '-mno-red-zone']
     37 sse2 = ['-msse2', '-mno-sse3', '-mno-ssse3', '-mno-sse4.1']
     38 subprocess.check_call(clang + cflags + sse2 +
     39                       ['-c', stages] +
     40                       ['-o', 'sse2.o'])
     41 subprocess.check_call(clang + cflags + sse2 + win +
     42                       ['-c', stages] +
     43                       ['-o', 'win_sse2.o'])
     44 subprocess.check_call(clang + cflags + sse2 + x86 +
     45                       ['-c', stages] +
     46                       ['-o', 'x86_sse2.o'])
     47 subprocess.check_call(clang + cflags + sse2 + win + x86 +
     48                       ['-c', stages] +
     49                       ['-o', 'win_x86_sse2.o'])
     50 
     51 ssse3 = ['-mssse3', '-mno-sse4.1']
     52 subprocess.check_call(clang + cflags + ssse3 +
     53                       ['-c', stages_lowp] +
     54                       ['-o', 'lowp_ssse3.o'])
     55 subprocess.check_call(clang + cflags + ssse3 + win +
     56                       ['-c', stages_lowp] +
     57                       ['-o', 'win_lowp_ssse3.o'])
     58 
     59 sse41 = ['-msse4.1']
     60 subprocess.check_call(clang + cflags + sse41 +
     61                       ['-c', stages] +
     62                       ['-o', 'sse41.o'])
     63 subprocess.check_call(clang + cflags + sse41 + win +
     64                       ['-c', stages] +
     65                       ['-o', 'win_sse41.o'])
     66 
     67 avx = ['-mavx']
     68 subprocess.check_call(clang + cflags + avx +
     69                       ['-c', stages] +
     70                       ['-o', 'avx.o'])
     71 subprocess.check_call(clang + cflags + avx + win +
     72                       ['-c', stages] +
     73                       ['-o', 'win_avx.o'])
     74 
     75 hsw = ['-mavx2', '-mfma', '-mf16c']
     76 subprocess.check_call(clang + cflags + hsw +
     77                       ['-c', stages] +
     78                       ['-o', 'hsw.o'])
     79 subprocess.check_call(clang + cflags + hsw + win +
     80                       ['-c', stages] +
     81                       ['-o', 'win_hsw.o'])
     82 subprocess.check_call(clang + cflags + hsw +
     83                       ['-c', stages_lowp] +
     84                       ['-o', 'lowp_hsw.o'])
     85 subprocess.check_call(clang + cflags + hsw + win +
     86                       ['-c', stages_lowp] +
     87                       ['-o', 'win_lowp_hsw.o'])
     88 
     89 aarch64 = [ '--target=aarch64' ]
     90 subprocess.check_call(clang + cflags + aarch64 +
     91                       ['-c', stages] +
     92                       ['-o', 'aarch64.o'])
     93 
     94 vfp4 = [
     95     '--target=armv7a-linux-gnueabihf',
     96     '-mfpu=neon-vfpv4',
     97 ]
     98 subprocess.check_call(clang + cflags + vfp4 +
     99                       ['-c', stages] +
    100                       ['-o', 'vfp4.o'])
    101 
    102 def parse_object_file(dot_o, directive, target=None):
    103   globl, hidden, label, comment, align = \
    104       '.globl', 'HIDDEN', ':', '// ', 'BALIGN'
    105   if 'win' in dot_o:
    106     globl, hidden, label, comment, align = \
    107         'PUBLIC', '', ' LABEL PROC', '; ', 'ALIGN '
    108 
    109   cmd = [objdump]
    110   if target:
    111     cmd += ['--target', target]
    112 
    113   # Look for sections we know we can't handle.
    114   section_headers = subprocess.check_output(cmd + ['-h', dot_o])
    115   for snippet in ['.rodata']:
    116     if snippet in section_headers:
    117       print >>sys.stderr, 'Found %s in section.' % snippet
    118       assert snippet not in section_headers
    119 
    120   if directive == '.long':
    121     disassemble = ['-d', dot_o]
    122     dehex = lambda h: '0x'+h
    123   else:
    124     # x86-64... as long as we're using %rip-relative addressing,
    125     # literal sections should be fine to just dump in with .text.
    126     disassemble = ['-d',               # DO NOT USE -D.
    127                    '-z',               # Print zero bytes instead of ...
    128                    '--insn-width=11',
    129                    '-j', '.text',
    130                    '-j', '.literal4',
    131                    '-j', '.literal16',
    132                    '-j', '.const',
    133                    dot_o]
    134     dehex = lambda h: str(int(h,16))
    135 
    136   # Ok.  Let's disassemble.
    137   for line in subprocess.check_output(cmd + disassemble).split('\n'):
    138     line = line.strip()
    139 
    140     if not line or line.startswith(dot_o) or line.startswith('Disassembly'):
    141       continue
    142 
    143     # E.g. 00000000000003a4 <_load_f16>:
    144     m = re.match('''[0-9a-f]+ <_?(.*)>:''', line)
    145     if m:
    146       print
    147       sym = m.group(1)
    148       if sym.startswith('.literal'):  # .literal4, .literal16, etc
    149         print sym.replace('.literal', align)
    150       elif sym.startswith('.const'):  # 32-byte constants
    151         print align + '32'
    152       elif not sym.startswith('sk_'):
    153         print >>sys.stderr, "build_stages.py can't handle '%s' (yet?)." % sym
    154         assert sym.startswith('sk_')
    155       else:  # a stage function
    156         if hidden:
    157           print hidden + ' _' + sym
    158         print globl + ' _' + sym
    159         if 'win' not in dot_o:
    160           print 'FUNCTION(_' + sym + ')'
    161         print '_' + sym + label
    162       continue
    163 
    164     columns = line.split('\t')
    165    #print >>sys.stderr, columns
    166     code = columns[1]
    167     if len(columns) >= 4:
    168       inst = columns[2]
    169       args = columns[3]
    170     else:
    171       inst, args = columns[2], ''
    172       if ' ' in columns[2]:
    173         inst, args = columns[2].split(' ', 1)
    174     code, inst, args = code.strip(), inst.strip(), args.strip()
    175 
    176     hexed = ','.join(dehex(x) for x in code.split(' '))
    177     print '  ' + directive + '  ' + hexed + ' '*(36-len(hexed)) + \
    178           comment + inst + (' '*(14-len(inst)) + args if args else '')
    179 
    180 sys.stdout = open(generated, 'w')
    181 
    182 print '''# Copyright 2017 Google Inc.
    183 #
    184 # Use of this source code is governed by a BSD-style license that can be
    185 # found in the LICENSE file.
    186 
    187 # This file is generated semi-automatically with this command:
    188 #   $ src/jumper/build_stages.py
    189 '''
    190 print '#if defined(__MACH__)'
    191 print '    #define HIDDEN .private_extern'
    192 print '    #define FUNCTION(name)'
    193 print '    #define BALIGN4  .align 2'
    194 print '    #define BALIGN16 .align 4'
    195 print '    #define BALIGN32 .align 5'
    196 print '#else'
    197 print '    .section .note.GNU-stack,"",%progbits'
    198 print '    #define HIDDEN .hidden'
    199 print '    #define FUNCTION(name) .type name,%function'
    200 print '    #define BALIGN4  .balign 4'
    201 print '    #define BALIGN16 .balign 16'
    202 print '    #define BALIGN32 .balign 32'
    203 print '#endif'
    204 
    205 print '.text'
    206 print '#if defined(__aarch64__)'
    207 print 'BALIGN4'
    208 parse_object_file('aarch64.o', '.long')
    209 
    210 print '#elif defined(__arm__)'
    211 print 'BALIGN4'
    212 parse_object_file('vfp4.o', '.long', target='elf32-littlearm')
    213 
    214 print '#elif defined(__x86_64__)'
    215 print 'BALIGN32'
    216 parse_object_file('hsw.o',   '.byte')
    217 print 'BALIGN32'
    218 parse_object_file('avx.o',   '.byte')
    219 print 'BALIGN32'
    220 parse_object_file('sse41.o', '.byte')
    221 print 'BALIGN32'
    222 parse_object_file('sse2.o',  '.byte')
    223 print 'BALIGN32'
    224 parse_object_file('lowp_hsw.o',  '.byte')
    225 print 'BALIGN32'
    226 parse_object_file('lowp_ssse3.o',  '.byte')
    227 
    228 print '#elif defined(__i386__)'
    229 print 'BALIGN32'
    230 parse_object_file('x86_sse2.o', '.byte')
    231 
    232 print '#endif'
    233 
    234 sys.stdout = open(generated_win, 'w')
    235 print '''; Copyright 2017 Google Inc.
    236 ;
    237 ; Use of this source code is governed by a BSD-style license that can be
    238 ; found in the LICENSE file.
    239 
    240 ; This file is generated semi-automatically with this command:
    241 ;   $ src/jumper/build_stages.py
    242 '''
    243 print 'IFDEF RAX'
    244 print "_text32 SEGMENT ALIGN(32) 'CODE'"
    245 print 'ALIGN 32'
    246 parse_object_file('win_hsw.o',   'DB')
    247 print 'ALIGN 32'
    248 parse_object_file('win_avx.o',   'DB')
    249 print 'ALIGN 32'
    250 parse_object_file('win_sse41.o', 'DB')
    251 print 'ALIGN 32'
    252 parse_object_file('win_sse2.o',  'DB')
    253 print 'ALIGN 32'
    254 parse_object_file('win_lowp_hsw.o',  'DB')
    255 print 'ALIGN 32'
    256 parse_object_file('win_lowp_ssse3.o',  'DB')
    257 
    258 print 'ELSE'
    259 print '.MODEL FLAT,C'
    260 print "_text32 SEGMENT ALIGN(32) 'CODE'"
    261 print 'ALIGN 32'
    262 parse_object_file('win_x86_sse2.o', 'DB')
    263 
    264 print 'ENDIF'
    265 print 'END'
    266