1 #!/usr/bin/env python2.7 2 # 3 # Copyright 2017 Google Inc. 4 # 5 # Use of this source code is governed by a BSD-style license that can be 6 # found in the LICENSE file. 7 8 import re 9 import subprocess 10 import sys 11 12 clang = 'clang-4.0' 13 objdump = 'gobjdump' 14 ccache = 'ccache' 15 stages = 'src/jumper/SkJumper_stages.cpp' 16 stages_lowp = 'src/jumper/SkJumper_stages_lowp.cpp' 17 generated = 'src/jumper/SkJumper_generated.S' 18 generated_win = 'src/jumper/SkJumper_generated_win.S' 19 20 clang = sys.argv[1] if len(sys.argv) > 1 else clang 21 objdump = sys.argv[2] if len(sys.argv) > 2 else objdump 22 ccache = sys.argv[3] if len(sys.argv) > 3 else ccache 23 stages = sys.argv[4] if len(sys.argv) > 4 else stages 24 stages_lowp = sys.argv[5] if len(sys.argv) > 5 else stages_lowp 25 generated = sys.argv[6] if len(sys.argv) > 6 else generated 26 generated_win = sys.argv[7] if len(sys.argv) > 7 else generated_win 27 28 clang = [ccache, clang, '-x', 'c++'] 29 30 31 cflags = ['-std=c++11', '-Os', '-DJUMPER', 32 '-momit-leaf-frame-pointer', '-ffp-contract=fast', 33 '-fno-exceptions', '-fno-rtti', '-fno-unwind-tables'] 34 35 x86 = [ '-m32' ] 36 win = ['-DWIN', '-mno-red-zone'] 37 sse2 = ['-msse2', '-mno-sse3', '-mno-ssse3', '-mno-sse4.1'] 38 subprocess.check_call(clang + cflags + sse2 + 39 ['-c', stages] + 40 ['-o', 'sse2.o']) 41 subprocess.check_call(clang + cflags + sse2 + win + 42 ['-c', stages] + 43 ['-o', 'win_sse2.o']) 44 subprocess.check_call(clang + cflags + sse2 + x86 + 45 ['-c', stages] + 46 ['-o', 'x86_sse2.o']) 47 subprocess.check_call(clang + cflags + sse2 + win + x86 + 48 ['-c', stages] + 49 ['-o', 'win_x86_sse2.o']) 50 51 ssse3 = ['-mssse3', '-mno-sse4.1'] 52 subprocess.check_call(clang + cflags + ssse3 + 53 ['-c', stages_lowp] + 54 ['-o', 'lowp_ssse3.o']) 55 subprocess.check_call(clang + cflags + ssse3 + win + 56 ['-c', stages_lowp] + 57 ['-o', 'win_lowp_ssse3.o']) 58 59 sse41 = ['-msse4.1'] 60 subprocess.check_call(clang + cflags + sse41 + 61 ['-c', stages] + 62 ['-o', 'sse41.o']) 63 subprocess.check_call(clang + cflags + sse41 + win + 64 ['-c', stages] + 65 ['-o', 'win_sse41.o']) 66 67 avx = ['-mavx'] 68 subprocess.check_call(clang + cflags + avx + 69 ['-c', stages] + 70 ['-o', 'avx.o']) 71 subprocess.check_call(clang + cflags + avx + win + 72 ['-c', stages] + 73 ['-o', 'win_avx.o']) 74 75 hsw = ['-mavx2', '-mfma', '-mf16c'] 76 subprocess.check_call(clang + cflags + hsw + 77 ['-c', stages] + 78 ['-o', 'hsw.o']) 79 subprocess.check_call(clang + cflags + hsw + win + 80 ['-c', stages] + 81 ['-o', 'win_hsw.o']) 82 subprocess.check_call(clang + cflags + hsw + 83 ['-c', stages_lowp] + 84 ['-o', 'lowp_hsw.o']) 85 subprocess.check_call(clang + cflags + hsw + win + 86 ['-c', stages_lowp] + 87 ['-o', 'win_lowp_hsw.o']) 88 89 aarch64 = [ '--target=aarch64' ] 90 subprocess.check_call(clang + cflags + aarch64 + 91 ['-c', stages] + 92 ['-o', 'aarch64.o']) 93 94 vfp4 = [ 95 '--target=armv7a-linux-gnueabihf', 96 '-mfpu=neon-vfpv4', 97 ] 98 subprocess.check_call(clang + cflags + vfp4 + 99 ['-c', stages] + 100 ['-o', 'vfp4.o']) 101 102 def parse_object_file(dot_o, directive, target=None): 103 globl, hidden, label, comment, align = \ 104 '.globl', 'HIDDEN', ':', '// ', 'BALIGN' 105 if 'win' in dot_o: 106 globl, hidden, label, comment, align = \ 107 'PUBLIC', '', ' LABEL PROC', '; ', 'ALIGN ' 108 109 cmd = [objdump] 110 if target: 111 cmd += ['--target', target] 112 113 # Look for sections we know we can't handle. 114 section_headers = subprocess.check_output(cmd + ['-h', dot_o]) 115 for snippet in ['.rodata']: 116 if snippet in section_headers: 117 print >>sys.stderr, 'Found %s in section.' % snippet 118 assert snippet not in section_headers 119 120 if directive == '.long': 121 disassemble = ['-d', dot_o] 122 dehex = lambda h: '0x'+h 123 else: 124 # x86-64... as long as we're using %rip-relative addressing, 125 # literal sections should be fine to just dump in with .text. 126 disassemble = ['-d', # DO NOT USE -D. 127 '-z', # Print zero bytes instead of ... 128 '--insn-width=11', 129 '-j', '.text', 130 '-j', '.literal4', 131 '-j', '.literal16', 132 '-j', '.const', 133 dot_o] 134 dehex = lambda h: str(int(h,16)) 135 136 # Ok. Let's disassemble. 137 for line in subprocess.check_output(cmd + disassemble).split('\n'): 138 line = line.strip() 139 140 if not line or line.startswith(dot_o) or line.startswith('Disassembly'): 141 continue 142 143 # E.g. 00000000000003a4 <_load_f16>: 144 m = re.match('''[0-9a-f]+ <_?(.*)>:''', line) 145 if m: 146 print 147 sym = m.group(1) 148 if sym.startswith('.literal'): # .literal4, .literal16, etc 149 print sym.replace('.literal', align) 150 elif sym.startswith('.const'): # 32-byte constants 151 print align + '32' 152 elif not sym.startswith('sk_'): 153 print >>sys.stderr, "build_stages.py can't handle '%s' (yet?)." % sym 154 assert sym.startswith('sk_') 155 else: # a stage function 156 if hidden: 157 print hidden + ' _' + sym 158 print globl + ' _' + sym 159 if 'win' not in dot_o: 160 print 'FUNCTION(_' + sym + ')' 161 print '_' + sym + label 162 continue 163 164 columns = line.split('\t') 165 #print >>sys.stderr, columns 166 code = columns[1] 167 if len(columns) >= 4: 168 inst = columns[2] 169 args = columns[3] 170 else: 171 inst, args = columns[2], '' 172 if ' ' in columns[2]: 173 inst, args = columns[2].split(' ', 1) 174 code, inst, args = code.strip(), inst.strip(), args.strip() 175 176 hexed = ','.join(dehex(x) for x in code.split(' ')) 177 print ' ' + directive + ' ' + hexed + ' '*(36-len(hexed)) + \ 178 comment + inst + (' '*(14-len(inst)) + args if args else '') 179 180 sys.stdout = open(generated, 'w') 181 182 print '''# Copyright 2017 Google Inc. 183 # 184 # Use of this source code is governed by a BSD-style license that can be 185 # found in the LICENSE file. 186 187 # This file is generated semi-automatically with this command: 188 # $ src/jumper/build_stages.py 189 ''' 190 print '#if defined(__MACH__)' 191 print ' #define HIDDEN .private_extern' 192 print ' #define FUNCTION(name)' 193 print ' #define BALIGN4 .align 2' 194 print ' #define BALIGN16 .align 4' 195 print ' #define BALIGN32 .align 5' 196 print '#else' 197 print ' .section .note.GNU-stack,"",%progbits' 198 print ' #define HIDDEN .hidden' 199 print ' #define FUNCTION(name) .type name,%function' 200 print ' #define BALIGN4 .balign 4' 201 print ' #define BALIGN16 .balign 16' 202 print ' #define BALIGN32 .balign 32' 203 print '#endif' 204 205 print '.text' 206 print '#if defined(__aarch64__)' 207 print 'BALIGN4' 208 parse_object_file('aarch64.o', '.long') 209 210 print '#elif defined(__arm__)' 211 print 'BALIGN4' 212 parse_object_file('vfp4.o', '.long', target='elf32-littlearm') 213 214 print '#elif defined(__x86_64__)' 215 print 'BALIGN32' 216 parse_object_file('hsw.o', '.byte') 217 print 'BALIGN32' 218 parse_object_file('avx.o', '.byte') 219 print 'BALIGN32' 220 parse_object_file('sse41.o', '.byte') 221 print 'BALIGN32' 222 parse_object_file('sse2.o', '.byte') 223 print 'BALIGN32' 224 parse_object_file('lowp_hsw.o', '.byte') 225 print 'BALIGN32' 226 parse_object_file('lowp_ssse3.o', '.byte') 227 228 print '#elif defined(__i386__)' 229 print 'BALIGN32' 230 parse_object_file('x86_sse2.o', '.byte') 231 232 print '#endif' 233 234 sys.stdout = open(generated_win, 'w') 235 print '''; Copyright 2017 Google Inc. 236 ; 237 ; Use of this source code is governed by a BSD-style license that can be 238 ; found in the LICENSE file. 239 240 ; This file is generated semi-automatically with this command: 241 ; $ src/jumper/build_stages.py 242 ''' 243 print 'IFDEF RAX' 244 print "_text32 SEGMENT ALIGN(32) 'CODE'" 245 print 'ALIGN 32' 246 parse_object_file('win_hsw.o', 'DB') 247 print 'ALIGN 32' 248 parse_object_file('win_avx.o', 'DB') 249 print 'ALIGN 32' 250 parse_object_file('win_sse41.o', 'DB') 251 print 'ALIGN 32' 252 parse_object_file('win_sse2.o', 'DB') 253 print 'ALIGN 32' 254 parse_object_file('win_lowp_hsw.o', 'DB') 255 print 'ALIGN 32' 256 parse_object_file('win_lowp_ssse3.o', 'DB') 257 258 print 'ELSE' 259 print '.MODEL FLAT,C' 260 print "_text32 SEGMENT ALIGN(32) 'CODE'" 261 print 'ALIGN 32' 262 parse_object_file('win_x86_sse2.o', 'DB') 263 264 print 'ENDIF' 265 print 'END' 266