1 """ARM/NEON assembly emitter. 2 3 Used by code generators to produce ARM assembly with NEON simd code. 4 Provides tools for easier register management: named register variable 5 allocation/deallocation, and offers a more procedural/structured approach 6 to generating assembly. 7 8 TODO: right now neon emitter prints out assembly instructions immediately, 9 it might be beneficial to keep the whole structure and emit the assembly after 10 applying some optimizations like: instruction reordering or register reuse. 11 12 TODO: NeonRegister object assigns explicit registers at allocation time. 13 Similarily to emiting code, register mapping and reuse can be performed and 14 optimized lazily. 15 """ 16 17 18 class Error(Exception): 19 """Module level error.""" 20 21 22 class RegisterAllocationError(Error): 23 """Cannot alocate registers.""" 24 25 26 class LaneError(Error): 27 """Wrong lane number.""" 28 29 30 def Low(register): 31 assert register[0] == 'q' 32 num = int(register[1:]) 33 return 'd%d' % (num * 2) 34 35 36 def High(register): 37 assert register[0] == 'q' 38 num = int(register[1:]) 39 return 'd%d' % (num * 2 + 1) 40 41 42 class NeonRegisters(object): 43 """Utility that keeps track of used ARM/NEON registers.""" 44 45 def __init__(self): 46 self.double = set() 47 self.double_ever = set() 48 self.general = set() 49 self.general_ever = set() 50 self.parameters = set() 51 52 def MapParameter(self, parameter): 53 self.parameters.add(parameter) 54 return '%%[%s]' % parameter 55 56 def DoubleRegister(self, min_val=0): 57 for i in range(min_val, 32): 58 if i not in self.double: 59 self.double.add(i) 60 self.double_ever.add(i) 61 return 'd%d' % i 62 raise RegisterAllocationError('Not enough double registers.') 63 64 def QuadRegister(self, min_val=0): 65 for i in range(min_val, 16): 66 if ((i * 2) not in self.double) and ((i * 2 + 1) not in self.double): 67 self.double.add(i * 2) 68 self.double.add(i * 2 + 1) 69 self.double_ever.add(i * 2) 70 self.double_ever.add(i * 2 + 1) 71 return 'q%d' % i 72 raise RegisterAllocationError('Not enough quad registers.') 73 74 def GeneralRegister(self): 75 for i in range(0, 16): 76 if i not in self.general: 77 self.general.add(i) 78 self.general_ever.add(i) 79 return 'r%d' % i 80 raise RegisterAllocationError('Not enough general registers.') 81 82 def MappedParameters(self): 83 return [x for x in self.parameters] 84 85 def Clobbers(self): 86 return (['r%d' % i 87 for i in self.general_ever] + ['d%d' % i 88 for i in self.DoubleClobbers()]) 89 90 def DoubleClobbers(self): 91 return sorted(self.double_ever) 92 93 def Low(self, register): 94 return Low(register) 95 96 def High(self, register): 97 return High(register) 98 99 def FreeRegister(self, register): 100 assert len(register) > 1 101 num = int(register[1:]) 102 103 if register[0] == 'r': 104 assert num in self.general 105 self.general.remove(num) 106 elif register[0] == 'd': 107 assert num in self.double 108 self.double.remove(num) 109 elif register[0] == 'q': 110 assert num * 2 in self.double 111 assert num * 2 + 1 in self.double 112 self.double.remove(num * 2) 113 self.double.remove(num * 2 + 1) 114 else: 115 raise RegisterDeallocationError('Register not allocated: %s' % register) 116 117 118 class NeonEmitter(object): 119 """Emits ARM/NEON assembly opcodes.""" 120 121 def __init__(self, debug=False): 122 self.ops = {} 123 self.indent = '' 124 self.debug = debug 125 126 def PushIndent(self): 127 self.indent += ' ' 128 129 def PopIndent(self): 130 self.indent = self.indent[:-2] 131 132 def EmitIndented(self, what): 133 print self.indent + what 134 135 def PushOp(self, op): 136 if op in self.ops.keys(): 137 self.ops[op] += 1 138 else: 139 self.ops[op] = 1 140 141 def ClearCounters(self): 142 self.ops.clear() 143 144 def EmitNewline(self): 145 print '' 146 147 def EmitPreprocessor1(self, op, param): 148 print '#%s %s' % (op, param) 149 150 def EmitPreprocessor(self, op): 151 print '#%s' % op 152 153 def EmitInclude(self, include): 154 self.EmitPreprocessor1('include', include) 155 156 def EmitCall1(self, function, param): 157 self.EmitIndented('%s(%s);' % (function, param)) 158 159 def EmitAssert(self, assert_expression): 160 if self.debug: 161 self.EmitCall1('assert', assert_expression) 162 163 def EmitHeaderBegin(self, header_name, includes): 164 self.EmitPreprocessor1('ifndef', (header_name + '_H_').upper()) 165 self.EmitPreprocessor1('define', (header_name + '_H_').upper()) 166 self.EmitNewline() 167 if includes: 168 for include in includes: 169 self.EmitInclude(include) 170 self.EmitNewline() 171 172 def EmitHeaderEnd(self): 173 self.EmitPreprocessor('endif') 174 175 def EmitCode(self, code): 176 self.EmitIndented('%s;' % code) 177 178 def EmitFunctionBeginA(self, function_name, params, return_type): 179 self.EmitIndented('%s %s(%s) {' % 180 (return_type, function_name, 181 ', '.join(['%s %s' % (t, n) for (t, n) in params]))) 182 self.PushIndent() 183 184 def EmitFunctionEnd(self): 185 self.PopIndent() 186 self.EmitIndented('}') 187 188 def EmitAsmBegin(self): 189 self.EmitIndented('asm volatile(') 190 self.PushIndent() 191 192 def EmitAsmMapping(self, elements, modifier): 193 if elements: 194 self.EmitIndented(': ' + ', '.join(['[%s] "%s"(%s)' % (d, modifier, d) 195 for d in elements])) 196 else: 197 self.EmitIndented(':') 198 199 def EmitClobbers(self, elements): 200 if elements: 201 self.EmitIndented(': ' + ', '.join(['"%s"' % c for c in elements])) 202 else: 203 self.EmitIndented(':') 204 205 def EmitAsmEnd(self, outputs, inputs, clobbers): 206 self.EmitAsmMapping(outputs, '+r') 207 self.EmitAsmMapping(inputs, 'r') 208 self.EmitClobbers(clobbers) 209 self.PopIndent() 210 self.EmitIndented(');') 211 212 def EmitComment(self, comment): 213 self.EmitIndented('// ' + comment) 214 215 def EmitNumericalLabel(self, label): 216 self.EmitIndented('"%d:"' % label) 217 218 def EmitOp1(self, op, param1): 219 self.PushOp(op) 220 self.EmitIndented('"%s %s\\n"' % (op, param1)) 221 222 def EmitOp2(self, op, param1, param2): 223 self.PushOp(op) 224 self.EmitIndented('"%s %s, %s\\n"' % (op, param1, param2)) 225 226 def EmitOp3(self, op, param1, param2, param3): 227 self.PushOp(op) 228 self.EmitIndented('"%s %s, %s, %s\\n"' % (op, param1, param2, param3)) 229 230 def EmitZip(self, size, param1, param2): 231 self.EmitOp2('vzip.%d' % size, param1, param2) 232 233 def EmitZip8(self, param1, param2): 234 self.EmitZip(8, param1, param2) 235 236 def EmitZip16(self, param1, param2): 237 self.EmitZip(16, param1, param2) 238 239 def EmitZip32(self, param1, param2): 240 self.EmitZip(32, param1, param2) 241 242 def EmitAdd(self, destination, source, param): 243 self.EmitOp3('add', destination, source, param) 244 245 def EmitSubs(self, destination, source, param): 246 self.EmitOp3('subs', destination, source, param) 247 248 def EmitSub(self, destination, source, param): 249 self.EmitOp3('sub', destination, source, param) 250 251 def EmitMul(self, destination, source, param): 252 self.EmitOp3('mul', destination, source, param) 253 254 def EmitMov(self, param1, param2): 255 self.EmitOp2('mov', param1, param2) 256 257 def EmitSkip(self, register, skip, stride): 258 self.EmitOp3('add', register, register, '#%d' % (skip * stride)) 259 260 def EmitBeqBack(self, label): 261 self.EmitOp1('beq', '%db' % label) 262 263 def EmitBeqFront(self, label): 264 self.EmitOp1('beq', '%df' % label) 265 266 def EmitBneBack(self, label): 267 self.EmitOp1('bne', '%db' % label) 268 269 def EmitBneFront(self, label): 270 self.EmitOp1('bne', '%df' % label) 271 272 def EmitVAdd(self, add_type, destination, source_1, source_2): 273 self.EmitOp3('vadd.%s' % add_type, destination, source_1, source_2) 274 275 def EmitVAddw(self, add_type, destination, source_1, source_2): 276 self.EmitOp3('vaddw.%s' % add_type, destination, source_1, source_2) 277 278 def EmitVCvt(self, cvt_to, cvt_from, destination, source): 279 self.EmitOp2('vcvt.%s.%s' % (cvt_to, cvt_from), destination, source) 280 281 def EmitVDup(self, dup_type, destination, source): 282 self.EmitOp2('vdup.%s' % dup_type, destination, source) 283 284 def EmitVMov(self, mov_type, destination, source): 285 self.EmitOp2('vmov.%s' % mov_type, destination, source) 286 287 def EmitVQmovn(self, mov_type, destination, source): 288 self.EmitOp2('vqmovn.%s' % mov_type, destination, source) 289 290 def EmitVQmovun(self, mov_type, destination, source): 291 self.EmitOp2('vqmovun.%s' % mov_type, destination, source) 292 293 def EmitVMul(self, mul_type, destination, source_1, source_2): 294 self.EmitOp3('vmul.%s' % mul_type, destination, source_1, source_2) 295 296 def EmitVMull(self, mul_type, destination, source_1, source_2): 297 self.EmitOp3('vmull.%s' % mul_type, destination, source_1, source_2) 298 299 def EmitVPadd(self, add_type, destination, source_1, source_2): 300 self.EmitOp3('vpadd.%s' % add_type, destination, source_1, source_2) 301 302 def EmitVPaddl(self, add_type, destination, source): 303 self.EmitOp2('vpaddl.%s' % add_type, destination, source) 304 305 def EmitVPadal(self, add_type, destination, source): 306 self.EmitOp2('vpadal.%s' % add_type, destination, source) 307 308 def EmitVLoad(self, load_type, destination, source): 309 self.EmitOp2('vld%s' % load_type, '{%s}' % destination, '%s' % source) 310 311 def EmitVLoadA(self, load_type, destinations, source): 312 self.EmitVLoad(load_type, ', '.join(destinations), source) 313 314 def EmitPld(self, load_address_register): 315 self.EmitOp1('pld', '[%s]' % load_address_register) 316 317 def EmitPldOffset(self, load_address_register, offset): 318 self.EmitOp1('pld', '[%s, %s]' % (load_address_register, offset)) 319 320 def EmitInstructionPreload(self, label): 321 self.EmitOp1('pli', label) 322 323 def EmitVShl(self, shift_type, destination, source, shift): 324 self.EmitOp3('vshl.%s' % shift_type, destination, source, shift) 325 326 def EmitVStore(self, store_type, source, destination): 327 self.EmitOp2('vst%s' % store_type, '{%s}' % source, destination) 328 329 def EmitVStoreA(self, store_type, sources, destination): 330 self.EmitVStore(store_type, ', '.join(sources), destination) 331 332 def EmitVStoreOffset(self, store_type, source, destination, offset): 333 self.EmitOp3('vst%s' % store_type, '{%s}' % source, destination, offset) 334 335 def Dereference(self, value, alignment): 336 if alignment: 337 return '[%s:%d]' % (value, alignment) 338 else: 339 return '[%s]' % value 340 341 def DereferenceIncrement(self, value, alignment): 342 return '%s!' % self.Dereference(value, alignment) 343 344 def ImmediateConstant(self, value): 345 return '#%d' % value 346 347 def AllLanes(self, value): 348 return '%s[]' % value 349 350 def Lane(self, value, lane): 351 return '%s[%d]' % (value, lane) 352