Home | History | Annotate | Download | only in generators
      1 """ARM/NEON assembly emitter.
      2 
      3 Used by code generators to produce ARM assembly with NEON simd code.
      4 Provides tools for easier register management: named register variable
      5 allocation/deallocation, and offers a more procedural/structured approach
      6 to generating assembly.
      7 
      8 TODO: right now neon emitter prints out assembly instructions immediately,
      9 it might be beneficial to keep the whole structure and emit the assembly after
     10 applying some optimizations like: instruction reordering or register reuse.
     11 
     12 TODO: NeonRegister object assigns explicit registers at allocation time.
     13 Similarily to emiting code, register mapping and reuse can be performed and
     14 optimized lazily.
     15 """
     16 
     17 
     18 class Error(Exception):
     19   """Module level error."""
     20 
     21 
     22 class RegisterAllocationError(Error):
     23   """Cannot alocate registers."""
     24 
     25 
     26 class LaneError(Error):
     27   """Wrong lane number."""
     28 
     29 
     30 def Low(register):
     31   assert register[0] == 'q'
     32   num = int(register[1:])
     33   return 'd%d' % (num * 2)
     34 
     35 
     36 def High(register):
     37   assert register[0] == 'q'
     38   num = int(register[1:])
     39   return 'd%d' % (num * 2 + 1)
     40 
     41 
     42 class NeonRegisters(object):
     43   """Utility that keeps track of used ARM/NEON registers."""
     44 
     45   def __init__(self):
     46     self.double = set()
     47     self.double_ever = set()
     48     self.general = set()
     49     self.general_ever = set()
     50     self.parameters = set()
     51 
     52   def MapParameter(self, parameter):
     53     self.parameters.add(parameter)
     54     return '%%[%s]' % parameter
     55 
     56   def DoubleRegister(self, min_val=0):
     57     for i in range(min_val, 32):
     58       if i not in self.double:
     59         self.double.add(i)
     60         self.double_ever.add(i)
     61         return 'd%d' % i
     62     raise RegisterAllocationError('Not enough double registers.')
     63 
     64   def QuadRegister(self, min_val=0):
     65     for i in range(min_val, 16):
     66       if ((i * 2) not in self.double) and ((i * 2 + 1) not in self.double):
     67         self.double.add(i * 2)
     68         self.double.add(i * 2 + 1)
     69         self.double_ever.add(i * 2)
     70         self.double_ever.add(i * 2 + 1)
     71         return 'q%d' % i
     72     raise RegisterAllocationError('Not enough quad registers.')
     73 
     74   def GeneralRegister(self):
     75     for i in range(0, 16):
     76       if i not in self.general:
     77         self.general.add(i)
     78         self.general_ever.add(i)
     79         return 'r%d' % i
     80     raise RegisterAllocationError('Not enough general registers.')
     81 
     82   def MappedParameters(self):
     83     return [x for x in self.parameters]
     84 
     85   def Clobbers(self):
     86     return (['r%d' % i
     87              for i in self.general_ever] + ['d%d' % i
     88                                             for i in self.DoubleClobbers()])
     89 
     90   def DoubleClobbers(self):
     91     return sorted(self.double_ever)
     92 
     93   def Low(self, register):
     94     return Low(register)
     95 
     96   def High(self, register):
     97     return High(register)
     98 
     99   def FreeRegister(self, register):
    100     assert len(register) > 1
    101     num = int(register[1:])
    102 
    103     if register[0] == 'r':
    104       assert num in self.general
    105       self.general.remove(num)
    106     elif register[0] == 'd':
    107       assert num in self.double
    108       self.double.remove(num)
    109     elif register[0] == 'q':
    110       assert num * 2 in self.double
    111       assert num * 2 + 1 in self.double
    112       self.double.remove(num * 2)
    113       self.double.remove(num * 2 + 1)
    114     else:
    115       raise RegisterDeallocationError('Register not allocated: %s' % register)
    116 
    117 
    118 class NeonEmitter(object):
    119   """Emits ARM/NEON assembly opcodes."""
    120 
    121   def __init__(self, debug=False):
    122     self.ops = {}
    123     self.indent = ''
    124     self.debug = debug
    125 
    126   def PushIndent(self):
    127     self.indent += '  '
    128 
    129   def PopIndent(self):
    130     self.indent = self.indent[:-2]
    131 
    132   def EmitIndented(self, what):
    133     print self.indent + what
    134 
    135   def PushOp(self, op):
    136     if op in self.ops.keys():
    137       self.ops[op] += 1
    138     else:
    139       self.ops[op] = 1
    140 
    141   def ClearCounters(self):
    142     self.ops.clear()
    143 
    144   def EmitNewline(self):
    145     print ''
    146 
    147   def EmitPreprocessor1(self, op, param):
    148     print '#%s %s' % (op, param)
    149 
    150   def EmitPreprocessor(self, op):
    151     print '#%s' % op
    152 
    153   def EmitInclude(self, include):
    154     self.EmitPreprocessor1('include', include)
    155 
    156   def EmitCall1(self, function, param):
    157     self.EmitIndented('%s(%s);' % (function, param))
    158 
    159   def EmitAssert(self, assert_expression):
    160     if self.debug:
    161       self.EmitCall1('assert', assert_expression)
    162 
    163   def EmitHeaderBegin(self, header_name, includes):
    164     self.EmitPreprocessor1('ifndef', (header_name + '_H_').upper())
    165     self.EmitPreprocessor1('define', (header_name + '_H_').upper())
    166     self.EmitNewline()
    167     if includes:
    168       for include in includes:
    169         self.EmitInclude(include)
    170       self.EmitNewline()
    171 
    172   def EmitHeaderEnd(self):
    173     self.EmitPreprocessor('endif')
    174 
    175   def EmitCode(self, code):
    176     self.EmitIndented('%s;' % code)
    177 
    178   def EmitFunctionBeginA(self, function_name, params, return_type):
    179     self.EmitIndented('%s %s(%s) {' %
    180                       (return_type, function_name,
    181                        ', '.join(['%s %s' % (t, n) for (t, n) in params])))
    182     self.PushIndent()
    183 
    184   def EmitFunctionEnd(self):
    185     self.PopIndent()
    186     self.EmitIndented('}')
    187 
    188   def EmitAsmBegin(self):
    189     self.EmitIndented('asm volatile(')
    190     self.PushIndent()
    191 
    192   def EmitAsmMapping(self, elements, modifier):
    193     if elements:
    194       self.EmitIndented(': ' + ', '.join(['[%s] "%s"(%s)' % (d, modifier, d)
    195                                           for d in elements]))
    196     else:
    197       self.EmitIndented(':')
    198 
    199   def EmitClobbers(self, elements):
    200     if elements:
    201       self.EmitIndented(': ' + ', '.join(['"%s"' % c for c in elements]))
    202     else:
    203       self.EmitIndented(':')
    204 
    205   def EmitAsmEnd(self, outputs, inputs, clobbers):
    206     self.EmitAsmMapping(outputs, '+r')
    207     self.EmitAsmMapping(inputs, 'r')
    208     self.EmitClobbers(clobbers)
    209     self.PopIndent()
    210     self.EmitIndented(');')
    211 
    212   def EmitComment(self, comment):
    213     self.EmitIndented('// ' + comment)
    214 
    215   def EmitNumericalLabel(self, label):
    216     self.EmitIndented('"%d:"' % label)
    217 
    218   def EmitOp1(self, op, param1):
    219     self.PushOp(op)
    220     self.EmitIndented('"%s %s\\n"' % (op, param1))
    221 
    222   def EmitOp2(self, op, param1, param2):
    223     self.PushOp(op)
    224     self.EmitIndented('"%s %s, %s\\n"' % (op, param1, param2))
    225 
    226   def EmitOp3(self, op, param1, param2, param3):
    227     self.PushOp(op)
    228     self.EmitIndented('"%s %s, %s, %s\\n"' % (op, param1, param2, param3))
    229 
    230   def EmitZip(self, size, param1, param2):
    231     self.EmitOp2('vzip.%d' % size, param1, param2)
    232 
    233   def EmitZip8(self, param1, param2):
    234     self.EmitZip(8, param1, param2)
    235 
    236   def EmitZip16(self, param1, param2):
    237     self.EmitZip(16, param1, param2)
    238 
    239   def EmitZip32(self, param1, param2):
    240     self.EmitZip(32, param1, param2)
    241 
    242   def EmitAdd(self, destination, source, param):
    243     self.EmitOp3('add', destination, source, param)
    244 
    245   def EmitSubs(self, destination, source, param):
    246     self.EmitOp3('subs', destination, source, param)
    247 
    248   def EmitSub(self, destination, source, param):
    249     self.EmitOp3('sub', destination, source, param)
    250 
    251   def EmitMul(self, destination, source, param):
    252     self.EmitOp3('mul', destination, source, param)
    253 
    254   def EmitMov(self, param1, param2):
    255     self.EmitOp2('mov', param1, param2)
    256 
    257   def EmitSkip(self, register, skip, stride):
    258     self.EmitOp3('add', register, register, '#%d' % (skip * stride))
    259 
    260   def EmitBeqBack(self, label):
    261     self.EmitOp1('beq', '%db' % label)
    262 
    263   def EmitBeqFront(self, label):
    264     self.EmitOp1('beq', '%df' % label)
    265 
    266   def EmitBneBack(self, label):
    267     self.EmitOp1('bne', '%db' % label)
    268 
    269   def EmitBneFront(self, label):
    270     self.EmitOp1('bne', '%df' % label)
    271 
    272   def EmitVAdd(self, add_type, destination, source_1, source_2):
    273     self.EmitOp3('vadd.%s' % add_type, destination, source_1, source_2)
    274 
    275   def EmitVAddw(self, add_type, destination, source_1, source_2):
    276     self.EmitOp3('vaddw.%s' % add_type, destination, source_1, source_2)
    277 
    278   def EmitVCvt(self, cvt_to, cvt_from, destination, source):
    279     self.EmitOp2('vcvt.%s.%s' % (cvt_to, cvt_from), destination, source)
    280 
    281   def EmitVDup(self, dup_type, destination, source):
    282     self.EmitOp2('vdup.%s' % dup_type, destination, source)
    283 
    284   def EmitVMov(self, mov_type, destination, source):
    285     self.EmitOp2('vmov.%s' % mov_type, destination, source)
    286 
    287   def EmitVQmovn(self, mov_type, destination, source):
    288     self.EmitOp2('vqmovn.%s' % mov_type, destination, source)
    289 
    290   def EmitVQmovun(self, mov_type, destination, source):
    291     self.EmitOp2('vqmovun.%s' % mov_type, destination, source)
    292 
    293   def EmitVMul(self, mul_type, destination, source_1, source_2):
    294     self.EmitOp3('vmul.%s' % mul_type, destination, source_1, source_2)
    295 
    296   def EmitVMull(self, mul_type, destination, source_1, source_2):
    297     self.EmitOp3('vmull.%s' % mul_type, destination, source_1, source_2)
    298 
    299   def EmitVPadd(self, add_type, destination, source_1, source_2):
    300     self.EmitOp3('vpadd.%s' % add_type, destination, source_1, source_2)
    301 
    302   def EmitVPaddl(self, add_type, destination, source):
    303     self.EmitOp2('vpaddl.%s' % add_type, destination, source)
    304 
    305   def EmitVPadal(self, add_type, destination, source):
    306     self.EmitOp2('vpadal.%s' % add_type, destination, source)
    307 
    308   def EmitVLoad(self, load_type, destination, source):
    309     self.EmitOp2('vld%s' % load_type, '{%s}' % destination, '%s' % source)
    310 
    311   def EmitVLoadA(self, load_type, destinations, source):
    312     self.EmitVLoad(load_type, ', '.join(destinations), source)
    313 
    314   def EmitPld(self, load_address_register):
    315     self.EmitOp1('pld', '[%s]' % load_address_register)
    316 
    317   def EmitPldOffset(self, load_address_register, offset):
    318     self.EmitOp1('pld', '[%s, %s]' % (load_address_register, offset))
    319 
    320   def EmitInstructionPreload(self, label):
    321     self.EmitOp1('pli', label)
    322 
    323   def EmitVShl(self, shift_type, destination, source, shift):
    324     self.EmitOp3('vshl.%s' % shift_type, destination, source, shift)
    325 
    326   def EmitVStore(self, store_type, source, destination):
    327     self.EmitOp2('vst%s' % store_type, '{%s}' % source, destination)
    328 
    329   def EmitVStoreA(self, store_type, sources, destination):
    330     self.EmitVStore(store_type, ', '.join(sources), destination)
    331 
    332   def EmitVStoreOffset(self, store_type, source, destination, offset):
    333     self.EmitOp3('vst%s' % store_type, '{%s}' % source, destination, offset)
    334 
    335   def Dereference(self, value, alignment):
    336     if alignment:
    337       return '[%s:%d]' % (value, alignment)
    338     else:
    339       return '[%s]' % value
    340 
    341   def DereferenceIncrement(self, value, alignment):
    342     return '%s!' % self.Dereference(value, alignment)
    343 
    344   def ImmediateConstant(self, value):
    345     return '#%d' % value
    346 
    347   def AllLanes(self, value):
    348     return '%s[]' % value
    349 
    350   def Lane(self, value, lane):
    351     return '%s[%d]' % (value, lane)
    352