Home | History | Annotate | Download | only in AMDGPU
      1 //===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 //===----------------------------------------------------------------------===//
     11 //  Declarations that describe the SI registers
     12 //===----------------------------------------------------------------------===//
     13 class SIReg <string n, bits<16> regIdx = 0> : Register<n>,
     14   DwarfRegNum<[!cast<int>(HWEncoding)]> {
     15   let Namespace = "AMDGPU";
     16 
     17   // This is the not yet the complete register encoding. An additional
     18   // bit is set for VGPRs.
     19   let HWEncoding = regIdx;
     20 }
     21 
     22 // Special Registers
     23 def VCC_LO : SIReg<"vcc_lo", 106>;
     24 def VCC_HI : SIReg<"vcc_hi", 107>;
     25 
     26 // VCC for 64-bit instructions
     27 def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
     28           DwarfRegAlias<VCC_LO> {
     29   let Namespace = "AMDGPU";
     30   let SubRegIndices = [sub0, sub1];
     31   let HWEncoding = 106;
     32 }
     33 
     34 def EXEC_LO : SIReg<"exec_lo", 126>;
     35 def EXEC_HI : SIReg<"exec_hi", 127>;
     36 
     37 def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
     38            DwarfRegAlias<EXEC_LO> {
     39   let Namespace = "AMDGPU";
     40   let SubRegIndices = [sub0, sub1];
     41   let HWEncoding = 126;
     42 }
     43 
     44 def SCC : SIReg<"scc", 253>;
     45 def M0 : SIReg <"m0", 124>;
     46 
     47 // Trap handler registers
     48 def TBA_LO : SIReg<"tba_lo", 108>;
     49 def TBA_HI : SIReg<"tba_hi", 109>;
     50 
     51 def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
     52           DwarfRegAlias<TBA_LO> {
     53   let Namespace = "AMDGPU";
     54   let SubRegIndices = [sub0, sub1];
     55   let HWEncoding = 108;
     56 }
     57 
     58 def TMA_LO : SIReg<"tma_lo", 110>;
     59 def TMA_HI : SIReg<"tma_hi", 111>;
     60 
     61 def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
     62           DwarfRegAlias<TMA_LO> {
     63   let Namespace = "AMDGPU";
     64   let SubRegIndices = [sub0, sub1];
     65   let HWEncoding = 110;
     66 }
     67 
     68 def TTMP0 : SIReg <"ttmp0", 112>;
     69 def TTMP1 : SIReg <"ttmp1", 113>;
     70 def TTMP2 : SIReg <"ttmp2", 114>;
     71 def TTMP3 : SIReg <"ttmp3", 115>;
     72 def TTMP4 : SIReg <"ttmp4", 116>;
     73 def TTMP5 : SIReg <"ttmp5", 117>;
     74 def TTMP6 : SIReg <"ttmp6", 118>;
     75 def TTMP7 : SIReg <"ttmp7", 119>;
     76 def TTMP8 : SIReg <"ttmp8", 120>;
     77 def TTMP9 : SIReg <"ttmp9", 121>;
     78 def TTMP10 : SIReg <"ttmp10", 122>;
     79 def TTMP11 : SIReg <"ttmp11", 123>;
     80 
     81 multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
     82   def _ci : SIReg<n, ci_e>;
     83   def _vi : SIReg<n, vi_e>;
     84   def "" : SIReg<"", 0>;
     85 }
     86 
     87 class FlatReg <Register lo, Register hi, bits<16> encoding> :
     88     RegisterWithSubRegs<"flat_scratch", [lo, hi]>,
     89     DwarfRegAlias<lo> {
     90   let Namespace = "AMDGPU";
     91   let SubRegIndices = [sub0, sub1];
     92   let HWEncoding = encoding;
     93 }
     94 
     95 defm FLAT_SCR_LO : FLAT_SCR_LOHI_m<"flat_scratch_lo", 104, 102>; // Offset in units of 256-bytes.
     96 defm FLAT_SCR_HI : FLAT_SCR_LOHI_m<"flat_scratch_hi", 105, 103>; // Size is the per-thread scratch size, in bytes.
     97 
     98 def FLAT_SCR_ci : FlatReg<FLAT_SCR_LO_ci, FLAT_SCR_HI_ci, 104>;
     99 def FLAT_SCR_vi : FlatReg<FLAT_SCR_LO_vi, FLAT_SCR_HI_vi, 102>;
    100 def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
    101 
    102 // SGPR registers
    103 foreach Index = 0-103 in {
    104   def SGPR#Index : SIReg <"SGPR"#Index, Index>;
    105 }
    106 
    107 // VGPR registers
    108 foreach Index = 0-255 in {
    109   def VGPR#Index : SIReg <"VGPR"#Index, Index> {
    110     let HWEncoding{8} = 1;
    111   }
    112 }
    113 
    114 //===----------------------------------------------------------------------===//
    115 //  Groupings using register classes and tuples
    116 //===----------------------------------------------------------------------===//
    117 
    118 def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
    119   let CopyCost = -1;
    120   let isAllocatable = 0;
    121 }
    122 
    123 // TODO: Do we need to set DwarfRegAlias on register tuples?
    124 
    125 // SGPR 32-bit registers
    126 def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
    127                             (add (sequence "SGPR%u", 0, 103))> {
    128   let AllocationPriority = 1;
    129 }
    130 
    131 // SGPR 64-bit registers
    132 def SGPR_64Regs : RegisterTuples<[sub0, sub1],
    133                              [(add (decimate SGPR_32, 2)),
    134                               (add (decimate (shl SGPR_32, 1), 2))]>;
    135 
    136 // SGPR 128-bit registers
    137 def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
    138                               [(add (decimate SGPR_32, 4)),
    139                                (add (decimate (shl SGPR_32, 1), 4)),
    140                                (add (decimate (shl SGPR_32, 2), 4)),
    141                                (add (decimate (shl SGPR_32, 3), 4))]>;
    142 
    143 // SGPR 256-bit registers
    144 def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
    145                               [(add (decimate SGPR_32, 4)),
    146                                (add (decimate (shl SGPR_32, 1), 4)),
    147                                (add (decimate (shl SGPR_32, 2), 4)),
    148                                (add (decimate (shl SGPR_32, 3), 4)),
    149                                (add (decimate (shl SGPR_32, 4), 4)),
    150                                (add (decimate (shl SGPR_32, 5), 4)),
    151                                (add (decimate (shl SGPR_32, 6), 4)),
    152                                (add (decimate (shl SGPR_32, 7), 4))]>;
    153 
    154 // SGPR 512-bit registers
    155 def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
    156                                sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
    157                               [(add (decimate SGPR_32, 4)),
    158                                (add (decimate (shl SGPR_32, 1), 4)),
    159                                (add (decimate (shl SGPR_32, 2), 4)),
    160                                (add (decimate (shl SGPR_32, 3), 4)),
    161                                (add (decimate (shl SGPR_32, 4), 4)),
    162                                (add (decimate (shl SGPR_32, 5), 4)),
    163                                (add (decimate (shl SGPR_32, 6), 4)),
    164                                (add (decimate (shl SGPR_32, 7), 4)),
    165                                (add (decimate (shl SGPR_32, 8), 4)),
    166                                (add (decimate (shl SGPR_32, 9), 4)),
    167                                (add (decimate (shl SGPR_32, 10), 4)),
    168                                (add (decimate (shl SGPR_32, 11), 4)),
    169                                (add (decimate (shl SGPR_32, 12), 4)),
    170                                (add (decimate (shl SGPR_32, 13), 4)),
    171                                (add (decimate (shl SGPR_32, 14), 4)),
    172                                (add (decimate (shl SGPR_32, 15), 4))]>;
    173 
    174 // Trap handler TMP 32-bit registers
    175 def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
    176                             (add (sequence "TTMP%u", 0, 11))> {
    177   let isAllocatable = 0;
    178 }
    179 
    180 // Trap handler TMP 64-bit registers
    181 def TTMP_64Regs : RegisterTuples<[sub0, sub1],
    182                              [(add (decimate TTMP_32, 2)),
    183                               (add (decimate (shl TTMP_32, 1), 2))]>;
    184 
    185 // Trap handler TMP 128-bit registers
    186 def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
    187                               [(add (decimate TTMP_32, 4)),
    188                                (add (decimate (shl TTMP_32, 1), 4)),
    189                                (add (decimate (shl TTMP_32, 2), 4)),
    190                                (add (decimate (shl TTMP_32, 3), 4))]>;
    191 
    192 // VGPR 32-bit registers
    193 def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
    194                             (add (sequence "VGPR%u", 0, 255))> {
    195   let AllocationPriority = 1;
    196 }
    197 
    198 // VGPR 64-bit registers
    199 def VGPR_64 : RegisterTuples<[sub0, sub1],
    200                              [(add (trunc VGPR_32, 255)),
    201                               (add (shl VGPR_32, 1))]>;
    202 
    203 // VGPR 96-bit registers
    204 def VGPR_96 : RegisterTuples<[sub0, sub1, sub2],
    205                              [(add (trunc VGPR_32, 254)),
    206                               (add (shl VGPR_32, 1)),
    207                               (add (shl VGPR_32, 2))]>;
    208 
    209 // VGPR 128-bit registers
    210 def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
    211                               [(add (trunc VGPR_32, 253)),
    212                                (add (shl VGPR_32, 1)),
    213                                (add (shl VGPR_32, 2)),
    214                                (add (shl VGPR_32, 3))]>;
    215 
    216 // VGPR 256-bit registers
    217 def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
    218                               [(add (trunc VGPR_32, 249)),
    219                                (add (shl VGPR_32, 1)),
    220                                (add (shl VGPR_32, 2)),
    221                                (add (shl VGPR_32, 3)),
    222                                (add (shl VGPR_32, 4)),
    223                                (add (shl VGPR_32, 5)),
    224                                (add (shl VGPR_32, 6)),
    225                                (add (shl VGPR_32, 7))]>;
    226 
    227 // VGPR 512-bit registers
    228 def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
    229                                sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
    230                               [(add (trunc VGPR_32, 241)),
    231                                (add (shl VGPR_32, 1)),
    232                                (add (shl VGPR_32, 2)),
    233                                (add (shl VGPR_32, 3)),
    234                                (add (shl VGPR_32, 4)),
    235                                (add (shl VGPR_32, 5)),
    236                                (add (shl VGPR_32, 6)),
    237                                (add (shl VGPR_32, 7)),
    238                                (add (shl VGPR_32, 8)),
    239                                (add (shl VGPR_32, 9)),
    240                                (add (shl VGPR_32, 10)),
    241                                (add (shl VGPR_32, 11)),
    242                                (add (shl VGPR_32, 12)),
    243                                (add (shl VGPR_32, 13)),
    244                                (add (shl VGPR_32, 14)),
    245                                (add (shl VGPR_32, 15))]>;
    246 
    247 //===----------------------------------------------------------------------===//
    248 //  Register classes used as source and destination
    249 //===----------------------------------------------------------------------===//
    250 
    251 class RegImmMatcher<string name> : AsmOperandClass {
    252   let Name = name;
    253   let RenderMethod = "addRegOrImmOperands";
    254 }
    255 
    256 // Subset of SReg_32 without M0 for SMRD instructions and alike.
    257 // See comments in SIInstructions.td for more info.
    258 def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32], 32,
    259   (add SGPR_32, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
    260    TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)> {
    261   let AllocationPriority = 1;
    262 }
    263 
    264 // Register class for all scalar registers (SGPRs + Special Registers)
    265 def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
    266   (add SReg_32_XM0, M0)> {
    267   let AllocationPriority = 1;
    268 }
    269 
    270 def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)> {
    271   let AllocationPriority = 2;
    272 }
    273 
    274 def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> {
    275   let isAllocatable = 0;
    276 }
    277 
    278 def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
    279   (add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64, TBA, TMA)> {
    280   let AllocationPriority = 2;
    281 }
    282 
    283 // Requires 2 s_mov_b64 to copy
    284 let CopyCost = 2 in {
    285 
    286 def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)> {
    287   let AllocationPriority = 4;
    288 }
    289 
    290 def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> {
    291   let isAllocatable = 0;
    292 }
    293 
    294 def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)> {
    295   let AllocationPriority = 4;
    296 }
    297 
    298 } // End CopyCost = 2
    299 
    300 def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
    301   // Requires 4 s_mov_b64 to copy
    302   let CopyCost = 4;
    303   let AllocationPriority = 5;
    304 }
    305 
    306 def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 32, (add SGPR_512)> {
    307   // Requires 8 s_mov_b64 to copy
    308   let CopyCost = 8;
    309   let AllocationPriority = 6;
    310 }
    311 
    312 // Register class for all vector registers (VGPRs + Interploation Registers)
    313 def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 32, (add VGPR_64)> {
    314   // Requires 2 v_mov_b32 to copy
    315   let CopyCost = 2;
    316   let AllocationPriority = 2;
    317 }
    318 
    319 def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> {
    320   let Size = 96;
    321 
    322   // Requires 3 v_mov_b32 to copy
    323   let CopyCost = 3;
    324   let AllocationPriority = 3;
    325 }
    326 
    327 def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> {
    328   // Requires 4 v_mov_b32 to copy
    329   let CopyCost = 4;
    330   let AllocationPriority = 4;
    331 }
    332 
    333 def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> {
    334   let CopyCost = 8;
    335   let AllocationPriority = 5;
    336 }
    337 
    338 def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> {
    339   let CopyCost = 16;
    340   let AllocationPriority = 6;
    341 }
    342 
    343 def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
    344   let Size = 32;
    345 }
    346 
    347 class RegImmOperand <RegisterClass rc> : RegisterOperand<rc> {
    348   let OperandNamespace = "AMDGPU";
    349   let OperandType = "OPERAND_REG_IMM32";
    350 }
    351 
    352 class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
    353   let OperandNamespace = "AMDGPU";
    354   let OperandType = "OPERAND_REG_INLINE_C";
    355 }
    356 
    357 //===----------------------------------------------------------------------===//
    358 //  SSrc_* Operands with an SGPR or a 32-bit immediate
    359 //===----------------------------------------------------------------------===//
    360 
    361 def SSrc_32 : RegImmOperand<SReg_32> {
    362   let ParserMatchClass = RegImmMatcher<"SSrc32">;
    363 }
    364 
    365 def SSrc_64 : RegImmOperand<SReg_64> {
    366   let ParserMatchClass = RegImmMatcher<"SSrc64">;
    367 }
    368 
    369 //===----------------------------------------------------------------------===//
    370 //  SCSrc_* Operands with an SGPR or a inline constant
    371 //===----------------------------------------------------------------------===//
    372 
    373 def SCSrc_32 : RegInlineOperand<SReg_32> {
    374   let ParserMatchClass = RegImmMatcher<"SCSrc32">;
    375 }
    376 
    377 //===----------------------------------------------------------------------===//
    378 //  VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
    379 //===----------------------------------------------------------------------===//
    380 
    381 def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
    382 
    383 def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
    384   let CopyCost = 2;
    385 }
    386 
    387 def VSrc_32 : RegisterOperand<VS_32> {
    388   let OperandNamespace = "AMDGPU";
    389   let OperandType = "OPERAND_REG_IMM32";
    390   let ParserMatchClass = RegImmMatcher<"VSrc32">;
    391 }
    392 
    393 def VSrc_64 : RegisterOperand<VS_64> {
    394   let OperandNamespace = "AMDGPU";
    395   let OperandType = "OPERAND_REG_IMM32";
    396   let ParserMatchClass = RegImmMatcher<"VSrc64">;
    397 }
    398 
    399 //===----------------------------------------------------------------------===//
    400 //  VCSrc_* Operands with an SGPR, VGPR or an inline constant
    401 //===----------------------------------------------------------------------===//
    402 
    403 def VCSrc_32 : RegisterOperand<VS_32> {
    404   let OperandNamespace = "AMDGPU";
    405   let OperandType = "OPERAND_REG_INLINE_C";
    406   let ParserMatchClass = RegImmMatcher<"VCSrc32">;
    407 }
    408 
    409 def VCSrc_64 : RegisterOperand<VS_64> {
    410   let OperandNamespace = "AMDGPU";
    411   let OperandType = "OPERAND_REG_INLINE_C";
    412   let ParserMatchClass = RegImmMatcher<"VCSrc64">;
    413 }
    414 
    415 //===----------------------------------------------------------------------===//
    416 //  SCSrc_* Operands with an SGPR or an inline constant
    417 //===----------------------------------------------------------------------===//
    418 
    419 def SCSrc_64 : RegisterOperand<SReg_64> {
    420   let OperandNamespace = "AMDGPU";
    421   let OperandType = "OPERAND_REG_INLINE_C";
    422   let ParserMatchClass = RegImmMatcher<"SCSrc64">;
    423 }
    424