1 //===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 //===----------------------------------------------------------------------===// 11 // Declarations that describe the SI registers 12 //===----------------------------------------------------------------------===// 13 class SIReg <string n, bits<16> regIdx = 0> : Register<n>, 14 DwarfRegNum<[!cast<int>(HWEncoding)]> { 15 let Namespace = "AMDGPU"; 16 17 // This is the not yet the complete register encoding. An additional 18 // bit is set for VGPRs. 19 let HWEncoding = regIdx; 20 } 21 22 // Special Registers 23 def VCC_LO : SIReg<"vcc_lo", 106>; 24 def VCC_HI : SIReg<"vcc_hi", 107>; 25 26 // VCC for 64-bit instructions 27 def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, 28 DwarfRegAlias<VCC_LO> { 29 let Namespace = "AMDGPU"; 30 let SubRegIndices = [sub0, sub1]; 31 let HWEncoding = 106; 32 } 33 34 def EXEC_LO : SIReg<"exec_lo", 126>; 35 def EXEC_HI : SIReg<"exec_hi", 127>; 36 37 def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>, 38 DwarfRegAlias<EXEC_LO> { 39 let Namespace = "AMDGPU"; 40 let SubRegIndices = [sub0, sub1]; 41 let HWEncoding = 126; 42 } 43 44 def SCC : SIReg<"scc", 253>; 45 def M0 : SIReg <"m0", 124>; 46 47 // Trap handler registers 48 def TBA_LO : SIReg<"tba_lo", 108>; 49 def TBA_HI : SIReg<"tba_hi", 109>; 50 51 def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>, 52 DwarfRegAlias<TBA_LO> { 53 let Namespace = "AMDGPU"; 54 let SubRegIndices = [sub0, sub1]; 55 let HWEncoding = 108; 56 } 57 58 def TMA_LO : SIReg<"tma_lo", 110>; 59 def TMA_HI : SIReg<"tma_hi", 111>; 60 61 def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>, 62 DwarfRegAlias<TMA_LO> { 63 let Namespace = "AMDGPU"; 64 let SubRegIndices = [sub0, sub1]; 65 let HWEncoding = 110; 66 } 67 68 def TTMP0 : SIReg <"ttmp0", 112>; 69 def TTMP1 : SIReg <"ttmp1", 113>; 70 def TTMP2 : SIReg <"ttmp2", 114>; 71 def TTMP3 : SIReg <"ttmp3", 115>; 72 def TTMP4 : SIReg <"ttmp4", 116>; 73 def TTMP5 : SIReg <"ttmp5", 117>; 74 def TTMP6 : SIReg <"ttmp6", 118>; 75 def TTMP7 : SIReg <"ttmp7", 119>; 76 def TTMP8 : SIReg <"ttmp8", 120>; 77 def TTMP9 : SIReg <"ttmp9", 121>; 78 def TTMP10 : SIReg <"ttmp10", 122>; 79 def TTMP11 : SIReg <"ttmp11", 123>; 80 81 multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> { 82 def _ci : SIReg<n, ci_e>; 83 def _vi : SIReg<n, vi_e>; 84 def "" : SIReg<"", 0>; 85 } 86 87 class FlatReg <Register lo, Register hi, bits<16> encoding> : 88 RegisterWithSubRegs<"flat_scratch", [lo, hi]>, 89 DwarfRegAlias<lo> { 90 let Namespace = "AMDGPU"; 91 let SubRegIndices = [sub0, sub1]; 92 let HWEncoding = encoding; 93 } 94 95 defm FLAT_SCR_LO : FLAT_SCR_LOHI_m<"flat_scratch_lo", 104, 102>; // Offset in units of 256-bytes. 96 defm FLAT_SCR_HI : FLAT_SCR_LOHI_m<"flat_scratch_hi", 105, 103>; // Size is the per-thread scratch size, in bytes. 97 98 def FLAT_SCR_ci : FlatReg<FLAT_SCR_LO_ci, FLAT_SCR_HI_ci, 104>; 99 def FLAT_SCR_vi : FlatReg<FLAT_SCR_LO_vi, FLAT_SCR_HI_vi, 102>; 100 def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>; 101 102 // SGPR registers 103 foreach Index = 0-103 in { 104 def SGPR#Index : SIReg <"SGPR"#Index, Index>; 105 } 106 107 // VGPR registers 108 foreach Index = 0-255 in { 109 def VGPR#Index : SIReg <"VGPR"#Index, Index> { 110 let HWEncoding{8} = 1; 111 } 112 } 113 114 //===----------------------------------------------------------------------===// 115 // Groupings using register classes and tuples 116 //===----------------------------------------------------------------------===// 117 118 def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> { 119 let CopyCost = -1; 120 let isAllocatable = 0; 121 } 122 123 // TODO: Do we need to set DwarfRegAlias on register tuples? 124 125 // SGPR 32-bit registers 126 def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32, 127 (add (sequence "SGPR%u", 0, 103))> { 128 let AllocationPriority = 1; 129 } 130 131 // SGPR 64-bit registers 132 def SGPR_64Regs : RegisterTuples<[sub0, sub1], 133 [(add (decimate SGPR_32, 2)), 134 (add (decimate (shl SGPR_32, 1), 2))]>; 135 136 // SGPR 128-bit registers 137 def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3], 138 [(add (decimate SGPR_32, 4)), 139 (add (decimate (shl SGPR_32, 1), 4)), 140 (add (decimate (shl SGPR_32, 2), 4)), 141 (add (decimate (shl SGPR_32, 3), 4))]>; 142 143 // SGPR 256-bit registers 144 def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], 145 [(add (decimate SGPR_32, 4)), 146 (add (decimate (shl SGPR_32, 1), 4)), 147 (add (decimate (shl SGPR_32, 2), 4)), 148 (add (decimate (shl SGPR_32, 3), 4)), 149 (add (decimate (shl SGPR_32, 4), 4)), 150 (add (decimate (shl SGPR_32, 5), 4)), 151 (add (decimate (shl SGPR_32, 6), 4)), 152 (add (decimate (shl SGPR_32, 7), 4))]>; 153 154 // SGPR 512-bit registers 155 def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, 156 sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], 157 [(add (decimate SGPR_32, 4)), 158 (add (decimate (shl SGPR_32, 1), 4)), 159 (add (decimate (shl SGPR_32, 2), 4)), 160 (add (decimate (shl SGPR_32, 3), 4)), 161 (add (decimate (shl SGPR_32, 4), 4)), 162 (add (decimate (shl SGPR_32, 5), 4)), 163 (add (decimate (shl SGPR_32, 6), 4)), 164 (add (decimate (shl SGPR_32, 7), 4)), 165 (add (decimate (shl SGPR_32, 8), 4)), 166 (add (decimate (shl SGPR_32, 9), 4)), 167 (add (decimate (shl SGPR_32, 10), 4)), 168 (add (decimate (shl SGPR_32, 11), 4)), 169 (add (decimate (shl SGPR_32, 12), 4)), 170 (add (decimate (shl SGPR_32, 13), 4)), 171 (add (decimate (shl SGPR_32, 14), 4)), 172 (add (decimate (shl SGPR_32, 15), 4))]>; 173 174 // Trap handler TMP 32-bit registers 175 def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32], 32, 176 (add (sequence "TTMP%u", 0, 11))> { 177 let isAllocatable = 0; 178 } 179 180 // Trap handler TMP 64-bit registers 181 def TTMP_64Regs : RegisterTuples<[sub0, sub1], 182 [(add (decimate TTMP_32, 2)), 183 (add (decimate (shl TTMP_32, 1), 2))]>; 184 185 // Trap handler TMP 128-bit registers 186 def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3], 187 [(add (decimate TTMP_32, 4)), 188 (add (decimate (shl TTMP_32, 1), 4)), 189 (add (decimate (shl TTMP_32, 2), 4)), 190 (add (decimate (shl TTMP_32, 3), 4))]>; 191 192 // VGPR 32-bit registers 193 def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32, 194 (add (sequence "VGPR%u", 0, 255))> { 195 let AllocationPriority = 1; 196 } 197 198 // VGPR 64-bit registers 199 def VGPR_64 : RegisterTuples<[sub0, sub1], 200 [(add (trunc VGPR_32, 255)), 201 (add (shl VGPR_32, 1))]>; 202 203 // VGPR 96-bit registers 204 def VGPR_96 : RegisterTuples<[sub0, sub1, sub2], 205 [(add (trunc VGPR_32, 254)), 206 (add (shl VGPR_32, 1)), 207 (add (shl VGPR_32, 2))]>; 208 209 // VGPR 128-bit registers 210 def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], 211 [(add (trunc VGPR_32, 253)), 212 (add (shl VGPR_32, 1)), 213 (add (shl VGPR_32, 2)), 214 (add (shl VGPR_32, 3))]>; 215 216 // VGPR 256-bit registers 217 def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], 218 [(add (trunc VGPR_32, 249)), 219 (add (shl VGPR_32, 1)), 220 (add (shl VGPR_32, 2)), 221 (add (shl VGPR_32, 3)), 222 (add (shl VGPR_32, 4)), 223 (add (shl VGPR_32, 5)), 224 (add (shl VGPR_32, 6)), 225 (add (shl VGPR_32, 7))]>; 226 227 // VGPR 512-bit registers 228 def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, 229 sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], 230 [(add (trunc VGPR_32, 241)), 231 (add (shl VGPR_32, 1)), 232 (add (shl VGPR_32, 2)), 233 (add (shl VGPR_32, 3)), 234 (add (shl VGPR_32, 4)), 235 (add (shl VGPR_32, 5)), 236 (add (shl VGPR_32, 6)), 237 (add (shl VGPR_32, 7)), 238 (add (shl VGPR_32, 8)), 239 (add (shl VGPR_32, 9)), 240 (add (shl VGPR_32, 10)), 241 (add (shl VGPR_32, 11)), 242 (add (shl VGPR_32, 12)), 243 (add (shl VGPR_32, 13)), 244 (add (shl VGPR_32, 14)), 245 (add (shl VGPR_32, 15))]>; 246 247 //===----------------------------------------------------------------------===// 248 // Register classes used as source and destination 249 //===----------------------------------------------------------------------===// 250 251 class RegImmMatcher<string name> : AsmOperandClass { 252 let Name = name; 253 let RenderMethod = "addRegOrImmOperands"; 254 } 255 256 // Subset of SReg_32 without M0 for SMRD instructions and alike. 257 // See comments in SIInstructions.td for more info. 258 def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32], 32, 259 (add SGPR_32, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI, 260 TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)> { 261 let AllocationPriority = 1; 262 } 263 264 // Register class for all scalar registers (SGPRs + Special Registers) 265 def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32, 266 (add SReg_32_XM0, M0)> { 267 let AllocationPriority = 1; 268 } 269 270 def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)> { 271 let AllocationPriority = 2; 272 } 273 274 def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> { 275 let isAllocatable = 0; 276 } 277 278 def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32, 279 (add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64, TBA, TMA)> { 280 let AllocationPriority = 2; 281 } 282 283 // Requires 2 s_mov_b64 to copy 284 let CopyCost = 2 in { 285 286 def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)> { 287 let AllocationPriority = 4; 288 } 289 290 def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> { 291 let isAllocatable = 0; 292 } 293 294 def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)> { 295 let AllocationPriority = 4; 296 } 297 298 } // End CopyCost = 2 299 300 def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> { 301 // Requires 4 s_mov_b64 to copy 302 let CopyCost = 4; 303 let AllocationPriority = 5; 304 } 305 306 def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 32, (add SGPR_512)> { 307 // Requires 8 s_mov_b64 to copy 308 let CopyCost = 8; 309 let AllocationPriority = 6; 310 } 311 312 // Register class for all vector registers (VGPRs + Interploation Registers) 313 def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 32, (add VGPR_64)> { 314 // Requires 2 v_mov_b32 to copy 315 let CopyCost = 2; 316 let AllocationPriority = 2; 317 } 318 319 def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> { 320 let Size = 96; 321 322 // Requires 3 v_mov_b32 to copy 323 let CopyCost = 3; 324 let AllocationPriority = 3; 325 } 326 327 def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> { 328 // Requires 4 v_mov_b32 to copy 329 let CopyCost = 4; 330 let AllocationPriority = 4; 331 } 332 333 def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> { 334 let CopyCost = 8; 335 let AllocationPriority = 5; 336 } 337 338 def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> { 339 let CopyCost = 16; 340 let AllocationPriority = 6; 341 } 342 343 def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> { 344 let Size = 32; 345 } 346 347 class RegImmOperand <RegisterClass rc> : RegisterOperand<rc> { 348 let OperandNamespace = "AMDGPU"; 349 let OperandType = "OPERAND_REG_IMM32"; 350 } 351 352 class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> { 353 let OperandNamespace = "AMDGPU"; 354 let OperandType = "OPERAND_REG_INLINE_C"; 355 } 356 357 //===----------------------------------------------------------------------===// 358 // SSrc_* Operands with an SGPR or a 32-bit immediate 359 //===----------------------------------------------------------------------===// 360 361 def SSrc_32 : RegImmOperand<SReg_32> { 362 let ParserMatchClass = RegImmMatcher<"SSrc32">; 363 } 364 365 def SSrc_64 : RegImmOperand<SReg_64> { 366 let ParserMatchClass = RegImmMatcher<"SSrc64">; 367 } 368 369 //===----------------------------------------------------------------------===// 370 // SCSrc_* Operands with an SGPR or a inline constant 371 //===----------------------------------------------------------------------===// 372 373 def SCSrc_32 : RegInlineOperand<SReg_32> { 374 let ParserMatchClass = RegImmMatcher<"SCSrc32">; 375 } 376 377 //===----------------------------------------------------------------------===// 378 // VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate 379 //===----------------------------------------------------------------------===// 380 381 def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>; 382 383 def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> { 384 let CopyCost = 2; 385 } 386 387 def VSrc_32 : RegisterOperand<VS_32> { 388 let OperandNamespace = "AMDGPU"; 389 let OperandType = "OPERAND_REG_IMM32"; 390 let ParserMatchClass = RegImmMatcher<"VSrc32">; 391 } 392 393 def VSrc_64 : RegisterOperand<VS_64> { 394 let OperandNamespace = "AMDGPU"; 395 let OperandType = "OPERAND_REG_IMM32"; 396 let ParserMatchClass = RegImmMatcher<"VSrc64">; 397 } 398 399 //===----------------------------------------------------------------------===// 400 // VCSrc_* Operands with an SGPR, VGPR or an inline constant 401 //===----------------------------------------------------------------------===// 402 403 def VCSrc_32 : RegisterOperand<VS_32> { 404 let OperandNamespace = "AMDGPU"; 405 let OperandType = "OPERAND_REG_INLINE_C"; 406 let ParserMatchClass = RegImmMatcher<"VCSrc32">; 407 } 408 409 def VCSrc_64 : RegisterOperand<VS_64> { 410 let OperandNamespace = "AMDGPU"; 411 let OperandType = "OPERAND_REG_INLINE_C"; 412 let ParserMatchClass = RegImmMatcher<"VCSrc64">; 413 } 414 415 //===----------------------------------------------------------------------===// 416 // SCSrc_* Operands with an SGPR or an inline constant 417 //===----------------------------------------------------------------------===// 418 419 def SCSrc_64 : RegisterOperand<SReg_64> { 420 let OperandNamespace = "AMDGPU"; 421 let OperandType = "OPERAND_REG_INLINE_C"; 422 let ParserMatchClass = RegImmMatcher<"SCSrc64">; 423 } 424