1 //===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // R600 Tablegen instruction definitions 11 // 12 //===----------------------------------------------------------------------===// 13 14 include "R600Intrinsics.td" 15 16 class InstR600 <bits<32> inst, dag outs, dag ins, string asm, list<dag> pattern, 17 InstrItinClass itin> 18 : AMDGPUInst <outs, ins, asm, pattern> { 19 20 field bits<32> Inst; 21 bit Trig = 0; 22 bit Op3 = 0; 23 bit isVector = 0; 24 bits<2> FlagOperandIdx = 0; 25 26 let Inst = inst; 27 let Namespace = "AMDGPU"; 28 let OutOperandList = outs; 29 let InOperandList = ins; 30 let AsmString = asm; 31 let Pattern = pattern; 32 let Itinerary = itin; 33 34 let TSFlags{4} = Trig; 35 let TSFlags{5} = Op3; 36 37 // Vector instructions are instructions that must fill all slots in an 38 // instruction group 39 let TSFlags{6} = isVector; 40 let TSFlags{8-7} = FlagOperandIdx; 41 } 42 43 class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : 44 AMDGPUInst <outs, ins, asm, pattern> 45 { 46 field bits<64> Inst; 47 48 let Namespace = "AMDGPU"; 49 } 50 51 def MEMxi : Operand<iPTR> { 52 let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index); 53 } 54 55 def MEMrr : Operand<iPTR> { 56 let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index); 57 } 58 59 def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; 60 def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; 61 def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; 62 63 class R600_ALU { 64 65 bits<7> DST_GPR = 0; 66 bits<9> SRC0_SEL = 0; 67 bits<1> SRC0_NEG = 0; 68 bits<9> SRC1_SEL = 0; 69 bits<1> SRC1_NEG = 0; 70 bits<1> CLAMP = 0; 71 72 } 73 74 def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), 75 (ops PRED_SEL_OFF)>; 76 77 78 class R600_1OP <bits<32> inst, string opName, list<dag> pattern, 79 InstrItinClass itin = AnyALU> : 80 InstR600 <inst, 81 (outs R600_Reg32:$dst), 82 (ins R600_Reg32:$src, R600_Pred:$p, variable_ops), 83 !strconcat(opName, " $dst, $src ($p)"), 84 pattern, 85 itin 86 >; 87 88 class R600_2OP <bits<32> inst, string opName, list<dag> pattern, 89 InstrItinClass itin = AnyALU> : 90 InstR600 <inst, 91 (outs R600_Reg32:$dst), 92 (ins R600_Reg32:$src0, R600_Reg32:$src1,R600_Pred:$p, variable_ops), 93 !strconcat(opName, " $dst, $src0, $src1"), 94 pattern, 95 itin 96 >; 97 98 class R600_3OP <bits<32> inst, string opName, list<dag> pattern, 99 InstrItinClass itin = AnyALU> : 100 InstR600 <inst, 101 (outs R600_Reg32:$dst), 102 (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2,R600_Pred:$p, variable_ops), 103 !strconcat(opName, " $dst, $src0, $src1, $src2"), 104 pattern, 105 itin>{ 106 107 let Op3 = 1; 108 } 109 110 111 112 def PRED_X : InstR600 <0, (outs R600_Predicate_Bit:$dst), 113 (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), 114 "PRED $dst, $src0, $src1", 115 [], NullALU> 116 { 117 let DisableEncoding = "$src0"; 118 field bits<32> Inst; 119 bits<32> src1; 120 121 let Inst = src1; 122 let FlagOperandIdx = 3; 123 } 124 125 let isTerminator = 1, isBranch = 1, isPseudo = 1 in { 126 def JUMP : InstR600 <0x10, 127 (outs), 128 (ins brtarget:$target, R600_Pred:$p), 129 "JUMP $target ($p)", 130 [], AnyALU 131 >; 132 } 133 134 class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern, 135 InstrItinClass itin = VecALU> : 136 InstR600 <inst, 137 (outs R600_Reg32:$dst), 138 ins, 139 asm, 140 pattern, 141 itin 142 143 >; 144 145 class R600_TEX <bits<32> inst, string opName, list<dag> pattern, 146 InstrItinClass itin = AnyALU> : 147 InstR600 <inst, 148 (outs R600_Reg128:$dst), 149 (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2), 150 !strconcat(opName, "$dst, $src0, $src1, $src2"), 151 pattern, 152 itin 153 >; 154 155 def TEX_SHADOW : PatLeaf< 156 (imm), 157 [{uint32_t TType = (uint32_t)N->getZExtValue(); 158 return (TType >= 6 && TType <= 8) || TType == 11 || TType == 12; 159 }] 160 >; 161 162 class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, 163 dag ins, string asm, list<dag> pattern> : 164 InstR600ISA <outs, ins, asm, pattern> 165 { 166 bits<7> RW_GPR; 167 bits<7> INDEX_GPR; 168 169 bits<2> RIM; 170 bits<2> TYPE; 171 bits<1> RW_REL; 172 bits<2> ELEM_SIZE; 173 174 bits<12> ARRAY_SIZE; 175 bits<4> COMP_MASK; 176 bits<4> BURST_COUNT; 177 bits<1> VPM; 178 bits<1> eop; 179 bits<1> MARK; 180 bits<1> BARRIER; 181 182 // CF_ALLOC_EXPORT_WORD0_RAT 183 let Inst{3-0} = rat_id; 184 let Inst{9-4} = rat_inst; 185 let Inst{10} = 0; // Reserved 186 let Inst{12-11} = RIM; 187 let Inst{14-13} = TYPE; 188 let Inst{21-15} = RW_GPR; 189 let Inst{22} = RW_REL; 190 let Inst{29-23} = INDEX_GPR; 191 let Inst{31-30} = ELEM_SIZE; 192 193 // CF_ALLOC_EXPORT_WORD1_BUF 194 let Inst{43-32} = ARRAY_SIZE; 195 let Inst{47-44} = COMP_MASK; 196 let Inst{51-48} = BURST_COUNT; 197 let Inst{52} = VPM; 198 let Inst{53} = eop; 199 let Inst{61-54} = cf_inst; 200 let Inst{62} = MARK; 201 let Inst{63} = BARRIER; 202 } 203 204 def load_param : PatFrag<(ops node:$ptr), 205 (load node:$ptr), 206 [{ 207 const Value *Src = cast<LoadSDNode>(N)->getSrcValue(); 208 if (Src) { 209 PointerType * PT = dyn_cast<PointerType>(Src->getType()); 210 return PT && PT->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS; 211 } 212 return false; 213 }]>; 214 215 def isR600 : Predicate<"Subtarget.device()" 216 "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">; 217 def isR700 : Predicate<"Subtarget.device()" 218 "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" 219 "Subtarget.device()->getDeviceFlag()" 220 ">= OCL_DEVICE_RV710">; 221 def isEG : Predicate<"Subtarget.device()" 222 "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && " 223 "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">; 224 def isCayman : Predicate<"Subtarget.device()" 225 "->getDeviceFlag() == OCL_DEVICE_CAYMAN">; 226 def isEGorCayman : Predicate<"Subtarget.device()" 227 "->getGeneration() == AMDGPUDeviceInfo::HD5XXX" 228 "|| Subtarget.device()->getGeneration() ==" 229 "AMDGPUDeviceInfo::HD6XXX">; 230 231 def isR600toCayman : Predicate< 232 "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; 233 234 235 let Predicates = [isR600toCayman] in { 236 237 //===----------------------------------------------------------------------===// 238 // Common Instructions R600, R700, Evergreen, Cayman 239 //===----------------------------------------------------------------------===// 240 241 def ADD : R600_2OP < 242 0x0, "ADD", 243 [(set R600_Reg32:$dst, (fadd R600_Reg32:$src0, R600_Reg32:$src1))] 244 >; 245 246 // Non-IEEE MUL: 0 * anything = 0 247 def MUL : R600_2OP < 248 0x1, "MUL NON-IEEE", 249 [(set R600_Reg32:$dst, (int_AMDGPU_mul R600_Reg32:$src0, R600_Reg32:$src1))] 250 >; 251 252 def MUL_IEEE : R600_2OP < 253 0x2, "MUL_IEEE", 254 [(set R600_Reg32:$dst, (fmul R600_Reg32:$src0, R600_Reg32:$src1))] 255 >; 256 257 def MAX : R600_2OP < 258 0x3, "MAX", 259 [(set R600_Reg32:$dst, (AMDGPUfmax R600_Reg32:$src0, R600_Reg32:$src1))] 260 >; 261 262 def MIN : R600_2OP < 263 0x4, "MIN", 264 [(set R600_Reg32:$dst, (AMDGPUfmin R600_Reg32:$src0, R600_Reg32:$src1))] 265 >; 266 267 // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, 268 // so some of the instruction names don't match the asm string. 269 // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. 270 271 def SETE : R600_2OP < 272 0x08, "SETE", 273 [(set R600_Reg32:$dst, 274 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 275 COND_EQ))] 276 >; 277 278 def SGT : R600_2OP < 279 0x09, "SETGT", 280 [(set R600_Reg32:$dst, 281 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 282 COND_GT))] 283 >; 284 285 def SGE : R600_2OP < 286 0xA, "SETGE", 287 [(set R600_Reg32:$dst, 288 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 289 COND_GE))] 290 >; 291 292 def SNE : R600_2OP < 293 0xB, "SETNE", 294 [(set R600_Reg32:$dst, 295 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 296 COND_NE))] 297 >; 298 299 def FRACT : R600_1OP < 300 0x10, "FRACT", 301 [(set R600_Reg32:$dst, (AMDGPUfract R600_Reg32:$src))] 302 >; 303 304 def TRUNC : R600_1OP < 305 0x11, "TRUNC", 306 [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))] 307 >; 308 309 def CEIL : R600_1OP < 310 0x12, "CEIL", 311 [(set R600_Reg32:$dst, (fceil R600_Reg32:$src))] 312 >; 313 314 def RNDNE : R600_1OP < 315 0x13, "RNDNE", 316 [(set R600_Reg32:$dst, (frint R600_Reg32:$src))] 317 >; 318 319 def FLOOR : R600_1OP < 320 0x14, "FLOOR", 321 [(set R600_Reg32:$dst, (int_AMDGPU_floor R600_Reg32:$src))] 322 >; 323 324 def MOV : InstR600 <0x19, (outs R600_Reg32:$dst), 325 (ins R600_Reg32:$src0, i32imm:$flags, 326 R600_Pred:$p), 327 "MOV $dst, $src0", [], AnyALU> { 328 let FlagOperandIdx = 2; 329 } 330 331 class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19, 332 (outs R600_Reg32:$dst), 333 (ins R600_Reg32:$alu_literal, R600_Pred:$p, immType:$imm), 334 "MOV_IMM $dst, $imm", 335 [], AnyALU 336 >; 337 338 def MOV_IMM_I32 : MOV_IMM<i32, i32imm>; 339 def : Pat < 340 (imm:$val), 341 (MOV_IMM_I32 (i32 ALU_LITERAL_X), imm:$val) 342 >; 343 344 def MOV_IMM_F32 : MOV_IMM<f32, f32imm>; 345 def : Pat < 346 (fpimm:$val), 347 (MOV_IMM_F32 (i32 ALU_LITERAL_X), fpimm:$val) 348 >; 349 350 def KILLGT : InstR600 <0x2D, 351 (outs R600_Reg32:$dst), 352 (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags, R600_Pred:$p, 353 variable_ops), 354 "KILLGT $dst, $src0, $src1, $flags ($p)", 355 [], 356 NullALU>{ 357 let FlagOperandIdx = 3; 358 } 359 360 def AND_INT : R600_2OP < 361 0x30, "AND_INT", 362 [(set R600_Reg32:$dst, (and R600_Reg32:$src0, R600_Reg32:$src1))] 363 >; 364 365 def OR_INT : R600_2OP < 366 0x31, "OR_INT", 367 [(set R600_Reg32:$dst, (or R600_Reg32:$src0, R600_Reg32:$src1))] 368 >; 369 370 def XOR_INT : R600_2OP < 371 0x32, "XOR_INT", 372 [(set R600_Reg32:$dst, (xor R600_Reg32:$src0, R600_Reg32:$src1))] 373 >; 374 375 def NOT_INT : R600_1OP < 376 0x33, "NOT_INT", 377 [(set R600_Reg32:$dst, (not R600_Reg32:$src))] 378 >; 379 380 def ADD_INT : R600_2OP < 381 0x34, "ADD_INT", 382 [(set R600_Reg32:$dst, (add R600_Reg32:$src0, R600_Reg32:$src1))] 383 >; 384 385 def SUB_INT : R600_2OP < 386 0x35, "SUB_INT", 387 [(set R600_Reg32:$dst, (sub R600_Reg32:$src0, R600_Reg32:$src1))] 388 >; 389 390 def MAX_INT : R600_2OP < 391 0x36, "MAX_INT", 392 [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))]>; 393 394 def MIN_INT : R600_2OP < 395 0x37, "MIN_INT", 396 [(set R600_Reg32:$dst, (AMDGPUsmin R600_Reg32:$src0, R600_Reg32:$src1))]>; 397 398 def MAX_UINT : R600_2OP < 399 0x38, "MAX_UINT", 400 [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))] 401 >; 402 403 def MIN_UINT : R600_2OP < 404 0x39, "MIN_UINT", 405 [(set R600_Reg32:$dst, (AMDGPUumin R600_Reg32:$src0, R600_Reg32:$src1))] 406 >; 407 408 def SETE_INT : R600_2OP < 409 0x3A, "SETE_INT", 410 [(set (i32 R600_Reg32:$dst), 411 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))] 412 >; 413 414 def SETGT_INT : R600_2OP < 415 0x3B, "SGT_INT", 416 [(set (i32 R600_Reg32:$dst), 417 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))] 418 >; 419 420 def SETGE_INT : R600_2OP < 421 0x3C, "SETGE_INT", 422 [(set (i32 R600_Reg32:$dst), 423 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))] 424 >; 425 426 def SETNE_INT : R600_2OP < 427 0x3D, "SETNE_INT", 428 [(set (i32 R600_Reg32:$dst), 429 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))] 430 >; 431 432 def SETGT_UINT : R600_2OP < 433 0x3E, "SETGT_UINT", 434 [(set (i32 R600_Reg32:$dst), 435 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))] 436 >; 437 438 def SETGE_UINT : R600_2OP < 439 0x3F, "SETGE_UINT", 440 [(set (i32 R600_Reg32:$dst), 441 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))] 442 >; 443 444 def CNDE_INT : R600_3OP < 445 0x1C, "CNDE_INT", 446 [(set (i32 R600_Reg32:$dst), 447 (select R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] 448 >; 449 450 //===----------------------------------------------------------------------===// 451 // Texture instructions 452 //===----------------------------------------------------------------------===// 453 454 def TEX_LD : R600_TEX < 455 0x03, "TEX_LD", 456 [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$src4, imm:$src5))] 457 > { 458 let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $src4, $src5"; 459 let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5); 460 } 461 462 def TEX_GET_TEXTURE_RESINFO : R600_TEX < 463 0x04, "TEX_GET_TEXTURE_RESINFO", 464 [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$src1, imm:$src2))] 465 >; 466 467 def TEX_GET_GRADIENTS_H : R600_TEX < 468 0x07, "TEX_GET_GRADIENTS_H", 469 [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$src1, imm:$src2))] 470 >; 471 472 def TEX_GET_GRADIENTS_V : R600_TEX < 473 0x08, "TEX_GET_GRADIENTS_V", 474 [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$src1, imm:$src2))] 475 >; 476 477 def TEX_SET_GRADIENTS_H : R600_TEX < 478 0x0B, "TEX_SET_GRADIENTS_H", 479 [] 480 >; 481 482 def TEX_SET_GRADIENTS_V : R600_TEX < 483 0x0C, "TEX_SET_GRADIENTS_V", 484 [] 485 >; 486 487 def TEX_SAMPLE : R600_TEX < 488 0x10, "TEX_SAMPLE", 489 [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))] 490 >; 491 492 def TEX_SAMPLE_C : R600_TEX < 493 0x18, "TEX_SAMPLE_C", 494 [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] 495 >; 496 497 def TEX_SAMPLE_L : R600_TEX < 498 0x11, "TEX_SAMPLE_L", 499 [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, imm:$src2))] 500 >; 501 502 def TEX_SAMPLE_C_L : R600_TEX < 503 0x19, "TEX_SAMPLE_C_L", 504 [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] 505 >; 506 507 def TEX_SAMPLE_LB : R600_TEX < 508 0x12, "TEX_SAMPLE_LB", 509 [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, imm:$src2))] 510 >; 511 512 def TEX_SAMPLE_C_LB : R600_TEX < 513 0x1A, "TEX_SAMPLE_C_LB", 514 [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] 515 >; 516 517 def TEX_SAMPLE_G : R600_TEX < 518 0x14, "TEX_SAMPLE_G", 519 [] 520 >; 521 522 def TEX_SAMPLE_C_G : R600_TEX < 523 0x1C, "TEX_SAMPLE_C_G", 524 [] 525 >; 526 527 //===----------------------------------------------------------------------===// 528 // Helper classes for common instructions 529 //===----------------------------------------------------------------------===// 530 531 class MUL_LIT_Common <bits<32> inst> : R600_3OP < 532 inst, "MUL_LIT", 533 [] 534 >; 535 536 class MULADD_Common <bits<32> inst> : R600_3OP < 537 inst, "MULADD", 538 [(set (f32 R600_Reg32:$dst), 539 (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] 540 >; 541 542 class CNDE_Common <bits<32> inst> : R600_3OP < 543 inst, "CNDE", 544 [(set (f32 R600_Reg32:$dst), 545 (select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 R600_Reg32:$src2), (f32 R600_Reg32:$src1)))] 546 >; 547 548 class CNDGT_Common <bits<32> inst> : R600_3OP < 549 inst, "CNDGT", 550 [] 551 >; 552 553 class CNDGE_Common <bits<32> inst> : R600_3OP < 554 inst, "CNDGE", 555 [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] 556 >; 557 558 class DOT4_Common <bits<32> inst> : R600_REDUCTION < 559 inst, 560 (ins R600_Reg128:$src0, R600_Reg128:$src1, i32imm:$flags), 561 "DOT4 $dst $src0, $src1", 562 [] 563 > { 564 let FlagOperandIdx = 3; 565 } 566 567 class DOT4_Pat <Instruction dot4> : Pat < 568 (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1), 569 (dot4 R600_Reg128:$src0, R600_Reg128:$src1, 0) 570 >; 571 572 multiclass CUBE_Common <bits<32> inst> { 573 574 def _pseudo : InstR600 < 575 inst, 576 (outs R600_Reg128:$dst), 577 (ins R600_Reg128:$src), 578 "CUBE $dst $src", 579 [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], 580 VecALU 581 >; 582 583 def _real : InstR600 < 584 inst, 585 (outs R600_Reg32:$dst), 586 (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), 587 "CUBE $dst, $src0, $src1", 588 [], VecALU 589 >{ 590 let FlagOperandIdx = 3; 591 } 592 } 593 594 class EXP_IEEE_Common <bits<32> inst> : R600_1OP < 595 inst, "EXP_IEEE", 596 [(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))] 597 >; 598 599 class FLT_TO_INT_Common <bits<32> inst> : R600_1OP < 600 inst, "FLT_TO_INT", 601 [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))] 602 >; 603 604 class INT_TO_FLT_Common <bits<32> inst> : R600_1OP < 605 inst, "INT_TO_FLT", 606 [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))] 607 >; 608 609 class FLT_TO_UINT_Common <bits<32> inst> : R600_1OP < 610 inst, "FLT_TO_UINT", 611 [(set R600_Reg32:$dst, (fp_to_uint R600_Reg32:$src))] 612 >; 613 614 class UINT_TO_FLT_Common <bits<32> inst> : R600_1OP < 615 inst, "UINT_TO_FLT", 616 [(set R600_Reg32:$dst, (uint_to_fp R600_Reg32:$src))] 617 >; 618 619 class LOG_CLAMPED_Common <bits<32> inst> : R600_1OP < 620 inst, "LOG_CLAMPED", 621 [] 622 >; 623 624 class LOG_IEEE_Common <bits<32> inst> : R600_1OP < 625 inst, "LOG_IEEE", 626 [(set R600_Reg32:$dst, (int_AMDIL_log R600_Reg32:$src))] 627 >; 628 629 class LSHL_Common <bits<32> inst> : R600_2OP < 630 inst, "LSHL $dst, $src0, $src1", 631 [(set R600_Reg32:$dst, (shl R600_Reg32:$src0, R600_Reg32:$src1))] 632 >; 633 634 class LSHR_Common <bits<32> inst> : R600_2OP < 635 inst, "LSHR $dst, $src0, $src1", 636 [(set R600_Reg32:$dst, (srl R600_Reg32:$src0, R600_Reg32:$src1))] 637 >; 638 639 class ASHR_Common <bits<32> inst> : R600_2OP < 640 inst, "ASHR $dst, $src0, $src1", 641 [(set R600_Reg32:$dst, (sra R600_Reg32:$src0, R600_Reg32:$src1))] 642 >; 643 644 class MULHI_INT_Common <bits<32> inst> : R600_2OP < 645 inst, "MULHI_INT $dst, $src0, $src1", 646 [(set R600_Reg32:$dst, (mulhs R600_Reg32:$src0, R600_Reg32:$src1))] 647 >; 648 649 class MULHI_UINT_Common <bits<32> inst> : R600_2OP < 650 inst, "MULHI $dst, $src0, $src1", 651 [(set R600_Reg32:$dst, (mulhu R600_Reg32:$src0, R600_Reg32:$src1))] 652 >; 653 654 class MULLO_INT_Common <bits<32> inst> : R600_2OP < 655 inst, "MULLO_INT $dst, $src0, $src1", 656 [(set R600_Reg32:$dst, (mul R600_Reg32:$src0, R600_Reg32:$src1))] 657 >; 658 659 class MULLO_UINT_Common <bits<32> inst> : R600_2OP < 660 inst, "MULLO_UINT $dst, $src0, $src1", 661 [] 662 >; 663 664 class RECIP_CLAMPED_Common <bits<32> inst> : R600_1OP < 665 inst, "RECIP_CLAMPED", 666 [] 667 >; 668 669 class RECIP_IEEE_Common <bits<32> inst> : R600_1OP < 670 inst, "RECIP_IEEE", 671 [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))] 672 >; 673 674 class RECIP_UINT_Common <bits<32> inst> : R600_1OP < 675 inst, "RECIP_INT $dst, $src", 676 [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))] 677 >; 678 679 class RECIPSQRT_CLAMPED_Common <bits<32> inst> : R600_1OP < 680 inst, "RECIPSQRT_CLAMPED", 681 [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))] 682 >; 683 684 class RECIPSQRT_IEEE_Common <bits<32> inst> : R600_1OP < 685 inst, "RECIPSQRT_IEEE", 686 [] 687 >; 688 689 class SIN_Common <bits<32> inst> : R600_1OP < 690 inst, "SIN", []>{ 691 let Trig = 1; 692 } 693 694 class COS_Common <bits<32> inst> : R600_1OP < 695 inst, "COS", []> { 696 let Trig = 1; 697 } 698 699 //===----------------------------------------------------------------------===// 700 // Helper patterns for complex intrinsics 701 //===----------------------------------------------------------------------===// 702 703 class DIV_Common <InstR600 recip_ieee> : Pat< 704 (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1), 705 (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) 706 >; 707 708 class SSG_Common <InstR600 cndgt, InstR600 cndge> : Pat < 709 (int_AMDGPU_ssg R600_Reg32:$src), 710 (cndgt R600_Reg32:$src, (f32 ONE), (cndge R600_Reg32:$src, (f32 ZERO), (f32 NEG_ONE))) 711 >; 712 713 class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat < 714 (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w), 715 (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x)) 716 >; 717 718 //===----------------------------------------------------------------------===// 719 // R600 / R700 Instructions 720 //===----------------------------------------------------------------------===// 721 722 let Predicates = [isR600] in { 723 724 def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; 725 def MULADD_r600 : MULADD_Common<0x10>; 726 def CNDE_r600 : CNDE_Common<0x18>; 727 def CNDGT_r600 : CNDGT_Common<0x19>; 728 def CNDGE_r600 : CNDGE_Common<0x1A>; 729 def DOT4_r600 : DOT4_Common<0x50>; 730 def : DOT4_Pat <DOT4_r600>; 731 defm CUBE_r600 : CUBE_Common<0x52>; 732 def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; 733 def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; 734 def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; 735 def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>; 736 def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>; 737 def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>; 738 def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>; 739 def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>; 740 def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; 741 def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>; 742 def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>; 743 def SIN_r600 : SIN_Common<0x6E>; 744 def COS_r600 : COS_Common<0x6F>; 745 def ASHR_r600 : ASHR_Common<0x70>; 746 def LSHR_r600 : LSHR_Common<0x71>; 747 def LSHL_r600 : LSHL_Common<0x72>; 748 def MULLO_INT_r600 : MULLO_INT_Common<0x73>; 749 def MULHI_INT_r600 : MULHI_INT_Common<0x74>; 750 def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; 751 def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; 752 def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; 753 754 def DIV_r600 : DIV_Common<RECIP_IEEE_r600>; 755 def POW_r600 : POW_Common<LOG_IEEE_r600, EXP_IEEE_r600, MUL, GPRF32>; 756 def SSG_r600 : SSG_Common<CNDGT_r600, CNDGE_r600>; 757 def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; 758 759 } 760 761 // Helper pattern for normalizing inputs to triginomic instructions for R700+ 762 // cards. 763 class TRIG_eg <InstR600 trig, Intrinsic intr> : Pat< 764 (intr R600_Reg32:$src), 765 (trig (MUL (MOV_IMM_I32 (i32 ALU_LITERAL_X), CONST.TWO_PI_INV), R600_Reg32:$src)) 766 >; 767 768 //===----------------------------------------------------------------------===// 769 // R700 Only instructions 770 //===----------------------------------------------------------------------===// 771 772 let Predicates = [isR700] in { 773 def SIN_r700 : SIN_Common<0x6E>; 774 def COS_r700 : COS_Common<0x6F>; 775 776 // R700 normalizes inputs to SIN/COS the same as EG 777 def : TRIG_eg <SIN_r700, int_AMDGPU_sin>; 778 def : TRIG_eg <COS_r700, int_AMDGPU_cos>; 779 } 780 781 //===----------------------------------------------------------------------===// 782 // Evergreen Only instructions 783 //===----------------------------------------------------------------------===// 784 785 let Predicates = [isEG] in { 786 787 def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; 788 789 def MULLO_INT_eg : MULLO_INT_Common<0x8F>; 790 def MULHI_INT_eg : MULHI_INT_Common<0x90>; 791 def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; 792 def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; 793 def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; 794 795 } // End Predicates = [isEG] 796 797 //===----------------------------------------------------------------------===// 798 // Evergreen / Cayman Instructions 799 //===----------------------------------------------------------------------===// 800 801 let Predicates = [isEGorCayman] in { 802 803 // BFE_UINT - bit_extract, an optimization for mask and shift 804 // Src0 = Input 805 // Src1 = Offset 806 // Src2 = Width 807 // 808 // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) 809 // 810 // Example Usage: 811 // (Offset, Width) 812 // 813 // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 814 // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 815 // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 816 // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 817 def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", 818 [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0, 819 R600_Reg32:$src1, 820 R600_Reg32:$src2))], 821 VecALU 822 >; 823 824 def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", 825 [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1, 826 R600_Reg32:$src2))], 827 VecALU 828 >; 829 830 def MULADD_eg : MULADD_Common<0x14>; 831 def ASHR_eg : ASHR_Common<0x15>; 832 def LSHR_eg : LSHR_Common<0x16>; 833 def LSHL_eg : LSHL_Common<0x17>; 834 def CNDE_eg : CNDE_Common<0x19>; 835 def CNDGT_eg : CNDGT_Common<0x1A>; 836 def CNDGE_eg : CNDGE_Common<0x1B>; 837 def MUL_LIT_eg : MUL_LIT_Common<0x1F>; 838 def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; 839 def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; 840 def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; 841 def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; 842 def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; 843 def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; 844 def SIN_eg : SIN_Common<0x8D>; 845 def COS_eg : COS_Common<0x8E>; 846 def DOT4_eg : DOT4_Common<0xBE>; 847 def : DOT4_Pat <DOT4_eg>; 848 defm CUBE_eg : CUBE_Common<0xC0>; 849 850 def DIV_eg : DIV_Common<RECIP_IEEE_eg>; 851 def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg, MUL, GPRF32>; 852 def SSG_eg : SSG_Common<CNDGT_eg, CNDGE_eg>; 853 def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; 854 855 def : TRIG_eg <SIN_eg, int_AMDGPU_sin>; 856 def : TRIG_eg <COS_eg, int_AMDGPU_cos>; 857 858 def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { 859 let Pattern = []; 860 } 861 862 def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; 863 864 def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { 865 let Pattern = []; 866 } 867 868 def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; 869 870 def : Pat<(fp_to_sint R600_Reg32:$src), 871 (FLT_TO_INT_eg (TRUNC R600_Reg32:$src))>; 872 873 def : Pat<(fp_to_uint R600_Reg32:$src), 874 (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src))>; 875 876 //===----------------------------------------------------------------------===// 877 // Memory read/write instructions 878 //===----------------------------------------------------------------------===// 879 880 let usesCustomInserter = 1 in { 881 882 def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, 0, (outs), 883 (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop), 884 "RAT_WRITE_CACHELESS_eg $rw_gpr, $index_gpr, $eop", 885 []> 886 { 887 let RIM = 0; 888 // XXX: Have a separate instruction for non-indexed writes. 889 let TYPE = 1; 890 let RW_REL = 0; 891 let ELEM_SIZE = 0; 892 893 let ARRAY_SIZE = 0; 894 let COMP_MASK = 1; 895 let BURST_COUNT = 0; 896 let VPM = 0; 897 let MARK = 0; 898 let BARRIER = 1; 899 } 900 901 } // End usesCustomInserter = 1 902 903 // i32 global_store 904 def : Pat < 905 (global_store (i32 R600_TReg32_X:$val), R600_TReg32_X:$ptr), 906 (RAT_WRITE_CACHELESS_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0) 907 >; 908 909 // Floating point global_store 910 def : Pat < 911 (global_store (f32 R600_TReg32_X:$val), R600_TReg32_X:$ptr), 912 (RAT_WRITE_CACHELESS_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0) 913 >; 914 915 class VTX_READ_eg <bits<8> buffer_id, dag outs, list<dag> pattern> 916 : InstR600ISA <outs, (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", pattern> { 917 918 // Operands 919 bits<7> DST_GPR; 920 bits<7> SRC_GPR; 921 922 // Static fields 923 bits<5> VC_INST = 0; 924 bits<2> FETCH_TYPE = 2; 925 bits<1> FETCH_WHOLE_QUAD = 0; 926 bits<8> BUFFER_ID = buffer_id; 927 bits<1> SRC_REL = 0; 928 // XXX: We can infer this field based on the SRC_GPR. This would allow us 929 // to store vertex addresses in any channel, not just X. 930 bits<2> SRC_SEL_X = 0; 931 bits<6> MEGA_FETCH_COUNT; 932 bits<1> DST_REL = 0; 933 bits<3> DST_SEL_X; 934 bits<3> DST_SEL_Y; 935 bits<3> DST_SEL_Z; 936 bits<3> DST_SEL_W; 937 // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, 938 // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, 939 // however, based on my testing if USE_CONST_FIELDS is set, then all 940 // these fields need to be set to 0. 941 bits<1> USE_CONST_FIELDS = 0; 942 bits<6> DATA_FORMAT; 943 bits<2> NUM_FORMAT_ALL = 1; 944 bits<1> FORMAT_COMP_ALL = 0; 945 bits<1> SRF_MODE_ALL = 0; 946 947 // LLVM can only encode 64-bit instructions, so these fields are manually 948 // encoded in R600CodeEmitter 949 // 950 // bits<16> OFFSET; 951 // bits<2> ENDIAN_SWAP = 0; 952 // bits<1> CONST_BUF_NO_STRIDE = 0; 953 // bits<1> MEGA_FETCH = 0; 954 // bits<1> ALT_CONST = 0; 955 // bits<2> BUFFER_INDEX_MODE = 0; 956 957 // VTX_WORD0 958 let Inst{4-0} = VC_INST; 959 let Inst{6-5} = FETCH_TYPE; 960 let Inst{7} = FETCH_WHOLE_QUAD; 961 let Inst{15-8} = BUFFER_ID; 962 let Inst{22-16} = SRC_GPR; 963 let Inst{23} = SRC_REL; 964 let Inst{25-24} = SRC_SEL_X; 965 let Inst{31-26} = MEGA_FETCH_COUNT; 966 967 // VTX_WORD1_GPR 968 let Inst{38-32} = DST_GPR; 969 let Inst{39} = DST_REL; 970 let Inst{40} = 0; // Reserved 971 let Inst{43-41} = DST_SEL_X; 972 let Inst{46-44} = DST_SEL_Y; 973 let Inst{49-47} = DST_SEL_Z; 974 let Inst{52-50} = DST_SEL_W; 975 let Inst{53} = USE_CONST_FIELDS; 976 let Inst{59-54} = DATA_FORMAT; 977 let Inst{61-60} = NUM_FORMAT_ALL; 978 let Inst{62} = FORMAT_COMP_ALL; 979 let Inst{63} = SRF_MODE_ALL; 980 981 // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 982 // is done in R600CodeEmitter 983 // 984 // Inst{79-64} = OFFSET; 985 // Inst{81-80} = ENDIAN_SWAP; 986 // Inst{82} = CONST_BUF_NO_STRIDE; 987 // Inst{83} = MEGA_FETCH; 988 // Inst{84} = ALT_CONST; 989 // Inst{86-85} = BUFFER_INDEX_MODE; 990 // Inst{95-86} = 0; Reserved 991 992 // VTX_WORD3 (Padding) 993 // 994 // Inst{127-96} = 0; 995 } 996 997 class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> 998 : VTX_READ_eg <buffer_id, (outs R600_TReg32_X:$dst), pattern> { 999 1000 let MEGA_FETCH_COUNT = 4; 1001 let DST_SEL_X = 0; 1002 let DST_SEL_Y = 7; // Masked 1003 let DST_SEL_Z = 7; // Masked 1004 let DST_SEL_W = 7; // Masked 1005 let DATA_FORMAT = 0xD; // COLOR_32 1006 1007 // This is not really necessary, but there were some GPU hangs that appeared 1008 // to be caused by ALU instructions in the next instruction group that wrote 1009 // to the $ptr registers of the VTX_READ. 1010 // e.g. 1011 // %T3_X<def> = VTX_READ_PARAM_i32_eg %T2_X<kill>, 24 1012 // %T2_X<def> = MOV %ZERO 1013 //Adding this constraint prevents this from happening. 1014 let Constraints = "$ptr.ptr = $dst"; 1015 } 1016 1017 class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> 1018 : VTX_READ_eg <buffer_id, (outs R600_Reg128:$dst), pattern> { 1019 1020 let MEGA_FETCH_COUNT = 16; 1021 let DST_SEL_X = 0; 1022 let DST_SEL_Y = 1; 1023 let DST_SEL_Z = 2; 1024 let DST_SEL_W = 3; 1025 let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 1026 1027 // XXX: Need to force VTX_READ_128 instructions to write to the same register 1028 // that holds its buffer address to avoid potential hangs. We can't use 1029 // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst 1030 // registers are different sizes. 1031 } 1032 1033 //===----------------------------------------------------------------------===// 1034 // VTX Read from parameter memory space 1035 //===----------------------------------------------------------------------===// 1036 1037 class VTX_READ_PARAM_32_eg <ValueType vt> : VTX_READ_32_eg <0, 1038 [(set (vt R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] 1039 >; 1040 1041 def VTX_READ_PARAM_i32_eg : VTX_READ_PARAM_32_eg<i32>; 1042 def VTX_READ_PARAM_f32_eg : VTX_READ_PARAM_32_eg<f32>; 1043 1044 1045 //===----------------------------------------------------------------------===// 1046 // VTX Read from global memory space 1047 //===----------------------------------------------------------------------===// 1048 1049 // 32-bit reads 1050 1051 class VTX_READ_GLOBAL_eg <ValueType vt> : VTX_READ_32_eg <1, 1052 [(set (vt R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))] 1053 >; 1054 1055 def VTX_READ_GLOBAL_i32_eg : VTX_READ_GLOBAL_eg<i32>; 1056 def VTX_READ_GLOBAL_f32_eg : VTX_READ_GLOBAL_eg<f32>; 1057 1058 // 128-bit reads 1059 1060 class VTX_READ_GLOBAL_128_eg <ValueType vt> : VTX_READ_128_eg <1, 1061 [(set (vt R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))] 1062 >; 1063 1064 def VTX_READ_GLOBAL_v4i32_eg : VTX_READ_GLOBAL_128_eg<v4i32>; 1065 def VTX_READ_GLOBAL_v4f32_eg : VTX_READ_GLOBAL_128_eg<v4f32>; 1066 1067 } 1068 1069 let Predicates = [isCayman] in { 1070 1071 let isVector = 1 in { 1072 1073 def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; 1074 1075 def MULLO_INT_cm : MULLO_INT_Common<0x8F>; 1076 def MULHI_INT_cm : MULHI_INT_Common<0x90>; 1077 def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; 1078 def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; 1079 1080 } // End isVector = 1 1081 1082 // RECIP_UINT emulation for Cayman 1083 def : Pat < 1084 (AMDGPUurecip R600_Reg32:$src0), 1085 (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)), 1086 (MOV_IMM_I32 (i32 ALU_LITERAL_X), 0x4f800000))) 1087 >; 1088 1089 } // End isCayman 1090 1091 let isCodeGenOnly = 1 in { 1092 1093 def MULLIT : AMDGPUShaderInst < 1094 (outs R600_Reg128:$dst), 1095 (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2), 1096 "MULLIT $dst, $src0, $src1", 1097 [(set R600_Reg128:$dst, (int_AMDGPU_mullit R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] 1098 >; 1099 1100 let usesCustomInserter = 1, isPseudo = 1 in { 1101 1102 class R600PreloadInst <string asm, Intrinsic intr> : AMDGPUInst < 1103 (outs R600_TReg32:$dst), 1104 (ins), 1105 asm, 1106 [(set R600_TReg32:$dst, (intr))] 1107 >; 1108 1109 def R600_LOAD_CONST : AMDGPUShaderInst < 1110 (outs R600_Reg32:$dst), 1111 (ins i32imm:$src0), 1112 "R600_LOAD_CONST $dst, $src0", 1113 [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))] 1114 >; 1115 1116 def RESERVE_REG : AMDGPUShaderInst < 1117 (outs), 1118 (ins i32imm:$src), 1119 "RESERVE_REG $src", 1120 [(int_AMDGPU_reserve_reg imm:$src)] 1121 >; 1122 1123 def TXD: AMDGPUShaderInst < 1124 (outs R600_Reg128:$dst), 1125 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4), 1126 "TXD $dst, $src0, $src1, $src2, $src3, $src4", 1127 [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, imm:$src4))] 1128 >; 1129 1130 def TXD_SHADOW: AMDGPUShaderInst < 1131 (outs R600_Reg128:$dst), 1132 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4), 1133 "TXD_SHADOW $dst, $src0, $src1, $src2, $src3, $src4", 1134 [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, TEX_SHADOW:$src4))] 1135 >; 1136 1137 } // End usesCustomInserter = 1, isPseudo = 1 1138 1139 } // End isCodeGenOnly = 1 1140 1141 def CLAMP_R600 : CLAMP <R600_Reg32>; 1142 def FABS_R600 : FABS<R600_Reg32>; 1143 def FNEG_R600 : FNEG<R600_Reg32>; 1144 1145 let usesCustomInserter = 1 in { 1146 1147 def MASK_WRITE : AMDGPUShaderInst < 1148 (outs), 1149 (ins R600_Reg32:$src), 1150 "MASK_WRITE $src", 1151 [] 1152 >; 1153 1154 } // End usesCustomInserter = 1 1155 1156 //===---------------------------------------------------------------------===// 1157 // Return instruction 1158 //===---------------------------------------------------------------------===// 1159 let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { 1160 def RETURN : ILFormat<(outs), (ins variable_ops), 1161 "RETURN", [(IL_retflag)]>; 1162 } 1163 1164 //===----------------------------------------------------------------------===// 1165 // ISel Patterns 1166 //===----------------------------------------------------------------------===// 1167 1168 // KIL Patterns 1169 def KILP : Pat < 1170 (int_AMDGPU_kilp), 1171 (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO), 0)) 1172 >; 1173 1174 def KIL : Pat < 1175 (int_AMDGPU_kill R600_Reg32:$src0), 1176 (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0), 0)) 1177 >; 1178 1179 // SGT Reverse args 1180 def : Pat < 1181 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT), 1182 (SGT R600_Reg32:$src1, R600_Reg32:$src0) 1183 >; 1184 1185 // SGE Reverse args 1186 def : Pat < 1187 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), 1188 (SGE R600_Reg32:$src1, R600_Reg32:$src0) 1189 >; 1190 1191 // SETGT_INT reverse args 1192 def : Pat < 1193 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT), 1194 (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0) 1195 >; 1196 1197 // SETGE_INT reverse args 1198 def : Pat < 1199 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE), 1200 (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0) 1201 >; 1202 1203 // SETGT_UINT reverse args 1204 def : Pat < 1205 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT), 1206 (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0) 1207 >; 1208 1209 // SETGE_UINT reverse args 1210 def : Pat < 1211 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE), 1212 (SETGE_UINT R600_Reg32:$src0, R600_Reg32:$src1) 1213 >; 1214 1215 // The next two patterns are special cases for handling 'true if ordered' and 1216 // 'true if unordered' conditionals. The assumption here is that the behavior of 1217 // SETE and SNE conforms to the Direct3D 10 rules for floating point values 1218 // described here: 1219 // http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit 1220 // We assume that SETE returns false when one of the operands is NAN and 1221 // SNE returns true when on of the operands is NAN 1222 1223 //SETE - 'true if ordered' 1224 def : Pat < 1225 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO), 1226 (SETE R600_Reg32:$src0, R600_Reg32:$src1) 1227 >; 1228 1229 //SNE - 'true if unordered' 1230 def : Pat < 1231 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), 1232 (SNE R600_Reg32:$src0, R600_Reg32:$src1) 1233 >; 1234 1235 def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>; 1236 def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>; 1237 def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>; 1238 def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>; 1239 1240 def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 4, sel_x>; 1241 def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 5, sel_y>; 1242 def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 6, sel_z>; 1243 def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 7, sel_w>; 1244 1245 def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>; 1246 def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>; 1247 def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>; 1248 def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>; 1249 1250 def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 4, sel_x>; 1251 def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 5, sel_y>; 1252 def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 6, sel_z>; 1253 def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 7, sel_w>; 1254 1255 def : Vector_Build <v4f32, R600_Reg32>; 1256 def : Vector_Build <v4i32, R600_Reg32>; 1257 1258 // bitconvert patterns 1259 1260 def : BitConvert <i32, f32, R600_Reg32>; 1261 def : BitConvert <f32, i32, R600_Reg32>; 1262 def : BitConvert <v4f32, v4i32, R600_Reg128>; 1263 1264 } // End isR600toCayman Predicate 1265