1 //===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // R600 Tablegen instruction definitions 11 // 12 //===----------------------------------------------------------------------===// 13 14 include "R600Intrinsics.td" 15 16 class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, 17 InstrItinClass itin> 18 : AMDGPUInst <outs, ins, asm, pattern> { 19 20 field bits<64> Inst; 21 bit Trig = 0; 22 bit Op3 = 0; 23 bit isVector = 0; 24 bits<2> FlagOperandIdx = 0; 25 bit Op1 = 0; 26 bit Op2 = 0; 27 bit HasNativeOperands = 0; 28 29 bits<11> op_code = inst; 30 //let Inst = inst; 31 let Namespace = "AMDGPU"; 32 let OutOperandList = outs; 33 let InOperandList = ins; 34 let AsmString = asm; 35 let Pattern = pattern; 36 let Itinerary = itin; 37 38 let TSFlags{4} = Trig; 39 let TSFlags{5} = Op3; 40 41 // Vector instructions are instructions that must fill all slots in an 42 // instruction group 43 let TSFlags{6} = isVector; 44 let TSFlags{8-7} = FlagOperandIdx; 45 let TSFlags{9} = HasNativeOperands; 46 let TSFlags{10} = Op1; 47 let TSFlags{11} = Op2; 48 } 49 50 class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : 51 AMDGPUInst <outs, ins, asm, pattern> { 52 field bits<64> Inst; 53 54 let Namespace = "AMDGPU"; 55 } 56 57 def MEMxi : Operand<iPTR> { 58 let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index); 59 let PrintMethod = "printMemOperand"; 60 } 61 62 def MEMrr : Operand<iPTR> { 63 let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index); 64 } 65 66 // Operands for non-registers 67 68 class InstFlag<string PM = "printOperand", int Default = 0> 69 : OperandWithDefaultOps <i32, (ops (i32 Default))> { 70 let PrintMethod = PM; 71 } 72 73 // src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers 74 def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> { 75 let PrintMethod = "printSel"; 76 } 77 78 def LITERAL : InstFlag<"printLiteral">; 79 80 def WRITE : InstFlag <"printWrite", 1>; 81 def OMOD : InstFlag <"printOMOD">; 82 def REL : InstFlag <"printRel">; 83 def CLAMP : InstFlag <"printClamp">; 84 def NEG : InstFlag <"printNeg">; 85 def ABS : InstFlag <"printAbs">; 86 def UEM : InstFlag <"printUpdateExecMask">; 87 def UP : InstFlag <"printUpdatePred">; 88 89 // XXX: The r600g finalizer in Mesa expects last to be one in most cases. 90 // Once we start using the packetizer in this backend we should have this 91 // default to 0. 92 def LAST : InstFlag<"printLast", 1>; 93 94 def FRAMEri : Operand<iPTR> { 95 let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index); 96 } 97 98 def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; 99 def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; 100 def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; 101 def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>; 102 def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>; 103 def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; 104 105 class R600ALU_Word0 { 106 field bits<32> Word0; 107 108 bits<11> src0; 109 bits<1> src0_neg; 110 bits<1> src0_rel; 111 bits<11> src1; 112 bits<1> src1_rel; 113 bits<1> src1_neg; 114 bits<3> index_mode = 0; 115 bits<2> pred_sel; 116 bits<1> last; 117 118 bits<9> src0_sel = src0{8-0}; 119 bits<2> src0_chan = src0{10-9}; 120 bits<9> src1_sel = src1{8-0}; 121 bits<2> src1_chan = src1{10-9}; 122 123 let Word0{8-0} = src0_sel; 124 let Word0{9} = src0_rel; 125 let Word0{11-10} = src0_chan; 126 let Word0{12} = src0_neg; 127 let Word0{21-13} = src1_sel; 128 let Word0{22} = src1_rel; 129 let Word0{24-23} = src1_chan; 130 let Word0{25} = src1_neg; 131 let Word0{28-26} = index_mode; 132 let Word0{30-29} = pred_sel; 133 let Word0{31} = last; 134 } 135 136 class R600ALU_Word1 { 137 field bits<32> Word1; 138 139 bits<11> dst; 140 bits<3> bank_swizzle = 0; 141 bits<1> dst_rel; 142 bits<1> clamp; 143 144 bits<7> dst_sel = dst{6-0}; 145 bits<2> dst_chan = dst{10-9}; 146 147 let Word1{20-18} = bank_swizzle; 148 let Word1{27-21} = dst_sel; 149 let Word1{28} = dst_rel; 150 let Word1{30-29} = dst_chan; 151 let Word1{31} = clamp; 152 } 153 154 class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{ 155 156 bits<1> src0_abs; 157 bits<1> src1_abs; 158 bits<1> update_exec_mask; 159 bits<1> update_pred; 160 bits<1> write; 161 bits<2> omod; 162 163 let Word1{0} = src0_abs; 164 let Word1{1} = src1_abs; 165 let Word1{2} = update_exec_mask; 166 let Word1{3} = update_pred; 167 let Word1{4} = write; 168 let Word1{6-5} = omod; 169 let Word1{17-7} = alu_inst; 170 } 171 172 class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{ 173 174 bits<11> src2; 175 bits<1> src2_rel; 176 bits<1> src2_neg; 177 178 bits<9> src2_sel = src2{8-0}; 179 bits<2> src2_chan = src2{10-9}; 180 181 let Word1{8-0} = src2_sel; 182 let Word1{9} = src2_rel; 183 let Word1{11-10} = src2_chan; 184 let Word1{12} = src2_neg; 185 let Word1{17-13} = alu_inst; 186 } 187 188 class VTX_WORD0 { 189 field bits<32> Word0; 190 bits<7> SRC_GPR; 191 bits<5> VC_INST; 192 bits<2> FETCH_TYPE; 193 bits<1> FETCH_WHOLE_QUAD; 194 bits<8> BUFFER_ID; 195 bits<1> SRC_REL; 196 bits<2> SRC_SEL_X; 197 bits<6> MEGA_FETCH_COUNT; 198 199 let Word0{4-0} = VC_INST; 200 let Word0{6-5} = FETCH_TYPE; 201 let Word0{7} = FETCH_WHOLE_QUAD; 202 let Word0{15-8} = BUFFER_ID; 203 let Word0{22-16} = SRC_GPR; 204 let Word0{23} = SRC_REL; 205 let Word0{25-24} = SRC_SEL_X; 206 let Word0{31-26} = MEGA_FETCH_COUNT; 207 } 208 209 class VTX_WORD1_GPR { 210 field bits<32> Word1; 211 bits<7> DST_GPR; 212 bits<1> DST_REL; 213 bits<3> DST_SEL_X; 214 bits<3> DST_SEL_Y; 215 bits<3> DST_SEL_Z; 216 bits<3> DST_SEL_W; 217 bits<1> USE_CONST_FIELDS; 218 bits<6> DATA_FORMAT; 219 bits<2> NUM_FORMAT_ALL; 220 bits<1> FORMAT_COMP_ALL; 221 bits<1> SRF_MODE_ALL; 222 223 let Word1{6-0} = DST_GPR; 224 let Word1{7} = DST_REL; 225 let Word1{8} = 0; // Reserved 226 let Word1{11-9} = DST_SEL_X; 227 let Word1{14-12} = DST_SEL_Y; 228 let Word1{17-15} = DST_SEL_Z; 229 let Word1{20-18} = DST_SEL_W; 230 let Word1{21} = USE_CONST_FIELDS; 231 let Word1{27-22} = DATA_FORMAT; 232 let Word1{29-28} = NUM_FORMAT_ALL; 233 let Word1{30} = FORMAT_COMP_ALL; 234 let Word1{31} = SRF_MODE_ALL; 235 } 236 237 /* 238 XXX: R600 subtarget uses a slightly different encoding than the other 239 subtargets. We currently handle this in R600MCCodeEmitter, but we may 240 want to use these instruction classes in the future. 241 242 class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 { 243 244 bits<1> fog_merge; 245 bits<10> alu_inst; 246 247 let Inst{37} = fog_merge; 248 let Inst{39-38} = omod; 249 let Inst{49-40} = alu_inst; 250 } 251 252 class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 { 253 254 bits<11> alu_inst; 255 256 let Inst{38-37} = omod; 257 let Inst{49-39} = alu_inst; 258 } 259 */ 260 261 def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), 262 (ops PRED_SEL_OFF)>; 263 264 265 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { 266 267 // Class for instructions with only one source register. 268 // If you add new ins to this instruction, make sure they are listed before 269 // $literal, because the backend currently assumes that the last operand is 270 // a literal. Also be sure to update the enum R600Op1OperandIndex::ROI in 271 // R600Defines.h, R600InstrInfo::buildDefaultInstruction(), 272 // and R600InstrInfo::getOperandIdx(). 273 class R600_1OP <bits<11> inst, string opName, list<dag> pattern, 274 InstrItinClass itin = AnyALU> : 275 InstR600 <0, 276 (outs R600_Reg32:$dst), 277 (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, 278 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, 279 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), 280 !strconcat(opName, 281 "$clamp $dst$write$dst_rel$omod, " 282 "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " 283 "$literal $pred_sel$last"), 284 pattern, 285 itin>, 286 R600ALU_Word0, 287 R600ALU_Word1_OP2 <inst> { 288 289 let src1 = 0; 290 let src1_rel = 0; 291 let src1_neg = 0; 292 let src1_abs = 0; 293 let update_exec_mask = 0; 294 let update_pred = 0; 295 let HasNativeOperands = 1; 296 let Op1 = 1; 297 let DisableEncoding = "$literal"; 298 299 let Inst{31-0} = Word0; 300 let Inst{63-32} = Word1; 301 } 302 303 class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node, 304 InstrItinClass itin = AnyALU> : 305 R600_1OP <inst, opName, 306 [(set R600_Reg32:$dst, (node R600_Reg32:$src0))] 307 >; 308 309 // If you add our change the operands for R600_2OP instructions, you must 310 // also update the R600Op2OperandIndex::ROI enum in R600Defines.h, 311 // R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx(). 312 class R600_2OP <bits<11> inst, string opName, list<dag> pattern, 313 InstrItinClass itin = AnyALU> : 314 InstR600 <inst, 315 (outs R600_Reg32:$dst), 316 (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write, 317 OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, 318 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, 319 R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel, 320 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), 321 !strconcat(opName, 322 "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, " 323 "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " 324 "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, " 325 "$literal $pred_sel$last"), 326 pattern, 327 itin>, 328 R600ALU_Word0, 329 R600ALU_Word1_OP2 <inst> { 330 331 let HasNativeOperands = 1; 332 let Op2 = 1; 333 let DisableEncoding = "$literal"; 334 335 let Inst{31-0} = Word0; 336 let Inst{63-32} = Word1; 337 } 338 339 class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node, 340 InstrItinClass itim = AnyALU> : 341 R600_2OP <inst, opName, 342 [(set R600_Reg32:$dst, (node R600_Reg32:$src0, 343 R600_Reg32:$src1))] 344 >; 345 346 // If you add our change the operands for R600_3OP instructions, you must 347 // also update the R600Op3OperandIndex::ROI enum in R600Defines.h, 348 // R600InstrInfo::buildDefaultInstruction(), and 349 // R600InstrInfo::getOperandIdx(). 350 class R600_3OP <bits<5> inst, string opName, list<dag> pattern, 351 InstrItinClass itin = AnyALU> : 352 InstR600 <0, 353 (outs R600_Reg32:$dst), 354 (ins REL:$dst_rel, CLAMP:$clamp, 355 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel, 356 R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, 357 R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, 358 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), 359 !strconcat(opName, "$clamp $dst$dst_rel, " 360 "$src0_neg$src0$src0_sel$src0_rel, " 361 "$src1_neg$src1$src1_sel$src1_rel, " 362 "$src2_neg$src2$src2_sel$src2_rel, " 363 "$literal $pred_sel$last"), 364 pattern, 365 itin>, 366 R600ALU_Word0, 367 R600ALU_Word1_OP3<inst>{ 368 369 let HasNativeOperands = 1; 370 let DisableEncoding = "$literal"; 371 let Op3 = 1; 372 373 let Inst{31-0} = Word0; 374 let Inst{63-32} = Word1; 375 } 376 377 class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, 378 InstrItinClass itin = VecALU> : 379 InstR600 <inst, 380 (outs R600_Reg32:$dst), 381 ins, 382 asm, 383 pattern, 384 itin>; 385 386 class R600_TEX <bits<11> inst, string opName, list<dag> pattern, 387 InstrItinClass itin = AnyALU> : 388 InstR600 <inst, 389 (outs R600_Reg128:$dst), 390 (ins R600_Reg128:$src0, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), 391 !strconcat(opName, "$dst, $src0, $resourceId, $samplerId, $textureTarget"), 392 pattern, 393 itin>{ 394 let Inst {10-0} = inst; 395 } 396 397 } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 398 399 def TEX_SHADOW : PatLeaf< 400 (imm), 401 [{uint32_t TType = (uint32_t)N->getZExtValue(); 402 return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13); 403 }] 404 >; 405 406 def TEX_RECT : PatLeaf< 407 (imm), 408 [{uint32_t TType = (uint32_t)N->getZExtValue(); 409 return TType == 5; 410 }] 411 >; 412 413 def TEX_ARRAY : PatLeaf< 414 (imm), 415 [{uint32_t TType = (uint32_t)N->getZExtValue(); 416 return TType == 9 || TType == 10 || TType == 15 || TType == 16; 417 }] 418 >; 419 420 def TEX_SHADOW_ARRAY : PatLeaf< 421 (imm), 422 [{uint32_t TType = (uint32_t)N->getZExtValue(); 423 return TType == 11 || TType == 12 || TType == 17; 424 }] 425 >; 426 427 class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, 428 dag ins, string asm, list<dag> pattern> : 429 InstR600ISA <outs, ins, asm, pattern> { 430 bits<7> RW_GPR; 431 bits<7> INDEX_GPR; 432 433 bits<2> RIM; 434 bits<2> TYPE; 435 bits<1> RW_REL; 436 bits<2> ELEM_SIZE; 437 438 bits<12> ARRAY_SIZE; 439 bits<4> COMP_MASK; 440 bits<4> BURST_COUNT; 441 bits<1> VPM; 442 bits<1> eop; 443 bits<1> MARK; 444 bits<1> BARRIER; 445 446 // CF_ALLOC_EXPORT_WORD0_RAT 447 let Inst{3-0} = rat_id; 448 let Inst{9-4} = rat_inst; 449 let Inst{10} = 0; // Reserved 450 let Inst{12-11} = RIM; 451 let Inst{14-13} = TYPE; 452 let Inst{21-15} = RW_GPR; 453 let Inst{22} = RW_REL; 454 let Inst{29-23} = INDEX_GPR; 455 let Inst{31-30} = ELEM_SIZE; 456 457 // CF_ALLOC_EXPORT_WORD1_BUF 458 let Inst{43-32} = ARRAY_SIZE; 459 let Inst{47-44} = COMP_MASK; 460 let Inst{51-48} = BURST_COUNT; 461 let Inst{52} = VPM; 462 let Inst{53} = eop; 463 let Inst{61-54} = cf_inst; 464 let Inst{62} = MARK; 465 let Inst{63} = BARRIER; 466 } 467 468 class LoadParamFrag <PatFrag load_type> : PatFrag < 469 (ops node:$ptr), (load_type node:$ptr), 470 [{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }] 471 >; 472 473 def load_param : LoadParamFrag<load>; 474 def load_param_zexti8 : LoadParamFrag<zextloadi8>; 475 def load_param_zexti16 : LoadParamFrag<zextloadi16>; 476 477 def isR600 : Predicate<"Subtarget.device()" 478 "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">; 479 def isR700 : Predicate<"Subtarget.device()" 480 "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" 481 "Subtarget.device()->getDeviceFlag()" 482 ">= OCL_DEVICE_RV710">; 483 def isEG : Predicate< 484 "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && " 485 "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && " 486 "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">; 487 488 def isCayman : Predicate<"Subtarget.device()" 489 "->getDeviceFlag() == OCL_DEVICE_CAYMAN">; 490 def isEGorCayman : Predicate<"Subtarget.device()" 491 "->getGeneration() == AMDGPUDeviceInfo::HD5XXX" 492 "|| Subtarget.device()->getGeneration() ==" 493 "AMDGPUDeviceInfo::HD6XXX">; 494 495 def isR600toCayman : Predicate< 496 "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; 497 498 //===----------------------------------------------------------------------===// 499 // R600 SDNodes 500 //===----------------------------------------------------------------------===// 501 502 def INTERP_PAIR_XY : AMDGPUShaderInst < 503 (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1), 504 (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), 505 "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1", 506 []>; 507 508 def INTERP_PAIR_ZW : AMDGPUShaderInst < 509 (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1), 510 (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2), 511 "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1", 512 []>; 513 514 def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", 515 SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, 516 [SDNPVariadic] 517 >; 518 519 //===----------------------------------------------------------------------===// 520 // Interpolation Instructions 521 //===----------------------------------------------------------------------===// 522 523 def INTERP_VEC_LOAD : AMDGPUShaderInst < 524 (outs R600_Reg128:$dst), 525 (ins i32imm:$src0), 526 "INTERP_LOAD $src0 : $dst", 527 []>; 528 529 def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { 530 let bank_swizzle = 5; 531 } 532 533 def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> { 534 let bank_swizzle = 5; 535 } 536 537 def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; 538 539 //===----------------------------------------------------------------------===// 540 // Export Instructions 541 //===----------------------------------------------------------------------===// 542 543 def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; 544 545 def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, 546 [SDNPHasChain, SDNPSideEffect]>; 547 548 class ExportWord0 { 549 field bits<32> Word0; 550 551 bits<13> arraybase; 552 bits<2> type; 553 bits<7> gpr; 554 bits<2> elem_size; 555 556 let Word0{12-0} = arraybase; 557 let Word0{14-13} = type; 558 let Word0{21-15} = gpr; 559 let Word0{22} = 0; // RW_REL 560 let Word0{29-23} = 0; // INDEX_GPR 561 let Word0{31-30} = elem_size; 562 } 563 564 class ExportSwzWord1 { 565 field bits<32> Word1; 566 567 bits<3> sw_x; 568 bits<3> sw_y; 569 bits<3> sw_z; 570 bits<3> sw_w; 571 bits<1> eop; 572 bits<8> inst; 573 574 let Word1{2-0} = sw_x; 575 let Word1{5-3} = sw_y; 576 let Word1{8-6} = sw_z; 577 let Word1{11-9} = sw_w; 578 } 579 580 class ExportBufWord1 { 581 field bits<32> Word1; 582 583 bits<12> arraySize; 584 bits<4> compMask; 585 bits<1> eop; 586 bits<8> inst; 587 588 let Word1{11-0} = arraySize; 589 let Word1{15-12} = compMask; 590 } 591 592 multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { 593 def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), 594 (ExportInst 595 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 596 0, 61, 0, 7, 7, 7, cf_inst, 0) 597 >; 598 599 def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), 600 (ExportInst 601 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0), 602 0, 61, 7, 0, 7, 7, cf_inst, 0) 603 >; 604 605 def : Pat<(int_R600_store_dummy (i32 imm:$type)), 606 (ExportInst 607 (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0) 608 >; 609 610 def : Pat<(int_R600_store_dummy 1), 611 (ExportInst 612 (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0) 613 >; 614 615 def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), 616 (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), 617 (ExportInst R600_Reg128:$src, imm:$type, imm:$base, 618 imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) 619 >; 620 621 } 622 623 multiclass SteamOutputExportPattern<Instruction ExportInst, 624 bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { 625 // Stream0 626 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 627 (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)), 628 (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 629 4095, imm:$mask, buf0inst, 0)>; 630 // Stream1 631 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 632 (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)), 633 (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 634 4095, imm:$mask, buf1inst, 0)>; 635 // Stream2 636 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 637 (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)), 638 (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 639 4095, imm:$mask, buf2inst, 0)>; 640 // Stream3 641 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), 642 (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)), 643 (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 644 4095, imm:$mask, buf3inst, 0)>; 645 } 646 647 let usesCustomInserter = 1 in { 648 649 class ExportSwzInst : InstR600ISA<( 650 outs), 651 (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, 652 i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst, 653 i32imm:$eop), 654 !strconcat("EXPORT", " $gpr"), 655 []>, ExportWord0, ExportSwzWord1 { 656 let elem_size = 3; 657 let Inst{31-0} = Word0; 658 let Inst{63-32} = Word1; 659 } 660 661 } // End usesCustomInserter = 1 662 663 class ExportBufInst : InstR600ISA<( 664 outs), 665 (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, 666 i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop), 667 !strconcat("EXPORT", " $gpr"), 668 []>, ExportWord0, ExportBufWord1 { 669 let elem_size = 0; 670 let Inst{31-0} = Word0; 671 let Inst{63-32} = Word1; 672 } 673 674 let Predicates = [isR600toCayman] in { 675 676 //===----------------------------------------------------------------------===// 677 // Common Instructions R600, R700, Evergreen, Cayman 678 //===----------------------------------------------------------------------===// 679 680 def ADD : R600_2OP_Helper <0x0, "ADD", fadd>; 681 // Non-IEEE MUL: 0 * anything = 0 682 def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>; 683 def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>; 684 def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>; 685 def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; 686 687 // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, 688 // so some of the instruction names don't match the asm string. 689 // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. 690 def SETE : R600_2OP < 691 0x08, "SETE", 692 [(set R600_Reg32:$dst, 693 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 694 COND_EQ))] 695 >; 696 697 def SGT : R600_2OP < 698 0x09, "SETGT", 699 [(set R600_Reg32:$dst, 700 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 701 COND_GT))] 702 >; 703 704 def SGE : R600_2OP < 705 0xA, "SETGE", 706 [(set R600_Reg32:$dst, 707 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 708 COND_GE))] 709 >; 710 711 def SNE : R600_2OP < 712 0xB, "SETNE", 713 [(set R600_Reg32:$dst, 714 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 715 COND_NE))] 716 >; 717 718 def SETE_DX10 : R600_2OP < 719 0xC, "SETE_DX10", 720 [(set R600_Reg32:$dst, 721 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), 722 COND_EQ))] 723 >; 724 725 def SETGT_DX10 : R600_2OP < 726 0xD, "SETGT_DX10", 727 [(set R600_Reg32:$dst, 728 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), 729 COND_GT))] 730 >; 731 732 def SETGE_DX10 : R600_2OP < 733 0xE, "SETGE_DX10", 734 [(set R600_Reg32:$dst, 735 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), 736 COND_GE))] 737 >; 738 739 def SETNE_DX10 : R600_2OP < 740 0xF, "SETNE_DX10", 741 [(set R600_Reg32:$dst, 742 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), 743 COND_NE))] 744 >; 745 746 def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; 747 def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>; 748 def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; 749 def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>; 750 def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; 751 752 def MOV : R600_1OP <0x19, "MOV", []>; 753 754 let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { 755 756 class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst < 757 (outs R600_Reg32:$dst), 758 (ins immType:$imm), 759 "", 760 [] 761 >; 762 763 } // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 764 765 def MOV_IMM_I32 : MOV_IMM<i32, i32imm>; 766 def : Pat < 767 (imm:$val), 768 (MOV_IMM_I32 imm:$val) 769 >; 770 771 def MOV_IMM_F32 : MOV_IMM<f32, f32imm>; 772 def : Pat < 773 (fpimm:$val), 774 (MOV_IMM_F32 fpimm:$val) 775 >; 776 777 def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>; 778 def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>; 779 def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>; 780 def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>; 781 782 let hasSideEffects = 1 in { 783 784 def KILLGT : R600_2OP <0x2D, "KILLGT", []>; 785 786 } // end hasSideEffects 787 788 def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>; 789 def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>; 790 def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>; 791 def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>; 792 def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>; 793 def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>; 794 def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>; 795 def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>; 796 def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>; 797 def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>; 798 799 def SETE_INT : R600_2OP < 800 0x3A, "SETE_INT", 801 [(set (i32 R600_Reg32:$dst), 802 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))] 803 >; 804 805 def SETGT_INT : R600_2OP < 806 0x3B, "SETGT_INT", 807 [(set (i32 R600_Reg32:$dst), 808 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))] 809 >; 810 811 def SETGE_INT : R600_2OP < 812 0x3C, "SETGE_INT", 813 [(set (i32 R600_Reg32:$dst), 814 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))] 815 >; 816 817 def SETNE_INT : R600_2OP < 818 0x3D, "SETNE_INT", 819 [(set (i32 R600_Reg32:$dst), 820 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))] 821 >; 822 823 def SETGT_UINT : R600_2OP < 824 0x3E, "SETGT_UINT", 825 [(set (i32 R600_Reg32:$dst), 826 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))] 827 >; 828 829 def SETGE_UINT : R600_2OP < 830 0x3F, "SETGE_UINT", 831 [(set (i32 R600_Reg32:$dst), 832 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))] 833 >; 834 835 def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>; 836 def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>; 837 def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>; 838 def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>; 839 840 def CNDE_INT : R600_3OP < 841 0x1C, "CNDE_INT", 842 [(set (i32 R600_Reg32:$dst), 843 (selectcc (i32 R600_Reg32:$src0), 0, 844 (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), 845 COND_EQ))] 846 >; 847 848 def CNDGE_INT : R600_3OP < 849 0x1E, "CNDGE_INT", 850 [(set (i32 R600_Reg32:$dst), 851 (selectcc (i32 R600_Reg32:$src0), 0, 852 (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), 853 COND_GE))] 854 >; 855 856 def CNDGT_INT : R600_3OP < 857 0x1D, "CNDGT_INT", 858 [(set (i32 R600_Reg32:$dst), 859 (selectcc (i32 R600_Reg32:$src0), 0, 860 (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), 861 COND_GT))] 862 >; 863 864 //===----------------------------------------------------------------------===// 865 // Texture instructions 866 //===----------------------------------------------------------------------===// 867 868 def TEX_LD : R600_TEX < 869 0x03, "TEX_LD", 870 [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] 871 > { 872 let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $resourceId, $samplerId, $textureTarget"; 873 let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget); 874 } 875 876 def TEX_GET_TEXTURE_RESINFO : R600_TEX < 877 0x04, "TEX_GET_TEXTURE_RESINFO", 878 [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] 879 >; 880 881 def TEX_GET_GRADIENTS_H : R600_TEX < 882 0x07, "TEX_GET_GRADIENTS_H", 883 [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] 884 >; 885 886 def TEX_GET_GRADIENTS_V : R600_TEX < 887 0x08, "TEX_GET_GRADIENTS_V", 888 [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] 889 >; 890 891 def TEX_SET_GRADIENTS_H : R600_TEX < 892 0x0B, "TEX_SET_GRADIENTS_H", 893 [] 894 >; 895 896 def TEX_SET_GRADIENTS_V : R600_TEX < 897 0x0C, "TEX_SET_GRADIENTS_V", 898 [] 899 >; 900 901 def TEX_SAMPLE : R600_TEX < 902 0x10, "TEX_SAMPLE", 903 [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] 904 >; 905 906 def TEX_SAMPLE_C : R600_TEX < 907 0x18, "TEX_SAMPLE_C", 908 [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] 909 >; 910 911 def TEX_SAMPLE_L : R600_TEX < 912 0x11, "TEX_SAMPLE_L", 913 [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] 914 >; 915 916 def TEX_SAMPLE_C_L : R600_TEX < 917 0x19, "TEX_SAMPLE_C_L", 918 [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] 919 >; 920 921 def TEX_SAMPLE_LB : R600_TEX < 922 0x12, "TEX_SAMPLE_LB", 923 [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0,imm:$resourceId, imm:$samplerId, imm:$textureTarget))] 924 >; 925 926 def TEX_SAMPLE_C_LB : R600_TEX < 927 0x1A, "TEX_SAMPLE_C_LB", 928 [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] 929 >; 930 931 def TEX_SAMPLE_G : R600_TEX < 932 0x14, "TEX_SAMPLE_G", 933 [] 934 >; 935 936 def TEX_SAMPLE_C_G : R600_TEX < 937 0x1C, "TEX_SAMPLE_C_G", 938 [] 939 >; 940 941 //===----------------------------------------------------------------------===// 942 // Helper classes for common instructions 943 //===----------------------------------------------------------------------===// 944 945 class MUL_LIT_Common <bits<5> inst> : R600_3OP < 946 inst, "MUL_LIT", 947 [] 948 >; 949 950 class MULADD_Common <bits<5> inst> : R600_3OP < 951 inst, "MULADD", 952 [] 953 >; 954 955 class MULADD_IEEE_Common <bits<5> inst> : R600_3OP < 956 inst, "MULADD_IEEE", 957 [(set (f32 R600_Reg32:$dst), 958 (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))] 959 >; 960 961 class CNDE_Common <bits<5> inst> : R600_3OP < 962 inst, "CNDE", 963 [(set R600_Reg32:$dst, 964 (selectcc (f32 R600_Reg32:$src0), FP_ZERO, 965 (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), 966 COND_EQ))] 967 >; 968 969 class CNDGT_Common <bits<5> inst> : R600_3OP < 970 inst, "CNDGT", 971 [(set R600_Reg32:$dst, 972 (selectcc (f32 R600_Reg32:$src0), FP_ZERO, 973 (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), 974 COND_GT))] 975 >; 976 977 class CNDGE_Common <bits<5> inst> : R600_3OP < 978 inst, "CNDGE", 979 [(set R600_Reg32:$dst, 980 (selectcc (f32 R600_Reg32:$src0), FP_ZERO, 981 (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), 982 COND_GE))] 983 >; 984 985 multiclass DOT4_Common <bits<11> inst> { 986 987 def _pseudo : R600_REDUCTION <inst, 988 (ins R600_Reg128:$src0, R600_Reg128:$src1), 989 "DOT4 $dst $src0, $src1", 990 [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))] 991 >; 992 993 def _real : R600_2OP <inst, "DOT4", []>; 994 } 995 996 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { 997 multiclass CUBE_Common <bits<11> inst> { 998 999 def _pseudo : InstR600 < 1000 inst, 1001 (outs R600_Reg128:$dst), 1002 (ins R600_Reg128:$src), 1003 "CUBE $dst $src", 1004 [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], 1005 VecALU 1006 > { 1007 let isPseudo = 1; 1008 } 1009 1010 def _real : R600_2OP <inst, "CUBE", []>; 1011 } 1012 } // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 1013 1014 class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper < 1015 inst, "EXP_IEEE", fexp2 1016 >; 1017 1018 class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper < 1019 inst, "FLT_TO_INT", fp_to_sint 1020 >; 1021 1022 class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < 1023 inst, "INT_TO_FLT", sint_to_fp 1024 >; 1025 1026 class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper < 1027 inst, "FLT_TO_UINT", fp_to_uint 1028 >; 1029 1030 class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < 1031 inst, "UINT_TO_FLT", uint_to_fp 1032 >; 1033 1034 class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < 1035 inst, "LOG_CLAMPED", [] 1036 >; 1037 1038 class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper < 1039 inst, "LOG_IEEE", flog2 1040 >; 1041 1042 class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>; 1043 class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>; 1044 class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>; 1045 class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper < 1046 inst, "MULHI_INT", mulhs 1047 >; 1048 class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper < 1049 inst, "MULHI", mulhu 1050 >; 1051 class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper < 1052 inst, "MULLO_INT", mul 1053 >; 1054 class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []>; 1055 1056 class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < 1057 inst, "RECIP_CLAMPED", [] 1058 >; 1059 1060 class RECIP_IEEE_Common <bits<11> inst> : R600_1OP < 1061 inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))] 1062 >; 1063 1064 class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper < 1065 inst, "RECIP_UINT", AMDGPUurecip 1066 >; 1067 1068 class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper < 1069 inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq 1070 >; 1071 1072 class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < 1073 inst, "RECIPSQRT_IEEE", [] 1074 >; 1075 1076 class SIN_Common <bits<11> inst> : R600_1OP < 1077 inst, "SIN", []>{ 1078 let Trig = 1; 1079 } 1080 1081 class COS_Common <bits<11> inst> : R600_1OP < 1082 inst, "COS", []> { 1083 let Trig = 1; 1084 } 1085 1086 //===----------------------------------------------------------------------===// 1087 // Helper patterns for complex intrinsics 1088 //===----------------------------------------------------------------------===// 1089 1090 multiclass DIV_Common <InstR600 recip_ieee> { 1091 def : Pat< 1092 (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1), 1093 (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) 1094 >; 1095 1096 def : Pat< 1097 (fdiv R600_Reg32:$src0, R600_Reg32:$src1), 1098 (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) 1099 >; 1100 } 1101 1102 class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat < 1103 (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w), 1104 (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x)) 1105 >; 1106 1107 //===----------------------------------------------------------------------===// 1108 // R600 / R700 Instructions 1109 //===----------------------------------------------------------------------===// 1110 1111 let Predicates = [isR600] in { 1112 1113 def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; 1114 def MULADD_r600 : MULADD_Common<0x10>; 1115 def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>; 1116 def CNDE_r600 : CNDE_Common<0x18>; 1117 def CNDGT_r600 : CNDGT_Common<0x19>; 1118 def CNDGE_r600 : CNDGE_Common<0x1A>; 1119 defm DOT4_r600 : DOT4_Common<0x50>; 1120 defm CUBE_r600 : CUBE_Common<0x52>; 1121 def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; 1122 def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; 1123 def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; 1124 def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>; 1125 def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>; 1126 def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>; 1127 def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>; 1128 def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>; 1129 def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; 1130 def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>; 1131 def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>; 1132 def SIN_r600 : SIN_Common<0x6E>; 1133 def COS_r600 : COS_Common<0x6F>; 1134 def ASHR_r600 : ASHR_Common<0x70>; 1135 def LSHR_r600 : LSHR_Common<0x71>; 1136 def LSHL_r600 : LSHL_Common<0x72>; 1137 def MULLO_INT_r600 : MULLO_INT_Common<0x73>; 1138 def MULHI_INT_r600 : MULHI_INT_Common<0x74>; 1139 def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; 1140 def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; 1141 def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; 1142 1143 defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; 1144 def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; 1145 1146 def : Pat<(fsqrt R600_Reg32:$src), 1147 (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>; 1148 1149 def R600_ExportSwz : ExportSwzInst { 1150 let Word1{20-17} = 1; // BURST_COUNT 1151 let Word1{21} = eop; 1152 let Word1{22} = 1; // VALID_PIXEL_MODE 1153 let Word1{30-23} = inst; 1154 let Word1{31} = 1; // BARRIER 1155 } 1156 defm : ExportPattern<R600_ExportSwz, 39>; 1157 1158 def R600_ExportBuf : ExportBufInst { 1159 let Word1{20-17} = 1; // BURST_COUNT 1160 let Word1{21} = eop; 1161 let Word1{22} = 1; // VALID_PIXEL_MODE 1162 let Word1{30-23} = inst; 1163 let Word1{31} = 1; // BARRIER 1164 } 1165 defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>; 1166 } 1167 1168 // Helper pattern for normalizing inputs to triginomic instructions for R700+ 1169 // cards. 1170 class COS_PAT <InstR600 trig> : Pat< 1171 (fcos R600_Reg32:$src), 1172 (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src)) 1173 >; 1174 1175 class SIN_PAT <InstR600 trig> : Pat< 1176 (fsin R600_Reg32:$src), 1177 (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src)) 1178 >; 1179 1180 //===----------------------------------------------------------------------===// 1181 // R700 Only instructions 1182 //===----------------------------------------------------------------------===// 1183 1184 let Predicates = [isR700] in { 1185 def SIN_r700 : SIN_Common<0x6E>; 1186 def COS_r700 : COS_Common<0x6F>; 1187 1188 // R700 normalizes inputs to SIN/COS the same as EG 1189 def : SIN_PAT <SIN_r700>; 1190 def : COS_PAT <COS_r700>; 1191 } 1192 1193 //===----------------------------------------------------------------------===// 1194 // Evergreen Only instructions 1195 //===----------------------------------------------------------------------===// 1196 1197 let Predicates = [isEG] in { 1198 1199 def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; 1200 defm DIV_eg : DIV_Common<RECIP_IEEE_eg>; 1201 1202 def MULLO_INT_eg : MULLO_INT_Common<0x8F>; 1203 def MULHI_INT_eg : MULHI_INT_Common<0x90>; 1204 def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; 1205 def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; 1206 def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; 1207 def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; 1208 def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; 1209 def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; 1210 def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; 1211 def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; 1212 def SIN_eg : SIN_Common<0x8D>; 1213 def COS_eg : COS_Common<0x8E>; 1214 1215 def : SIN_PAT <SIN_eg>; 1216 def : COS_PAT <COS_eg>; 1217 def : Pat<(fsqrt R600_Reg32:$src), 1218 (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>; 1219 } // End Predicates = [isEG] 1220 1221 //===----------------------------------------------------------------------===// 1222 // Evergreen / Cayman Instructions 1223 //===----------------------------------------------------------------------===// 1224 1225 let Predicates = [isEGorCayman] in { 1226 1227 // BFE_UINT - bit_extract, an optimization for mask and shift 1228 // Src0 = Input 1229 // Src1 = Offset 1230 // Src2 = Width 1231 // 1232 // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) 1233 // 1234 // Example Usage: 1235 // (Offset, Width) 1236 // 1237 // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 1238 // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 1239 // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 1240 // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 1241 def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", 1242 [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0, 1243 R600_Reg32:$src1, 1244 R600_Reg32:$src2))], 1245 VecALU 1246 >; 1247 1248 def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", 1249 [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1, 1250 R600_Reg32:$src2))], 1251 VecALU 1252 >; 1253 1254 def MULADD_eg : MULADD_Common<0x14>; 1255 def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>; 1256 def ASHR_eg : ASHR_Common<0x15>; 1257 def LSHR_eg : LSHR_Common<0x16>; 1258 def LSHL_eg : LSHL_Common<0x17>; 1259 def CNDE_eg : CNDE_Common<0x19>; 1260 def CNDGT_eg : CNDGT_Common<0x1A>; 1261 def CNDGE_eg : CNDGE_Common<0x1B>; 1262 def MUL_LIT_eg : MUL_LIT_Common<0x1F>; 1263 def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; 1264 defm DOT4_eg : DOT4_Common<0xBE>; 1265 defm CUBE_eg : CUBE_Common<0xC0>; 1266 1267 let hasSideEffects = 1 in { 1268 def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>; 1269 } 1270 1271 def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; 1272 1273 def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { 1274 let Pattern = []; 1275 } 1276 1277 def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; 1278 1279 def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { 1280 let Pattern = []; 1281 } 1282 1283 def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; 1284 1285 // TRUNC is used for the FLT_TO_INT instructions to work around a 1286 // perceived problem where the rounding modes are applied differently 1287 // depending on the instruction and the slot they are in. 1288 // See: 1289 // https://bugs.freedesktop.org/show_bug.cgi?id=50232 1290 // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c 1291 // 1292 // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes, 1293 // which do not need to be truncated since the fp values are 0.0f or 1.0f. 1294 // We should look into handling these cases separately. 1295 def : Pat<(fp_to_sint R600_Reg32:$src0), 1296 (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>; 1297 1298 def : Pat<(fp_to_uint R600_Reg32:$src0), 1299 (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>; 1300 1301 def EG_ExportSwz : ExportSwzInst { 1302 let Word1{19-16} = 1; // BURST_COUNT 1303 let Word1{20} = 1; // VALID_PIXEL_MODE 1304 let Word1{21} = eop; 1305 let Word1{29-22} = inst; 1306 let Word1{30} = 0; // MARK 1307 let Word1{31} = 1; // BARRIER 1308 } 1309 defm : ExportPattern<EG_ExportSwz, 83>; 1310 1311 def EG_ExportBuf : ExportBufInst { 1312 let Word1{19-16} = 1; // BURST_COUNT 1313 let Word1{20} = 1; // VALID_PIXEL_MODE 1314 let Word1{21} = eop; 1315 let Word1{29-22} = inst; 1316 let Word1{30} = 0; // MARK 1317 let Word1{31} = 1; // BARRIER 1318 } 1319 defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>; 1320 1321 //===----------------------------------------------------------------------===// 1322 // Memory read/write instructions 1323 //===----------------------------------------------------------------------===// 1324 let usesCustomInserter = 1 in { 1325 1326 class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name, 1327 list<dag> pattern> 1328 : EG_CF_RAT <0x57, 0x2, 0, (outs), ins, 1329 !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> { 1330 let RIM = 0; 1331 // XXX: Have a separate instruction for non-indexed writes. 1332 let TYPE = 1; 1333 let RW_REL = 0; 1334 let ELEM_SIZE = 0; 1335 1336 let ARRAY_SIZE = 0; 1337 let COMP_MASK = comp_mask; 1338 let BURST_COUNT = 0; 1339 let VPM = 0; 1340 let MARK = 0; 1341 let BARRIER = 1; 1342 } 1343 1344 } // End usesCustomInserter = 1 1345 1346 // 32-bit store 1347 def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < 1348 (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 1349 0x1, "RAT_WRITE_CACHELESS_32_eg", 1350 [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)] 1351 >; 1352 1353 //128-bit store 1354 def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < 1355 (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 1356 0xf, "RAT_WRITE_CACHELESS_128", 1357 [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)] 1358 >; 1359 1360 class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> 1361 : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>, 1362 VTX_WORD1_GPR, VTX_WORD0 { 1363 1364 // Static fields 1365 let VC_INST = 0; 1366 let FETCH_TYPE = 2; 1367 let FETCH_WHOLE_QUAD = 0; 1368 let BUFFER_ID = buffer_id; 1369 let SRC_REL = 0; 1370 // XXX: We can infer this field based on the SRC_GPR. This would allow us 1371 // to store vertex addresses in any channel, not just X. 1372 let SRC_SEL_X = 0; 1373 let DST_REL = 0; 1374 // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, 1375 // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, 1376 // however, based on my testing if USE_CONST_FIELDS is set, then all 1377 // these fields need to be set to 0. 1378 let USE_CONST_FIELDS = 0; 1379 let NUM_FORMAT_ALL = 1; 1380 let FORMAT_COMP_ALL = 0; 1381 let SRF_MODE_ALL = 0; 1382 1383 let Inst{31-0} = Word0; 1384 let Inst{63-32} = Word1; 1385 // LLVM can only encode 64-bit instructions, so these fields are manually 1386 // encoded in R600CodeEmitter 1387 // 1388 // bits<16> OFFSET; 1389 // bits<2> ENDIAN_SWAP = 0; 1390 // bits<1> CONST_BUF_NO_STRIDE = 0; 1391 // bits<1> MEGA_FETCH = 0; 1392 // bits<1> ALT_CONST = 0; 1393 // bits<2> BUFFER_INDEX_MODE = 0; 1394 1395 1396 1397 // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 1398 // is done in R600CodeEmitter 1399 // 1400 // Inst{79-64} = OFFSET; 1401 // Inst{81-80} = ENDIAN_SWAP; 1402 // Inst{82} = CONST_BUF_NO_STRIDE; 1403 // Inst{83} = MEGA_FETCH; 1404 // Inst{84} = ALT_CONST; 1405 // Inst{86-85} = BUFFER_INDEX_MODE; 1406 // Inst{95-86} = 0; Reserved 1407 1408 // VTX_WORD3 (Padding) 1409 // 1410 // Inst{127-96} = 0; 1411 } 1412 1413 class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> 1414 : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst), 1415 pattern> { 1416 1417 let MEGA_FETCH_COUNT = 1; 1418 let DST_SEL_X = 0; 1419 let DST_SEL_Y = 7; // Masked 1420 let DST_SEL_Z = 7; // Masked 1421 let DST_SEL_W = 7; // Masked 1422 let DATA_FORMAT = 1; // FMT_8 1423 } 1424 1425 class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern> 1426 : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst), 1427 pattern> { 1428 let MEGA_FETCH_COUNT = 2; 1429 let DST_SEL_X = 0; 1430 let DST_SEL_Y = 7; // Masked 1431 let DST_SEL_Z = 7; // Masked 1432 let DST_SEL_W = 7; // Masked 1433 let DATA_FORMAT = 5; // FMT_16 1434 1435 } 1436 1437 class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> 1438 : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst), 1439 pattern> { 1440 1441 let MEGA_FETCH_COUNT = 4; 1442 let DST_SEL_X = 0; 1443 let DST_SEL_Y = 7; // Masked 1444 let DST_SEL_Z = 7; // Masked 1445 let DST_SEL_W = 7; // Masked 1446 let DATA_FORMAT = 0xD; // COLOR_32 1447 1448 // This is not really necessary, but there were some GPU hangs that appeared 1449 // to be caused by ALU instructions in the next instruction group that wrote 1450 // to the $ptr registers of the VTX_READ. 1451 // e.g. 1452 // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 1453 // %T2_X<def> = MOV %ZERO 1454 //Adding this constraint prevents this from happening. 1455 let Constraints = "$ptr.ptr = $dst"; 1456 } 1457 1458 class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> 1459 : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst), 1460 pattern> { 1461 1462 let MEGA_FETCH_COUNT = 16; 1463 let DST_SEL_X = 0; 1464 let DST_SEL_Y = 1; 1465 let DST_SEL_Z = 2; 1466 let DST_SEL_W = 3; 1467 let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 1468 1469 // XXX: Need to force VTX_READ_128 instructions to write to the same register 1470 // that holds its buffer address to avoid potential hangs. We can't use 1471 // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst 1472 // registers are different sizes. 1473 } 1474 1475 //===----------------------------------------------------------------------===// 1476 // VTX Read from parameter memory space 1477 //===----------------------------------------------------------------------===// 1478 1479 def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, 1480 [(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))] 1481 >; 1482 1483 def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, 1484 [(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))] 1485 >; 1486 1487 def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, 1488 [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] 1489 >; 1490 1491 def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, 1492 [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))] 1493 >; 1494 1495 //===----------------------------------------------------------------------===// 1496 // VTX Read from global memory space 1497 //===----------------------------------------------------------------------===// 1498 1499 // 8-bit reads 1500 def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, 1501 [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))] 1502 >; 1503 1504 // 32-bit reads 1505 def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, 1506 [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))] 1507 >; 1508 1509 // 128-bit reads 1510 def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, 1511 [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))] 1512 >; 1513 1514 //===----------------------------------------------------------------------===// 1515 // Constant Loads 1516 // XXX: We are currently storing all constants in the global address space. 1517 //===----------------------------------------------------------------------===// 1518 1519 def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, 1520 [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))] 1521 >; 1522 1523 } 1524 1525 //===----------------------------------------------------------------------===// 1526 // Regist loads and stores - for indirect addressing 1527 //===----------------------------------------------------------------------===// 1528 1529 defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>; 1530 1531 let Predicates = [isCayman] in { 1532 1533 let isVector = 1 in { 1534 1535 def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; 1536 1537 def MULLO_INT_cm : MULLO_INT_Common<0x8F>; 1538 def MULHI_INT_cm : MULHI_INT_Common<0x90>; 1539 def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; 1540 def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; 1541 def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; 1542 def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; 1543 def LOG_IEEE_ : LOG_IEEE_Common<0x83>; 1544 def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; 1545 def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; 1546 def SIN_cm : SIN_Common<0x8D>; 1547 def COS_cm : COS_Common<0x8E>; 1548 } // End isVector = 1 1549 1550 def : SIN_PAT <SIN_cm>; 1551 def : COS_PAT <COS_cm>; 1552 1553 defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; 1554 1555 // RECIP_UINT emulation for Cayman 1556 def : Pat < 1557 (AMDGPUurecip R600_Reg32:$src0), 1558 (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)), 1559 (MOV_IMM_I32 0x4f800000))) 1560 >; 1561 1562 1563 def : Pat<(fsqrt R600_Reg32:$src), 1564 (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>; 1565 1566 } // End isCayman 1567 1568 //===----------------------------------------------------------------------===// 1569 // Branch Instructions 1570 //===----------------------------------------------------------------------===// 1571 1572 1573 def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src), 1574 "IF_PREDICATE_SET $src", []>; 1575 1576 def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src), 1577 "PREDICATED_BREAK $src", []>; 1578 1579 //===----------------------------------------------------------------------===// 1580 // Pseudo instructions 1581 //===----------------------------------------------------------------------===// 1582 1583 let isPseudo = 1 in { 1584 1585 def PRED_X : InstR600 < 1586 0, (outs R600_Predicate_Bit:$dst), 1587 (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), 1588 "", [], NullALU> { 1589 let FlagOperandIdx = 3; 1590 } 1591 1592 let isTerminator = 1, isBranch = 1 in { 1593 def JUMP_COND : InstR600 <0x10, 1594 (outs), 1595 (ins brtarget:$target, R600_Predicate_Bit:$p), 1596 "JUMP $target ($p)", 1597 [], AnyALU 1598 >; 1599 1600 def JUMP : InstR600 <0x10, 1601 (outs), 1602 (ins brtarget:$target), 1603 "JUMP $target", 1604 [], AnyALU 1605 > 1606 { 1607 let isPredicable = 1; 1608 let isBarrier = 1; 1609 } 1610 1611 } // End isTerminator = 1, isBranch = 1 1612 1613 let usesCustomInserter = 1 in { 1614 1615 let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in { 1616 1617 def MASK_WRITE : AMDGPUShaderInst < 1618 (outs), 1619 (ins R600_Reg32:$src), 1620 "MASK_WRITE $src", 1621 [] 1622 >; 1623 1624 } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 1625 1626 1627 def TXD: AMDGPUShaderInst < 1628 (outs R600_Reg128:$dst), 1629 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), 1630 "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", 1631 [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] 1632 >; 1633 1634 def TXD_SHADOW: AMDGPUShaderInst < 1635 (outs R600_Reg128:$dst), 1636 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), 1637 "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", 1638 [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] 1639 >; 1640 1641 } // End isPseudo = 1 1642 } // End usesCustomInserter = 1 1643 1644 def CLAMP_R600 : CLAMP <R600_Reg32>; 1645 def FABS_R600 : FABS<R600_Reg32>; 1646 def FNEG_R600 : FNEG<R600_Reg32>; 1647 1648 //===---------------------------------------------------------------------===// 1649 // Return instruction 1650 //===---------------------------------------------------------------------===// 1651 let isTerminator = 1, isReturn = 1, hasCtrlDep = 1, 1652 usesCustomInserter = 1 in { 1653 def RETURN : ILFormat<(outs), (ins variable_ops), 1654 "RETURN", [(IL_retflag)]>; 1655 } 1656 1657 1658 //===----------------------------------------------------------------------===// 1659 // Constant Buffer Addressing Support 1660 //===----------------------------------------------------------------------===// 1661 1662 let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { 1663 def CONST_COPY : Instruction { 1664 let OutOperandList = (outs R600_Reg32:$dst); 1665 let InOperandList = (ins i32imm:$src); 1666 let Pattern = 1667 [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))]; 1668 let AsmString = "CONST_COPY"; 1669 let neverHasSideEffects = 1; 1670 let isAsCheapAsAMove = 1; 1671 let Itinerary = NullALU; 1672 } 1673 } // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" 1674 1675 def TEX_VTX_CONSTBUF : 1676 InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr", 1677 [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>, 1678 VTX_WORD1_GPR, VTX_WORD0 { 1679 1680 let VC_INST = 0; 1681 let FETCH_TYPE = 2; 1682 let FETCH_WHOLE_QUAD = 0; 1683 let SRC_REL = 0; 1684 let SRC_SEL_X = 0; 1685 let DST_REL = 0; 1686 let USE_CONST_FIELDS = 0; 1687 let NUM_FORMAT_ALL = 2; 1688 let FORMAT_COMP_ALL = 1; 1689 let SRF_MODE_ALL = 1; 1690 let MEGA_FETCH_COUNT = 16; 1691 let DST_SEL_X = 0; 1692 let DST_SEL_Y = 1; 1693 let DST_SEL_Z = 2; 1694 let DST_SEL_W = 3; 1695 let DATA_FORMAT = 35; 1696 1697 let Inst{31-0} = Word0; 1698 let Inst{63-32} = Word1; 1699 1700 // LLVM can only encode 64-bit instructions, so these fields are manually 1701 // encoded in R600CodeEmitter 1702 // 1703 // bits<16> OFFSET; 1704 // bits<2> ENDIAN_SWAP = 0; 1705 // bits<1> CONST_BUF_NO_STRIDE = 0; 1706 // bits<1> MEGA_FETCH = 0; 1707 // bits<1> ALT_CONST = 0; 1708 // bits<2> BUFFER_INDEX_MODE = 0; 1709 1710 1711 1712 // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 1713 // is done in R600CodeEmitter 1714 // 1715 // Inst{79-64} = OFFSET; 1716 // Inst{81-80} = ENDIAN_SWAP; 1717 // Inst{82} = CONST_BUF_NO_STRIDE; 1718 // Inst{83} = MEGA_FETCH; 1719 // Inst{84} = ALT_CONST; 1720 // Inst{86-85} = BUFFER_INDEX_MODE; 1721 // Inst{95-86} = 0; Reserved 1722 1723 // VTX_WORD3 (Padding) 1724 // 1725 // Inst{127-96} = 0; 1726 } 1727 1728 def TEX_VTX_TEXBUF: 1729 InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr", 1730 [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, 1731 VTX_WORD1_GPR, VTX_WORD0 { 1732 1733 let VC_INST = 0; 1734 let FETCH_TYPE = 2; 1735 let FETCH_WHOLE_QUAD = 0; 1736 let SRC_REL = 0; 1737 let SRC_SEL_X = 0; 1738 let DST_REL = 0; 1739 let USE_CONST_FIELDS = 1; 1740 let NUM_FORMAT_ALL = 0; 1741 let FORMAT_COMP_ALL = 0; 1742 let SRF_MODE_ALL = 1; 1743 let MEGA_FETCH_COUNT = 16; 1744 let DST_SEL_X = 0; 1745 let DST_SEL_Y = 1; 1746 let DST_SEL_Z = 2; 1747 let DST_SEL_W = 3; 1748 let DATA_FORMAT = 0; 1749 1750 let Inst{31-0} = Word0; 1751 let Inst{63-32} = Word1; 1752 1753 // LLVM can only encode 64-bit instructions, so these fields are manually 1754 // encoded in R600CodeEmitter 1755 // 1756 // bits<16> OFFSET; 1757 // bits<2> ENDIAN_SWAP = 0; 1758 // bits<1> CONST_BUF_NO_STRIDE = 0; 1759 // bits<1> MEGA_FETCH = 0; 1760 // bits<1> ALT_CONST = 0; 1761 // bits<2> BUFFER_INDEX_MODE = 0; 1762 1763 1764 1765 // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 1766 // is done in R600CodeEmitter 1767 // 1768 // Inst{79-64} = OFFSET; 1769 // Inst{81-80} = ENDIAN_SWAP; 1770 // Inst{82} = CONST_BUF_NO_STRIDE; 1771 // Inst{83} = MEGA_FETCH; 1772 // Inst{84} = ALT_CONST; 1773 // Inst{86-85} = BUFFER_INDEX_MODE; 1774 // Inst{95-86} = 0; Reserved 1775 1776 // VTX_WORD3 (Padding) 1777 // 1778 // Inst{127-96} = 0; 1779 } 1780 1781 1782 1783 //===--------------------------------------------------------------------===// 1784 // Instructions support 1785 //===--------------------------------------------------------------------===// 1786 //===---------------------------------------------------------------------===// 1787 // Custom Inserter for Branches and returns, this eventually will be a 1788 // seperate pass 1789 //===---------------------------------------------------------------------===// 1790 let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { 1791 def BRANCH : ILFormat<(outs), (ins brtarget:$target), 1792 "; Pseudo unconditional branch instruction", 1793 [(br bb:$target)]>; 1794 defm BRANCH_COND : BranchConditional<IL_brcond>; 1795 } 1796 1797 //===---------------------------------------------------------------------===// 1798 // Flow and Program control Instructions 1799 //===---------------------------------------------------------------------===// 1800 let isTerminator=1 in { 1801 def SWITCH : ILFormat< (outs), (ins GPRI32:$src), 1802 !strconcat("SWITCH", " $src"), []>; 1803 def CASE : ILFormat< (outs), (ins GPRI32:$src), 1804 !strconcat("CASE", " $src"), []>; 1805 def BREAK : ILFormat< (outs), (ins), 1806 "BREAK", []>; 1807 def CONTINUE : ILFormat< (outs), (ins), 1808 "CONTINUE", []>; 1809 def DEFAULT : ILFormat< (outs), (ins), 1810 "DEFAULT", []>; 1811 def ELSE : ILFormat< (outs), (ins), 1812 "ELSE", []>; 1813 def ENDSWITCH : ILFormat< (outs), (ins), 1814 "ENDSWITCH", []>; 1815 def ENDMAIN : ILFormat< (outs), (ins), 1816 "ENDMAIN", []>; 1817 def END : ILFormat< (outs), (ins), 1818 "END", []>; 1819 def ENDFUNC : ILFormat< (outs), (ins), 1820 "ENDFUNC", []>; 1821 def ENDIF : ILFormat< (outs), (ins), 1822 "ENDIF", []>; 1823 def WHILELOOP : ILFormat< (outs), (ins), 1824 "WHILE", []>; 1825 def ENDLOOP : ILFormat< (outs), (ins), 1826 "ENDLOOP", []>; 1827 def FUNC : ILFormat< (outs), (ins), 1828 "FUNC", []>; 1829 def RETDYN : ILFormat< (outs), (ins), 1830 "RET_DYN", []>; 1831 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 1832 defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">; 1833 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 1834 defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">; 1835 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 1836 defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">; 1837 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 1838 defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">; 1839 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 1840 defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">; 1841 // This opcode has custom swizzle pattern encoded in Swizzle Encoder 1842 defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">; 1843 defm IFC : BranchInstr2<"IFC">; 1844 defm BREAKC : BranchInstr2<"BREAKC">; 1845 defm CONTINUEC : BranchInstr2<"CONTINUEC">; 1846 } 1847 1848 //===----------------------------------------------------------------------===// 1849 // ISel Patterns 1850 //===----------------------------------------------------------------------===// 1851 1852 // CND*_INT Pattterns for f32 True / False values 1853 1854 class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat < 1855 (selectcc (i32 R600_Reg32:$src0), 0, (f32 R600_Reg32:$src1), 1856 R600_Reg32:$src2, cc), 1857 (cnd R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2) 1858 >; 1859 1860 def : CND_INT_f32 <CNDE_INT, SETEQ>; 1861 def : CND_INT_f32 <CNDGT_INT, SETGT>; 1862 def : CND_INT_f32 <CNDGE_INT, SETGE>; 1863 1864 //CNDGE_INT extra pattern 1865 def : Pat < 1866 (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1), 1867 (i32 R600_Reg32:$src2), COND_GT), 1868 (CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2) 1869 >; 1870 1871 // KIL Patterns 1872 def KILP : Pat < 1873 (int_AMDGPU_kilp), 1874 (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) 1875 >; 1876 1877 def KIL : Pat < 1878 (int_AMDGPU_kill R600_Reg32:$src0), 1879 (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0))) 1880 >; 1881 1882 // SGT Reverse args 1883 def : Pat < 1884 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT), 1885 (SGT R600_Reg32:$src1, R600_Reg32:$src0) 1886 >; 1887 1888 // SGE Reverse args 1889 def : Pat < 1890 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), 1891 (SGE R600_Reg32:$src1, R600_Reg32:$src0) 1892 >; 1893 1894 // SETGT_DX10 reverse args 1895 def : Pat < 1896 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT), 1897 (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0) 1898 >; 1899 1900 // SETGE_DX10 reverse args 1901 def : Pat < 1902 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE), 1903 (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0) 1904 >; 1905 1906 // SETGT_INT reverse args 1907 def : Pat < 1908 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT), 1909 (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0) 1910 >; 1911 1912 // SETGE_INT reverse args 1913 def : Pat < 1914 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE), 1915 (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0) 1916 >; 1917 1918 // SETGT_UINT reverse args 1919 def : Pat < 1920 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT), 1921 (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0) 1922 >; 1923 1924 // SETGE_UINT reverse args 1925 def : Pat < 1926 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE), 1927 (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0) 1928 >; 1929 1930 // The next two patterns are special cases for handling 'true if ordered' and 1931 // 'true if unordered' conditionals. The assumption here is that the behavior of 1932 // SETE and SNE conforms to the Direct3D 10 rules for floating point values 1933 // described here: 1934 // http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit 1935 // We assume that SETE returns false when one of the operands is NAN and 1936 // SNE returns true when on of the operands is NAN 1937 1938 //SETE - 'true if ordered' 1939 def : Pat < 1940 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO), 1941 (SETE R600_Reg32:$src0, R600_Reg32:$src1) 1942 >; 1943 1944 //SETE_DX10 - 'true if ordered' 1945 def : Pat < 1946 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO), 1947 (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1) 1948 >; 1949 1950 //SNE - 'true if unordered' 1951 def : Pat < 1952 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), 1953 (SNE R600_Reg32:$src0, R600_Reg32:$src1) 1954 >; 1955 1956 //SETNE_DX10 - 'true if ordered' 1957 def : Pat < 1958 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO), 1959 (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1) 1960 >; 1961 1962 def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>; 1963 def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>; 1964 def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>; 1965 def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>; 1966 1967 def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>; 1968 def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>; 1969 def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>; 1970 def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>; 1971 1972 def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>; 1973 def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>; 1974 def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>; 1975 def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>; 1976 1977 def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>; 1978 def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>; 1979 def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>; 1980 def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>; 1981 1982 def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>; 1983 def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>; 1984 1985 // bitconvert patterns 1986 1987 def : BitConvert <i32, f32, R600_Reg32>; 1988 def : BitConvert <f32, i32, R600_Reg32>; 1989 def : BitConvert <v4f32, v4i32, R600_Reg128>; 1990 def : BitConvert <v4i32, v4f32, R600_Reg128>; 1991 1992 // DWORDADDR pattern 1993 def : DwordAddrPat <i32, R600_Reg32>; 1994 1995 } // End isR600toCayman Predicate 1996