1 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief R600 Implementation of TargetInstrInfo. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "R600InstrInfo.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "AMDGPUTargetMachine.h" 19 #include "R600Defines.h" 20 #include "R600MachineFunctionInfo.h" 21 #include "R600RegisterInfo.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 26 #define GET_INSTRINFO_CTOR 27 #include "AMDGPUGenDFAPacketizer.inc" 28 29 using namespace llvm; 30 31 R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) 32 : AMDGPUInstrInfo(tm), 33 RI(tm), 34 ST(tm.getSubtarget<AMDGPUSubtarget>()) 35 { } 36 37 const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { 38 return RI; 39 } 40 41 bool R600InstrInfo::isTrig(const MachineInstr &MI) const { 42 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; 43 } 44 45 bool R600InstrInfo::isVector(const MachineInstr &MI) const { 46 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 47 } 48 49 void 50 R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 51 MachineBasicBlock::iterator MI, DebugLoc DL, 52 unsigned DestReg, unsigned SrcReg, 53 bool KillSrc) const { 54 unsigned VectorComponents = 0; 55 if (AMDGPU::R600_Reg128RegClass.contains(DestReg) && 56 AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { 57 VectorComponents = 4; 58 } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) && 59 AMDGPU::R600_Reg64RegClass.contains(SrcReg)) { 60 VectorComponents = 2; 61 } 62 63 if (VectorComponents > 0) { 64 for (unsigned I = 0; I < VectorComponents; I++) { 65 unsigned SubRegIndex = RI.getSubRegFromChannel(I); 66 buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 67 RI.getSubReg(DestReg, SubRegIndex), 68 RI.getSubReg(SrcReg, SubRegIndex)) 69 .addReg(DestReg, 70 RegState::Define | RegState::Implicit); 71 } 72 } else { 73 MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 74 DestReg, SrcReg); 75 NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) 76 .setIsKill(KillSrc); 77 } 78 } 79 80 MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, 81 unsigned DstReg, int64_t Imm) const { 82 MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); 83 MachineInstrBuilder MIB(*MF, MI); 84 MIB.addReg(DstReg, RegState::Define); 85 MIB.addReg(AMDGPU::ALU_LITERAL_X); 86 MIB.addImm(Imm); 87 MIB.addReg(0); // PREDICATE_BIT 88 89 return MI; 90 } 91 92 unsigned R600InstrInfo::getIEQOpcode() const { 93 return AMDGPU::SETE_INT; 94 } 95 96 bool R600InstrInfo::isMov(unsigned Opcode) const { 97 98 99 switch(Opcode) { 100 default: return false; 101 case AMDGPU::MOV: 102 case AMDGPU::MOV_IMM_F32: 103 case AMDGPU::MOV_IMM_I32: 104 return true; 105 } 106 } 107 108 // Some instructions act as place holders to emulate operations that the GPU 109 // hardware does automatically. This function can be used to check if 110 // an opcode falls into this category. 111 bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { 112 switch (Opcode) { 113 default: return false; 114 case AMDGPU::RETURN: 115 return true; 116 } 117 } 118 119 bool R600InstrInfo::isReductionOp(unsigned Opcode) const { 120 return false; 121 } 122 123 bool R600InstrInfo::isCubeOp(unsigned Opcode) const { 124 switch(Opcode) { 125 default: return false; 126 case AMDGPU::CUBE_r600_pseudo: 127 case AMDGPU::CUBE_r600_real: 128 case AMDGPU::CUBE_eg_pseudo: 129 case AMDGPU::CUBE_eg_real: 130 return true; 131 } 132 } 133 134 bool R600InstrInfo::isALUInstr(unsigned Opcode) const { 135 unsigned TargetFlags = get(Opcode).TSFlags; 136 137 return (TargetFlags & R600_InstFlag::ALU_INST); 138 } 139 140 bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const { 141 unsigned TargetFlags = get(Opcode).TSFlags; 142 143 return ((TargetFlags & R600_InstFlag::OP1) | 144 (TargetFlags & R600_InstFlag::OP2) | 145 (TargetFlags & R600_InstFlag::OP3)); 146 } 147 148 bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { 149 unsigned TargetFlags = get(Opcode).TSFlags; 150 151 return ((TargetFlags & R600_InstFlag::LDS_1A) | 152 (TargetFlags & R600_InstFlag::LDS_1A1D)); 153 } 154 155 bool R600InstrInfo::isTransOnly(unsigned Opcode) const { 156 return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY); 157 } 158 159 bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { 160 return isTransOnly(MI->getOpcode()); 161 } 162 163 bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { 164 return ST.hasVertexCache() && IS_VTX(get(Opcode)); 165 } 166 167 bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const { 168 const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); 169 return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode()); 170 } 171 172 bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { 173 return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode)); 174 } 175 176 bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { 177 const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); 178 return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) || 179 usesTextureCache(MI->getOpcode()); 180 } 181 182 bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { 183 switch (Opcode) { 184 case AMDGPU::KILLGT: 185 case AMDGPU::GROUP_BARRIER: 186 return true; 187 default: 188 return false; 189 } 190 } 191 192 int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const { 193 static const unsigned OpTable[] = { 194 AMDGPU::OpName::src0, 195 AMDGPU::OpName::src1, 196 AMDGPU::OpName::src2 197 }; 198 199 assert (SrcNum < 3); 200 return getOperandIdx(Opcode, OpTable[SrcNum]); 201 } 202 203 #define SRC_SEL_ROWS 11 204 int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { 205 static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = { 206 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 207 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 208 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 209 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 210 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 211 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 212 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 213 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 214 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 215 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 216 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} 217 }; 218 219 for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) { 220 if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) { 221 return getOperandIdx(Opcode, SrcSelTable[i][1]); 222 } 223 } 224 return -1; 225 } 226 #undef SRC_SEL_ROWS 227 228 SmallVector<std::pair<MachineOperand *, int64_t>, 3> 229 R600InstrInfo::getSrcs(MachineInstr *MI) const { 230 SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; 231 232 if (MI->getOpcode() == AMDGPU::DOT_4) { 233 static const unsigned OpTable[8][2] = { 234 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 235 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 236 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 237 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 238 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 239 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 240 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 241 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}, 242 }; 243 244 for (unsigned j = 0; j < 8; j++) { 245 MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 246 OpTable[j][0])); 247 unsigned Reg = MO.getReg(); 248 if (Reg == AMDGPU::ALU_CONST) { 249 unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(), 250 OpTable[j][1])).getImm(); 251 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 252 continue; 253 } 254 255 } 256 return Result; 257 } 258 259 static const unsigned OpTable[3][2] = { 260 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 261 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 262 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 263 }; 264 265 for (unsigned j = 0; j < 3; j++) { 266 int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); 267 if (SrcIdx < 0) 268 break; 269 MachineOperand &MO = MI->getOperand(SrcIdx); 270 unsigned Reg = MI->getOperand(SrcIdx).getReg(); 271 if (Reg == AMDGPU::ALU_CONST) { 272 unsigned Sel = MI->getOperand( 273 getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); 274 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 275 continue; 276 } 277 if (Reg == AMDGPU::ALU_LITERAL_X) { 278 unsigned Imm = MI->getOperand( 279 getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm(); 280 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm)); 281 continue; 282 } 283 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0)); 284 } 285 return Result; 286 } 287 288 std::vector<std::pair<int, unsigned> > 289 R600InstrInfo::ExtractSrcs(MachineInstr *MI, 290 const DenseMap<unsigned, unsigned> &PV, 291 unsigned &ConstCount) const { 292 ConstCount = 0; 293 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI); 294 const std::pair<int, unsigned> DummyPair(-1, 0); 295 std::vector<std::pair<int, unsigned> > Result; 296 unsigned i = 0; 297 for (unsigned n = Srcs.size(); i < n; ++i) { 298 unsigned Reg = Srcs[i].first->getReg(); 299 unsigned Index = RI.getEncodingValue(Reg) & 0xff; 300 if (Reg == AMDGPU::OQAP) { 301 Result.push_back(std::pair<int, unsigned>(Index, 0)); 302 } 303 if (PV.find(Reg) != PV.end()) { 304 // 255 is used to tells its a PS/PV reg 305 Result.push_back(std::pair<int, unsigned>(255, 0)); 306 continue; 307 } 308 if (Index > 127) { 309 ConstCount++; 310 Result.push_back(DummyPair); 311 continue; 312 } 313 unsigned Chan = RI.getHWRegChan(Reg); 314 Result.push_back(std::pair<int, unsigned>(Index, Chan)); 315 } 316 for (; i < 3; ++i) 317 Result.push_back(DummyPair); 318 return Result; 319 } 320 321 static std::vector<std::pair<int, unsigned> > 322 Swizzle(std::vector<std::pair<int, unsigned> > Src, 323 R600InstrInfo::BankSwizzle Swz) { 324 switch (Swz) { 325 case R600InstrInfo::ALU_VEC_012_SCL_210: 326 break; 327 case R600InstrInfo::ALU_VEC_021_SCL_122: 328 std::swap(Src[1], Src[2]); 329 break; 330 case R600InstrInfo::ALU_VEC_102_SCL_221: 331 std::swap(Src[0], Src[1]); 332 break; 333 case R600InstrInfo::ALU_VEC_120_SCL_212: 334 std::swap(Src[0], Src[1]); 335 std::swap(Src[0], Src[2]); 336 break; 337 case R600InstrInfo::ALU_VEC_201: 338 std::swap(Src[0], Src[2]); 339 std::swap(Src[0], Src[1]); 340 break; 341 case R600InstrInfo::ALU_VEC_210: 342 std::swap(Src[0], Src[2]); 343 break; 344 } 345 return Src; 346 } 347 348 static unsigned 349 getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { 350 switch (Swz) { 351 case R600InstrInfo::ALU_VEC_012_SCL_210: { 352 unsigned Cycles[3] = { 2, 1, 0}; 353 return Cycles[Op]; 354 } 355 case R600InstrInfo::ALU_VEC_021_SCL_122: { 356 unsigned Cycles[3] = { 1, 2, 2}; 357 return Cycles[Op]; 358 } 359 case R600InstrInfo::ALU_VEC_120_SCL_212: { 360 unsigned Cycles[3] = { 2, 1, 2}; 361 return Cycles[Op]; 362 } 363 case R600InstrInfo::ALU_VEC_102_SCL_221: { 364 unsigned Cycles[3] = { 2, 2, 1}; 365 return Cycles[Op]; 366 } 367 default: 368 llvm_unreachable("Wrong Swizzle for Trans Slot"); 369 return 0; 370 } 371 } 372 373 /// returns how many MIs (whose inputs are represented by IGSrcs) can be packed 374 /// in the same Instruction Group while meeting read port limitations given a 375 /// Swz swizzle sequence. 376 unsigned R600InstrInfo::isLegalUpTo( 377 const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 378 const std::vector<R600InstrInfo::BankSwizzle> &Swz, 379 const std::vector<std::pair<int, unsigned> > &TransSrcs, 380 R600InstrInfo::BankSwizzle TransSwz) const { 381 int Vector[4][3]; 382 memset(Vector, -1, sizeof(Vector)); 383 for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) { 384 const std::vector<std::pair<int, unsigned> > &Srcs = 385 Swizzle(IGSrcs[i], Swz[i]); 386 for (unsigned j = 0; j < 3; j++) { 387 const std::pair<int, unsigned> &Src = Srcs[j]; 388 if (Src.first < 0 || Src.first == 255) 389 continue; 390 if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { 391 if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 && 392 Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) { 393 // The value from output queue A (denoted by register OQAP) can 394 // only be fetched during the first cycle. 395 return false; 396 } 397 // OQAP does not count towards the normal read port restrictions 398 continue; 399 } 400 if (Vector[Src.second][j] < 0) 401 Vector[Src.second][j] = Src.first; 402 if (Vector[Src.second][j] != Src.first) 403 return i; 404 } 405 } 406 // Now check Trans Alu 407 for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) { 408 const std::pair<int, unsigned> &Src = TransSrcs[i]; 409 unsigned Cycle = getTransSwizzle(TransSwz, i); 410 if (Src.first < 0) 411 continue; 412 if (Src.first == 255) 413 continue; 414 if (Vector[Src.second][Cycle] < 0) 415 Vector[Src.second][Cycle] = Src.first; 416 if (Vector[Src.second][Cycle] != Src.first) 417 return IGSrcs.size() - 1; 418 } 419 return IGSrcs.size(); 420 } 421 422 /// Given a swizzle sequence SwzCandidate and an index Idx, returns the next 423 /// (in lexicographic term) swizzle sequence assuming that all swizzles after 424 /// Idx can be skipped 425 static bool 426 NextPossibleSolution( 427 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 428 unsigned Idx) { 429 assert(Idx < SwzCandidate.size()); 430 int ResetIdx = Idx; 431 while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210) 432 ResetIdx --; 433 for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) { 434 SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210; 435 } 436 if (ResetIdx == -1) 437 return false; 438 int NextSwizzle = SwzCandidate[ResetIdx] + 1; 439 SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle; 440 return true; 441 } 442 443 /// Enumerate all possible Swizzle sequence to find one that can meet all 444 /// read port requirements. 445 bool R600InstrInfo::FindSwizzleForVectorSlot( 446 const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 447 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 448 const std::vector<std::pair<int, unsigned> > &TransSrcs, 449 R600InstrInfo::BankSwizzle TransSwz) const { 450 unsigned ValidUpTo = 0; 451 do { 452 ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz); 453 if (ValidUpTo == IGSrcs.size()) 454 return true; 455 } while (NextPossibleSolution(SwzCandidate, ValidUpTo)); 456 return false; 457 } 458 459 /// Instructions in Trans slot can't read gpr at cycle 0 if they also read 460 /// a const, and can't read a gpr at cycle 1 if they read 2 const. 461 static bool 462 isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, 463 const std::vector<std::pair<int, unsigned> > &TransOps, 464 unsigned ConstCount) { 465 for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { 466 const std::pair<int, unsigned> &Src = TransOps[i]; 467 unsigned Cycle = getTransSwizzle(TransSwz, i); 468 if (Src.first < 0) 469 continue; 470 if (ConstCount > 0 && Cycle == 0) 471 return false; 472 if (ConstCount > 1 && Cycle == 1) 473 return false; 474 } 475 return true; 476 } 477 478 bool 479 R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG, 480 const DenseMap<unsigned, unsigned> &PV, 481 std::vector<BankSwizzle> &ValidSwizzle, 482 bool isLastAluTrans) 483 const { 484 //Todo : support shared src0 - src1 operand 485 486 std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs; 487 ValidSwizzle.clear(); 488 unsigned ConstCount; 489 BankSwizzle TransBS = ALU_VEC_012_SCL_210; 490 for (unsigned i = 0, e = IG.size(); i < e; ++i) { 491 IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount)); 492 unsigned Op = getOperandIdx(IG[i]->getOpcode(), 493 AMDGPU::OpName::bank_swizzle); 494 ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) 495 IG[i]->getOperand(Op).getImm()); 496 } 497 std::vector<std::pair<int, unsigned> > TransOps; 498 if (!isLastAluTrans) 499 return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS); 500 501 TransOps = IGSrcs.back(); 502 IGSrcs.pop_back(); 503 ValidSwizzle.pop_back(); 504 505 static const R600InstrInfo::BankSwizzle TransSwz[] = { 506 ALU_VEC_012_SCL_210, 507 ALU_VEC_021_SCL_122, 508 ALU_VEC_120_SCL_212, 509 ALU_VEC_102_SCL_221 510 }; 511 for (unsigned i = 0; i < 4; i++) { 512 TransBS = TransSwz[i]; 513 if (!isConstCompatible(TransBS, TransOps, ConstCount)) 514 continue; 515 bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, 516 TransBS); 517 if (Result) { 518 ValidSwizzle.push_back(TransBS); 519 return true; 520 } 521 } 522 523 return false; 524 } 525 526 527 bool 528 R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) 529 const { 530 assert (Consts.size() <= 12 && "Too many operands in instructions group"); 531 unsigned Pair1 = 0, Pair2 = 0; 532 for (unsigned i = 0, n = Consts.size(); i < n; ++i) { 533 unsigned ReadConstHalf = Consts[i] & 2; 534 unsigned ReadConstIndex = Consts[i] & (~3); 535 unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; 536 if (!Pair1) { 537 Pair1 = ReadHalfConst; 538 continue; 539 } 540 if (Pair1 == ReadHalfConst) 541 continue; 542 if (!Pair2) { 543 Pair2 = ReadHalfConst; 544 continue; 545 } 546 if (Pair2 != ReadHalfConst) 547 return false; 548 } 549 return true; 550 } 551 552 bool 553 R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs) 554 const { 555 std::vector<unsigned> Consts; 556 SmallSet<int64_t, 4> Literals; 557 for (unsigned i = 0, n = MIs.size(); i < n; i++) { 558 MachineInstr *MI = MIs[i]; 559 if (!isALUInstr(MI->getOpcode())) 560 continue; 561 562 const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs = 563 getSrcs(MI); 564 565 for (unsigned j = 0, e = Srcs.size(); j < e; j++) { 566 std::pair<MachineOperand *, unsigned> Src = Srcs[j]; 567 if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) 568 Literals.insert(Src.second); 569 if (Literals.size() > 4) 570 return false; 571 if (Src.first->getReg() == AMDGPU::ALU_CONST) 572 Consts.push_back(Src.second); 573 if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || 574 AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) { 575 unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff; 576 unsigned Chan = RI.getHWRegChan(Src.first->getReg()); 577 Consts.push_back((Index << 2) | Chan); 578 } 579 } 580 } 581 return fitsConstReadLimitations(Consts); 582 } 583 584 DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, 585 const ScheduleDAG *DAG) const { 586 const InstrItineraryData *II = TM->getInstrItineraryData(); 587 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); 588 } 589 590 static bool 591 isPredicateSetter(unsigned Opcode) { 592 switch (Opcode) { 593 case AMDGPU::PRED_X: 594 return true; 595 default: 596 return false; 597 } 598 } 599 600 static MachineInstr * 601 findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 602 MachineBasicBlock::iterator I) { 603 while (I != MBB.begin()) { 604 --I; 605 MachineInstr *MI = I; 606 if (isPredicateSetter(MI->getOpcode())) 607 return MI; 608 } 609 610 return NULL; 611 } 612 613 static 614 bool isJump(unsigned Opcode) { 615 return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; 616 } 617 618 bool 619 R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 620 MachineBasicBlock *&TBB, 621 MachineBasicBlock *&FBB, 622 SmallVectorImpl<MachineOperand> &Cond, 623 bool AllowModify) const { 624 // Most of the following comes from the ARM implementation of AnalyzeBranch 625 626 // If the block has no terminators, it just falls into the block after it. 627 MachineBasicBlock::iterator I = MBB.end(); 628 if (I == MBB.begin()) 629 return false; 630 --I; 631 while (I->isDebugValue()) { 632 if (I == MBB.begin()) 633 return false; 634 --I; 635 } 636 if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) { 637 return false; 638 } 639 640 // Get the last instruction in the block. 641 MachineInstr *LastInst = I; 642 643 // If there is only one terminator instruction, process it. 644 unsigned LastOpc = LastInst->getOpcode(); 645 if (I == MBB.begin() || 646 !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) { 647 if (LastOpc == AMDGPU::JUMP) { 648 TBB = LastInst->getOperand(0).getMBB(); 649 return false; 650 } else if (LastOpc == AMDGPU::JUMP_COND) { 651 MachineInstr *predSet = I; 652 while (!isPredicateSetter(predSet->getOpcode())) { 653 predSet = --I; 654 } 655 TBB = LastInst->getOperand(0).getMBB(); 656 Cond.push_back(predSet->getOperand(1)); 657 Cond.push_back(predSet->getOperand(2)); 658 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 659 return false; 660 } 661 return true; // Can't handle indirect branch. 662 } 663 664 // Get the instruction before it if it is a terminator. 665 MachineInstr *SecondLastInst = I; 666 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 667 668 // If the block ends with a B and a Bcc, handle it. 669 if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) { 670 MachineInstr *predSet = --I; 671 while (!isPredicateSetter(predSet->getOpcode())) { 672 predSet = --I; 673 } 674 TBB = SecondLastInst->getOperand(0).getMBB(); 675 FBB = LastInst->getOperand(0).getMBB(); 676 Cond.push_back(predSet->getOperand(1)); 677 Cond.push_back(predSet->getOperand(2)); 678 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 679 return false; 680 } 681 682 // Otherwise, can't handle this. 683 return true; 684 } 685 686 int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { 687 const MachineInstr *MI = op.getParent(); 688 689 switch (MI->getDesc().OpInfo->RegClass) { 690 default: // FIXME: fallthrough?? 691 case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; 692 case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; 693 }; 694 } 695 696 static 697 MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { 698 for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); 699 It != E; ++It) { 700 if (It->getOpcode() == AMDGPU::CF_ALU || 701 It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 702 return llvm::prior(It.base()); 703 } 704 return MBB.end(); 705 } 706 707 unsigned 708 R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, 709 MachineBasicBlock *TBB, 710 MachineBasicBlock *FBB, 711 const SmallVectorImpl<MachineOperand> &Cond, 712 DebugLoc DL) const { 713 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 714 715 if (FBB == 0) { 716 if (Cond.empty()) { 717 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); 718 return 1; 719 } else { 720 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 721 assert(PredSet && "No previous predicate !"); 722 addFlag(PredSet, 0, MO_FLAG_PUSH); 723 PredSet->getOperand(2).setImm(Cond[1].getImm()); 724 725 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 726 .addMBB(TBB) 727 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 728 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 729 if (CfAlu == MBB.end()) 730 return 1; 731 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 732 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 733 return 1; 734 } 735 } else { 736 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 737 assert(PredSet && "No previous predicate !"); 738 addFlag(PredSet, 0, MO_FLAG_PUSH); 739 PredSet->getOperand(2).setImm(Cond[1].getImm()); 740 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 741 .addMBB(TBB) 742 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 743 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); 744 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 745 if (CfAlu == MBB.end()) 746 return 2; 747 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 748 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 749 return 2; 750 } 751 } 752 753 unsigned 754 R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 755 756 // Note : we leave PRED* instructions there. 757 // They may be needed when predicating instructions. 758 759 MachineBasicBlock::iterator I = MBB.end(); 760 761 if (I == MBB.begin()) { 762 return 0; 763 } 764 --I; 765 switch (I->getOpcode()) { 766 default: 767 return 0; 768 case AMDGPU::JUMP_COND: { 769 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 770 clearFlag(predSet, 0, MO_FLAG_PUSH); 771 I->eraseFromParent(); 772 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 773 if (CfAlu == MBB.end()) 774 break; 775 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 776 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 777 break; 778 } 779 case AMDGPU::JUMP: 780 I->eraseFromParent(); 781 break; 782 } 783 I = MBB.end(); 784 785 if (I == MBB.begin()) { 786 return 1; 787 } 788 --I; 789 switch (I->getOpcode()) { 790 // FIXME: only one case?? 791 default: 792 return 1; 793 case AMDGPU::JUMP_COND: { 794 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 795 clearFlag(predSet, 0, MO_FLAG_PUSH); 796 I->eraseFromParent(); 797 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 798 if (CfAlu == MBB.end()) 799 break; 800 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 801 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 802 break; 803 } 804 case AMDGPU::JUMP: 805 I->eraseFromParent(); 806 break; 807 } 808 return 2; 809 } 810 811 bool 812 R600InstrInfo::isPredicated(const MachineInstr *MI) const { 813 int idx = MI->findFirstPredOperandIdx(); 814 if (idx < 0) 815 return false; 816 817 unsigned Reg = MI->getOperand(idx).getReg(); 818 switch (Reg) { 819 default: return false; 820 case AMDGPU::PRED_SEL_ONE: 821 case AMDGPU::PRED_SEL_ZERO: 822 case AMDGPU::PREDICATE_BIT: 823 return true; 824 } 825 } 826 827 bool 828 R600InstrInfo::isPredicable(MachineInstr *MI) const { 829 // XXX: KILL* instructions can be predicated, but they must be the last 830 // instruction in a clause, so this means any instructions after them cannot 831 // be predicated. Until we have proper support for instruction clauses in the 832 // backend, we will mark KILL* instructions as unpredicable. 833 834 if (MI->getOpcode() == AMDGPU::KILLGT) { 835 return false; 836 } else if (MI->getOpcode() == AMDGPU::CF_ALU) { 837 // If the clause start in the middle of MBB then the MBB has more 838 // than a single clause, unable to predicate several clauses. 839 if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI)) 840 return false; 841 // TODO: We don't support KC merging atm 842 if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0) 843 return false; 844 return true; 845 } else if (isVector(*MI)) { 846 return false; 847 } else { 848 return AMDGPUInstrInfo::isPredicable(MI); 849 } 850 } 851 852 853 bool 854 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 855 unsigned NumCyles, 856 unsigned ExtraPredCycles, 857 const BranchProbability &Probability) const{ 858 return true; 859 } 860 861 bool 862 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 863 unsigned NumTCycles, 864 unsigned ExtraTCycles, 865 MachineBasicBlock &FMBB, 866 unsigned NumFCycles, 867 unsigned ExtraFCycles, 868 const BranchProbability &Probability) const { 869 return true; 870 } 871 872 bool 873 R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 874 unsigned NumCyles, 875 const BranchProbability &Probability) 876 const { 877 return true; 878 } 879 880 bool 881 R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 882 MachineBasicBlock &FMBB) const { 883 return false; 884 } 885 886 887 bool 888 R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 889 MachineOperand &MO = Cond[1]; 890 switch (MO.getImm()) { 891 case OPCODE_IS_ZERO_INT: 892 MO.setImm(OPCODE_IS_NOT_ZERO_INT); 893 break; 894 case OPCODE_IS_NOT_ZERO_INT: 895 MO.setImm(OPCODE_IS_ZERO_INT); 896 break; 897 case OPCODE_IS_ZERO: 898 MO.setImm(OPCODE_IS_NOT_ZERO); 899 break; 900 case OPCODE_IS_NOT_ZERO: 901 MO.setImm(OPCODE_IS_ZERO); 902 break; 903 default: 904 return true; 905 } 906 907 MachineOperand &MO2 = Cond[2]; 908 switch (MO2.getReg()) { 909 case AMDGPU::PRED_SEL_ZERO: 910 MO2.setReg(AMDGPU::PRED_SEL_ONE); 911 break; 912 case AMDGPU::PRED_SEL_ONE: 913 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 914 break; 915 default: 916 return true; 917 } 918 return false; 919 } 920 921 bool 922 R600InstrInfo::DefinesPredicate(MachineInstr *MI, 923 std::vector<MachineOperand> &Pred) const { 924 return isPredicateSetter(MI->getOpcode()); 925 } 926 927 928 bool 929 R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 930 const SmallVectorImpl<MachineOperand> &Pred2) const { 931 return false; 932 } 933 934 935 bool 936 R600InstrInfo::PredicateInstruction(MachineInstr *MI, 937 const SmallVectorImpl<MachineOperand> &Pred) const { 938 int PIdx = MI->findFirstPredOperandIdx(); 939 940 if (MI->getOpcode() == AMDGPU::CF_ALU) { 941 MI->getOperand(8).setImm(0); 942 return true; 943 } 944 945 if (PIdx != -1) { 946 MachineOperand &PMO = MI->getOperand(PIdx); 947 PMO.setReg(Pred[2].getReg()); 948 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 949 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 950 return true; 951 } 952 953 return false; 954 } 955 956 unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 957 const MachineInstr *MI, 958 unsigned *PredCost) const { 959 if (PredCost) 960 *PredCost = 2; 961 return 2; 962 } 963 964 int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { 965 const MachineRegisterInfo &MRI = MF.getRegInfo(); 966 const MachineFrameInfo *MFI = MF.getFrameInfo(); 967 int Offset = 0; 968 969 if (MFI->getNumObjects() == 0) { 970 return -1; 971 } 972 973 if (MRI.livein_empty()) { 974 return 0; 975 } 976 977 for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), 978 LE = MRI.livein_end(); 979 LI != LE; ++LI) { 980 Offset = std::max(Offset, 981 GET_REG_INDEX(RI.getEncodingValue(LI->first))); 982 } 983 984 return Offset + 1; 985 } 986 987 int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { 988 int Offset = 0; 989 const MachineFrameInfo *MFI = MF.getFrameInfo(); 990 991 // Variable sized objects are not supported 992 assert(!MFI->hasVarSizedObjects()); 993 994 if (MFI->getNumObjects() == 0) { 995 return -1; 996 } 997 998 Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1); 999 1000 return getIndirectIndexBegin(MF) + Offset; 1001 } 1002 1003 std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs( 1004 const MachineFunction &MF) const { 1005 const AMDGPUFrameLowering *TFL = 1006 static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering()); 1007 std::vector<unsigned> Regs; 1008 1009 unsigned StackWidth = TFL->getStackWidth(MF); 1010 int End = getIndirectIndexEnd(MF); 1011 1012 if (End == -1) { 1013 return Regs; 1014 } 1015 1016 for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { 1017 unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); 1018 Regs.push_back(SuperReg); 1019 for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { 1020 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); 1021 Regs.push_back(Reg); 1022 } 1023 } 1024 return Regs; 1025 } 1026 1027 unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, 1028 unsigned Channel) const { 1029 // XXX: Remove when we support a stack width > 2 1030 assert(Channel == 0); 1031 return RegIndex; 1032 } 1033 1034 const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass( 1035 unsigned SourceReg) const { 1036 return &AMDGPU::R600_TReg32RegClass; 1037 } 1038 1039 const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const { 1040 return &AMDGPU::TRegMemRegClass; 1041 } 1042 1043 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, 1044 MachineBasicBlock::iterator I, 1045 unsigned ValueReg, unsigned Address, 1046 unsigned OffsetReg) const { 1047 unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); 1048 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1049 AMDGPU::AR_X, OffsetReg); 1050 setImmOperand(MOVA, AMDGPU::OpName::write, 0); 1051 1052 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1053 AddrReg, ValueReg) 1054 .addReg(AMDGPU::AR_X, 1055 RegState::Implicit | RegState::Kill); 1056 setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1); 1057 return Mov; 1058 } 1059 1060 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, 1061 MachineBasicBlock::iterator I, 1062 unsigned ValueReg, unsigned Address, 1063 unsigned OffsetReg) const { 1064 unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); 1065 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1066 AMDGPU::AR_X, 1067 OffsetReg); 1068 setImmOperand(MOVA, AMDGPU::OpName::write, 0); 1069 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1070 ValueReg, 1071 AddrReg) 1072 .addReg(AMDGPU::AR_X, 1073 RegState::Implicit | RegState::Kill); 1074 setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1); 1075 1076 return Mov; 1077 } 1078 1079 const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const { 1080 return &AMDGPU::IndirectRegRegClass; 1081 } 1082 1083 unsigned R600InstrInfo::getMaxAlusPerClause() const { 1084 return 115; 1085 } 1086 1087 MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, 1088 MachineBasicBlock::iterator I, 1089 unsigned Opcode, 1090 unsigned DstReg, 1091 unsigned Src0Reg, 1092 unsigned Src1Reg) const { 1093 MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode), 1094 DstReg); // $dst 1095 1096 if (Src1Reg) { 1097 MIB.addImm(0) // $update_exec_mask 1098 .addImm(0); // $update_predicate 1099 } 1100 MIB.addImm(1) // $write 1101 .addImm(0) // $omod 1102 .addImm(0) // $dst_rel 1103 .addImm(0) // $dst_clamp 1104 .addReg(Src0Reg) // $src0 1105 .addImm(0) // $src0_neg 1106 .addImm(0) // $src0_rel 1107 .addImm(0) // $src0_abs 1108 .addImm(-1); // $src0_sel 1109 1110 if (Src1Reg) { 1111 MIB.addReg(Src1Reg) // $src1 1112 .addImm(0) // $src1_neg 1113 .addImm(0) // $src1_rel 1114 .addImm(0) // $src1_abs 1115 .addImm(-1); // $src1_sel 1116 } 1117 1118 //XXX: The r600g finalizer expects this to be 1, once we've moved the 1119 //scheduling to the backend, we can change the default to 0. 1120 MIB.addImm(1) // $last 1121 .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel 1122 .addImm(0) // $literal 1123 .addImm(0); // $bank_swizzle 1124 1125 return MIB; 1126 } 1127 1128 #define OPERAND_CASE(Label) \ 1129 case Label: { \ 1130 static const unsigned Ops[] = \ 1131 { \ 1132 Label##_X, \ 1133 Label##_Y, \ 1134 Label##_Z, \ 1135 Label##_W \ 1136 }; \ 1137 return Ops[Slot]; \ 1138 } 1139 1140 static unsigned getSlotedOps(unsigned Op, unsigned Slot) { 1141 switch (Op) { 1142 OPERAND_CASE(AMDGPU::OpName::update_exec_mask) 1143 OPERAND_CASE(AMDGPU::OpName::update_pred) 1144 OPERAND_CASE(AMDGPU::OpName::write) 1145 OPERAND_CASE(AMDGPU::OpName::omod) 1146 OPERAND_CASE(AMDGPU::OpName::dst_rel) 1147 OPERAND_CASE(AMDGPU::OpName::clamp) 1148 OPERAND_CASE(AMDGPU::OpName::src0) 1149 OPERAND_CASE(AMDGPU::OpName::src0_neg) 1150 OPERAND_CASE(AMDGPU::OpName::src0_rel) 1151 OPERAND_CASE(AMDGPU::OpName::src0_abs) 1152 OPERAND_CASE(AMDGPU::OpName::src0_sel) 1153 OPERAND_CASE(AMDGPU::OpName::src1) 1154 OPERAND_CASE(AMDGPU::OpName::src1_neg) 1155 OPERAND_CASE(AMDGPU::OpName::src1_rel) 1156 OPERAND_CASE(AMDGPU::OpName::src1_abs) 1157 OPERAND_CASE(AMDGPU::OpName::src1_sel) 1158 OPERAND_CASE(AMDGPU::OpName::pred_sel) 1159 default: 1160 llvm_unreachable("Wrong Operand"); 1161 } 1162 } 1163 1164 #undef OPERAND_CASE 1165 1166 MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( 1167 MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) 1168 const { 1169 assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); 1170 unsigned Opcode; 1171 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 1172 if (ST.getGeneration() <= AMDGPUSubtarget::R700) 1173 Opcode = AMDGPU::DOT4_r600; 1174 else 1175 Opcode = AMDGPU::DOT4_eg; 1176 MachineBasicBlock::iterator I = MI; 1177 MachineOperand &Src0 = MI->getOperand( 1178 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot))); 1179 MachineOperand &Src1 = MI->getOperand( 1180 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot))); 1181 MachineInstr *MIB = buildDefaultInstruction( 1182 MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg()); 1183 static const unsigned Operands[14] = { 1184 AMDGPU::OpName::update_exec_mask, 1185 AMDGPU::OpName::update_pred, 1186 AMDGPU::OpName::write, 1187 AMDGPU::OpName::omod, 1188 AMDGPU::OpName::dst_rel, 1189 AMDGPU::OpName::clamp, 1190 AMDGPU::OpName::src0_neg, 1191 AMDGPU::OpName::src0_rel, 1192 AMDGPU::OpName::src0_abs, 1193 AMDGPU::OpName::src0_sel, 1194 AMDGPU::OpName::src1_neg, 1195 AMDGPU::OpName::src1_rel, 1196 AMDGPU::OpName::src1_abs, 1197 AMDGPU::OpName::src1_sel, 1198 }; 1199 1200 for (unsigned i = 0; i < 14; i++) { 1201 MachineOperand &MO = MI->getOperand( 1202 getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); 1203 assert (MO.isImm()); 1204 setImmOperand(MIB, Operands[i], MO.getImm()); 1205 } 1206 MIB->getOperand(20).setImm(0); 1207 return MIB; 1208 } 1209 1210 MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, 1211 MachineBasicBlock::iterator I, 1212 unsigned DstReg, 1213 uint64_t Imm) const { 1214 MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, 1215 AMDGPU::ALU_LITERAL_X); 1216 setImmOperand(MovImm, AMDGPU::OpName::literal, Imm); 1217 return MovImm; 1218 } 1219 1220 int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { 1221 return getOperandIdx(MI.getOpcode(), Op); 1222 } 1223 1224 int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { 1225 return AMDGPU::getNamedOperandIdx(Opcode, Op); 1226 } 1227 1228 void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op, 1229 int64_t Imm) const { 1230 int Idx = getOperandIdx(*MI, Op); 1231 assert(Idx != -1 && "Operand not supported for this instruction."); 1232 assert(MI->getOperand(Idx).isImm()); 1233 MI->getOperand(Idx).setImm(Imm); 1234 } 1235 1236 //===----------------------------------------------------------------------===// 1237 // Instruction flag getters/setters 1238 //===----------------------------------------------------------------------===// 1239 1240 bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const { 1241 return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; 1242 } 1243 1244 MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, 1245 unsigned Flag) const { 1246 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1247 int FlagIndex = 0; 1248 if (Flag != 0) { 1249 // If we pass something other than the default value of Flag to this 1250 // function, it means we are want to set a flag on an instruction 1251 // that uses native encoding. 1252 assert(HAS_NATIVE_OPERANDS(TargetFlags)); 1253 bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; 1254 switch (Flag) { 1255 case MO_FLAG_CLAMP: 1256 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp); 1257 break; 1258 case MO_FLAG_MASK: 1259 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write); 1260 break; 1261 case MO_FLAG_NOT_LAST: 1262 case MO_FLAG_LAST: 1263 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last); 1264 break; 1265 case MO_FLAG_NEG: 1266 switch (SrcIdx) { 1267 case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break; 1268 case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break; 1269 case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break; 1270 } 1271 break; 1272 1273 case MO_FLAG_ABS: 1274 assert(!IsOP3 && "Cannot set absolute value modifier for OP3 " 1275 "instructions."); 1276 (void)IsOP3; 1277 switch (SrcIdx) { 1278 case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break; 1279 case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break; 1280 } 1281 break; 1282 1283 default: 1284 FlagIndex = -1; 1285 break; 1286 } 1287 assert(FlagIndex != -1 && "Flag not supported for this instruction"); 1288 } else { 1289 FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags); 1290 assert(FlagIndex != 0 && 1291 "Instruction flags not supported for this instruction"); 1292 } 1293 1294 MachineOperand &FlagOp = MI->getOperand(FlagIndex); 1295 assert(FlagOp.isImm()); 1296 return FlagOp; 1297 } 1298 1299 void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, 1300 unsigned Flag) const { 1301 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1302 if (Flag == 0) { 1303 return; 1304 } 1305 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1306 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1307 if (Flag == MO_FLAG_NOT_LAST) { 1308 clearFlag(MI, Operand, MO_FLAG_LAST); 1309 } else if (Flag == MO_FLAG_MASK) { 1310 clearFlag(MI, Operand, Flag); 1311 } else { 1312 FlagOp.setImm(1); 1313 } 1314 } else { 1315 MachineOperand &FlagOp = getFlagOp(MI, Operand); 1316 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); 1317 } 1318 } 1319 1320 void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, 1321 unsigned Flag) const { 1322 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1323 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1324 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1325 FlagOp.setImm(0); 1326 } else { 1327 MachineOperand &FlagOp = getFlagOp(MI); 1328 unsigned InstFlags = FlagOp.getImm(); 1329 InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); 1330 FlagOp.setImm(InstFlags); 1331 } 1332 } 1333