1 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // R600 Implementation of TargetInstrInfo. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "R600InstrInfo.h" 15 #include "AMDGPUTargetMachine.h" 16 #include "AMDGPUSubtarget.h" 17 #include "R600Defines.h" 18 #include "R600RegisterInfo.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "AMDILUtilityFunctions.h" 21 22 #define GET_INSTRINFO_CTOR 23 #include "AMDGPUGenDFAPacketizer.inc" 24 25 using namespace llvm; 26 27 R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) 28 : AMDGPUInstrInfo(tm), 29 RI(tm, *this), 30 TM(tm) 31 { } 32 33 const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const 34 { 35 return RI; 36 } 37 38 bool R600InstrInfo::isTrig(const MachineInstr &MI) const 39 { 40 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; 41 } 42 43 bool R600InstrInfo::isVector(const MachineInstr &MI) const 44 { 45 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 46 } 47 48 void 49 R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 50 MachineBasicBlock::iterator MI, DebugLoc DL, 51 unsigned DestReg, unsigned SrcReg, 52 bool KillSrc) const 53 { 54 if (AMDGPU::R600_Reg128RegClass.contains(DestReg) 55 && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { 56 for (unsigned I = 0; I < 4; I++) { 57 unsigned SubRegIndex = RI.getSubRegFromChannel(I); 58 BuildMI(MBB, MI, DL, get(AMDGPU::MOV)) 59 .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define) 60 .addReg(RI.getSubReg(SrcReg, SubRegIndex)) 61 .addImm(0) // Flag 62 .addReg(0) // PREDICATE_BIT 63 .addReg(DestReg, RegState::Define | RegState::Implicit); 64 } 65 } else { 66 67 /* We can't copy vec4 registers */ 68 assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) 69 && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); 70 71 BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg) 72 .addReg(SrcReg, getKillRegState(KillSrc)) 73 .addImm(0) // Flag 74 .addReg(0); // PREDICATE_BIT 75 } 76 } 77 78 MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, 79 unsigned DstReg, int64_t Imm) const 80 { 81 MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); 82 MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); 83 MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X); 84 MachineInstrBuilder(MI).addImm(Imm); 85 MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT 86 87 return MI; 88 } 89 90 unsigned R600InstrInfo::getIEQOpcode() const 91 { 92 return AMDGPU::SETE_INT; 93 } 94 95 bool R600InstrInfo::isMov(unsigned Opcode) const 96 { 97 98 99 switch(Opcode) { 100 default: return false; 101 case AMDGPU::MOV: 102 case AMDGPU::MOV_IMM_F32: 103 case AMDGPU::MOV_IMM_I32: 104 return true; 105 } 106 } 107 108 // Some instructions act as place holders to emulate operations that the GPU 109 // hardware does automatically. This function can be used to check if 110 // an opcode falls into this category. 111 bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const 112 { 113 switch (Opcode) { 114 default: return false; 115 case AMDGPU::RETURN: 116 case AMDGPU::MASK_WRITE: 117 case AMDGPU::RESERVE_REG: 118 return true; 119 } 120 } 121 122 bool R600InstrInfo::isReductionOp(unsigned Opcode) const 123 { 124 switch(Opcode) { 125 default: return false; 126 case AMDGPU::DOT4_r600: 127 case AMDGPU::DOT4_eg: 128 return true; 129 } 130 } 131 132 bool R600InstrInfo::isCubeOp(unsigned Opcode) const 133 { 134 switch(Opcode) { 135 default: return false; 136 case AMDGPU::CUBE_r600_pseudo: 137 case AMDGPU::CUBE_r600_real: 138 case AMDGPU::CUBE_eg_pseudo: 139 case AMDGPU::CUBE_eg_real: 140 return true; 141 } 142 } 143 144 DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, 145 const ScheduleDAG *DAG) const 146 { 147 const InstrItineraryData *II = TM->getInstrItineraryData(); 148 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); 149 } 150 151 static bool 152 isPredicateSetter(unsigned Opcode) 153 { 154 switch (Opcode) { 155 case AMDGPU::PRED_X: 156 return true; 157 default: 158 return false; 159 } 160 } 161 162 static MachineInstr * 163 findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 164 MachineBasicBlock::iterator I) 165 { 166 while (I != MBB.begin()) { 167 --I; 168 MachineInstr *MI = I; 169 if (isPredicateSetter(MI->getOpcode())) 170 return MI; 171 } 172 173 return NULL; 174 } 175 176 bool 177 R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 178 MachineBasicBlock *&TBB, 179 MachineBasicBlock *&FBB, 180 SmallVectorImpl<MachineOperand> &Cond, 181 bool AllowModify) const 182 { 183 // Most of the following comes from the ARM implementation of AnalyzeBranch 184 185 // If the block has no terminators, it just falls into the block after it. 186 MachineBasicBlock::iterator I = MBB.end(); 187 if (I == MBB.begin()) 188 return false; 189 --I; 190 while (I->isDebugValue()) { 191 if (I == MBB.begin()) 192 return false; 193 --I; 194 } 195 if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) { 196 return false; 197 } 198 199 // Get the last instruction in the block. 200 MachineInstr *LastInst = I; 201 202 // If there is only one terminator instruction, process it. 203 unsigned LastOpc = LastInst->getOpcode(); 204 if (I == MBB.begin() || 205 static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) { 206 if (LastOpc == AMDGPU::JUMP) { 207 if(!isPredicated(LastInst)) { 208 TBB = LastInst->getOperand(0).getMBB(); 209 return false; 210 } else { 211 MachineInstr *predSet = I; 212 while (!isPredicateSetter(predSet->getOpcode())) { 213 predSet = --I; 214 } 215 TBB = LastInst->getOperand(0).getMBB(); 216 Cond.push_back(predSet->getOperand(1)); 217 Cond.push_back(predSet->getOperand(2)); 218 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 219 return false; 220 } 221 } 222 return true; // Can't handle indirect branch. 223 } 224 225 // Get the instruction before it if it is a terminator. 226 MachineInstr *SecondLastInst = I; 227 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 228 229 // If the block ends with a B and a Bcc, handle it. 230 if (SecondLastOpc == AMDGPU::JUMP && 231 isPredicated(SecondLastInst) && 232 LastOpc == AMDGPU::JUMP && 233 !isPredicated(LastInst)) { 234 MachineInstr *predSet = --I; 235 while (!isPredicateSetter(predSet->getOpcode())) { 236 predSet = --I; 237 } 238 TBB = SecondLastInst->getOperand(0).getMBB(); 239 FBB = LastInst->getOperand(0).getMBB(); 240 Cond.push_back(predSet->getOperand(1)); 241 Cond.push_back(predSet->getOperand(2)); 242 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 243 return false; 244 } 245 246 // Otherwise, can't handle this. 247 return true; 248 } 249 250 int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { 251 const MachineInstr *MI = op.getParent(); 252 253 switch (MI->getDesc().OpInfo->RegClass) { 254 default: // FIXME: fallthrough?? 255 case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; 256 case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; 257 }; 258 } 259 260 unsigned 261 R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, 262 MachineBasicBlock *TBB, 263 MachineBasicBlock *FBB, 264 const SmallVectorImpl<MachineOperand> &Cond, 265 DebugLoc DL) const 266 { 267 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 268 269 if (FBB == 0) { 270 if (Cond.empty()) { 271 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0); 272 return 1; 273 } else { 274 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 275 assert(PredSet && "No previous predicate !"); 276 addFlag(PredSet, 1, MO_FLAG_PUSH); 277 PredSet->getOperand(2).setImm(Cond[1].getImm()); 278 279 BuildMI(&MBB, DL, get(AMDGPU::JUMP)) 280 .addMBB(TBB) 281 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 282 return 1; 283 } 284 } else { 285 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 286 assert(PredSet && "No previous predicate !"); 287 addFlag(PredSet, 1, MO_FLAG_PUSH); 288 PredSet->getOperand(2).setImm(Cond[1].getImm()); 289 BuildMI(&MBB, DL, get(AMDGPU::JUMP)) 290 .addMBB(TBB) 291 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 292 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0); 293 return 2; 294 } 295 } 296 297 unsigned 298 R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const 299 { 300 301 // Note : we leave PRED* instructions there. 302 // They may be needed when predicating instructions. 303 304 MachineBasicBlock::iterator I = MBB.end(); 305 306 if (I == MBB.begin()) { 307 return 0; 308 } 309 --I; 310 switch (I->getOpcode()) { 311 default: 312 return 0; 313 case AMDGPU::JUMP: 314 if (isPredicated(I)) { 315 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 316 clearFlag(predSet, 1, MO_FLAG_PUSH); 317 } 318 I->eraseFromParent(); 319 break; 320 } 321 I = MBB.end(); 322 323 if (I == MBB.begin()) { 324 return 1; 325 } 326 --I; 327 switch (I->getOpcode()) { 328 // FIXME: only one case?? 329 default: 330 return 1; 331 case AMDGPU::JUMP: 332 if (isPredicated(I)) { 333 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 334 clearFlag(predSet, 1, MO_FLAG_PUSH); 335 } 336 I->eraseFromParent(); 337 break; 338 } 339 return 2; 340 } 341 342 bool 343 R600InstrInfo::isPredicated(const MachineInstr *MI) const 344 { 345 int idx = MI->findFirstPredOperandIdx(); 346 if (idx < 0) 347 return false; 348 349 unsigned Reg = MI->getOperand(idx).getReg(); 350 switch (Reg) { 351 default: return false; 352 case AMDGPU::PRED_SEL_ONE: 353 case AMDGPU::PRED_SEL_ZERO: 354 case AMDGPU::PREDICATE_BIT: 355 return true; 356 } 357 } 358 359 bool 360 R600InstrInfo::isPredicable(MachineInstr *MI) const 361 { 362 return AMDGPUInstrInfo::isPredicable(MI); 363 } 364 365 366 bool 367 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 368 unsigned NumCyles, 369 unsigned ExtraPredCycles, 370 const BranchProbability &Probability) const{ 371 return true; 372 } 373 374 bool 375 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 376 unsigned NumTCycles, 377 unsigned ExtraTCycles, 378 MachineBasicBlock &FMBB, 379 unsigned NumFCycles, 380 unsigned ExtraFCycles, 381 const BranchProbability &Probability) const 382 { 383 return true; 384 } 385 386 bool 387 R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 388 unsigned NumCyles, 389 const BranchProbability &Probability) 390 const 391 { 392 return true; 393 } 394 395 bool 396 R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 397 MachineBasicBlock &FMBB) const 398 { 399 return false; 400 } 401 402 403 bool 404 R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const 405 { 406 MachineOperand &MO = Cond[1]; 407 switch (MO.getImm()) { 408 case OPCODE_IS_ZERO_INT: 409 MO.setImm(OPCODE_IS_NOT_ZERO_INT); 410 break; 411 case OPCODE_IS_NOT_ZERO_INT: 412 MO.setImm(OPCODE_IS_ZERO_INT); 413 break; 414 case OPCODE_IS_ZERO: 415 MO.setImm(OPCODE_IS_NOT_ZERO); 416 break; 417 case OPCODE_IS_NOT_ZERO: 418 MO.setImm(OPCODE_IS_ZERO); 419 break; 420 default: 421 return true; 422 } 423 424 MachineOperand &MO2 = Cond[2]; 425 switch (MO2.getReg()) { 426 case AMDGPU::PRED_SEL_ZERO: 427 MO2.setReg(AMDGPU::PRED_SEL_ONE); 428 break; 429 case AMDGPU::PRED_SEL_ONE: 430 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 431 break; 432 default: 433 return true; 434 } 435 return false; 436 } 437 438 bool 439 R600InstrInfo::DefinesPredicate(MachineInstr *MI, 440 std::vector<MachineOperand> &Pred) const 441 { 442 return isPredicateSetter(MI->getOpcode()); 443 } 444 445 446 bool 447 R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 448 const SmallVectorImpl<MachineOperand> &Pred2) const 449 { 450 return false; 451 } 452 453 454 bool 455 R600InstrInfo::PredicateInstruction(MachineInstr *MI, 456 const SmallVectorImpl<MachineOperand> &Pred) const 457 { 458 int PIdx = MI->findFirstPredOperandIdx(); 459 460 if (PIdx != -1) { 461 MachineOperand &PMO = MI->getOperand(PIdx); 462 PMO.setReg(Pred[2].getReg()); 463 MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 464 return true; 465 } 466 467 return false; 468 } 469 470 int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 471 const MachineInstr *MI, 472 unsigned *PredCost) const 473 { 474 if (PredCost) 475 *PredCost = 2; 476 return 2; 477 } 478 479 //===----------------------------------------------------------------------===// 480 // Instruction flag getters/setters 481 //===----------------------------------------------------------------------===// 482 483 bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const 484 { 485 return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; 486 } 487 488 MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI) const 489 { 490 unsigned FlagIndex = GET_FLAG_OPERAND_IDX(get(MI->getOpcode()).TSFlags); 491 assert(FlagIndex != 0 && 492 "Instruction flags not supported for this instruction"); 493 MachineOperand &FlagOp = MI->getOperand(FlagIndex); 494 assert(FlagOp.isImm()); 495 return FlagOp; 496 } 497 498 void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, 499 unsigned Flag) const 500 { 501 MachineOperand &FlagOp = getFlagOp(MI); 502 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); 503 } 504 505 void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, 506 unsigned Flag) const 507 { 508 MachineOperand &FlagOp = getFlagOp(MI); 509 unsigned InstFlags = FlagOp.getImm(); 510 InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); 511 FlagOp.setImm(InstFlags); 512 } 513