1 //===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief SI Implementation of TargetInstrInfo. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include "SIInstrInfo.h" 17 #include "AMDGPUTargetMachine.h" 18 #include "SIDefines.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/IR/Function.h" 23 #include "llvm/MC/MCInstrDesc.h" 24 25 using namespace llvm; 26 27 SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st) 28 : AMDGPUInstrInfo(st), 29 RI(st) { } 30 31 //===----------------------------------------------------------------------===// 32 // TargetInstrInfo callbacks 33 //===----------------------------------------------------------------------===// 34 35 void 36 SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 37 MachineBasicBlock::iterator MI, DebugLoc DL, 38 unsigned DestReg, unsigned SrcReg, 39 bool KillSrc) const { 40 41 // If we are trying to copy to or from SCC, there is a bug somewhere else in 42 // the backend. While it may be theoretically possible to do this, it should 43 // never be necessary. 44 assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); 45 46 static const int16_t Sub0_15[] = { 47 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 48 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 49 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, 50 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0 51 }; 52 53 static const int16_t Sub0_7[] = { 54 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 55 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0 56 }; 57 58 static const int16_t Sub0_3[] = { 59 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0 60 }; 61 62 static const int16_t Sub0_2[] = { 63 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0 64 }; 65 66 static const int16_t Sub0_1[] = { 67 AMDGPU::sub0, AMDGPU::sub1, 0 68 }; 69 70 unsigned Opcode; 71 const int16_t *SubIndices; 72 73 if (AMDGPU::M0 == DestReg) { 74 // Check if M0 isn't already set to this value 75 for (MachineBasicBlock::reverse_iterator E = MBB.rend(), 76 I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) { 77 78 if (!I->definesRegister(AMDGPU::M0)) 79 continue; 80 81 unsigned Opc = I->getOpcode(); 82 if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32) 83 break; 84 85 if (!I->readsRegister(SrcReg)) 86 break; 87 88 // The copy isn't necessary 89 return; 90 } 91 } 92 93 if (AMDGPU::SReg_32RegClass.contains(DestReg)) { 94 assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 95 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 96 .addReg(SrcReg, getKillRegState(KillSrc)); 97 return; 98 99 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { 100 assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); 101 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 102 .addReg(SrcReg, getKillRegState(KillSrc)); 103 return; 104 105 } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { 106 assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); 107 Opcode = AMDGPU::S_MOV_B32; 108 SubIndices = Sub0_3; 109 110 } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { 111 assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); 112 Opcode = AMDGPU::S_MOV_B32; 113 SubIndices = Sub0_7; 114 115 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { 116 assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); 117 Opcode = AMDGPU::S_MOV_B32; 118 SubIndices = Sub0_15; 119 120 } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) { 121 assert(AMDGPU::VReg_32RegClass.contains(SrcReg) || 122 AMDGPU::SReg_32RegClass.contains(SrcReg)); 123 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) 124 .addReg(SrcReg, getKillRegState(KillSrc)); 125 return; 126 127 } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) { 128 assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || 129 AMDGPU::SReg_64RegClass.contains(SrcReg)); 130 Opcode = AMDGPU::V_MOV_B32_e32; 131 SubIndices = Sub0_1; 132 133 } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { 134 assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); 135 Opcode = AMDGPU::V_MOV_B32_e32; 136 SubIndices = Sub0_2; 137 138 } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { 139 assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || 140 AMDGPU::SReg_128RegClass.contains(SrcReg)); 141 Opcode = AMDGPU::V_MOV_B32_e32; 142 SubIndices = Sub0_3; 143 144 } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) { 145 assert(AMDGPU::VReg_256RegClass.contains(SrcReg) || 146 AMDGPU::SReg_256RegClass.contains(SrcReg)); 147 Opcode = AMDGPU::V_MOV_B32_e32; 148 SubIndices = Sub0_7; 149 150 } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) { 151 assert(AMDGPU::VReg_512RegClass.contains(SrcReg) || 152 AMDGPU::SReg_512RegClass.contains(SrcReg)); 153 Opcode = AMDGPU::V_MOV_B32_e32; 154 SubIndices = Sub0_15; 155 156 } else { 157 llvm_unreachable("Can't copy register!"); 158 } 159 160 while (unsigned SubIdx = *SubIndices++) { 161 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, 162 get(Opcode), RI.getSubReg(DestReg, SubIdx)); 163 164 Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc)); 165 166 if (*SubIndices) 167 Builder.addReg(DestReg, RegState::Define | RegState::Implicit); 168 } 169 } 170 171 unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { 172 int NewOpc; 173 174 // Try to map original to commuted opcode 175 if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1) 176 return NewOpc; 177 178 // Try to map commuted to original opcode 179 if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1) 180 return NewOpc; 181 182 return Opcode; 183 } 184 185 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 186 MachineBasicBlock::iterator MI, 187 unsigned SrcReg, bool isKill, 188 int FrameIndex, 189 const TargetRegisterClass *RC, 190 const TargetRegisterInfo *TRI) const { 191 MachineFunction *MF = MBB.getParent(); 192 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 193 MachineRegisterInfo &MRI = MF->getRegInfo(); 194 DebugLoc DL = MBB.findDebugLoc(MI); 195 unsigned KillFlag = isKill ? RegState::Kill : 0; 196 197 if (RI.hasVGPRs(RC)) { 198 LLVMContext &Ctx = MF->getFunction()->getContext(); 199 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!"); 200 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0) 201 .addReg(SrcReg); 202 } else if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) { 203 unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MF); 204 unsigned TgtReg = MFI->SpillTracker.LaneVGPR; 205 206 BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), TgtReg) 207 .addReg(SrcReg, KillFlag) 208 .addImm(Lane); 209 MFI->SpillTracker.addSpilledReg(FrameIndex, TgtReg, Lane); 210 } else if (RI.isSGPRClass(RC)) { 211 // We are only allowed to create one new instruction when spilling 212 // registers, so we need to use pseudo instruction for vector 213 // registers. 214 // 215 // Reserve a spot in the spill tracker for each sub-register of 216 // the vector register. 217 unsigned NumSubRegs = RC->getSize() / 4; 218 unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MF, NumSubRegs); 219 MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, 220 FirstLane); 221 222 unsigned Opcode; 223 switch (RC->getSize() * 8) { 224 case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break; 225 case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break; 226 case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; 227 case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; 228 default: llvm_unreachable("Cannot spill register class"); 229 } 230 231 BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR) 232 .addReg(SrcReg) 233 .addImm(FrameIndex); 234 } else { 235 llvm_unreachable("VGPR spilling not supported"); 236 } 237 } 238 239 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 240 MachineBasicBlock::iterator MI, 241 unsigned DestReg, int FrameIndex, 242 const TargetRegisterClass *RC, 243 const TargetRegisterInfo *TRI) const { 244 MachineFunction *MF = MBB.getParent(); 245 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 246 DebugLoc DL = MBB.findDebugLoc(MI); 247 248 if (RI.hasVGPRs(RC)) { 249 LLVMContext &Ctx = MF->getFunction()->getContext(); 250 Ctx.emitError("SIInstrInfo::loadRegToStackSlot - Can't retrieve spilled VGPR!"); 251 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) 252 .addImm(0); 253 } else if (RI.isSGPRClass(RC)){ 254 unsigned Opcode; 255 switch(RC->getSize() * 8) { 256 case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break; 257 case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break; 258 case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break; 259 case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; 260 case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; 261 default: llvm_unreachable("Cannot spill register class"); 262 } 263 264 SIMachineFunctionInfo::SpilledReg Spill = 265 MFI->SpillTracker.getSpilledReg(FrameIndex); 266 267 BuildMI(MBB, MI, DL, get(Opcode), DestReg) 268 .addReg(Spill.VGPR) 269 .addImm(FrameIndex); 270 } else { 271 llvm_unreachable("VGPR spilling not supported"); 272 } 273 } 274 275 static unsigned getNumSubRegsForSpillOp(unsigned Op) { 276 277 switch (Op) { 278 case AMDGPU::SI_SPILL_S512_SAVE: 279 case AMDGPU::SI_SPILL_S512_RESTORE: 280 return 16; 281 case AMDGPU::SI_SPILL_S256_SAVE: 282 case AMDGPU::SI_SPILL_S256_RESTORE: 283 return 8; 284 case AMDGPU::SI_SPILL_S128_SAVE: 285 case AMDGPU::SI_SPILL_S128_RESTORE: 286 return 4; 287 case AMDGPU::SI_SPILL_S64_SAVE: 288 case AMDGPU::SI_SPILL_S64_RESTORE: 289 return 2; 290 case AMDGPU::SI_SPILL_S32_RESTORE: 291 return 1; 292 default: llvm_unreachable("Invalid spill opcode"); 293 } 294 } 295 296 void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, 297 int Count) const { 298 while (Count > 0) { 299 int Arg; 300 if (Count >= 8) 301 Arg = 7; 302 else 303 Arg = Count - 1; 304 Count -= 8; 305 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP)) 306 .addImm(Arg); 307 } 308 } 309 310 bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { 311 SIMachineFunctionInfo *MFI = 312 MI->getParent()->getParent()->getInfo<SIMachineFunctionInfo>(); 313 MachineBasicBlock &MBB = *MI->getParent(); 314 DebugLoc DL = MBB.findDebugLoc(MI); 315 switch (MI->getOpcode()) { 316 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); 317 318 // SGPR register spill 319 case AMDGPU::SI_SPILL_S512_SAVE: 320 case AMDGPU::SI_SPILL_S256_SAVE: 321 case AMDGPU::SI_SPILL_S128_SAVE: 322 case AMDGPU::SI_SPILL_S64_SAVE: { 323 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); 324 unsigned FrameIndex = MI->getOperand(2).getImm(); 325 326 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { 327 SIMachineFunctionInfo::SpilledReg Spill; 328 unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(), 329 &AMDGPU::SGPR_32RegClass, i); 330 Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); 331 332 BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), 333 MI->getOperand(0).getReg()) 334 .addReg(SubReg) 335 .addImm(Spill.Lane + i); 336 } 337 MI->eraseFromParent(); 338 break; 339 } 340 341 // SGPR register restore 342 case AMDGPU::SI_SPILL_S512_RESTORE: 343 case AMDGPU::SI_SPILL_S256_RESTORE: 344 case AMDGPU::SI_SPILL_S128_RESTORE: 345 case AMDGPU::SI_SPILL_S64_RESTORE: 346 case AMDGPU::SI_SPILL_S32_RESTORE: { 347 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); 348 349 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { 350 SIMachineFunctionInfo::SpilledReg Spill; 351 unsigned FrameIndex = MI->getOperand(2).getImm(); 352 unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(), 353 &AMDGPU::SGPR_32RegClass, i); 354 Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); 355 356 BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg) 357 .addReg(MI->getOperand(1).getReg()) 358 .addImm(Spill.Lane + i); 359 } 360 insertNOPs(MI, 3); 361 MI->eraseFromParent(); 362 break; 363 } 364 } 365 return true; 366 } 367 368 MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, 369 bool NewMI) const { 370 371 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 372 if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg()) 373 return nullptr; 374 375 // Cannot commute VOP2 if src0 is SGPR. 376 if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() && 377 RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg()))) 378 return nullptr; 379 380 if (!MI->getOperand(2).isReg()) { 381 // XXX: Commute instructions with FPImm operands 382 if (NewMI || MI->getOperand(2).isFPImm() || 383 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { 384 return nullptr; 385 } 386 387 // XXX: Commute VOP3 instructions with abs and neg set. 388 if (isVOP3(MI->getOpcode()) && 389 (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(), 390 AMDGPU::OpName::abs)).getImm() || 391 MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(), 392 AMDGPU::OpName::neg)).getImm())) 393 return nullptr; 394 395 unsigned Reg = MI->getOperand(1).getReg(); 396 unsigned SubReg = MI->getOperand(1).getSubReg(); 397 MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm()); 398 MI->getOperand(2).ChangeToRegister(Reg, false); 399 MI->getOperand(2).setSubReg(SubReg); 400 } else { 401 MI = TargetInstrInfo::commuteInstruction(MI, NewMI); 402 } 403 404 if (MI) 405 MI->setDesc(get(commuteOpcode(MI->getOpcode()))); 406 407 return MI; 408 } 409 410 MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB, 411 MachineBasicBlock::iterator I, 412 unsigned DstReg, 413 unsigned SrcReg) const { 414 return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32), 415 DstReg) .addReg(SrcReg); 416 } 417 418 bool SIInstrInfo::isMov(unsigned Opcode) const { 419 switch(Opcode) { 420 default: return false; 421 case AMDGPU::S_MOV_B32: 422 case AMDGPU::S_MOV_B64: 423 case AMDGPU::V_MOV_B32_e32: 424 case AMDGPU::V_MOV_B32_e64: 425 return true; 426 } 427 } 428 429 bool 430 SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 431 return RC != &AMDGPU::EXECRegRegClass; 432 } 433 434 bool 435 SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI, 436 AliasAnalysis *AA) const { 437 switch(MI->getOpcode()) { 438 default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA); 439 case AMDGPU::S_MOV_B32: 440 case AMDGPU::S_MOV_B64: 441 case AMDGPU::V_MOV_B32_e32: 442 return MI->getOperand(1).isImm(); 443 } 444 } 445 446 namespace llvm { 447 namespace AMDGPU { 448 // Helper function generated by tablegen. We are wrapping this with 449 // an SIInstrInfo function that reutrns bool rather than int. 450 int isDS(uint16_t Opcode); 451 } 452 } 453 454 bool SIInstrInfo::isDS(uint16_t Opcode) const { 455 return ::AMDGPU::isDS(Opcode) != -1; 456 } 457 458 int SIInstrInfo::isMIMG(uint16_t Opcode) const { 459 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 460 } 461 462 int SIInstrInfo::isSMRD(uint16_t Opcode) const { 463 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 464 } 465 466 bool SIInstrInfo::isVOP1(uint16_t Opcode) const { 467 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 468 } 469 470 bool SIInstrInfo::isVOP2(uint16_t Opcode) const { 471 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 472 } 473 474 bool SIInstrInfo::isVOP3(uint16_t Opcode) const { 475 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 476 } 477 478 bool SIInstrInfo::isVOPC(uint16_t Opcode) const { 479 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 480 } 481 482 bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const { 483 return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU; 484 } 485 486 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { 487 int32_t Val = Imm.getSExtValue(); 488 if (Val >= -16 && Val <= 64) 489 return true; 490 491 // The actual type of the operand does not seem to matter as long 492 // as the bits match one of the inline immediate values. For example: 493 // 494 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 495 // so it is a legal inline immediate. 496 // 497 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 498 // floating-point, so it is a legal inline immediate. 499 500 return (APInt::floatToBits(0.0f) == Imm) || 501 (APInt::floatToBits(1.0f) == Imm) || 502 (APInt::floatToBits(-1.0f) == Imm) || 503 (APInt::floatToBits(0.5f) == Imm) || 504 (APInt::floatToBits(-0.5f) == Imm) || 505 (APInt::floatToBits(2.0f) == Imm) || 506 (APInt::floatToBits(-2.0f) == Imm) || 507 (APInt::floatToBits(4.0f) == Imm) || 508 (APInt::floatToBits(-4.0f) == Imm); 509 } 510 511 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const { 512 if (MO.isImm()) 513 return isInlineConstant(APInt(32, MO.getImm(), true)); 514 515 if (MO.isFPImm()) { 516 APFloat FpImm = MO.getFPImm()->getValueAPF(); 517 return isInlineConstant(FpImm.bitcastToAPInt()); 518 } 519 520 return false; 521 } 522 523 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const { 524 return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO); 525 } 526 527 static bool compareMachineOp(const MachineOperand &Op0, 528 const MachineOperand &Op1) { 529 if (Op0.getType() != Op1.getType()) 530 return false; 531 532 switch (Op0.getType()) { 533 case MachineOperand::MO_Register: 534 return Op0.getReg() == Op1.getReg(); 535 case MachineOperand::MO_Immediate: 536 return Op0.getImm() == Op1.getImm(); 537 case MachineOperand::MO_FPImmediate: 538 return Op0.getFPImm() == Op1.getFPImm(); 539 default: 540 llvm_unreachable("Didn't expect to be comparing these operand types"); 541 } 542 } 543 544 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, 545 StringRef &ErrInfo) const { 546 uint16_t Opcode = MI->getOpcode(); 547 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 548 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 549 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 550 551 // Make sure the number of operands is correct. 552 const MCInstrDesc &Desc = get(Opcode); 553 if (!Desc.isVariadic() && 554 Desc.getNumOperands() != MI->getNumExplicitOperands()) { 555 ErrInfo = "Instruction has wrong number of operands."; 556 return false; 557 } 558 559 // Make sure the register classes are correct 560 for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) { 561 switch (Desc.OpInfo[i].OperandType) { 562 case MCOI::OPERAND_REGISTER: { 563 int RegClass = Desc.OpInfo[i].RegClass; 564 if (!RI.regClassCanUseImmediate(RegClass) && 565 (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) { 566 ErrInfo = "Expected register, but got immediate"; 567 return false; 568 } 569 } 570 break; 571 case MCOI::OPERAND_IMMEDIATE: 572 if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm()) { 573 ErrInfo = "Expected immediate, but got non-immediate"; 574 return false; 575 } 576 // Fall-through 577 default: 578 continue; 579 } 580 581 if (!MI->getOperand(i).isReg()) 582 continue; 583 584 int RegClass = Desc.OpInfo[i].RegClass; 585 if (RegClass != -1) { 586 unsigned Reg = MI->getOperand(i).getReg(); 587 if (TargetRegisterInfo::isVirtualRegister(Reg)) 588 continue; 589 590 const TargetRegisterClass *RC = RI.getRegClass(RegClass); 591 if (!RC->contains(Reg)) { 592 ErrInfo = "Operand has incorrect register class."; 593 return false; 594 } 595 } 596 } 597 598 599 // Verify VOP* 600 if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) { 601 unsigned ConstantBusCount = 0; 602 unsigned SGPRUsed = AMDGPU::NoRegister; 603 for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { 604 const MachineOperand &MO = MI->getOperand(i); 605 if (MO.isReg() && MO.isUse() && 606 !TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 607 608 // EXEC register uses the constant bus. 609 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) 610 ++ConstantBusCount; 611 612 // SGPRs use the constant bus 613 if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC || 614 (!MO.isImplicit() && 615 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) || 616 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) { 617 if (SGPRUsed != MO.getReg()) { 618 ++ConstantBusCount; 619 SGPRUsed = MO.getReg(); 620 } 621 } 622 } 623 // Literal constants use the constant bus. 624 if (isLiteralConstant(MO)) 625 ++ConstantBusCount; 626 } 627 if (ConstantBusCount > 1) { 628 ErrInfo = "VOP* instruction uses the constant bus more than once"; 629 return false; 630 } 631 } 632 633 // Verify SRC1 for VOP2 and VOPC 634 if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) { 635 const MachineOperand &Src1 = MI->getOperand(Src1Idx); 636 if (Src1.isImm() || Src1.isFPImm()) { 637 ErrInfo = "VOP[2C] src1 cannot be an immediate."; 638 return false; 639 } 640 } 641 642 // Verify VOP3 643 if (isVOP3(Opcode)) { 644 if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) { 645 ErrInfo = "VOP3 src0 cannot be a literal constant."; 646 return false; 647 } 648 if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) { 649 ErrInfo = "VOP3 src1 cannot be a literal constant."; 650 return false; 651 } 652 if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) { 653 ErrInfo = "VOP3 src2 cannot be a literal constant."; 654 return false; 655 } 656 } 657 658 // Verify misc. restrictions on specific instructions. 659 if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || 660 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { 661 MI->dump(); 662 663 const MachineOperand &Src0 = MI->getOperand(2); 664 const MachineOperand &Src1 = MI->getOperand(3); 665 const MachineOperand &Src2 = MI->getOperand(4); 666 if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { 667 if (!compareMachineOp(Src0, Src1) && 668 !compareMachineOp(Src0, Src2)) { 669 ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; 670 return false; 671 } 672 } 673 } 674 675 return true; 676 } 677 678 unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { 679 switch (MI.getOpcode()) { 680 default: return AMDGPU::INSTRUCTION_LIST_END; 681 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; 682 case AMDGPU::COPY: return AMDGPU::COPY; 683 case AMDGPU::PHI: return AMDGPU::PHI; 684 case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; 685 case AMDGPU::S_MOV_B32: 686 return MI.getOperand(1).isReg() ? 687 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; 688 case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32; 689 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; 690 case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32; 691 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; 692 case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32; 693 case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32; 694 case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32; 695 case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32; 696 case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32; 697 case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32; 698 case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32; 699 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; 700 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; 701 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; 702 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; 703 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; 704 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; 705 case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32; 706 case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; 707 case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; 708 case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; 709 case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; 710 case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; 711 case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; 712 case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; 713 case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; 714 case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; 715 case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; 716 case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; 717 case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; 718 case AMDGPU::S_LOAD_DWORD_IMM: 719 case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; 720 case AMDGPU::S_LOAD_DWORDX2_IMM: 721 case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; 722 case AMDGPU::S_LOAD_DWORDX4_IMM: 723 case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; 724 case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32; 725 case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; 726 case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; 727 } 728 } 729 730 bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { 731 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; 732 } 733 734 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, 735 unsigned OpNo) const { 736 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 737 const MCInstrDesc &Desc = get(MI.getOpcode()); 738 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || 739 Desc.OpInfo[OpNo].RegClass == -1) 740 return MRI.getRegClass(MI.getOperand(OpNo).getReg()); 741 742 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 743 return RI.getRegClass(RCID); 744 } 745 746 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { 747 switch (MI.getOpcode()) { 748 case AMDGPU::COPY: 749 case AMDGPU::REG_SEQUENCE: 750 case AMDGPU::PHI: 751 case AMDGPU::INSERT_SUBREG: 752 return RI.hasVGPRs(getOpRegClass(MI, 0)); 753 default: 754 return RI.hasVGPRs(getOpRegClass(MI, OpNo)); 755 } 756 } 757 758 void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { 759 MachineBasicBlock::iterator I = MI; 760 MachineOperand &MO = MI->getOperand(OpIdx); 761 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 762 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass; 763 const TargetRegisterClass *RC = RI.getRegClass(RCID); 764 unsigned Opcode = AMDGPU::V_MOV_B32_e32; 765 if (MO.isReg()) { 766 Opcode = AMDGPU::COPY; 767 } else if (RI.isSGPRClass(RC)) { 768 Opcode = AMDGPU::S_MOV_B32; 769 } 770 771 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); 772 unsigned Reg = MRI.createVirtualRegister(VRC); 773 BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode), 774 Reg).addOperand(MO); 775 MO.ChangeToRegister(Reg, false); 776 } 777 778 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, 779 MachineRegisterInfo &MRI, 780 MachineOperand &SuperReg, 781 const TargetRegisterClass *SuperRC, 782 unsigned SubIdx, 783 const TargetRegisterClass *SubRC) 784 const { 785 assert(SuperReg.isReg()); 786 787 unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); 788 unsigned SubReg = MRI.createVirtualRegister(SubRC); 789 790 // Just in case the super register is itself a sub-register, copy it to a new 791 // value so we don't need to worry about merging its subreg index with the 792 // SubIdx passed to this function. The register coalescer should be able to 793 // eliminate this extra copy. 794 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY), 795 NewSuperReg) 796 .addOperand(SuperReg); 797 798 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY), 799 SubReg) 800 .addReg(NewSuperReg, 0, SubIdx); 801 return SubReg; 802 } 803 804 MachineOperand SIInstrInfo::buildExtractSubRegOrImm( 805 MachineBasicBlock::iterator MII, 806 MachineRegisterInfo &MRI, 807 MachineOperand &Op, 808 const TargetRegisterClass *SuperRC, 809 unsigned SubIdx, 810 const TargetRegisterClass *SubRC) const { 811 if (Op.isImm()) { 812 // XXX - Is there a better way to do this? 813 if (SubIdx == AMDGPU::sub0) 814 return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF); 815 if (SubIdx == AMDGPU::sub1) 816 return MachineOperand::CreateImm(Op.getImm() >> 32); 817 818 llvm_unreachable("Unhandled register index for immediate"); 819 } 820 821 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, 822 SubIdx, SubRC); 823 return MachineOperand::CreateReg(SubReg, false); 824 } 825 826 unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist, 827 MachineBasicBlock::iterator MI, 828 MachineRegisterInfo &MRI, 829 const TargetRegisterClass *RC, 830 const MachineOperand &Op) const { 831 MachineBasicBlock *MBB = MI->getParent(); 832 DebugLoc DL = MI->getDebugLoc(); 833 unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 834 unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 835 unsigned Dst = MRI.createVirtualRegister(RC); 836 837 MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), 838 LoDst) 839 .addImm(Op.getImm() & 0xFFFFFFFF); 840 MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), 841 HiDst) 842 .addImm(Op.getImm() >> 32); 843 844 BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst) 845 .addReg(LoDst) 846 .addImm(AMDGPU::sub0) 847 .addReg(HiDst) 848 .addImm(AMDGPU::sub1); 849 850 Worklist.push_back(Lo); 851 Worklist.push_back(Hi); 852 853 return Dst; 854 } 855 856 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { 857 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 858 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 859 AMDGPU::OpName::src0); 860 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 861 AMDGPU::OpName::src1); 862 int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 863 AMDGPU::OpName::src2); 864 865 // Legalize VOP2 866 if (isVOP2(MI->getOpcode()) && Src1Idx != -1) { 867 MachineOperand &Src0 = MI->getOperand(Src0Idx); 868 MachineOperand &Src1 = MI->getOperand(Src1Idx); 869 870 // If the instruction implicitly reads VCC, we can't have any SGPR operands, 871 // so move any. 872 bool ReadsVCC = MI->readsRegister(AMDGPU::VCC, &RI); 873 if (ReadsVCC && Src0.isReg() && 874 RI.isSGPRClass(MRI.getRegClass(Src0.getReg()))) { 875 legalizeOpWithMove(MI, Src0Idx); 876 return; 877 } 878 879 if (ReadsVCC && Src1.isReg() && 880 RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) { 881 legalizeOpWithMove(MI, Src1Idx); 882 return; 883 } 884 885 // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must 886 // be the first operand, and there can only be one. 887 if (Src1.isImm() || Src1.isFPImm() || 888 (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) { 889 if (MI->isCommutable()) { 890 if (commuteInstruction(MI)) 891 return; 892 } 893 legalizeOpWithMove(MI, Src1Idx); 894 } 895 } 896 897 // XXX - Do any VOP3 instructions read VCC? 898 // Legalize VOP3 899 if (isVOP3(MI->getOpcode())) { 900 int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx}; 901 unsigned SGPRReg = AMDGPU::NoRegister; 902 for (unsigned i = 0; i < 3; ++i) { 903 int Idx = VOP3Idx[i]; 904 if (Idx == -1) 905 continue; 906 MachineOperand &MO = MI->getOperand(Idx); 907 908 if (MO.isReg()) { 909 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) 910 continue; // VGPRs are legal 911 912 assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction"); 913 914 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { 915 SGPRReg = MO.getReg(); 916 // We can use one SGPR in each VOP3 instruction. 917 continue; 918 } 919 } else if (!isLiteralConstant(MO)) { 920 // If it is not a register and not a literal constant, then it must be 921 // an inline constant which is always legal. 922 continue; 923 } 924 // If we make it this far, then the operand is not legal and we must 925 // legalize it. 926 legalizeOpWithMove(MI, Idx); 927 } 928 } 929 930 // Legalize REG_SEQUENCE and PHI 931 // The register class of the operands much be the same type as the register 932 // class of the output. 933 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE || 934 MI->getOpcode() == AMDGPU::PHI) { 935 const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; 936 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { 937 if (!MI->getOperand(i).isReg() || 938 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) 939 continue; 940 const TargetRegisterClass *OpRC = 941 MRI.getRegClass(MI->getOperand(i).getReg()); 942 if (RI.hasVGPRs(OpRC)) { 943 VRC = OpRC; 944 } else { 945 SRC = OpRC; 946 } 947 } 948 949 // If any of the operands are VGPR registers, then they all most be 950 // otherwise we will create illegal VGPR->SGPR copies when legalizing 951 // them. 952 if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) { 953 if (!VRC) { 954 assert(SRC); 955 VRC = RI.getEquivalentVGPRClass(SRC); 956 } 957 RC = VRC; 958 } else { 959 RC = SRC; 960 } 961 962 // Update all the operands so they have the same type. 963 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { 964 if (!MI->getOperand(i).isReg() || 965 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) 966 continue; 967 unsigned DstReg = MRI.createVirtualRegister(RC); 968 MachineBasicBlock *InsertBB; 969 MachineBasicBlock::iterator Insert; 970 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { 971 InsertBB = MI->getParent(); 972 Insert = MI; 973 } else { 974 // MI is a PHI instruction. 975 InsertBB = MI->getOperand(i + 1).getMBB(); 976 Insert = InsertBB->getFirstTerminator(); 977 } 978 BuildMI(*InsertBB, Insert, MI->getDebugLoc(), 979 get(AMDGPU::COPY), DstReg) 980 .addOperand(MI->getOperand(i)); 981 MI->getOperand(i).setReg(DstReg); 982 } 983 } 984 985 // Legalize INSERT_SUBREG 986 // src0 must have the same register class as dst 987 if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) { 988 unsigned Dst = MI->getOperand(0).getReg(); 989 unsigned Src0 = MI->getOperand(1).getReg(); 990 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 991 const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); 992 if (DstRC != Src0RC) { 993 MachineBasicBlock &MBB = *MI->getParent(); 994 unsigned NewSrc0 = MRI.createVirtualRegister(DstRC); 995 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0) 996 .addReg(Src0); 997 MI->getOperand(1).setReg(NewSrc0); 998 } 999 return; 1000 } 1001 1002 // Legalize MUBUF* instructions 1003 // FIXME: If we start using the non-addr64 instructions for compute, we 1004 // may need to legalize them here. 1005 1006 int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1007 AMDGPU::OpName::srsrc); 1008 int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1009 AMDGPU::OpName::vaddr); 1010 if (SRsrcIdx != -1 && VAddrIdx != -1) { 1011 const TargetRegisterClass *VAddrRC = 1012 RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass); 1013 1014 if(VAddrRC->getSize() == 8 && 1015 MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) { 1016 // We have a MUBUF instruction that uses a 64-bit vaddr register and 1017 // srsrc has the incorrect register class. In order to fix this, we 1018 // need to extract the pointer from the resource descriptor (srsrc), 1019 // add it to the value of vadd, then store the result in the vaddr 1020 // operand. Then, we need to set the pointer field of the resource 1021 // descriptor to zero. 1022 1023 MachineBasicBlock &MBB = *MI->getParent(); 1024 MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx); 1025 MachineOperand &VAddrOp = MI->getOperand(VAddrIdx); 1026 unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi; 1027 unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); 1028 unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); 1029 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 1030 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 1031 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1032 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1033 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 1034 1035 // SRsrcPtrLo = srsrc:sub0 1036 SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp, 1037 &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass); 1038 1039 // SRsrcPtrHi = srsrc:sub1 1040 SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp, 1041 &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass); 1042 1043 // VAddrLo = vaddr:sub0 1044 VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp, 1045 &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass); 1046 1047 // VAddrHi = vaddr:sub1 1048 VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp, 1049 &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass); 1050 1051 // NewVaddrLo = SRsrcPtrLo + VAddrLo 1052 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32), 1053 NewVAddrLo) 1054 .addReg(SRsrcPtrLo) 1055 .addReg(VAddrLo) 1056 .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit); 1057 1058 // NewVaddrHi = SRsrcPtrHi + VAddrHi 1059 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32), 1060 NewVAddrHi) 1061 .addReg(SRsrcPtrHi) 1062 .addReg(VAddrHi) 1063 .addReg(AMDGPU::VCC, RegState::ImplicitDefine) 1064 .addReg(AMDGPU::VCC, RegState::Implicit); 1065 1066 // NewVaddr = {NewVaddrHi, NewVaddrLo} 1067 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 1068 NewVAddr) 1069 .addReg(NewVAddrLo) 1070 .addImm(AMDGPU::sub0) 1071 .addReg(NewVAddrHi) 1072 .addImm(AMDGPU::sub1); 1073 1074 // Zero64 = 0 1075 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), 1076 Zero64) 1077 .addImm(0); 1078 1079 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} 1080 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1081 SRsrcFormatLo) 1082 .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); 1083 1084 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} 1085 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1086 SRsrcFormatHi) 1087 .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); 1088 1089 // NewSRsrc = {Zero64, SRsrcFormat} 1090 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 1091 NewSRsrc) 1092 .addReg(Zero64) 1093 .addImm(AMDGPU::sub0_sub1) 1094 .addReg(SRsrcFormatLo) 1095 .addImm(AMDGPU::sub2) 1096 .addReg(SRsrcFormatHi) 1097 .addImm(AMDGPU::sub3); 1098 1099 // Update the instruction to use NewVaddr 1100 MI->getOperand(VAddrIdx).setReg(NewVAddr); 1101 // Update the instruction to use NewSRsrc 1102 MI->getOperand(SRsrcIdx).setReg(NewSRsrc); 1103 } 1104 } 1105 } 1106 1107 void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const { 1108 MachineBasicBlock *MBB = MI->getParent(); 1109 switch (MI->getOpcode()) { 1110 case AMDGPU::S_LOAD_DWORD_IMM: 1111 case AMDGPU::S_LOAD_DWORD_SGPR: 1112 case AMDGPU::S_LOAD_DWORDX2_IMM: 1113 case AMDGPU::S_LOAD_DWORDX2_SGPR: 1114 case AMDGPU::S_LOAD_DWORDX4_IMM: 1115 case AMDGPU::S_LOAD_DWORDX4_SGPR: 1116 unsigned NewOpcode = getVALUOp(*MI); 1117 unsigned RegOffset; 1118 unsigned ImmOffset; 1119 1120 if (MI->getOperand(2).isReg()) { 1121 RegOffset = MI->getOperand(2).getReg(); 1122 ImmOffset = 0; 1123 } else { 1124 assert(MI->getOperand(2).isImm()); 1125 // SMRD instructions take a dword offsets and MUBUF instructions 1126 // take a byte offset. 1127 ImmOffset = MI->getOperand(2).getImm() << 2; 1128 RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1129 if (isUInt<12>(ImmOffset)) { 1130 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1131 RegOffset) 1132 .addImm(0); 1133 } else { 1134 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1135 RegOffset) 1136 .addImm(ImmOffset); 1137 ImmOffset = 0; 1138 } 1139 } 1140 1141 unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 1142 unsigned DWord0 = RegOffset; 1143 unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1144 unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1145 unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1146 1147 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1) 1148 .addImm(0); 1149 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2) 1150 .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); 1151 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3) 1152 .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); 1153 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc) 1154 .addReg(DWord0) 1155 .addImm(AMDGPU::sub0) 1156 .addReg(DWord1) 1157 .addImm(AMDGPU::sub1) 1158 .addReg(DWord2) 1159 .addImm(AMDGPU::sub2) 1160 .addReg(DWord3) 1161 .addImm(AMDGPU::sub3); 1162 MI->setDesc(get(NewOpcode)); 1163 if (MI->getOperand(2).isReg()) { 1164 MI->getOperand(2).setReg(MI->getOperand(1).getReg()); 1165 } else { 1166 MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false); 1167 } 1168 MI->getOperand(1).setReg(SRsrc); 1169 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset)); 1170 } 1171 } 1172 1173 void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { 1174 SmallVector<MachineInstr *, 128> Worklist; 1175 Worklist.push_back(&TopInst); 1176 1177 while (!Worklist.empty()) { 1178 MachineInstr *Inst = Worklist.pop_back_val(); 1179 MachineBasicBlock *MBB = Inst->getParent(); 1180 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1181 1182 unsigned Opcode = Inst->getOpcode(); 1183 unsigned NewOpcode = getVALUOp(*Inst); 1184 1185 // Handle some special cases 1186 switch (Opcode) { 1187 default: 1188 if (isSMRD(Inst->getOpcode())) { 1189 moveSMRDToVALU(Inst, MRI); 1190 } 1191 break; 1192 case AMDGPU::S_MOV_B64: { 1193 DebugLoc DL = Inst->getDebugLoc(); 1194 1195 // If the source operand is a register we can replace this with a 1196 // copy. 1197 if (Inst->getOperand(1).isReg()) { 1198 MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY)) 1199 .addOperand(Inst->getOperand(0)) 1200 .addOperand(Inst->getOperand(1)); 1201 Worklist.push_back(Copy); 1202 } else { 1203 // Otherwise, we need to split this into two movs, because there is 1204 // no 64-bit VALU move instruction. 1205 unsigned Reg = Inst->getOperand(0).getReg(); 1206 unsigned Dst = split64BitImm(Worklist, 1207 Inst, 1208 MRI, 1209 MRI.getRegClass(Reg), 1210 Inst->getOperand(1)); 1211 MRI.replaceRegWith(Reg, Dst); 1212 } 1213 Inst->eraseFromParent(); 1214 continue; 1215 } 1216 case AMDGPU::S_AND_B64: 1217 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32); 1218 Inst->eraseFromParent(); 1219 continue; 1220 1221 case AMDGPU::S_OR_B64: 1222 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32); 1223 Inst->eraseFromParent(); 1224 continue; 1225 1226 case AMDGPU::S_XOR_B64: 1227 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32); 1228 Inst->eraseFromParent(); 1229 continue; 1230 1231 case AMDGPU::S_NOT_B64: 1232 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32); 1233 Inst->eraseFromParent(); 1234 continue; 1235 1236 case AMDGPU::S_BCNT1_I32_B64: 1237 splitScalar64BitBCNT(Worklist, Inst); 1238 Inst->eraseFromParent(); 1239 continue; 1240 1241 case AMDGPU::S_BFE_U64: 1242 case AMDGPU::S_BFE_I64: 1243 case AMDGPU::S_BFM_B64: 1244 llvm_unreachable("Moving this op to VALU not implemented"); 1245 } 1246 1247 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { 1248 // We cannot move this instruction to the VALU, so we should try to 1249 // legalize its operands instead. 1250 legalizeOperands(Inst); 1251 continue; 1252 } 1253 1254 // Use the new VALU Opcode. 1255 const MCInstrDesc &NewDesc = get(NewOpcode); 1256 Inst->setDesc(NewDesc); 1257 1258 // Remove any references to SCC. Vector instructions can't read from it, and 1259 // We're just about to add the implicit use / defs of VCC, and we don't want 1260 // both. 1261 for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) { 1262 MachineOperand &Op = Inst->getOperand(i); 1263 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) 1264 Inst->RemoveOperand(i); 1265 } 1266 1267 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { 1268 // We are converting these to a BFE, so we need to add the missing 1269 // operands for the size and offset. 1270 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; 1271 Inst->addOperand(Inst->getOperand(1)); 1272 Inst->getOperand(1).ChangeToImmediate(0); 1273 Inst->addOperand(MachineOperand::CreateImm(0)); 1274 Inst->addOperand(MachineOperand::CreateImm(0)); 1275 Inst->addOperand(MachineOperand::CreateImm(0)); 1276 Inst->addOperand(MachineOperand::CreateImm(Size)); 1277 1278 // XXX - Other pointless operands. There are 4, but it seems you only need 1279 // 3 to not hit an assertion later in MCInstLower. 1280 Inst->addOperand(MachineOperand::CreateImm(0)); 1281 Inst->addOperand(MachineOperand::CreateImm(0)); 1282 } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { 1283 // The VALU version adds the second operand to the result, so insert an 1284 // extra 0 operand. 1285 Inst->addOperand(MachineOperand::CreateImm(0)); 1286 } 1287 1288 addDescImplicitUseDef(NewDesc, Inst); 1289 1290 if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { 1291 const MachineOperand &OffsetWidthOp = Inst->getOperand(2); 1292 // If we need to move this to VGPRs, we need to unpack the second operand 1293 // back into the 2 separate ones for bit offset and width. 1294 assert(OffsetWidthOp.isImm() && 1295 "Scalar BFE is only implemented for constant width and offset"); 1296 uint32_t Imm = OffsetWidthOp.getImm(); 1297 1298 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 1299 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 1300 1301 Inst->RemoveOperand(2); // Remove old immediate. 1302 Inst->addOperand(Inst->getOperand(1)); 1303 Inst->getOperand(1).ChangeToImmediate(0); 1304 Inst->addOperand(MachineOperand::CreateImm(0)); 1305 Inst->addOperand(MachineOperand::CreateImm(Offset)); 1306 Inst->addOperand(MachineOperand::CreateImm(0)); 1307 Inst->addOperand(MachineOperand::CreateImm(BitWidth)); 1308 Inst->addOperand(MachineOperand::CreateImm(0)); 1309 Inst->addOperand(MachineOperand::CreateImm(0)); 1310 } 1311 1312 // Update the destination register class. 1313 1314 const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0); 1315 1316 switch (Opcode) { 1317 // For target instructions, getOpRegClass just returns the virtual 1318 // register class associated with the operand, so we need to find an 1319 // equivalent VGPR register class in order to move the instruction to the 1320 // VALU. 1321 case AMDGPU::COPY: 1322 case AMDGPU::PHI: 1323 case AMDGPU::REG_SEQUENCE: 1324 case AMDGPU::INSERT_SUBREG: 1325 if (RI.hasVGPRs(NewDstRC)) 1326 continue; 1327 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); 1328 if (!NewDstRC) 1329 continue; 1330 break; 1331 default: 1332 break; 1333 } 1334 1335 unsigned DstReg = Inst->getOperand(0).getReg(); 1336 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); 1337 MRI.replaceRegWith(DstReg, NewDstReg); 1338 1339 // Legalize the operands 1340 legalizeOperands(Inst); 1341 1342 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), 1343 E = MRI.use_end(); I != E; ++I) { 1344 MachineInstr &UseMI = *I->getParent(); 1345 if (!canReadVGPR(UseMI, I.getOperandNo())) { 1346 Worklist.push_back(&UseMI); 1347 } 1348 } 1349 } 1350 } 1351 1352 //===----------------------------------------------------------------------===// 1353 // Indirect addressing callbacks 1354 //===----------------------------------------------------------------------===// 1355 1356 unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex, 1357 unsigned Channel) const { 1358 assert(Channel == 0); 1359 return RegIndex; 1360 } 1361 1362 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { 1363 return &AMDGPU::VReg_32RegClass; 1364 } 1365 1366 void SIInstrInfo::splitScalar64BitUnaryOp( 1367 SmallVectorImpl<MachineInstr *> &Worklist, 1368 MachineInstr *Inst, 1369 unsigned Opcode) const { 1370 MachineBasicBlock &MBB = *Inst->getParent(); 1371 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 1372 1373 MachineOperand &Dest = Inst->getOperand(0); 1374 MachineOperand &Src0 = Inst->getOperand(1); 1375 DebugLoc DL = Inst->getDebugLoc(); 1376 1377 MachineBasicBlock::iterator MII = Inst; 1378 1379 const MCInstrDesc &InstDesc = get(Opcode); 1380 const TargetRegisterClass *Src0RC = Src0.isReg() ? 1381 MRI.getRegClass(Src0.getReg()) : 1382 &AMDGPU::SGPR_32RegClass; 1383 1384 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 1385 1386 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 1387 AMDGPU::sub0, Src0SubRC); 1388 1389 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 1390 const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); 1391 1392 unsigned DestSub0 = MRI.createVirtualRegister(DestRC); 1393 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) 1394 .addOperand(SrcReg0Sub0); 1395 1396 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 1397 AMDGPU::sub1, Src0SubRC); 1398 1399 unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); 1400 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) 1401 .addOperand(SrcReg0Sub1); 1402 1403 unsigned FullDestReg = MRI.createVirtualRegister(DestRC); 1404 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 1405 .addReg(DestSub0) 1406 .addImm(AMDGPU::sub0) 1407 .addReg(DestSub1) 1408 .addImm(AMDGPU::sub1); 1409 1410 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 1411 1412 // Try to legalize the operands in case we need to swap the order to keep it 1413 // valid. 1414 Worklist.push_back(LoHalf); 1415 Worklist.push_back(HiHalf); 1416 } 1417 1418 void SIInstrInfo::splitScalar64BitBinaryOp( 1419 SmallVectorImpl<MachineInstr *> &Worklist, 1420 MachineInstr *Inst, 1421 unsigned Opcode) const { 1422 MachineBasicBlock &MBB = *Inst->getParent(); 1423 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 1424 1425 MachineOperand &Dest = Inst->getOperand(0); 1426 MachineOperand &Src0 = Inst->getOperand(1); 1427 MachineOperand &Src1 = Inst->getOperand(2); 1428 DebugLoc DL = Inst->getDebugLoc(); 1429 1430 MachineBasicBlock::iterator MII = Inst; 1431 1432 const MCInstrDesc &InstDesc = get(Opcode); 1433 const TargetRegisterClass *Src0RC = Src0.isReg() ? 1434 MRI.getRegClass(Src0.getReg()) : 1435 &AMDGPU::SGPR_32RegClass; 1436 1437 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 1438 const TargetRegisterClass *Src1RC = Src1.isReg() ? 1439 MRI.getRegClass(Src1.getReg()) : 1440 &AMDGPU::SGPR_32RegClass; 1441 1442 const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); 1443 1444 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 1445 AMDGPU::sub0, Src0SubRC); 1446 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 1447 AMDGPU::sub0, Src1SubRC); 1448 1449 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 1450 const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); 1451 1452 unsigned DestSub0 = MRI.createVirtualRegister(DestRC); 1453 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) 1454 .addOperand(SrcReg0Sub0) 1455 .addOperand(SrcReg1Sub0); 1456 1457 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 1458 AMDGPU::sub1, Src0SubRC); 1459 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 1460 AMDGPU::sub1, Src1SubRC); 1461 1462 unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); 1463 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) 1464 .addOperand(SrcReg0Sub1) 1465 .addOperand(SrcReg1Sub1); 1466 1467 unsigned FullDestReg = MRI.createVirtualRegister(DestRC); 1468 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 1469 .addReg(DestSub0) 1470 .addImm(AMDGPU::sub0) 1471 .addReg(DestSub1) 1472 .addImm(AMDGPU::sub1); 1473 1474 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 1475 1476 // Try to legalize the operands in case we need to swap the order to keep it 1477 // valid. 1478 Worklist.push_back(LoHalf); 1479 Worklist.push_back(HiHalf); 1480 } 1481 1482 void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, 1483 MachineInstr *Inst) const { 1484 MachineBasicBlock &MBB = *Inst->getParent(); 1485 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 1486 1487 MachineBasicBlock::iterator MII = Inst; 1488 DebugLoc DL = Inst->getDebugLoc(); 1489 1490 MachineOperand &Dest = Inst->getOperand(0); 1491 MachineOperand &Src = Inst->getOperand(1); 1492 1493 const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32); 1494 const TargetRegisterClass *SrcRC = Src.isReg() ? 1495 MRI.getRegClass(Src.getReg()) : 1496 &AMDGPU::SGPR_32RegClass; 1497 1498 unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1499 unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1500 1501 const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); 1502 1503 MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 1504 AMDGPU::sub0, SrcSubRC); 1505 MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 1506 AMDGPU::sub1, SrcSubRC); 1507 1508 MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg) 1509 .addOperand(SrcRegSub0) 1510 .addImm(0); 1511 1512 MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg) 1513 .addOperand(SrcRegSub1) 1514 .addReg(MidReg); 1515 1516 MRI.replaceRegWith(Dest.getReg(), ResultReg); 1517 1518 Worklist.push_back(First); 1519 Worklist.push_back(Second); 1520 } 1521 1522 void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc, 1523 MachineInstr *Inst) const { 1524 // Add the implict and explicit register definitions. 1525 if (NewDesc.ImplicitUses) { 1526 for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { 1527 unsigned Reg = NewDesc.ImplicitUses[i]; 1528 Inst->addOperand(MachineOperand::CreateReg(Reg, false, true)); 1529 } 1530 } 1531 1532 if (NewDesc.ImplicitDefs) { 1533 for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { 1534 unsigned Reg = NewDesc.ImplicitDefs[i]; 1535 Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); 1536 } 1537 } 1538 } 1539 1540 MachineInstrBuilder SIInstrInfo::buildIndirectWrite( 1541 MachineBasicBlock *MBB, 1542 MachineBasicBlock::iterator I, 1543 unsigned ValueReg, 1544 unsigned Address, unsigned OffsetReg) const { 1545 const DebugLoc &DL = MBB->findDebugLoc(I); 1546 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( 1547 getIndirectIndexBegin(*MBB->getParent())); 1548 1549 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1)) 1550 .addReg(IndirectBaseReg, RegState::Define) 1551 .addOperand(I->getOperand(0)) 1552 .addReg(IndirectBaseReg) 1553 .addReg(OffsetReg) 1554 .addImm(0) 1555 .addReg(ValueReg); 1556 } 1557 1558 MachineInstrBuilder SIInstrInfo::buildIndirectRead( 1559 MachineBasicBlock *MBB, 1560 MachineBasicBlock::iterator I, 1561 unsigned ValueReg, 1562 unsigned Address, unsigned OffsetReg) const { 1563 const DebugLoc &DL = MBB->findDebugLoc(I); 1564 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( 1565 getIndirectIndexBegin(*MBB->getParent())); 1566 1567 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC)) 1568 .addOperand(I->getOperand(0)) 1569 .addOperand(I->getOperand(1)) 1570 .addReg(IndirectBaseReg) 1571 .addReg(OffsetReg) 1572 .addImm(0); 1573 1574 } 1575 1576 void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved, 1577 const MachineFunction &MF) const { 1578 int End = getIndirectIndexEnd(MF); 1579 int Begin = getIndirectIndexBegin(MF); 1580 1581 if (End == -1) 1582 return; 1583 1584 1585 for (int Index = Begin; Index <= End; ++Index) 1586 Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index)); 1587 1588 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index) 1589 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index)); 1590 1591 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index) 1592 Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index)); 1593 1594 for (int Index = std::max(0, Begin - 3); Index <= End; ++Index) 1595 Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index)); 1596 1597 for (int Index = std::max(0, Begin - 7); Index <= End; ++Index) 1598 Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index)); 1599 1600 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) 1601 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); 1602 } 1603