1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "AMDGPUInstrInfo.h" 15 #include "AMDGPUISelLowering.h" // For AMDGPUISD 16 #include "AMDGPURegisterInfo.h" 17 #include "AMDGPUSubtarget.h" 18 #include "R600InstrInfo.h" 19 #include "SIISelLowering.h" 20 #include "llvm/CodeGen/FunctionLoweringInfo.h" 21 #include "llvm/CodeGen/PseudoSourceValue.h" 22 #include "llvm/CodeGen/SelectionDAG.h" 23 #include "llvm/CodeGen/SelectionDAGISel.h" 24 #include "llvm/IR/Function.h" 25 26 using namespace llvm; 27 28 //===----------------------------------------------------------------------===// 29 // Instruction Selector Implementation 30 //===----------------------------------------------------------------------===// 31 32 namespace { 33 /// AMDGPU specific code to select AMDGPU machine instructions for 34 /// SelectionDAG operations. 35 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 36 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 37 // make the right decision when generating code for different targets. 38 const AMDGPUSubtarget &Subtarget; 39 public: 40 AMDGPUDAGToDAGISel(TargetMachine &TM); 41 virtual ~AMDGPUDAGToDAGISel(); 42 43 SDNode *Select(SDNode *N) override; 44 const char *getPassName() const override; 45 void PostprocessISelDAG() override; 46 47 private: 48 bool isInlineImmediate(SDNode *N) const; 49 inline SDValue getSmallIPtrImm(unsigned Imm); 50 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 51 const R600InstrInfo *TII); 52 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 53 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 54 55 // Complex pattern selectors 56 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 57 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 58 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 59 60 static bool checkType(const Value *ptr, unsigned int addrspace); 61 static bool checkPrivateAddress(const MachineMemOperand *Op); 62 63 static bool isGlobalStore(const StoreSDNode *N); 64 static bool isPrivateStore(const StoreSDNode *N); 65 static bool isLocalStore(const StoreSDNode *N); 66 static bool isRegionStore(const StoreSDNode *N); 67 68 bool isCPLoad(const LoadSDNode *N) const; 69 bool isConstantLoad(const LoadSDNode *N, int cbID) const; 70 bool isGlobalLoad(const LoadSDNode *N) const; 71 bool isParamLoad(const LoadSDNode *N) const; 72 bool isPrivateLoad(const LoadSDNode *N) const; 73 bool isLocalLoad(const LoadSDNode *N) const; 74 bool isRegionLoad(const LoadSDNode *N) const; 75 76 /// \returns True if the current basic block being selected is at control 77 /// flow depth 0. Meaning that the current block dominates the 78 // exit block. 79 bool isCFDepth0() const; 80 81 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 82 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 83 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 84 SDValue& Offset); 85 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 86 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 87 bool SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, SDValue &Offset, 88 SDValue &ImmOffset) const; 89 90 SDNode *SelectADD_SUB_I64(SDNode *N); 91 SDNode *SelectDIV_SCALE(SDNode *N); 92 93 // Include the pieces autogenerated from the target description. 94 #include "AMDGPUGenDAGISel.inc" 95 }; 96 } // end anonymous namespace 97 98 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 99 // DAG, ready for instruction scheduling. 100 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 101 return new AMDGPUDAGToDAGISel(TM); 102 } 103 104 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 105 : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) { 106 } 107 108 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 109 } 110 111 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 112 const SITargetLowering *TL 113 = static_cast<const SITargetLowering *>(getTargetLowering()); 114 return TL->analyzeImmediate(N) == 0; 115 } 116 117 /// \brief Determine the register class for \p OpNo 118 /// \returns The register class of the virtual register that will be used for 119 /// the given operand number \OpNo or NULL if the register class cannot be 120 /// determined. 121 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 122 unsigned OpNo) const { 123 if (!N->isMachineOpcode()) 124 return nullptr; 125 126 switch (N->getMachineOpcode()) { 127 default: { 128 const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode()); 129 unsigned OpIdx = Desc.getNumDefs() + OpNo; 130 if (OpIdx >= Desc.getNumOperands()) 131 return nullptr; 132 int RegClass = Desc.OpInfo[OpIdx].RegClass; 133 if (RegClass == -1) 134 return nullptr; 135 136 return TM.getRegisterInfo()->getRegClass(RegClass); 137 } 138 case AMDGPU::REG_SEQUENCE: { 139 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 140 const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(RCID); 141 142 SDValue SubRegOp = N->getOperand(OpNo + 1); 143 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 144 return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx); 145 } 146 } 147 } 148 149 SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) { 150 return CurDAG->getTargetConstant(Imm, MVT::i32); 151 } 152 153 bool AMDGPUDAGToDAGISel::SelectADDRParam( 154 SDValue Addr, SDValue& R1, SDValue& R2) { 155 156 if (Addr.getOpcode() == ISD::FrameIndex) { 157 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 158 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 159 R2 = CurDAG->getTargetConstant(0, MVT::i32); 160 } else { 161 R1 = Addr; 162 R2 = CurDAG->getTargetConstant(0, MVT::i32); 163 } 164 } else if (Addr.getOpcode() == ISD::ADD) { 165 R1 = Addr.getOperand(0); 166 R2 = Addr.getOperand(1); 167 } else { 168 R1 = Addr; 169 R2 = CurDAG->getTargetConstant(0, MVT::i32); 170 } 171 return true; 172 } 173 174 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 175 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 176 Addr.getOpcode() == ISD::TargetGlobalAddress) { 177 return false; 178 } 179 return SelectADDRParam(Addr, R1, R2); 180 } 181 182 183 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 184 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 185 Addr.getOpcode() == ISD::TargetGlobalAddress) { 186 return false; 187 } 188 189 if (Addr.getOpcode() == ISD::FrameIndex) { 190 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 191 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 192 R2 = CurDAG->getTargetConstant(0, MVT::i64); 193 } else { 194 R1 = Addr; 195 R2 = CurDAG->getTargetConstant(0, MVT::i64); 196 } 197 } else if (Addr.getOpcode() == ISD::ADD) { 198 R1 = Addr.getOperand(0); 199 R2 = Addr.getOperand(1); 200 } else { 201 R1 = Addr; 202 R2 = CurDAG->getTargetConstant(0, MVT::i64); 203 } 204 return true; 205 } 206 207 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { 208 unsigned int Opc = N->getOpcode(); 209 if (N->isMachineOpcode()) { 210 N->setNodeId(-1); 211 return nullptr; // Already selected. 212 } 213 214 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 215 switch (Opc) { 216 default: break; 217 // We are selecting i64 ADD here instead of custom lower it during 218 // DAG legalization, so we can fold some i64 ADDs used for address 219 // calculation into the LOAD and STORE instructions. 220 case ISD::ADD: 221 case ISD::SUB: { 222 if (N->getValueType(0) != MVT::i64 || 223 ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 224 break; 225 226 return SelectADD_SUB_I64(N); 227 } 228 case ISD::SCALAR_TO_VECTOR: 229 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 230 case ISD::BUILD_VECTOR: { 231 unsigned RegClassID; 232 const AMDGPURegisterInfo *TRI = 233 static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo()); 234 const SIRegisterInfo *SIRI = 235 static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); 236 EVT VT = N->getValueType(0); 237 unsigned NumVectorElts = VT.getVectorNumElements(); 238 EVT EltVT = VT.getVectorElementType(); 239 assert(EltVT.bitsEq(MVT::i32)); 240 if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 241 bool UseVReg = true; 242 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); 243 U != E; ++U) { 244 if (!U->isMachineOpcode()) { 245 continue; 246 } 247 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); 248 if (!RC) { 249 continue; 250 } 251 if (SIRI->isSGPRClass(RC)) { 252 UseVReg = false; 253 } 254 } 255 switch(NumVectorElts) { 256 case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID : 257 AMDGPU::SReg_32RegClassID; 258 break; 259 case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID : 260 AMDGPU::SReg_64RegClassID; 261 break; 262 case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID : 263 AMDGPU::SReg_128RegClassID; 264 break; 265 case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID : 266 AMDGPU::SReg_256RegClassID; 267 break; 268 case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID : 269 AMDGPU::SReg_512RegClassID; 270 break; 271 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 272 } 273 } else { 274 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 275 // that adds a 128 bits reg copy when going through TwoAddressInstructions 276 // pass. We want to avoid 128 bits copies as much as possible because they 277 // can't be bundled by our scheduler. 278 switch(NumVectorElts) { 279 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 280 case 4: 281 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 282 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 283 else 284 RegClassID = AMDGPU::R600_Reg128RegClassID; 285 break; 286 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 287 } 288 } 289 290 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32); 291 292 if (NumVectorElts == 1) { 293 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, 294 N->getOperand(0), RegClass); 295 } 296 297 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 298 "supported yet"); 299 // 16 = Max Num Vector Elements 300 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 301 // 1 = Vector Register Class 302 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 303 304 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32); 305 bool IsRegSeq = true; 306 unsigned NOps = N->getNumOperands(); 307 for (unsigned i = 0; i < NOps; i++) { 308 // XXX: Why is this here? 309 if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { 310 IsRegSeq = false; 311 break; 312 } 313 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 314 RegSeqArgs[1 + (2 * i) + 1] = 315 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); 316 } 317 318 if (NOps != NumVectorElts) { 319 // Fill in the missing undef elements if this was a scalar_to_vector. 320 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 321 322 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 323 SDLoc(N), EltVT); 324 for (unsigned i = NOps; i < NumVectorElts; ++i) { 325 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 326 RegSeqArgs[1 + (2 * i) + 1] = 327 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); 328 } 329 } 330 331 if (!IsRegSeq) 332 break; 333 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), 334 RegSeqArgs); 335 } 336 case ISD::BUILD_PAIR: { 337 SDValue RC, SubReg0, SubReg1; 338 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 339 break; 340 } 341 if (N->getValueType(0) == MVT::i128) { 342 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32); 343 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32); 344 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32); 345 } else if (N->getValueType(0) == MVT::i64) { 346 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32); 347 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); 348 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); 349 } else { 350 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 351 } 352 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 353 N->getOperand(1), SubReg1 }; 354 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 355 SDLoc(N), N->getValueType(0), Ops); 356 } 357 358 case ISD::Constant: 359 case ISD::ConstantFP: { 360 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 361 if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 362 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 363 break; 364 365 uint64_t Imm; 366 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 367 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 368 else { 369 ConstantSDNode *C = cast<ConstantSDNode>(N); 370 Imm = C->getZExtValue(); 371 } 372 373 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32, 374 CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32)); 375 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32, 376 CurDAG->getConstant(Imm >> 32, MVT::i32)); 377 const SDValue Ops[] = { 378 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), 379 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), 380 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32) 381 }; 382 383 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), 384 N->getValueType(0), Ops); 385 } 386 387 case AMDGPUISD::REGISTER_LOAD: { 388 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 389 break; 390 SDValue Addr, Offset; 391 392 SelectADDRIndirect(N->getOperand(1), Addr, Offset); 393 const SDValue Ops[] = { 394 Addr, 395 Offset, 396 CurDAG->getTargetConstant(0, MVT::i32), 397 N->getOperand(0), 398 }; 399 return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N), 400 CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other), 401 Ops); 402 } 403 case AMDGPUISD::REGISTER_STORE: { 404 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 405 break; 406 SDValue Addr, Offset; 407 SelectADDRIndirect(N->getOperand(2), Addr, Offset); 408 const SDValue Ops[] = { 409 N->getOperand(1), 410 Addr, 411 Offset, 412 CurDAG->getTargetConstant(0, MVT::i32), 413 N->getOperand(0), 414 }; 415 return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N), 416 CurDAG->getVTList(MVT::Other), 417 Ops); 418 } 419 420 case AMDGPUISD::BFE_I32: 421 case AMDGPUISD::BFE_U32: { 422 if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 423 break; 424 425 // There is a scalar version available, but unlike the vector version which 426 // has a separate operand for the offset and width, the scalar version packs 427 // the width and offset into a single operand. Try to move to the scalar 428 // version if the offsets are constant, so that we can try to keep extended 429 // loads of kernel arguments in SGPRs. 430 431 // TODO: Technically we could try to pattern match scalar bitshifts of 432 // dynamic values, but it's probably not useful. 433 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 434 if (!Offset) 435 break; 436 437 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 438 if (!Width) 439 break; 440 441 bool Signed = Opc == AMDGPUISD::BFE_I32; 442 443 // Transformation function, pack the offset and width of a BFE into 444 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 445 // source, bits [5:0] contain the offset and bits [22:16] the width. 446 447 uint32_t OffsetVal = Offset->getZExtValue(); 448 uint32_t WidthVal = Width->getZExtValue(); 449 450 uint32_t PackedVal = OffsetVal | WidthVal << 16; 451 452 SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32); 453 return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 454 SDLoc(N), 455 MVT::i32, 456 N->getOperand(0), 457 PackedOffsetWidth); 458 459 } 460 case AMDGPUISD::DIV_SCALE: { 461 return SelectDIV_SCALE(N); 462 } 463 } 464 return SelectCode(N); 465 } 466 467 468 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 469 assert(AS != 0 && "Use checkPrivateAddress instead."); 470 if (!Ptr) 471 return false; 472 473 return Ptr->getType()->getPointerAddressSpace() == AS; 474 } 475 476 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 477 if (Op->getPseudoValue()) 478 return true; 479 480 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 481 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 482 483 return false; 484 } 485 486 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { 487 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 488 } 489 490 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 491 const Value *MemVal = N->getMemOperand()->getValue(); 492 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 493 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 494 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 495 } 496 497 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 498 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 499 } 500 501 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 502 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 503 } 504 505 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { 506 const Value *MemVal = N->getMemOperand()->getValue(); 507 if (CbId == -1) 508 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 509 510 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 511 } 512 513 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { 514 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) { 515 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 516 if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 517 N->getMemoryVT().bitsLT(MVT::i32)) { 518 return true; 519 } 520 } 521 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 522 } 523 524 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 525 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 526 } 527 528 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 529 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 530 } 531 532 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 533 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 534 } 535 536 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 537 MachineMemOperand *MMO = N->getMemOperand(); 538 if (checkPrivateAddress(N->getMemOperand())) { 539 if (MMO) { 540 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 541 if (PSV && PSV == PseudoSourceValue::getConstantPool()) { 542 return true; 543 } 544 } 545 } 546 return false; 547 } 548 549 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 550 if (checkPrivateAddress(N->getMemOperand())) { 551 // Check to make sure we are not a constant pool load or a constant load 552 // that is marked as a private load 553 if (isCPLoad(N) || isConstantLoad(N, -1)) { 554 return false; 555 } 556 } 557 558 const Value *MemVal = N->getMemOperand()->getValue(); 559 if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 560 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 561 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 562 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 563 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 564 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)){ 565 return true; 566 } 567 return false; 568 } 569 570 bool AMDGPUDAGToDAGISel::isCFDepth0() const { 571 // FIXME: Figure out a way to use DominatorTree analysis here. 572 const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock(); 573 const Function *Fn = FuncInfo->Fn; 574 return &Fn->front() == CurBlock || &Fn->back() == CurBlock; 575 } 576 577 578 const char *AMDGPUDAGToDAGISel::getPassName() const { 579 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 580 } 581 582 #ifdef DEBUGTMP 583 #undef INT64_C 584 #endif 585 #undef DEBUGTMP 586 587 //===----------------------------------------------------------------------===// 588 // Complex Patterns 589 //===----------------------------------------------------------------------===// 590 591 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 592 SDValue& IntPtr) { 593 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 594 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true); 595 return true; 596 } 597 return false; 598 } 599 600 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 601 SDValue& BaseReg, SDValue &Offset) { 602 if (!isa<ConstantSDNode>(Addr)) { 603 BaseReg = Addr; 604 Offset = CurDAG->getIntPtrConstant(0, true); 605 return true; 606 } 607 return false; 608 } 609 610 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 611 SDValue &Offset) { 612 ConstantSDNode *IMMOffset; 613 614 if (Addr.getOpcode() == ISD::ADD 615 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 616 && isInt<16>(IMMOffset->getZExtValue())) { 617 618 Base = Addr.getOperand(0); 619 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); 620 return true; 621 // If the pointer address is constant, we can move it to the offset field. 622 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 623 && isInt<16>(IMMOffset->getZExtValue())) { 624 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 625 SDLoc(CurDAG->getEntryNode()), 626 AMDGPU::ZERO, MVT::i32); 627 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); 628 return true; 629 } 630 631 // Default case, no offset 632 Base = Addr; 633 Offset = CurDAG->getTargetConstant(0, MVT::i32); 634 return true; 635 } 636 637 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 638 SDValue &Offset) { 639 ConstantSDNode *C; 640 641 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 642 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 643 Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); 644 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 645 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 646 Base = Addr.getOperand(0); 647 Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); 648 } else { 649 Base = Addr; 650 Offset = CurDAG->getTargetConstant(0, MVT::i32); 651 } 652 653 return true; 654 } 655 656 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 657 SDLoc DL(N); 658 SDValue LHS = N->getOperand(0); 659 SDValue RHS = N->getOperand(1); 660 661 bool IsAdd = (N->getOpcode() == ISD::ADD); 662 663 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); 664 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); 665 666 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 667 DL, MVT::i32, LHS, Sub0); 668 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 669 DL, MVT::i32, LHS, Sub1); 670 671 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 672 DL, MVT::i32, RHS, Sub0); 673 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 674 DL, MVT::i32, RHS, Sub1); 675 676 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 677 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 678 679 680 unsigned Opc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32; 681 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 682 683 if (!isCFDepth0()) { 684 Opc = IsAdd ? AMDGPU::V_ADD_I32_e32 : AMDGPU::V_SUB_I32_e32; 685 CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e32 : AMDGPU::V_SUBB_U32_e32; 686 } 687 688 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 689 SDValue Carry(AddLo, 1); 690 SDNode *AddHi 691 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 692 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 693 694 SDValue Args[5] = { 695 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), 696 SDValue(AddLo,0), 697 Sub0, 698 SDValue(AddHi,0), 699 Sub1, 700 }; 701 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 702 } 703 704 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 705 SDLoc SL(N); 706 EVT VT = N->getValueType(0); 707 708 assert(VT == MVT::f32 || VT == MVT::f64); 709 710 unsigned Opc 711 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 712 713 const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32); 714 715 SDValue Ops[] = { 716 N->getOperand(0), 717 N->getOperand(1), 718 N->getOperand(2), 719 Zero, 720 Zero, 721 Zero, 722 Zero 723 }; 724 725 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 726 } 727 728 static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) { 729 return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32, 730 Ptr), 0); 731 } 732 733 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, 734 SDValue &Offset, 735 SDValue &ImmOffset) const { 736 SDLoc DL(Addr); 737 738 if (CurDAG->isBaseWithConstantOffset(Addr)) { 739 SDValue N0 = Addr.getOperand(0); 740 SDValue N1 = Addr.getOperand(1); 741 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 742 743 if (isUInt<12>(C1->getZExtValue())) { 744 745 if (N0.getOpcode() == ISD::ADD) { 746 // (add (add N2, N3), C1) 747 SDValue N2 = N0.getOperand(0); 748 SDValue N3 = N0.getOperand(1); 749 Ptr = wrapAddr64Rsrc(CurDAG, DL, N2); 750 Offset = N3; 751 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); 752 return true; 753 } 754 755 // (add N0, C1) 756 Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getTargetConstant(0, MVT::i64));; 757 Offset = N0; 758 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); 759 return true; 760 } 761 } 762 if (Addr.getOpcode() == ISD::ADD) { 763 // (add N0, N1) 764 SDValue N0 = Addr.getOperand(0); 765 SDValue N1 = Addr.getOperand(1); 766 Ptr = wrapAddr64Rsrc(CurDAG, DL, N0); 767 Offset = N1; 768 ImmOffset = CurDAG->getTargetConstant(0, MVT::i16); 769 return true; 770 } 771 772 // default case 773 Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getConstant(0, MVT::i64)); 774 Offset = Addr; 775 ImmOffset = CurDAG->getTargetConstant(0, MVT::i16); 776 return true; 777 } 778 779 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 780 const AMDGPUTargetLowering& Lowering = 781 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 782 bool IsModified = false; 783 do { 784 IsModified = false; 785 // Go over all selected nodes and try to fold them a bit more 786 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 787 E = CurDAG->allnodes_end(); I != E; ++I) { 788 789 SDNode *Node = I; 790 791 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I); 792 if (!MachineNode) 793 continue; 794 795 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 796 if (ResNode != Node) { 797 ReplaceUses(Node, ResNode); 798 IsModified = true; 799 } 800 } 801 CurDAG->RemoveDeadNodes(); 802 } while (IsModified); 803 } 804