1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUDiagnosticInfoUnsupported.h" 16 #include "AMDGPUInstrInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPURegisterInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "R600InstrInfo.h" 21 #include "SIDefines.h" 22 #include "SIISelLowering.h" 23 #include "SIMachineFunctionInfo.h" 24 #include "llvm/CodeGen/FunctionLoweringInfo.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/PseudoSourceValue.h" 28 #include "llvm/CodeGen/SelectionDAG.h" 29 #include "llvm/CodeGen/SelectionDAGISel.h" 30 #include "llvm/IR/Function.h" 31 32 using namespace llvm; 33 34 //===----------------------------------------------------------------------===// 35 // Instruction Selector Implementation 36 //===----------------------------------------------------------------------===// 37 38 namespace { 39 /// AMDGPU specific code to select AMDGPU machine instructions for 40 /// SelectionDAG operations. 41 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 42 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 43 // make the right decision when generating code for different targets. 44 const AMDGPUSubtarget *Subtarget; 45 46 public: 47 AMDGPUDAGToDAGISel(TargetMachine &TM); 48 virtual ~AMDGPUDAGToDAGISel(); 49 bool runOnMachineFunction(MachineFunction &MF) override; 50 SDNode *Select(SDNode *N) override; 51 const char *getPassName() const override; 52 void PreprocessISelDAG() override; 53 void PostprocessISelDAG() override; 54 55 private: 56 bool isInlineImmediate(SDNode *N) const; 57 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 58 const R600InstrInfo *TII); 59 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 60 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 61 62 // Complex pattern selectors 63 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 64 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 65 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 66 67 static bool checkType(const Value *ptr, unsigned int addrspace); 68 static bool checkPrivateAddress(const MachineMemOperand *Op); 69 70 static bool isGlobalStore(const StoreSDNode *N); 71 static bool isFlatStore(const StoreSDNode *N); 72 static bool isPrivateStore(const StoreSDNode *N); 73 static bool isLocalStore(const StoreSDNode *N); 74 static bool isRegionStore(const StoreSDNode *N); 75 76 bool isCPLoad(const LoadSDNode *N) const; 77 bool isConstantLoad(const LoadSDNode *N, int cbID) const; 78 bool isGlobalLoad(const LoadSDNode *N) const; 79 bool isFlatLoad(const LoadSDNode *N) const; 80 bool isParamLoad(const LoadSDNode *N) const; 81 bool isPrivateLoad(const LoadSDNode *N) const; 82 bool isLocalLoad(const LoadSDNode *N) const; 83 bool isRegionLoad(const LoadSDNode *N) const; 84 85 SDNode *glueCopyToM0(SDNode *N) const; 86 87 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 88 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 89 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 90 SDValue& Offset); 91 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 92 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 93 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 94 unsigned OffsetBits) const; 95 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 96 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 97 SDValue &Offset1) const; 98 void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 99 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 100 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 101 SDValue &TFE) const; 102 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 103 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 104 SDValue &SLC, SDValue &TFE) const; 105 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 106 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 107 SDValue &SLC) const; 108 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 109 SDValue &SOffset, SDValue &ImmOffset) const; 110 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 111 SDValue &Offset, SDValue &GLC, SDValue &SLC, 112 SDValue &TFE) const; 113 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 114 SDValue &Offset, SDValue &GLC) const; 115 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 116 bool &Imm) const; 117 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 118 bool &Imm) const; 119 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 120 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 121 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 122 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 123 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 124 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 125 SDNode *SelectAddrSpaceCast(SDNode *N); 126 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 127 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 128 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 129 SDValue &Clamp, SDValue &Omod) const; 130 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 131 SDValue &Clamp, SDValue &Omod) const; 132 133 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 134 SDValue &Omod) const; 135 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 136 SDValue &Clamp, 137 SDValue &Omod) const; 138 139 SDNode *SelectADD_SUB_I64(SDNode *N); 140 SDNode *SelectDIV_SCALE(SDNode *N); 141 142 SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 143 uint32_t Offset, uint32_t Width); 144 SDNode *SelectS_BFEFromShifts(SDNode *N); 145 SDNode *SelectS_BFE(SDNode *N); 146 147 // Include the pieces autogenerated from the target description. 148 #include "AMDGPUGenDAGISel.inc" 149 }; 150 } // end anonymous namespace 151 152 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 153 // DAG, ready for instruction scheduling. 154 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 155 return new AMDGPUDAGToDAGISel(TM); 156 } 157 158 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 159 : SelectionDAGISel(TM) {} 160 161 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 162 Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); 163 return SelectionDAGISel::runOnMachineFunction(MF); 164 } 165 166 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 167 } 168 169 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 170 const SITargetLowering *TL 171 = static_cast<const SITargetLowering *>(getTargetLowering()); 172 return TL->analyzeImmediate(N) == 0; 173 } 174 175 /// \brief Determine the register class for \p OpNo 176 /// \returns The register class of the virtual register that will be used for 177 /// the given operand number \OpNo or NULL if the register class cannot be 178 /// determined. 179 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 180 unsigned OpNo) const { 181 if (!N->isMachineOpcode()) 182 return nullptr; 183 184 switch (N->getMachineOpcode()) { 185 default: { 186 const MCInstrDesc &Desc = 187 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 188 unsigned OpIdx = Desc.getNumDefs() + OpNo; 189 if (OpIdx >= Desc.getNumOperands()) 190 return nullptr; 191 int RegClass = Desc.OpInfo[OpIdx].RegClass; 192 if (RegClass == -1) 193 return nullptr; 194 195 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 196 } 197 case AMDGPU::REG_SEQUENCE: { 198 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 199 const TargetRegisterClass *SuperRC = 200 Subtarget->getRegisterInfo()->getRegClass(RCID); 201 202 SDValue SubRegOp = N->getOperand(OpNo + 1); 203 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 204 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 205 SubRegIdx); 206 } 207 } 208 } 209 210 bool AMDGPUDAGToDAGISel::SelectADDRParam( 211 SDValue Addr, SDValue& R1, SDValue& R2) { 212 213 if (Addr.getOpcode() == ISD::FrameIndex) { 214 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 215 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 216 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 217 } else { 218 R1 = Addr; 219 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 220 } 221 } else if (Addr.getOpcode() == ISD::ADD) { 222 R1 = Addr.getOperand(0); 223 R2 = Addr.getOperand(1); 224 } else { 225 R1 = Addr; 226 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 227 } 228 return true; 229 } 230 231 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 232 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 233 Addr.getOpcode() == ISD::TargetGlobalAddress) { 234 return false; 235 } 236 return SelectADDRParam(Addr, R1, R2); 237 } 238 239 240 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 241 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 242 Addr.getOpcode() == ISD::TargetGlobalAddress) { 243 return false; 244 } 245 246 if (Addr.getOpcode() == ISD::FrameIndex) { 247 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 248 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 249 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 250 } else { 251 R1 = Addr; 252 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 253 } 254 } else if (Addr.getOpcode() == ISD::ADD) { 255 R1 = Addr.getOperand(0); 256 R2 = Addr.getOperand(1); 257 } else { 258 R1 = Addr; 259 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 260 } 261 return true; 262 } 263 264 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 265 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 266 !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(), 267 AMDGPUAS::LOCAL_ADDRESS)) 268 return N; 269 270 const SITargetLowering& Lowering = 271 *static_cast<const SITargetLowering*>(getTargetLowering()); 272 273 // Write max value to m0 before each load operation 274 275 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 276 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 277 278 SDValue Glue = M0.getValue(1); 279 280 SmallVector <SDValue, 8> Ops; 281 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 282 Ops.push_back(N->getOperand(i)); 283 } 284 Ops.push_back(Glue); 285 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 286 287 return N; 288 } 289 290 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 291 switch (NumVectorElts) { 292 case 1: 293 return AMDGPU::SReg_32RegClassID; 294 case 2: 295 return AMDGPU::SReg_64RegClassID; 296 case 4: 297 return AMDGPU::SReg_128RegClassID; 298 case 8: 299 return AMDGPU::SReg_256RegClassID; 300 case 16: 301 return AMDGPU::SReg_512RegClassID; 302 } 303 304 llvm_unreachable("invalid vector size"); 305 } 306 307 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { 308 unsigned int Opc = N->getOpcode(); 309 if (N->isMachineOpcode()) { 310 N->setNodeId(-1); 311 return nullptr; // Already selected. 312 } 313 314 if (isa<AtomicSDNode>(N)) 315 N = glueCopyToM0(N); 316 317 switch (Opc) { 318 default: break; 319 // We are selecting i64 ADD here instead of custom lower it during 320 // DAG legalization, so we can fold some i64 ADDs used for address 321 // calculation into the LOAD and STORE instructions. 322 case ISD::ADD: 323 case ISD::SUB: { 324 if (N->getValueType(0) != MVT::i64 || 325 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 326 break; 327 328 return SelectADD_SUB_I64(N); 329 } 330 case ISD::SCALAR_TO_VECTOR: 331 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 332 case ISD::BUILD_VECTOR: { 333 unsigned RegClassID; 334 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 335 EVT VT = N->getValueType(0); 336 unsigned NumVectorElts = VT.getVectorNumElements(); 337 EVT EltVT = VT.getVectorElementType(); 338 assert(EltVT.bitsEq(MVT::i32)); 339 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 340 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 341 } else { 342 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 343 // that adds a 128 bits reg copy when going through TwoAddressInstructions 344 // pass. We want to avoid 128 bits copies as much as possible because they 345 // can't be bundled by our scheduler. 346 switch(NumVectorElts) { 347 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 348 case 4: 349 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 350 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 351 else 352 RegClassID = AMDGPU::R600_Reg128RegClassID; 353 break; 354 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 355 } 356 } 357 358 SDLoc DL(N); 359 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 360 361 if (NumVectorElts == 1) { 362 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, 363 N->getOperand(0), RegClass); 364 } 365 366 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 367 "supported yet"); 368 // 16 = Max Num Vector Elements 369 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 370 // 1 = Vector Register Class 371 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 372 373 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 374 bool IsRegSeq = true; 375 unsigned NOps = N->getNumOperands(); 376 for (unsigned i = 0; i < NOps; i++) { 377 // XXX: Why is this here? 378 if (isa<RegisterSDNode>(N->getOperand(i))) { 379 IsRegSeq = false; 380 break; 381 } 382 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 383 RegSeqArgs[1 + (2 * i) + 1] = 384 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 385 MVT::i32); 386 } 387 388 if (NOps != NumVectorElts) { 389 // Fill in the missing undef elements if this was a scalar_to_vector. 390 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 391 392 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 393 DL, EltVT); 394 for (unsigned i = NOps; i < NumVectorElts; ++i) { 395 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 396 RegSeqArgs[1 + (2 * i) + 1] = 397 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 398 } 399 } 400 401 if (!IsRegSeq) 402 break; 403 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), 404 RegSeqArgs); 405 } 406 case ISD::BUILD_PAIR: { 407 SDValue RC, SubReg0, SubReg1; 408 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 409 break; 410 } 411 SDLoc DL(N); 412 if (N->getValueType(0) == MVT::i128) { 413 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 414 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 415 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 416 } else if (N->getValueType(0) == MVT::i64) { 417 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 418 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 419 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 420 } else { 421 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 422 } 423 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 424 N->getOperand(1), SubReg1 }; 425 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 426 DL, N->getValueType(0), Ops); 427 } 428 429 case ISD::Constant: 430 case ISD::ConstantFP: { 431 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 432 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 433 break; 434 435 uint64_t Imm; 436 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 437 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 438 else { 439 ConstantSDNode *C = cast<ConstantSDNode>(N); 440 Imm = C->getZExtValue(); 441 } 442 443 SDLoc DL(N); 444 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 445 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 446 MVT::i32)); 447 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 448 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 449 const SDValue Ops[] = { 450 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 451 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 452 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 453 }; 454 455 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 456 N->getValueType(0), Ops); 457 } 458 case ISD::LOAD: 459 case ISD::STORE: { 460 N = glueCopyToM0(N); 461 break; 462 } 463 464 case AMDGPUISD::BFE_I32: 465 case AMDGPUISD::BFE_U32: { 466 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 467 break; 468 469 // There is a scalar version available, but unlike the vector version which 470 // has a separate operand for the offset and width, the scalar version packs 471 // the width and offset into a single operand. Try to move to the scalar 472 // version if the offsets are constant, so that we can try to keep extended 473 // loads of kernel arguments in SGPRs. 474 475 // TODO: Technically we could try to pattern match scalar bitshifts of 476 // dynamic values, but it's probably not useful. 477 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 478 if (!Offset) 479 break; 480 481 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 482 if (!Width) 483 break; 484 485 bool Signed = Opc == AMDGPUISD::BFE_I32; 486 487 uint32_t OffsetVal = Offset->getZExtValue(); 488 uint32_t WidthVal = Width->getZExtValue(); 489 490 return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), 491 N->getOperand(0), OffsetVal, WidthVal); 492 } 493 case AMDGPUISD::DIV_SCALE: { 494 return SelectDIV_SCALE(N); 495 } 496 case ISD::CopyToReg: { 497 const SITargetLowering& Lowering = 498 *static_cast<const SITargetLowering*>(getTargetLowering()); 499 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 500 break; 501 } 502 case ISD::ADDRSPACECAST: 503 return SelectAddrSpaceCast(N); 504 case ISD::AND: 505 case ISD::SRL: 506 case ISD::SRA: 507 if (N->getValueType(0) != MVT::i32 || 508 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 509 break; 510 511 return SelectS_BFE(N); 512 } 513 514 return SelectCode(N); 515 } 516 517 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 518 assert(AS != 0 && "Use checkPrivateAddress instead."); 519 if (!Ptr) 520 return false; 521 522 return Ptr->getType()->getPointerAddressSpace() == AS; 523 } 524 525 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 526 if (Op->getPseudoValue()) 527 return true; 528 529 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 530 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 531 532 return false; 533 } 534 535 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { 536 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 537 } 538 539 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 540 const Value *MemVal = N->getMemOperand()->getValue(); 541 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 542 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 543 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 544 } 545 546 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 547 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 548 } 549 550 bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) { 551 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 552 } 553 554 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 555 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 556 } 557 558 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { 559 const Value *MemVal = N->getMemOperand()->getValue(); 560 if (CbId == -1) 561 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 562 563 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 564 } 565 566 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { 567 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 568 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 569 N->getMemoryVT().bitsLT(MVT::i32)) 570 return true; 571 572 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 573 } 574 575 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 576 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 577 } 578 579 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 580 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 581 } 582 583 bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const { 584 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 585 } 586 587 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 588 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 589 } 590 591 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 592 MachineMemOperand *MMO = N->getMemOperand(); 593 if (checkPrivateAddress(N->getMemOperand())) { 594 if (MMO) { 595 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 596 if (PSV && PSV->isConstantPool()) { 597 return true; 598 } 599 } 600 } 601 return false; 602 } 603 604 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 605 if (checkPrivateAddress(N->getMemOperand())) { 606 // Check to make sure we are not a constant pool load or a constant load 607 // that is marked as a private load 608 if (isCPLoad(N) || isConstantLoad(N, -1)) { 609 return false; 610 } 611 } 612 613 const Value *MemVal = N->getMemOperand()->getValue(); 614 if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 615 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 616 !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) && 617 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 618 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 619 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 620 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) { 621 return true; 622 } 623 return false; 624 } 625 626 const char *AMDGPUDAGToDAGISel::getPassName() const { 627 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 628 } 629 630 #ifdef DEBUGTMP 631 #undef INT64_C 632 #endif 633 #undef DEBUGTMP 634 635 //===----------------------------------------------------------------------===// 636 // Complex Patterns 637 //===----------------------------------------------------------------------===// 638 639 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 640 SDValue& IntPtr) { 641 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 642 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 643 true); 644 return true; 645 } 646 return false; 647 } 648 649 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 650 SDValue& BaseReg, SDValue &Offset) { 651 if (!isa<ConstantSDNode>(Addr)) { 652 BaseReg = Addr; 653 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 654 return true; 655 } 656 return false; 657 } 658 659 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 660 SDValue &Offset) { 661 ConstantSDNode *IMMOffset; 662 663 if (Addr.getOpcode() == ISD::ADD 664 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 665 && isInt<16>(IMMOffset->getZExtValue())) { 666 667 Base = Addr.getOperand(0); 668 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 669 MVT::i32); 670 return true; 671 // If the pointer address is constant, we can move it to the offset field. 672 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 673 && isInt<16>(IMMOffset->getZExtValue())) { 674 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 675 SDLoc(CurDAG->getEntryNode()), 676 AMDGPU::ZERO, MVT::i32); 677 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 678 MVT::i32); 679 return true; 680 } 681 682 // Default case, no offset 683 Base = Addr; 684 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 685 return true; 686 } 687 688 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 689 SDValue &Offset) { 690 ConstantSDNode *C; 691 SDLoc DL(Addr); 692 693 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 694 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 695 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 696 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 697 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 698 Base = Addr.getOperand(0); 699 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 700 } else { 701 Base = Addr; 702 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 703 } 704 705 return true; 706 } 707 708 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 709 SDLoc DL(N); 710 SDValue LHS = N->getOperand(0); 711 SDValue RHS = N->getOperand(1); 712 713 bool IsAdd = (N->getOpcode() == ISD::ADD); 714 715 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 716 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 717 718 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 719 DL, MVT::i32, LHS, Sub0); 720 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 721 DL, MVT::i32, LHS, Sub1); 722 723 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 724 DL, MVT::i32, RHS, Sub0); 725 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 726 DL, MVT::i32, RHS, Sub1); 727 728 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 729 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 730 731 732 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 733 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 734 735 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 736 SDValue Carry(AddLo, 1); 737 SDNode *AddHi 738 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 739 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 740 741 SDValue Args[5] = { 742 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 743 SDValue(AddLo,0), 744 Sub0, 745 SDValue(AddHi,0), 746 Sub1, 747 }; 748 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 749 } 750 751 // We need to handle this here because tablegen doesn't support matching 752 // instructions with multiple outputs. 753 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 754 SDLoc SL(N); 755 EVT VT = N->getValueType(0); 756 757 assert(VT == MVT::f32 || VT == MVT::f64); 758 759 unsigned Opc 760 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 761 762 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, 763 // omod 764 SDValue Ops[8]; 765 766 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 767 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 768 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 769 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 770 } 771 772 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 773 unsigned OffsetBits) const { 774 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 775 (OffsetBits == 8 && !isUInt<8>(Offset))) 776 return false; 777 778 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 779 Subtarget->unsafeDSOffsetFoldingEnabled()) 780 return true; 781 782 // On Southern Islands instruction with a negative base value and an offset 783 // don't seem to work. 784 return CurDAG->SignBitIsZero(Base); 785 } 786 787 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 788 SDValue &Offset) const { 789 if (CurDAG->isBaseWithConstantOffset(Addr)) { 790 SDValue N0 = Addr.getOperand(0); 791 SDValue N1 = Addr.getOperand(1); 792 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 793 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 794 // (add n0, c0) 795 Base = N0; 796 Offset = N1; 797 return true; 798 } 799 } else if (Addr.getOpcode() == ISD::SUB) { 800 // sub C, x -> add (sub 0, x), C 801 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 802 int64_t ByteOffset = C->getSExtValue(); 803 if (isUInt<16>(ByteOffset)) { 804 SDLoc DL(Addr); 805 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 806 807 // XXX - This is kind of hacky. Create a dummy sub node so we can check 808 // the known bits in isDSOffsetLegal. We need to emit the selected node 809 // here, so this is thrown away. 810 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 811 Zero, Addr.getOperand(1)); 812 813 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 814 MachineSDNode *MachineSub 815 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 816 Zero, Addr.getOperand(1)); 817 818 Base = SDValue(MachineSub, 0); 819 Offset = Addr.getOperand(0); 820 return true; 821 } 822 } 823 } 824 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 825 // If we have a constant address, prefer to put the constant into the 826 // offset. This can save moves to load the constant address since multiple 827 // operations can share the zero base address register, and enables merging 828 // into read2 / write2 instructions. 829 830 SDLoc DL(Addr); 831 832 if (isUInt<16>(CAddr->getZExtValue())) { 833 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 834 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 835 DL, MVT::i32, Zero); 836 Base = SDValue(MovZero, 0); 837 Offset = Addr; 838 return true; 839 } 840 } 841 842 // default case 843 Base = Addr; 844 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 845 return true; 846 } 847 848 // TODO: If offset is too big, put low 16-bit into offset. 849 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 850 SDValue &Offset0, 851 SDValue &Offset1) const { 852 SDLoc DL(Addr); 853 854 if (CurDAG->isBaseWithConstantOffset(Addr)) { 855 SDValue N0 = Addr.getOperand(0); 856 SDValue N1 = Addr.getOperand(1); 857 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 858 unsigned DWordOffset0 = C1->getZExtValue() / 4; 859 unsigned DWordOffset1 = DWordOffset0 + 1; 860 // (add n0, c0) 861 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 862 Base = N0; 863 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 864 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 865 return true; 866 } 867 } else if (Addr.getOpcode() == ISD::SUB) { 868 // sub C, x -> add (sub 0, x), C 869 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 870 unsigned DWordOffset0 = C->getZExtValue() / 4; 871 unsigned DWordOffset1 = DWordOffset0 + 1; 872 873 if (isUInt<8>(DWordOffset0)) { 874 SDLoc DL(Addr); 875 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 876 877 // XXX - This is kind of hacky. Create a dummy sub node so we can check 878 // the known bits in isDSOffsetLegal. We need to emit the selected node 879 // here, so this is thrown away. 880 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 881 Zero, Addr.getOperand(1)); 882 883 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 884 MachineSDNode *MachineSub 885 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 886 Zero, Addr.getOperand(1)); 887 888 Base = SDValue(MachineSub, 0); 889 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 890 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 891 return true; 892 } 893 } 894 } 895 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 896 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 897 unsigned DWordOffset1 = DWordOffset0 + 1; 898 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 899 900 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 901 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 902 MachineSDNode *MovZero 903 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 904 DL, MVT::i32, Zero); 905 Base = SDValue(MovZero, 0); 906 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 907 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 908 return true; 909 } 910 } 911 912 // default case 913 Base = Addr; 914 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 915 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 916 return true; 917 } 918 919 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 920 return isUInt<12>(Imm->getZExtValue()); 921 } 922 923 void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 924 SDValue &VAddr, SDValue &SOffset, 925 SDValue &Offset, SDValue &Offen, 926 SDValue &Idxen, SDValue &Addr64, 927 SDValue &GLC, SDValue &SLC, 928 SDValue &TFE) const { 929 SDLoc DL(Addr); 930 931 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 932 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 933 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 934 935 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 936 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 937 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 938 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 939 940 if (CurDAG->isBaseWithConstantOffset(Addr)) { 941 SDValue N0 = Addr.getOperand(0); 942 SDValue N1 = Addr.getOperand(1); 943 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 944 945 if (N0.getOpcode() == ISD::ADD) { 946 // (add (add N2, N3), C1) -> addr64 947 SDValue N2 = N0.getOperand(0); 948 SDValue N3 = N0.getOperand(1); 949 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 950 Ptr = N2; 951 VAddr = N3; 952 } else { 953 954 // (add N0, C1) -> offset 955 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 956 Ptr = N0; 957 } 958 959 if (isLegalMUBUFImmOffset(C1)) { 960 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 961 return; 962 } else if (isUInt<32>(C1->getZExtValue())) { 963 // Illegal offset, store it in soffset. 964 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 965 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 966 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 967 0); 968 return; 969 } 970 } 971 972 if (Addr.getOpcode() == ISD::ADD) { 973 // (add N0, N1) -> addr64 974 SDValue N0 = Addr.getOperand(0); 975 SDValue N1 = Addr.getOperand(1); 976 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 977 Ptr = N0; 978 VAddr = N1; 979 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 980 return; 981 } 982 983 // default case -> offset 984 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 985 Ptr = Addr; 986 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 987 } 988 989 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 990 SDValue &VAddr, SDValue &SOffset, 991 SDValue &Offset, SDValue &GLC, 992 SDValue &SLC, SDValue &TFE) const { 993 SDValue Ptr, Offen, Idxen, Addr64; 994 995 // addr64 bit was removed for volcanic islands. 996 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 997 return false; 998 999 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1000 GLC, SLC, TFE); 1001 1002 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1003 if (C->getSExtValue()) { 1004 SDLoc DL(Addr); 1005 1006 const SITargetLowering& Lowering = 1007 *static_cast<const SITargetLowering*>(getTargetLowering()); 1008 1009 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1010 return true; 1011 } 1012 1013 return false; 1014 } 1015 1016 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1017 SDValue &VAddr, SDValue &SOffset, 1018 SDValue &Offset, 1019 SDValue &SLC) const { 1020 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1021 SDValue GLC, TFE; 1022 1023 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1024 } 1025 1026 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1027 SDValue &VAddr, SDValue &SOffset, 1028 SDValue &ImmOffset) const { 1029 1030 SDLoc DL(Addr); 1031 MachineFunction &MF = CurDAG->getMachineFunction(); 1032 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1033 1034 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1035 SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); 1036 1037 // (add n0, c1) 1038 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1039 SDValue N0 = Addr.getOperand(0); 1040 SDValue N1 = Addr.getOperand(1); 1041 // Offsets in vaddr must be positive. 1042 if (CurDAG->SignBitIsZero(N0)) { 1043 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1044 if (isLegalMUBUFImmOffset(C1)) { 1045 VAddr = N0; 1046 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1047 return true; 1048 } 1049 } 1050 } 1051 1052 // (node) 1053 VAddr = Addr; 1054 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1055 return true; 1056 } 1057 1058 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1059 SDValue &SOffset, SDValue &Offset, 1060 SDValue &GLC, SDValue &SLC, 1061 SDValue &TFE) const { 1062 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1063 const SIInstrInfo *TII = 1064 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1065 1066 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1067 GLC, SLC, TFE); 1068 1069 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1070 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1071 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1072 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1073 APInt::getAllOnesValue(32).getZExtValue(); // Size 1074 SDLoc DL(Addr); 1075 1076 const SITargetLowering& Lowering = 1077 *static_cast<const SITargetLowering*>(getTargetLowering()); 1078 1079 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1080 return true; 1081 } 1082 return false; 1083 } 1084 1085 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1086 SDValue &Soffset, SDValue &Offset, 1087 SDValue &GLC) const { 1088 SDValue SLC, TFE; 1089 1090 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1091 } 1092 1093 /// 1094 /// \param EncodedOffset This is the immediate value that will be encoded 1095 /// directly into the instruction. On SI/CI the \p EncodedOffset 1096 /// will be in units of dwords and on VI+ it will be units of bytes. 1097 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1098 int64_t EncodedOffset) { 1099 return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1100 isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1101 } 1102 1103 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1104 SDValue &Offset, bool &Imm) const { 1105 1106 // FIXME: Handle non-constant offsets. 1107 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1108 if (!C) 1109 return false; 1110 1111 SDLoc SL(ByteOffsetNode); 1112 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1113 int64_t ByteOffset = C->getSExtValue(); 1114 int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1115 ByteOffset >> 2 : ByteOffset; 1116 1117 if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1118 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1119 Imm = true; 1120 return true; 1121 } 1122 1123 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1124 return false; 1125 1126 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1127 // 32-bit Immediates are supported on Sea Islands. 1128 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1129 } else { 1130 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1131 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1132 C32Bit), 0); 1133 } 1134 Imm = false; 1135 return true; 1136 } 1137 1138 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1139 SDValue &Offset, bool &Imm) const { 1140 1141 SDLoc SL(Addr); 1142 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1143 SDValue N0 = Addr.getOperand(0); 1144 SDValue N1 = Addr.getOperand(1); 1145 1146 if (SelectSMRDOffset(N1, Offset, Imm)) { 1147 SBase = N0; 1148 return true; 1149 } 1150 } 1151 SBase = Addr; 1152 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1153 Imm = true; 1154 return true; 1155 } 1156 1157 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1158 SDValue &Offset) const { 1159 bool Imm; 1160 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1161 } 1162 1163 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1164 SDValue &Offset) const { 1165 1166 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1167 return false; 1168 1169 bool Imm; 1170 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1171 return false; 1172 1173 return !Imm && isa<ConstantSDNode>(Offset); 1174 } 1175 1176 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1177 SDValue &Offset) const { 1178 bool Imm; 1179 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1180 !isa<ConstantSDNode>(Offset); 1181 } 1182 1183 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1184 SDValue &Offset) const { 1185 bool Imm; 1186 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1187 } 1188 1189 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1190 SDValue &Offset) const { 1191 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1192 return false; 1193 1194 bool Imm; 1195 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1196 return false; 1197 1198 return !Imm && isa<ConstantSDNode>(Offset); 1199 } 1200 1201 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1202 SDValue &Offset) const { 1203 bool Imm; 1204 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1205 !isa<ConstantSDNode>(Offset); 1206 } 1207 1208 // FIXME: This is incorrect and only enough to be able to compile. 1209 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 1210 AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); 1211 SDLoc DL(N); 1212 1213 const MachineFunction &MF = CurDAG->getMachineFunction(); 1214 DiagnosticInfoUnsupported NotImplemented(*MF.getFunction(), 1215 "addrspacecast not implemented"); 1216 CurDAG->getContext()->diagnose(NotImplemented); 1217 1218 assert(Subtarget->hasFlatAddressSpace() && 1219 "addrspacecast only supported with flat address space!"); 1220 1221 assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS || 1222 ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) && 1223 "Can only cast to / from flat address space!"); 1224 1225 // The flat instructions read the address as the index of the VGPR holding the 1226 // address, so casting should just be reinterpreting the base VGPR, so just 1227 // insert trunc / bitcast / zext. 1228 1229 SDValue Src = ASC->getOperand(0); 1230 EVT DestVT = ASC->getValueType(0); 1231 EVT SrcVT = Src.getValueType(); 1232 1233 unsigned SrcSize = SrcVT.getSizeInBits(); 1234 unsigned DestSize = DestVT.getSizeInBits(); 1235 1236 if (SrcSize > DestSize) { 1237 assert(SrcSize == 64 && DestSize == 32); 1238 return CurDAG->getMachineNode( 1239 TargetOpcode::EXTRACT_SUBREG, 1240 DL, 1241 DestVT, 1242 Src, 1243 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32)); 1244 } 1245 1246 if (DestSize > SrcSize) { 1247 assert(SrcSize == 32 && DestSize == 64); 1248 1249 // FIXME: This is probably wrong, we should never be defining 1250 // a register class with both VGPRs and SGPRs 1251 SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL, 1252 MVT::i32); 1253 1254 const SDValue Ops[] = { 1255 RC, 1256 Src, 1257 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1258 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1259 CurDAG->getConstant(0, DL, MVT::i32)), 0), 1260 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 1261 }; 1262 1263 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 1264 DL, N->getValueType(0), Ops); 1265 } 1266 1267 assert(SrcSize == 64 && DestSize == 64); 1268 return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode(); 1269 } 1270 1271 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 1272 uint32_t Offset, uint32_t Width) { 1273 // Transformation function, pack the offset and width of a BFE into 1274 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1275 // source, bits [5:0] contain the offset and bits [22:16] the width. 1276 uint32_t PackedVal = Offset | (Width << 16); 1277 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1278 1279 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1280 } 1281 1282 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1283 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1284 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1285 // Predicate: 0 < b <= c < 32 1286 1287 const SDValue &Shl = N->getOperand(0); 1288 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1289 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1290 1291 if (B && C) { 1292 uint32_t BVal = B->getZExtValue(); 1293 uint32_t CVal = C->getZExtValue(); 1294 1295 if (0 < BVal && BVal <= CVal && CVal < 32) { 1296 bool Signed = N->getOpcode() == ISD::SRA; 1297 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1298 1299 return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), 1300 CVal - BVal, 32 - CVal); 1301 } 1302 } 1303 return SelectCode(N); 1304 } 1305 1306 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1307 switch (N->getOpcode()) { 1308 case ISD::AND: 1309 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1310 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1311 // Predicate: isMask(mask) 1312 const SDValue &Srl = N->getOperand(0); 1313 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1314 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1315 1316 if (Shift && Mask) { 1317 uint32_t ShiftVal = Shift->getZExtValue(); 1318 uint32_t MaskVal = Mask->getZExtValue(); 1319 1320 if (isMask_32(MaskVal)) { 1321 uint32_t WidthVal = countPopulation(MaskVal); 1322 1323 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0), 1324 ShiftVal, WidthVal); 1325 } 1326 } 1327 } 1328 break; 1329 case ISD::SRL: 1330 if (N->getOperand(0).getOpcode() == ISD::AND) { 1331 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1332 // Predicate: isMask(mask >> b) 1333 const SDValue &And = N->getOperand(0); 1334 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1335 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1336 1337 if (Shift && Mask) { 1338 uint32_t ShiftVal = Shift->getZExtValue(); 1339 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1340 1341 if (isMask_32(MaskVal)) { 1342 uint32_t WidthVal = countPopulation(MaskVal); 1343 1344 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0), 1345 ShiftVal, WidthVal); 1346 } 1347 } 1348 } else if (N->getOperand(0).getOpcode() == ISD::SHL) 1349 return SelectS_BFEFromShifts(N); 1350 break; 1351 case ISD::SRA: 1352 if (N->getOperand(0).getOpcode() == ISD::SHL) 1353 return SelectS_BFEFromShifts(N); 1354 break; 1355 } 1356 1357 return SelectCode(N); 1358 } 1359 1360 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1361 SDValue &SrcMods) const { 1362 1363 unsigned Mods = 0; 1364 1365 Src = In; 1366 1367 if (Src.getOpcode() == ISD::FNEG) { 1368 Mods |= SISrcMods::NEG; 1369 Src = Src.getOperand(0); 1370 } 1371 1372 if (Src.getOpcode() == ISD::FABS) { 1373 Mods |= SISrcMods::ABS; 1374 Src = Src.getOperand(0); 1375 } 1376 1377 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1378 1379 return true; 1380 } 1381 1382 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1383 SDValue &SrcMods) const { 1384 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1385 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1386 } 1387 1388 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1389 SDValue &SrcMods, SDValue &Clamp, 1390 SDValue &Omod) const { 1391 SDLoc DL(In); 1392 // FIXME: Handle Clamp and Omod 1393 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1394 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1395 1396 return SelectVOP3Mods(In, Src, SrcMods); 1397 } 1398 1399 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1400 SDValue &SrcMods, SDValue &Clamp, 1401 SDValue &Omod) const { 1402 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1403 1404 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1405 cast<ConstantSDNode>(Clamp)->isNullValue() && 1406 cast<ConstantSDNode>(Omod)->isNullValue(); 1407 } 1408 1409 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1410 SDValue &SrcMods, 1411 SDValue &Omod) const { 1412 // FIXME: Handle Omod 1413 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1414 1415 return SelectVOP3Mods(In, Src, SrcMods); 1416 } 1417 1418 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1419 SDValue &SrcMods, 1420 SDValue &Clamp, 1421 SDValue &Omod) const { 1422 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1423 return SelectVOP3Mods(In, Src, SrcMods); 1424 } 1425 1426 void AMDGPUDAGToDAGISel::PreprocessISelDAG() { 1427 bool Modified = false; 1428 1429 // XXX - Other targets seem to be able to do this without a worklist. 1430 SmallVector<LoadSDNode *, 8> LoadsToReplace; 1431 SmallVector<StoreSDNode *, 8> StoresToReplace; 1432 1433 for (SDNode &Node : CurDAG->allnodes()) { 1434 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) { 1435 EVT VT = LD->getValueType(0); 1436 if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) 1437 continue; 1438 1439 // To simplify the TableGen patters, we replace all i64 loads with v2i32 1440 // loads. Alternatively, we could promote i64 loads to v2i32 during DAG 1441 // legalization, however, so places (ExpandUnalignedLoad) in the DAG 1442 // legalizer assume that if i64 is legal, so doing this promotion early 1443 // can cause problems. 1444 LoadsToReplace.push_back(LD); 1445 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) { 1446 // Handle i64 stores here for the same reason mentioned above for loads. 1447 SDValue Value = ST->getValue(); 1448 if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore()) 1449 continue; 1450 StoresToReplace.push_back(ST); 1451 } 1452 } 1453 1454 for (LoadSDNode *LD : LoadsToReplace) { 1455 SDLoc SL(LD); 1456 1457 SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(), 1458 LD->getBasePtr(), LD->getMemOperand()); 1459 SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, 1460 MVT::i64, NewLoad); 1461 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1)); 1462 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast); 1463 Modified = true; 1464 } 1465 1466 for (StoreSDNode *ST : StoresToReplace) { 1467 SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST), 1468 MVT::v2i32, ST->getValue()); 1469 const SDValue StoreOps[] = { 1470 ST->getChain(), 1471 NewValue, 1472 ST->getBasePtr(), 1473 ST->getOffset() 1474 }; 1475 1476 CurDAG->UpdateNodeOperands(ST, StoreOps); 1477 Modified = true; 1478 } 1479 1480 // XXX - Is this necessary? 1481 if (Modified) 1482 CurDAG->RemoveDeadNodes(); 1483 } 1484 1485 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1486 const AMDGPUTargetLowering& Lowering = 1487 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1488 bool IsModified = false; 1489 do { 1490 IsModified = false; 1491 // Go over all selected nodes and try to fold them a bit more 1492 for (SDNode &Node : CurDAG->allnodes()) { 1493 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1494 if (!MachineNode) 1495 continue; 1496 1497 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1498 if (ResNode != &Node) { 1499 ReplaceUses(&Node, ResNode); 1500 IsModified = true; 1501 } 1502 } 1503 CurDAG->RemoveDeadNodes(); 1504 } while (IsModified); 1505 } 1506