1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Custom DAG lowering for R600 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "R600ISelLowering.h" 16 #include "R600Defines.h" 17 #include "R600InstrInfo.h" 18 #include "R600MachineFunctionInfo.h" 19 #include "llvm/CodeGen/CallingConvLower.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/IR/Argument.h" 25 #include "llvm/IR/Function.h" 26 27 using namespace llvm; 28 29 R600TargetLowering::R600TargetLowering(TargetMachine &TM) : 30 AMDGPUTargetLowering(TM), 31 Gen(TM.getSubtarget<AMDGPUSubtarget>().getGeneration()) { 32 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); 33 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); 34 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); 35 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); 36 addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass); 37 addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass); 38 39 computeRegisterProperties(); 40 41 setOperationAction(ISD::FADD, MVT::v4f32, Expand); 42 setOperationAction(ISD::FADD, MVT::v2f32, Expand); 43 setOperationAction(ISD::FMUL, MVT::v4f32, Expand); 44 setOperationAction(ISD::FMUL, MVT::v2f32, Expand); 45 setOperationAction(ISD::FDIV, MVT::v4f32, Expand); 46 setOperationAction(ISD::FDIV, MVT::v2f32, Expand); 47 setOperationAction(ISD::FSUB, MVT::v4f32, Expand); 48 setOperationAction(ISD::FSUB, MVT::v2f32, Expand); 49 50 setOperationAction(ISD::FCOS, MVT::f32, Custom); 51 setOperationAction(ISD::FSIN, MVT::f32, Custom); 52 53 setOperationAction(ISD::SETCC, MVT::v4i32, Expand); 54 setOperationAction(ISD::SETCC, MVT::v2i32, Expand); 55 56 setOperationAction(ISD::BR_CC, MVT::i32, Expand); 57 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 58 59 setOperationAction(ISD::FSUB, MVT::f32, Expand); 60 61 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 62 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 63 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); 64 65 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 66 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 67 68 setOperationAction(ISD::SETCC, MVT::i32, Expand); 69 setOperationAction(ISD::SETCC, MVT::f32, Expand); 70 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); 71 72 setOperationAction(ISD::SELECT, MVT::i32, Custom); 73 setOperationAction(ISD::SELECT, MVT::f32, Custom); 74 75 // Legalize loads and stores to the private address space. 76 setOperationAction(ISD::LOAD, MVT::i32, Custom); 77 setOperationAction(ISD::LOAD, MVT::v2i32, Custom); 78 setOperationAction(ISD::LOAD, MVT::v4i32, Custom); 79 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); 80 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); 81 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); 82 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom); 83 setOperationAction(ISD::STORE, MVT::i8, Custom); 84 setOperationAction(ISD::STORE, MVT::i32, Custom); 85 setOperationAction(ISD::STORE, MVT::v2i32, Custom); 86 setOperationAction(ISD::STORE, MVT::v4i32, Custom); 87 88 setOperationAction(ISD::LOAD, MVT::i32, Custom); 89 setOperationAction(ISD::LOAD, MVT::v4i32, Custom); 90 setOperationAction(ISD::FrameIndex, MVT::i32, Custom); 91 92 setTargetDAGCombine(ISD::FP_ROUND); 93 setTargetDAGCombine(ISD::FP_TO_SINT); 94 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); 95 setTargetDAGCombine(ISD::SELECT_CC); 96 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 97 98 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 99 100 setBooleanContents(ZeroOrNegativeOneBooleanContent); 101 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 102 setSchedulingPreference(Sched::VLIW); 103 } 104 105 MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( 106 MachineInstr * MI, MachineBasicBlock * BB) const { 107 MachineFunction * MF = BB->getParent(); 108 MachineRegisterInfo &MRI = MF->getRegInfo(); 109 MachineBasicBlock::iterator I = *MI; 110 const R600InstrInfo *TII = 111 static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo()); 112 113 switch (MI->getOpcode()) { 114 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 115 case AMDGPU::CLAMP_R600: { 116 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, 117 AMDGPU::MOV, 118 MI->getOperand(0).getReg(), 119 MI->getOperand(1).getReg()); 120 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP); 121 break; 122 } 123 124 case AMDGPU::FABS_R600: { 125 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, 126 AMDGPU::MOV, 127 MI->getOperand(0).getReg(), 128 MI->getOperand(1).getReg()); 129 TII->addFlag(NewMI, 0, MO_FLAG_ABS); 130 break; 131 } 132 133 case AMDGPU::FNEG_R600: { 134 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, 135 AMDGPU::MOV, 136 MI->getOperand(0).getReg(), 137 MI->getOperand(1).getReg()); 138 TII->addFlag(NewMI, 0, MO_FLAG_NEG); 139 break; 140 } 141 142 case AMDGPU::MASK_WRITE: { 143 unsigned maskedRegister = MI->getOperand(0).getReg(); 144 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); 145 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 146 TII->addFlag(defInstr, 0, MO_FLAG_MASK); 147 break; 148 } 149 150 case AMDGPU::LDS_READ_RET: { 151 MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), 152 TII->get(MI->getOpcode()), 153 AMDGPU::OQAP); 154 for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) { 155 NewMI.addOperand(MI->getOperand(i)); 156 } 157 TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, 158 MI->getOperand(0).getReg(), 159 AMDGPU::OQAP); 160 break; 161 } 162 163 case AMDGPU::MOV_IMM_F32: 164 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), 165 MI->getOperand(1).getFPImm()->getValueAPF() 166 .bitcastToAPInt().getZExtValue()); 167 break; 168 case AMDGPU::MOV_IMM_I32: 169 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), 170 MI->getOperand(1).getImm()); 171 break; 172 case AMDGPU::CONST_COPY: { 173 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV, 174 MI->getOperand(0).getReg(), AMDGPU::ALU_CONST); 175 TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel, 176 MI->getOperand(1).getImm()); 177 break; 178 } 179 180 case AMDGPU::RAT_WRITE_CACHELESS_32_eg: 181 case AMDGPU::RAT_WRITE_CACHELESS_64_eg: 182 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { 183 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; 184 185 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 186 .addOperand(MI->getOperand(0)) 187 .addOperand(MI->getOperand(1)) 188 .addImm(EOP); // Set End of program bit 189 break; 190 } 191 192 case AMDGPU::TXD: { 193 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 194 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 195 MachineOperand &RID = MI->getOperand(4); 196 MachineOperand &SID = MI->getOperand(5); 197 unsigned TextureId = MI->getOperand(6).getImm(); 198 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3; 199 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1; 200 201 switch (TextureId) { 202 case 5: // Rect 203 CTX = CTY = 0; 204 break; 205 case 6: // Shadow1D 206 SrcW = SrcZ; 207 break; 208 case 7: // Shadow2D 209 SrcW = SrcZ; 210 break; 211 case 8: // ShadowRect 212 CTX = CTY = 0; 213 SrcW = SrcZ; 214 break; 215 case 9: // 1DArray 216 SrcZ = SrcY; 217 CTZ = 0; 218 break; 219 case 10: // 2DArray 220 CTZ = 0; 221 break; 222 case 11: // Shadow1DArray 223 SrcZ = SrcY; 224 CTZ = 0; 225 break; 226 case 12: // Shadow2DArray 227 CTZ = 0; 228 break; 229 } 230 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) 231 .addOperand(MI->getOperand(3)) 232 .addImm(SrcX) 233 .addImm(SrcY) 234 .addImm(SrcZ) 235 .addImm(SrcW) 236 .addImm(0) 237 .addImm(0) 238 .addImm(0) 239 .addImm(0) 240 .addImm(1) 241 .addImm(2) 242 .addImm(3) 243 .addOperand(RID) 244 .addOperand(SID) 245 .addImm(CTX) 246 .addImm(CTY) 247 .addImm(CTZ) 248 .addImm(CTW); 249 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) 250 .addOperand(MI->getOperand(2)) 251 .addImm(SrcX) 252 .addImm(SrcY) 253 .addImm(SrcZ) 254 .addImm(SrcW) 255 .addImm(0) 256 .addImm(0) 257 .addImm(0) 258 .addImm(0) 259 .addImm(1) 260 .addImm(2) 261 .addImm(3) 262 .addOperand(RID) 263 .addOperand(SID) 264 .addImm(CTX) 265 .addImm(CTY) 266 .addImm(CTZ) 267 .addImm(CTW); 268 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) 269 .addOperand(MI->getOperand(0)) 270 .addOperand(MI->getOperand(1)) 271 .addImm(SrcX) 272 .addImm(SrcY) 273 .addImm(SrcZ) 274 .addImm(SrcW) 275 .addImm(0) 276 .addImm(0) 277 .addImm(0) 278 .addImm(0) 279 .addImm(1) 280 .addImm(2) 281 .addImm(3) 282 .addOperand(RID) 283 .addOperand(SID) 284 .addImm(CTX) 285 .addImm(CTY) 286 .addImm(CTZ) 287 .addImm(CTW) 288 .addReg(T0, RegState::Implicit) 289 .addReg(T1, RegState::Implicit); 290 break; 291 } 292 293 case AMDGPU::TXD_SHADOW: { 294 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 295 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 296 MachineOperand &RID = MI->getOperand(4); 297 MachineOperand &SID = MI->getOperand(5); 298 unsigned TextureId = MI->getOperand(6).getImm(); 299 unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3; 300 unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1; 301 302 switch (TextureId) { 303 case 5: // Rect 304 CTX = CTY = 0; 305 break; 306 case 6: // Shadow1D 307 SrcW = SrcZ; 308 break; 309 case 7: // Shadow2D 310 SrcW = SrcZ; 311 break; 312 case 8: // ShadowRect 313 CTX = CTY = 0; 314 SrcW = SrcZ; 315 break; 316 case 9: // 1DArray 317 SrcZ = SrcY; 318 CTZ = 0; 319 break; 320 case 10: // 2DArray 321 CTZ = 0; 322 break; 323 case 11: // Shadow1DArray 324 SrcZ = SrcY; 325 CTZ = 0; 326 break; 327 case 12: // Shadow2DArray 328 CTZ = 0; 329 break; 330 } 331 332 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) 333 .addOperand(MI->getOperand(3)) 334 .addImm(SrcX) 335 .addImm(SrcY) 336 .addImm(SrcZ) 337 .addImm(SrcW) 338 .addImm(0) 339 .addImm(0) 340 .addImm(0) 341 .addImm(0) 342 .addImm(1) 343 .addImm(2) 344 .addImm(3) 345 .addOperand(RID) 346 .addOperand(SID) 347 .addImm(CTX) 348 .addImm(CTY) 349 .addImm(CTZ) 350 .addImm(CTW); 351 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) 352 .addOperand(MI->getOperand(2)) 353 .addImm(SrcX) 354 .addImm(SrcY) 355 .addImm(SrcZ) 356 .addImm(SrcW) 357 .addImm(0) 358 .addImm(0) 359 .addImm(0) 360 .addImm(0) 361 .addImm(1) 362 .addImm(2) 363 .addImm(3) 364 .addOperand(RID) 365 .addOperand(SID) 366 .addImm(CTX) 367 .addImm(CTY) 368 .addImm(CTZ) 369 .addImm(CTW); 370 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) 371 .addOperand(MI->getOperand(0)) 372 .addOperand(MI->getOperand(1)) 373 .addImm(SrcX) 374 .addImm(SrcY) 375 .addImm(SrcZ) 376 .addImm(SrcW) 377 .addImm(0) 378 .addImm(0) 379 .addImm(0) 380 .addImm(0) 381 .addImm(1) 382 .addImm(2) 383 .addImm(3) 384 .addOperand(RID) 385 .addOperand(SID) 386 .addImm(CTX) 387 .addImm(CTY) 388 .addImm(CTZ) 389 .addImm(CTW) 390 .addReg(T0, RegState::Implicit) 391 .addReg(T1, RegState::Implicit); 392 break; 393 } 394 395 case AMDGPU::BRANCH: 396 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 397 .addOperand(MI->getOperand(0)); 398 break; 399 400 case AMDGPU::BRANCH_COND_f32: { 401 MachineInstr *NewMI = 402 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), 403 AMDGPU::PREDICATE_BIT) 404 .addOperand(MI->getOperand(1)) 405 .addImm(OPCODE_IS_NOT_ZERO) 406 .addImm(0); // Flags 407 TII->addFlag(NewMI, 0, MO_FLAG_PUSH); 408 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) 409 .addOperand(MI->getOperand(0)) 410 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 411 break; 412 } 413 414 case AMDGPU::BRANCH_COND_i32: { 415 MachineInstr *NewMI = 416 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), 417 AMDGPU::PREDICATE_BIT) 418 .addOperand(MI->getOperand(1)) 419 .addImm(OPCODE_IS_NOT_ZERO_INT) 420 .addImm(0); // Flags 421 TII->addFlag(NewMI, 0, MO_FLAG_PUSH); 422 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) 423 .addOperand(MI->getOperand(0)) 424 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 425 break; 426 } 427 428 case AMDGPU::EG_ExportSwz: 429 case AMDGPU::R600_ExportSwz: { 430 // Instruction is left unmodified if its not the last one of its type 431 bool isLastInstructionOfItsType = true; 432 unsigned InstExportType = MI->getOperand(1).getImm(); 433 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I), 434 EndBlock = BB->end(); NextExportInst != EndBlock; 435 NextExportInst = llvm::next(NextExportInst)) { 436 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz || 437 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) { 438 unsigned CurrentInstExportType = NextExportInst->getOperand(1) 439 .getImm(); 440 if (CurrentInstExportType == InstExportType) { 441 isLastInstructionOfItsType = false; 442 break; 443 } 444 } 445 } 446 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0; 447 if (!EOP && !isLastInstructionOfItsType) 448 return BB; 449 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40; 450 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 451 .addOperand(MI->getOperand(0)) 452 .addOperand(MI->getOperand(1)) 453 .addOperand(MI->getOperand(2)) 454 .addOperand(MI->getOperand(3)) 455 .addOperand(MI->getOperand(4)) 456 .addOperand(MI->getOperand(5)) 457 .addOperand(MI->getOperand(6)) 458 .addImm(CfInst) 459 .addImm(EOP); 460 break; 461 } 462 case AMDGPU::RETURN: { 463 // RETURN instructions must have the live-out registers as implicit uses, 464 // otherwise they appear dead. 465 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>(); 466 MachineInstrBuilder MIB(*MF, MI); 467 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i) 468 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit); 469 return BB; 470 } 471 } 472 473 MI->eraseFromParent(); 474 return BB; 475 } 476 477 //===----------------------------------------------------------------------===// 478 // Custom DAG Lowering Operations 479 //===----------------------------------------------------------------------===// 480 481 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 482 MachineFunction &MF = DAG.getMachineFunction(); 483 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 484 switch (Op.getOpcode()) { 485 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 486 case ISD::FCOS: 487 case ISD::FSIN: return LowerTrig(Op, DAG); 488 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 489 case ISD::SELECT: return LowerSELECT(Op, DAG); 490 case ISD::STORE: return LowerSTORE(Op, DAG); 491 case ISD::LOAD: return LowerLOAD(Op, DAG); 492 case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); 493 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); 494 case ISD::INTRINSIC_VOID: { 495 SDValue Chain = Op.getOperand(0); 496 unsigned IntrinsicID = 497 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 498 switch (IntrinsicID) { 499 case AMDGPUIntrinsic::AMDGPU_store_output: { 500 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 501 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 502 MFI->LiveOuts.push_back(Reg); 503 return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2)); 504 } 505 case AMDGPUIntrinsic::R600_store_swizzle: { 506 const SDValue Args[8] = { 507 Chain, 508 Op.getOperand(2), // Export Value 509 Op.getOperand(3), // ArrayBase 510 Op.getOperand(4), // Type 511 DAG.getConstant(0, MVT::i32), // SWZ_X 512 DAG.getConstant(1, MVT::i32), // SWZ_Y 513 DAG.getConstant(2, MVT::i32), // SWZ_Z 514 DAG.getConstant(3, MVT::i32) // SWZ_W 515 }; 516 return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), 517 Args, 8); 518 } 519 520 // default for switch(IntrinsicID) 521 default: break; 522 } 523 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) 524 break; 525 } 526 case ISD::INTRINSIC_WO_CHAIN: { 527 unsigned IntrinsicID = 528 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 529 EVT VT = Op.getValueType(); 530 SDLoc DL(Op); 531 switch(IntrinsicID) { 532 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 533 case AMDGPUIntrinsic::R600_load_input: { 534 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 535 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 536 MachineFunction &MF = DAG.getMachineFunction(); 537 MachineRegisterInfo &MRI = MF.getRegInfo(); 538 MRI.addLiveIn(Reg); 539 return DAG.getCopyFromReg(DAG.getEntryNode(), 540 SDLoc(DAG.getEntryNode()), Reg, VT); 541 } 542 543 case AMDGPUIntrinsic::R600_interp_input: { 544 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 545 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue(); 546 MachineSDNode *interp; 547 if (ijb < 0) { 548 const MachineFunction &MF = DAG.getMachineFunction(); 549 const R600InstrInfo *TII = 550 static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo()); 551 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL, 552 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32)); 553 return DAG.getTargetExtractSubreg( 554 TII->getRegisterInfo().getSubRegFromChannel(slot % 4), 555 DL, MVT::f32, SDValue(interp, 0)); 556 } 557 558 MachineFunction &MF = DAG.getMachineFunction(); 559 MachineRegisterInfo &MRI = MF.getRegInfo(); 560 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb); 561 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1); 562 MRI.addLiveIn(RegisterI); 563 MRI.addLiveIn(RegisterJ); 564 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(), 565 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32); 566 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(), 567 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32); 568 569 if (slot % 4 < 2) 570 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL, 571 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), 572 RegisterJNode, RegisterINode); 573 else 574 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL, 575 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), 576 RegisterJNode, RegisterINode); 577 return SDValue(interp, slot % 2); 578 } 579 case AMDGPUIntrinsic::R600_tex: 580 case AMDGPUIntrinsic::R600_texc: 581 case AMDGPUIntrinsic::R600_txl: 582 case AMDGPUIntrinsic::R600_txlc: 583 case AMDGPUIntrinsic::R600_txb: 584 case AMDGPUIntrinsic::R600_txbc: 585 case AMDGPUIntrinsic::R600_txf: 586 case AMDGPUIntrinsic::R600_txq: 587 case AMDGPUIntrinsic::R600_ddx: 588 case AMDGPUIntrinsic::R600_ddy: { 589 unsigned TextureOp; 590 switch (IntrinsicID) { 591 case AMDGPUIntrinsic::R600_tex: 592 TextureOp = 0; 593 break; 594 case AMDGPUIntrinsic::R600_texc: 595 TextureOp = 1; 596 break; 597 case AMDGPUIntrinsic::R600_txl: 598 TextureOp = 2; 599 break; 600 case AMDGPUIntrinsic::R600_txlc: 601 TextureOp = 3; 602 break; 603 case AMDGPUIntrinsic::R600_txb: 604 TextureOp = 4; 605 break; 606 case AMDGPUIntrinsic::R600_txbc: 607 TextureOp = 5; 608 break; 609 case AMDGPUIntrinsic::R600_txf: 610 TextureOp = 6; 611 break; 612 case AMDGPUIntrinsic::R600_txq: 613 TextureOp = 7; 614 break; 615 case AMDGPUIntrinsic::R600_ddx: 616 TextureOp = 8; 617 break; 618 case AMDGPUIntrinsic::R600_ddy: 619 TextureOp = 9; 620 break; 621 default: 622 llvm_unreachable("Unknow Texture Operation"); 623 } 624 625 SDValue TexArgs[19] = { 626 DAG.getConstant(TextureOp, MVT::i32), 627 Op.getOperand(1), 628 DAG.getConstant(0, MVT::i32), 629 DAG.getConstant(1, MVT::i32), 630 DAG.getConstant(2, MVT::i32), 631 DAG.getConstant(3, MVT::i32), 632 Op.getOperand(2), 633 Op.getOperand(3), 634 Op.getOperand(4), 635 DAG.getConstant(0, MVT::i32), 636 DAG.getConstant(1, MVT::i32), 637 DAG.getConstant(2, MVT::i32), 638 DAG.getConstant(3, MVT::i32), 639 Op.getOperand(5), 640 Op.getOperand(6), 641 Op.getOperand(7), 642 Op.getOperand(8), 643 Op.getOperand(9), 644 Op.getOperand(10) 645 }; 646 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19); 647 } 648 case AMDGPUIntrinsic::AMDGPU_dp4: { 649 SDValue Args[8] = { 650 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 651 DAG.getConstant(0, MVT::i32)), 652 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 653 DAG.getConstant(0, MVT::i32)), 654 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 655 DAG.getConstant(1, MVT::i32)), 656 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 657 DAG.getConstant(1, MVT::i32)), 658 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 659 DAG.getConstant(2, MVT::i32)), 660 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 661 DAG.getConstant(2, MVT::i32)), 662 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), 663 DAG.getConstant(3, MVT::i32)), 664 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), 665 DAG.getConstant(3, MVT::i32)) 666 }; 667 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8); 668 } 669 670 case Intrinsic::r600_read_ngroups_x: 671 return LowerImplicitParameter(DAG, VT, DL, 0); 672 case Intrinsic::r600_read_ngroups_y: 673 return LowerImplicitParameter(DAG, VT, DL, 1); 674 case Intrinsic::r600_read_ngroups_z: 675 return LowerImplicitParameter(DAG, VT, DL, 2); 676 case Intrinsic::r600_read_global_size_x: 677 return LowerImplicitParameter(DAG, VT, DL, 3); 678 case Intrinsic::r600_read_global_size_y: 679 return LowerImplicitParameter(DAG, VT, DL, 4); 680 case Intrinsic::r600_read_global_size_z: 681 return LowerImplicitParameter(DAG, VT, DL, 5); 682 case Intrinsic::r600_read_local_size_x: 683 return LowerImplicitParameter(DAG, VT, DL, 6); 684 case Intrinsic::r600_read_local_size_y: 685 return LowerImplicitParameter(DAG, VT, DL, 7); 686 case Intrinsic::r600_read_local_size_z: 687 return LowerImplicitParameter(DAG, VT, DL, 8); 688 689 case Intrinsic::r600_read_tgid_x: 690 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 691 AMDGPU::T1_X, VT); 692 case Intrinsic::r600_read_tgid_y: 693 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 694 AMDGPU::T1_Y, VT); 695 case Intrinsic::r600_read_tgid_z: 696 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 697 AMDGPU::T1_Z, VT); 698 case Intrinsic::r600_read_tidig_x: 699 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 700 AMDGPU::T0_X, VT); 701 case Intrinsic::r600_read_tidig_y: 702 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 703 AMDGPU::T0_Y, VT); 704 case Intrinsic::r600_read_tidig_z: 705 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 706 AMDGPU::T0_Z, VT); 707 } 708 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) 709 break; 710 } 711 } // end switch(Op.getOpcode()) 712 return SDValue(); 713 } 714 715 void R600TargetLowering::ReplaceNodeResults(SDNode *N, 716 SmallVectorImpl<SDValue> &Results, 717 SelectionDAG &DAG) const { 718 switch (N->getOpcode()) { 719 default: return; 720 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); 721 return; 722 case ISD::LOAD: { 723 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode(); 724 Results.push_back(SDValue(Node, 0)); 725 Results.push_back(SDValue(Node, 1)); 726 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode 727 // function 728 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1)); 729 return; 730 } 731 case ISD::STORE: 732 SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode(); 733 Results.push_back(SDValue(Node, 0)); 734 return; 735 } 736 } 737 738 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { 739 // On hw >= R700, COS/SIN input must be between -1. and 1. 740 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5) 741 EVT VT = Op.getValueType(); 742 SDValue Arg = Op.getOperand(0); 743 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT, 744 DAG.getNode(ISD::FADD, SDLoc(Op), VT, 745 DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg, 746 DAG.getConstantFP(0.15915494309, MVT::f32)), 747 DAG.getConstantFP(0.5, MVT::f32))); 748 unsigned TrigNode; 749 switch (Op.getOpcode()) { 750 case ISD::FCOS: 751 TrigNode = AMDGPUISD::COS_HW; 752 break; 753 case ISD::FSIN: 754 TrigNode = AMDGPUISD::SIN_HW; 755 break; 756 default: 757 llvm_unreachable("Wrong trig opcode"); 758 } 759 SDValue TrigVal = DAG.getNode(TrigNode, SDLoc(Op), VT, 760 DAG.getNode(ISD::FADD, SDLoc(Op), VT, FractPart, 761 DAG.getConstantFP(-0.5, MVT::f32))); 762 if (Gen >= AMDGPUSubtarget::R700) 763 return TrigVal; 764 // On R600 hw, COS/SIN input must be between -Pi and Pi. 765 return DAG.getNode(ISD::FMUL, SDLoc(Op), VT, TrigVal, 766 DAG.getConstantFP(3.14159265359, MVT::f32)); 767 } 768 769 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { 770 return DAG.getNode( 771 ISD::SETCC, 772 SDLoc(Op), 773 MVT::i1, 774 Op, DAG.getConstantFP(0.0f, MVT::f32), 775 DAG.getCondCode(ISD::SETNE) 776 ); 777 } 778 779 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, 780 SDLoc DL, 781 unsigned DwordOffset) const { 782 unsigned ByteOffset = DwordOffset * 4; 783 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 784 AMDGPUAS::CONSTANT_BUFFER_0); 785 786 // We shouldn't be using an offset wider than 16-bits for implicit parameters. 787 assert(isInt<16>(ByteOffset)); 788 789 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 790 DAG.getConstant(ByteOffset, MVT::i32), // PTR 791 MachinePointerInfo(ConstantPointerNull::get(PtrType)), 792 false, false, false, 0); 793 } 794 795 SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { 796 797 MachineFunction &MF = DAG.getMachineFunction(); 798 const AMDGPUFrameLowering *TFL = 799 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering()); 800 801 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op); 802 assert(FIN); 803 804 unsigned FrameIndex = FIN->getIndex(); 805 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); 806 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32); 807 } 808 809 bool R600TargetLowering::isZero(SDValue Op) const { 810 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { 811 return Cst->isNullValue(); 812 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){ 813 return CstFP->isZero(); 814 } else { 815 return false; 816 } 817 } 818 819 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 820 SDLoc DL(Op); 821 EVT VT = Op.getValueType(); 822 823 SDValue LHS = Op.getOperand(0); 824 SDValue RHS = Op.getOperand(1); 825 SDValue True = Op.getOperand(2); 826 SDValue False = Op.getOperand(3); 827 SDValue CC = Op.getOperand(4); 828 SDValue Temp; 829 830 // LHS and RHS are guaranteed to be the same value type 831 EVT CompareVT = LHS.getValueType(); 832 833 // Check if we can lower this to a native operation. 834 835 // Try to lower to a SET* instruction: 836 // 837 // SET* can match the following patterns: 838 // 839 // select_cc f32, f32, -1, 0, cc_any 840 // select_cc f32, f32, 1.0f, 0.0f, cc_any 841 // select_cc i32, i32, -1, 0, cc_any 842 // 843 844 // Move hardware True/False values to the correct operand. 845 if (isHWTrueValue(False) && isHWFalseValue(True)) { 846 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 847 std::swap(False, True); 848 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32)); 849 } 850 851 if (isHWTrueValue(True) && isHWFalseValue(False) && 852 (CompareVT == VT || VT == MVT::i32)) { 853 // This can be matched by a SET* instruction. 854 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 855 } 856 857 // Try to lower to a CND* instruction: 858 // 859 // CND* can match the following patterns: 860 // 861 // select_cc f32, 0.0, f32, f32, cc_any 862 // select_cc f32, 0.0, i32, i32, cc_any 863 // select_cc i32, 0, f32, f32, cc_any 864 // select_cc i32, 0, i32, i32, cc_any 865 // 866 if (isZero(LHS) || isZero(RHS)) { 867 SDValue Cond = (isZero(LHS) ? RHS : LHS); 868 SDValue Zero = (isZero(LHS) ? LHS : RHS); 869 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 870 if (CompareVT != VT) { 871 // Bitcast True / False to the correct types. This will end up being 872 // a nop, but it allows us to define only a single pattern in the 873 // .TD files for each CND* instruction rather than having to have 874 // one pattern for integer True/False and one for fp True/False 875 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); 876 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); 877 } 878 if (isZero(LHS)) { 879 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode); 880 } 881 882 switch (CCOpcode) { 883 case ISD::SETONE: 884 case ISD::SETUNE: 885 case ISD::SETNE: 886 case ISD::SETULE: 887 case ISD::SETULT: 888 case ISD::SETOLE: 889 case ISD::SETOLT: 890 case ISD::SETLE: 891 case ISD::SETLT: 892 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); 893 Temp = True; 894 True = False; 895 False = Temp; 896 break; 897 default: 898 break; 899 } 900 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, 901 Cond, Zero, 902 True, False, 903 DAG.getCondCode(CCOpcode)); 904 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); 905 } 906 907 908 // Possible Min/Max pattern 909 SDValue MinMax = LowerMinMax(Op, DAG); 910 if (MinMax.getNode()) { 911 return MinMax; 912 } 913 914 // If we make it this for it means we have no native instructions to handle 915 // this SELECT_CC, so we must lower it. 916 SDValue HWTrue, HWFalse; 917 918 if (CompareVT == MVT::f32) { 919 HWTrue = DAG.getConstantFP(1.0f, CompareVT); 920 HWFalse = DAG.getConstantFP(0.0f, CompareVT); 921 } else if (CompareVT == MVT::i32) { 922 HWTrue = DAG.getConstant(-1, CompareVT); 923 HWFalse = DAG.getConstant(0, CompareVT); 924 } 925 else { 926 assert(!"Unhandled value type in LowerSELECT_CC"); 927 } 928 929 // Lower this unsupported SELECT_CC into a combination of two supported 930 // SELECT_CC operations. 931 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); 932 933 return DAG.getNode(ISD::SELECT_CC, DL, VT, 934 Cond, HWFalse, 935 True, False, 936 DAG.getCondCode(ISD::SETNE)); 937 } 938 939 SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 940 return DAG.getNode(ISD::SELECT_CC, 941 SDLoc(Op), 942 Op.getValueType(), 943 Op.getOperand(0), 944 DAG.getConstant(0, MVT::i32), 945 Op.getOperand(1), 946 Op.getOperand(2), 947 DAG.getCondCode(ISD::SETNE)); 948 } 949 950 /// LLVM generates byte-addresed pointers. For indirect addressing, we need to 951 /// convert these pointers to a register index. Each register holds 952 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the 953 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used 954 /// for indirect addressing. 955 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr, 956 unsigned StackWidth, 957 SelectionDAG &DAG) const { 958 unsigned SRLPad; 959 switch(StackWidth) { 960 case 1: 961 SRLPad = 2; 962 break; 963 case 2: 964 SRLPad = 3; 965 break; 966 case 4: 967 SRLPad = 4; 968 break; 969 default: llvm_unreachable("Invalid stack width"); 970 } 971 972 return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr, 973 DAG.getConstant(SRLPad, MVT::i32)); 974 } 975 976 void R600TargetLowering::getStackAddress(unsigned StackWidth, 977 unsigned ElemIdx, 978 unsigned &Channel, 979 unsigned &PtrIncr) const { 980 switch (StackWidth) { 981 default: 982 case 1: 983 Channel = 0; 984 if (ElemIdx > 0) { 985 PtrIncr = 1; 986 } else { 987 PtrIncr = 0; 988 } 989 break; 990 case 2: 991 Channel = ElemIdx % 2; 992 if (ElemIdx == 2) { 993 PtrIncr = 1; 994 } else { 995 PtrIncr = 0; 996 } 997 break; 998 case 4: 999 Channel = ElemIdx; 1000 PtrIncr = 0; 1001 break; 1002 } 1003 } 1004 1005 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1006 SDLoc DL(Op); 1007 StoreSDNode *StoreNode = cast<StoreSDNode>(Op); 1008 SDValue Chain = Op.getOperand(0); 1009 SDValue Value = Op.getOperand(1); 1010 SDValue Ptr = Op.getOperand(2); 1011 1012 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && 1013 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) { 1014 // Convert pointer from byte address to dword address. 1015 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(), 1016 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), 1017 Ptr, DAG.getConstant(2, MVT::i32))); 1018 1019 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { 1020 assert(!"Truncated and indexed stores not supported yet"); 1021 } else { 1022 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); 1023 } 1024 return Chain; 1025 } 1026 1027 EVT ValueVT = Value.getValueType(); 1028 1029 if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { 1030 return SDValue(); 1031 } 1032 1033 // Lowering for indirect addressing 1034 1035 const MachineFunction &MF = DAG.getMachineFunction(); 1036 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>( 1037 getTargetMachine().getFrameLowering()); 1038 unsigned StackWidth = TFL->getStackWidth(MF); 1039 1040 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); 1041 1042 if (ValueVT.isVector()) { 1043 unsigned NumElemVT = ValueVT.getVectorNumElements(); 1044 EVT ElemVT = ValueVT.getVectorElementType(); 1045 SDValue Stores[4]; 1046 1047 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " 1048 "vector width in load"); 1049 1050 for (unsigned i = 0; i < NumElemVT; ++i) { 1051 unsigned Channel, PtrIncr; 1052 getStackAddress(StackWidth, i, Channel, PtrIncr); 1053 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, 1054 DAG.getConstant(PtrIncr, MVT::i32)); 1055 SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, 1056 Value, DAG.getConstant(i, MVT::i32)); 1057 1058 Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, 1059 Chain, Elem, Ptr, 1060 DAG.getTargetConstant(Channel, MVT::i32)); 1061 } 1062 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT); 1063 } else { 1064 if (ValueVT == MVT::i8) { 1065 Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value); 1066 } 1067 Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr, 1068 DAG.getTargetConstant(0, MVT::i32)); // Channel 1069 } 1070 1071 return Chain; 1072 } 1073 1074 // return (512 + (kc_bank << 12) 1075 static int 1076 ConstantAddressBlock(unsigned AddressSpace) { 1077 switch (AddressSpace) { 1078 case AMDGPUAS::CONSTANT_BUFFER_0: 1079 return 512; 1080 case AMDGPUAS::CONSTANT_BUFFER_1: 1081 return 512 + 4096; 1082 case AMDGPUAS::CONSTANT_BUFFER_2: 1083 return 512 + 4096 * 2; 1084 case AMDGPUAS::CONSTANT_BUFFER_3: 1085 return 512 + 4096 * 3; 1086 case AMDGPUAS::CONSTANT_BUFFER_4: 1087 return 512 + 4096 * 4; 1088 case AMDGPUAS::CONSTANT_BUFFER_5: 1089 return 512 + 4096 * 5; 1090 case AMDGPUAS::CONSTANT_BUFFER_6: 1091 return 512 + 4096 * 6; 1092 case AMDGPUAS::CONSTANT_BUFFER_7: 1093 return 512 + 4096 * 7; 1094 case AMDGPUAS::CONSTANT_BUFFER_8: 1095 return 512 + 4096 * 8; 1096 case AMDGPUAS::CONSTANT_BUFFER_9: 1097 return 512 + 4096 * 9; 1098 case AMDGPUAS::CONSTANT_BUFFER_10: 1099 return 512 + 4096 * 10; 1100 case AMDGPUAS::CONSTANT_BUFFER_11: 1101 return 512 + 4096 * 11; 1102 case AMDGPUAS::CONSTANT_BUFFER_12: 1103 return 512 + 4096 * 12; 1104 case AMDGPUAS::CONSTANT_BUFFER_13: 1105 return 512 + 4096 * 13; 1106 case AMDGPUAS::CONSTANT_BUFFER_14: 1107 return 512 + 4096 * 14; 1108 case AMDGPUAS::CONSTANT_BUFFER_15: 1109 return 512 + 4096 * 15; 1110 default: 1111 return -1; 1112 } 1113 } 1114 1115 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const 1116 { 1117 EVT VT = Op.getValueType(); 1118 SDLoc DL(Op); 1119 LoadSDNode *LoadNode = cast<LoadSDNode>(Op); 1120 SDValue Chain = Op.getOperand(0); 1121 SDValue Ptr = Op.getOperand(1); 1122 SDValue LoweredLoad; 1123 1124 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); 1125 if (ConstantBlock > -1) { 1126 SDValue Result; 1127 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) || 1128 dyn_cast<Constant>(LoadNode->getSrcValue()) || 1129 dyn_cast<ConstantSDNode>(Ptr)) { 1130 SDValue Slots[4]; 1131 for (unsigned i = 0; i < 4; i++) { 1132 // We want Const position encoded with the following formula : 1133 // (((512 + (kc_bank << 12) + const_index) << 2) + chan) 1134 // const_index is Ptr computed by llvm using an alignment of 16. 1135 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and 1136 // then div by 4 at the ISel step 1137 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 1138 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32)); 1139 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); 1140 } 1141 EVT NewVT = MVT::v4i32; 1142 unsigned NumElements = 4; 1143 if (VT.isVector()) { 1144 NewVT = VT; 1145 NumElements = VT.getVectorNumElements(); 1146 } 1147 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements); 1148 } else { 1149 // non constant ptr cant be folded, keeps it as a v4f32 load 1150 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, 1151 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)), 1152 DAG.getConstant(LoadNode->getAddressSpace() - 1153 AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32) 1154 ); 1155 } 1156 1157 if (!VT.isVector()) { 1158 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, 1159 DAG.getConstant(0, MVT::i32)); 1160 } 1161 1162 SDValue MergedValues[2] = { 1163 Result, 1164 Chain 1165 }; 1166 return DAG.getMergeValues(MergedValues, 2, DL); 1167 } 1168 1169 // For most operations returning SDValue() will result int he node being 1170 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so 1171 // we need to manually expand loads that may be legal in some address spaces 1172 // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported 1173 // for compute shaders, since the data is sign extended when it is uploaded 1174 // to the buffer. Howerver SEXT loads from other addresspaces are not 1175 // supported, so we need to expand them here. 1176 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) { 1177 EVT MemVT = LoadNode->getMemoryVT(); 1178 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8)); 1179 SDValue ShiftAmount = 1180 DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32); 1181 SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr, 1182 LoadNode->getPointerInfo(), MemVT, 1183 LoadNode->isVolatile(), 1184 LoadNode->isNonTemporal(), 1185 LoadNode->getAlignment()); 1186 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount); 1187 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount); 1188 1189 SDValue MergedValues[2] = { Sra, Chain }; 1190 return DAG.getMergeValues(MergedValues, 2, DL); 1191 } 1192 1193 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { 1194 return SDValue(); 1195 } 1196 1197 // Lowering for indirect addressing 1198 const MachineFunction &MF = DAG.getMachineFunction(); 1199 const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>( 1200 getTargetMachine().getFrameLowering()); 1201 unsigned StackWidth = TFL->getStackWidth(MF); 1202 1203 Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG); 1204 1205 if (VT.isVector()) { 1206 unsigned NumElemVT = VT.getVectorNumElements(); 1207 EVT ElemVT = VT.getVectorElementType(); 1208 SDValue Loads[4]; 1209 1210 assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " 1211 "vector width in load"); 1212 1213 for (unsigned i = 0; i < NumElemVT; ++i) { 1214 unsigned Channel, PtrIncr; 1215 getStackAddress(StackWidth, i, Channel, PtrIncr); 1216 Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, 1217 DAG.getConstant(PtrIncr, MVT::i32)); 1218 Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT, 1219 Chain, Ptr, 1220 DAG.getTargetConstant(Channel, MVT::i32), 1221 Op.getOperand(2)); 1222 } 1223 for (unsigned i = NumElemVT; i < 4; ++i) { 1224 Loads[i] = DAG.getUNDEF(ElemVT); 1225 } 1226 EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4); 1227 LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4); 1228 } else { 1229 LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT, 1230 Chain, Ptr, 1231 DAG.getTargetConstant(0, MVT::i32), // Channel 1232 Op.getOperand(2)); 1233 } 1234 1235 SDValue Ops[2]; 1236 Ops[0] = LoweredLoad; 1237 Ops[1] = Chain; 1238 1239 return DAG.getMergeValues(Ops, 2, DL); 1240 } 1241 1242 /// XXX Only kernel functions are supported, so we can assume for now that 1243 /// every function is a kernel function, but in the future we should use 1244 /// separate calling conventions for kernel and non-kernel functions. 1245 SDValue R600TargetLowering::LowerFormalArguments( 1246 SDValue Chain, 1247 CallingConv::ID CallConv, 1248 bool isVarArg, 1249 const SmallVectorImpl<ISD::InputArg> &Ins, 1250 SDLoc DL, SelectionDAG &DAG, 1251 SmallVectorImpl<SDValue> &InVals) const { 1252 SmallVector<CCValAssign, 16> ArgLocs; 1253 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1254 getTargetMachine(), ArgLocs, *DAG.getContext()); 1255 1256 AnalyzeFormalArguments(CCInfo, Ins); 1257 1258 for (unsigned i = 0, e = Ins.size(); i < e; ++i) { 1259 CCValAssign &VA = ArgLocs[i]; 1260 EVT VT = VA.getLocVT(); 1261 1262 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 1263 AMDGPUAS::CONSTANT_BUFFER_0); 1264 1265 // The first 36 bytes of the input buffer contains information about 1266 // thread group and global sizes. 1267 SDValue Arg = DAG.getLoad(VT, DL, Chain, 1268 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32), 1269 MachinePointerInfo(UndefValue::get(PtrTy)), false, 1270 false, false, 4); // 4 is the prefered alignment for 1271 // the CONSTANT memory space. 1272 InVals.push_back(Arg); 1273 } 1274 return Chain; 1275 } 1276 1277 EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 1278 if (!VT.isVector()) return MVT::i32; 1279 return VT.changeVectorElementTypeToInteger(); 1280 } 1281 1282 static SDValue 1283 CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, 1284 DenseMap<unsigned, unsigned> &RemapSwizzle) { 1285 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR); 1286 assert(RemapSwizzle.empty()); 1287 SDValue NewBldVec[4] = { 1288 VectorEntry.getOperand(0), 1289 VectorEntry.getOperand(1), 1290 VectorEntry.getOperand(2), 1291 VectorEntry.getOperand(3) 1292 }; 1293 1294 for (unsigned i = 0; i < 4; i++) { 1295 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) { 1296 if (C->isZero()) { 1297 RemapSwizzle[i] = 4; // SEL_0 1298 NewBldVec[i] = DAG.getUNDEF(MVT::f32); 1299 } else if (C->isExactlyValue(1.0)) { 1300 RemapSwizzle[i] = 5; // SEL_1 1301 NewBldVec[i] = DAG.getUNDEF(MVT::f32); 1302 } 1303 } 1304 1305 if (NewBldVec[i].getOpcode() == ISD::UNDEF) 1306 continue; 1307 for (unsigned j = 0; j < i; j++) { 1308 if (NewBldVec[i] == NewBldVec[j]) { 1309 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType()); 1310 RemapSwizzle[i] = j; 1311 break; 1312 } 1313 } 1314 } 1315 1316 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry), 1317 VectorEntry.getValueType(), NewBldVec, 4); 1318 } 1319 1320 static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, 1321 DenseMap<unsigned, unsigned> &RemapSwizzle) { 1322 assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR); 1323 assert(RemapSwizzle.empty()); 1324 SDValue NewBldVec[4] = { 1325 VectorEntry.getOperand(0), 1326 VectorEntry.getOperand(1), 1327 VectorEntry.getOperand(2), 1328 VectorEntry.getOperand(3) 1329 }; 1330 bool isUnmovable[4] = { false, false, false, false }; 1331 for (unsigned i = 0; i < 4; i++) 1332 RemapSwizzle[i] = i; 1333 1334 for (unsigned i = 0; i < 4; i++) { 1335 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 1336 unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1)) 1337 ->getZExtValue(); 1338 if (!isUnmovable[Idx]) { 1339 // Swap i and Idx 1340 std::swap(NewBldVec[Idx], NewBldVec[i]); 1341 std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]); 1342 } 1343 isUnmovable[Idx] = true; 1344 } 1345 } 1346 1347 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry), 1348 VectorEntry.getValueType(), NewBldVec, 4); 1349 } 1350 1351 1352 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, 1353 SDValue Swz[4], SelectionDAG &DAG) const { 1354 assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR); 1355 // Old -> New swizzle values 1356 DenseMap<unsigned, unsigned> SwizzleRemap; 1357 1358 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap); 1359 for (unsigned i = 0; i < 4; i++) { 1360 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue(); 1361 if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) 1362 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32); 1363 } 1364 1365 SwizzleRemap.clear(); 1366 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap); 1367 for (unsigned i = 0; i < 4; i++) { 1368 unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue(); 1369 if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) 1370 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32); 1371 } 1372 1373 return BuildVector; 1374 } 1375 1376 1377 //===----------------------------------------------------------------------===// 1378 // Custom DAG Optimizations 1379 //===----------------------------------------------------------------------===// 1380 1381 SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, 1382 DAGCombinerInfo &DCI) const { 1383 SelectionDAG &DAG = DCI.DAG; 1384 1385 switch (N->getOpcode()) { 1386 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) 1387 case ISD::FP_ROUND: { 1388 SDValue Arg = N->getOperand(0); 1389 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) { 1390 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0), 1391 Arg.getOperand(0)); 1392 } 1393 break; 1394 } 1395 1396 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) -> 1397 // (i32 select_cc f32, f32, -1, 0 cc) 1398 // 1399 // Mesa's GLSL frontend generates the above pattern a lot and we can lower 1400 // this to one of the SET*_DX10 instructions. 1401 case ISD::FP_TO_SINT: { 1402 SDValue FNeg = N->getOperand(0); 1403 if (FNeg.getOpcode() != ISD::FNEG) { 1404 return SDValue(); 1405 } 1406 SDValue SelectCC = FNeg.getOperand(0); 1407 if (SelectCC.getOpcode() != ISD::SELECT_CC || 1408 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS 1409 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True 1410 !isHWTrueValue(SelectCC.getOperand(2)) || 1411 !isHWFalseValue(SelectCC.getOperand(3))) { 1412 return SDValue(); 1413 } 1414 1415 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), 1416 SelectCC.getOperand(0), // LHS 1417 SelectCC.getOperand(1), // RHS 1418 DAG.getConstant(-1, MVT::i32), // True 1419 DAG.getConstant(0, MVT::i32), // Flase 1420 SelectCC.getOperand(4)); // CC 1421 1422 break; 1423 } 1424 1425 // insert_vector_elt (build_vector elt0, , eltN), NewEltIdx, idx 1426 // => build_vector elt0, , NewEltIdx, , eltN 1427 case ISD::INSERT_VECTOR_ELT: { 1428 SDValue InVec = N->getOperand(0); 1429 SDValue InVal = N->getOperand(1); 1430 SDValue EltNo = N->getOperand(2); 1431 SDLoc dl(N); 1432 1433 // If the inserted element is an UNDEF, just use the input vector. 1434 if (InVal.getOpcode() == ISD::UNDEF) 1435 return InVec; 1436 1437 EVT VT = InVec.getValueType(); 1438 1439 // If we can't generate a legal BUILD_VECTOR, exit 1440 if (!isOperationLegal(ISD::BUILD_VECTOR, VT)) 1441 return SDValue(); 1442 1443 // Check that we know which element is being inserted 1444 if (!isa<ConstantSDNode>(EltNo)) 1445 return SDValue(); 1446 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 1447 1448 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 1449 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 1450 // vector elements. 1451 SmallVector<SDValue, 8> Ops; 1452 if (InVec.getOpcode() == ISD::BUILD_VECTOR) { 1453 Ops.append(InVec.getNode()->op_begin(), 1454 InVec.getNode()->op_end()); 1455 } else if (InVec.getOpcode() == ISD::UNDEF) { 1456 unsigned NElts = VT.getVectorNumElements(); 1457 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 1458 } else { 1459 return SDValue(); 1460 } 1461 1462 // Insert the element 1463 if (Elt < Ops.size()) { 1464 // All the operands of BUILD_VECTOR must have the same type; 1465 // we enforce that here. 1466 EVT OpVT = Ops[0].getValueType(); 1467 if (InVal.getValueType() != OpVT) 1468 InVal = OpVT.bitsGT(InVal.getValueType()) ? 1469 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : 1470 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); 1471 Ops[Elt] = InVal; 1472 } 1473 1474 // Return the new vector 1475 return DAG.getNode(ISD::BUILD_VECTOR, dl, 1476 VT, &Ops[0], Ops.size()); 1477 } 1478 1479 // Extract_vec (Build_vector) generated by custom lowering 1480 // also needs to be customly combined 1481 case ISD::EXTRACT_VECTOR_ELT: { 1482 SDValue Arg = N->getOperand(0); 1483 if (Arg.getOpcode() == ISD::BUILD_VECTOR) { 1484 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 1485 unsigned Element = Const->getZExtValue(); 1486 return Arg->getOperand(Element); 1487 } 1488 } 1489 if (Arg.getOpcode() == ISD::BITCAST && 1490 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { 1491 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 1492 unsigned Element = Const->getZExtValue(); 1493 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(), 1494 Arg->getOperand(0).getOperand(Element)); 1495 } 1496 } 1497 } 1498 1499 case ISD::SELECT_CC: { 1500 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq -> 1501 // selectcc x, y, a, b, inv(cc) 1502 // 1503 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne -> 1504 // selectcc x, y, a, b, cc 1505 SDValue LHS = N->getOperand(0); 1506 if (LHS.getOpcode() != ISD::SELECT_CC) { 1507 return SDValue(); 1508 } 1509 1510 SDValue RHS = N->getOperand(1); 1511 SDValue True = N->getOperand(2); 1512 SDValue False = N->getOperand(3); 1513 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 1514 1515 if (LHS.getOperand(2).getNode() != True.getNode() || 1516 LHS.getOperand(3).getNode() != False.getNode() || 1517 RHS.getNode() != False.getNode()) { 1518 return SDValue(); 1519 } 1520 1521 switch (NCC) { 1522 default: return SDValue(); 1523 case ISD::SETNE: return LHS; 1524 case ISD::SETEQ: { 1525 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get(); 1526 LHSCC = ISD::getSetCCInverse(LHSCC, 1527 LHS.getOperand(0).getValueType().isInteger()); 1528 return DAG.getSelectCC(SDLoc(N), 1529 LHS.getOperand(0), 1530 LHS.getOperand(1), 1531 LHS.getOperand(2), 1532 LHS.getOperand(3), 1533 LHSCC); 1534 } 1535 } 1536 } 1537 case AMDGPUISD::EXPORT: { 1538 SDValue Arg = N->getOperand(1); 1539 if (Arg.getOpcode() != ISD::BUILD_VECTOR) 1540 break; 1541 1542 SDValue NewArgs[8] = { 1543 N->getOperand(0), // Chain 1544 SDValue(), 1545 N->getOperand(2), // ArrayBase 1546 N->getOperand(3), // Type 1547 N->getOperand(4), // SWZ_X 1548 N->getOperand(5), // SWZ_Y 1549 N->getOperand(6), // SWZ_Z 1550 N->getOperand(7) // SWZ_W 1551 }; 1552 SDLoc DL(N); 1553 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG); 1554 return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8); 1555 } 1556 case AMDGPUISD::TEXTURE_FETCH: { 1557 SDValue Arg = N->getOperand(1); 1558 if (Arg.getOpcode() != ISD::BUILD_VECTOR) 1559 break; 1560 1561 SDValue NewArgs[19] = { 1562 N->getOperand(0), 1563 N->getOperand(1), 1564 N->getOperand(2), 1565 N->getOperand(3), 1566 N->getOperand(4), 1567 N->getOperand(5), 1568 N->getOperand(6), 1569 N->getOperand(7), 1570 N->getOperand(8), 1571 N->getOperand(9), 1572 N->getOperand(10), 1573 N->getOperand(11), 1574 N->getOperand(12), 1575 N->getOperand(13), 1576 N->getOperand(14), 1577 N->getOperand(15), 1578 N->getOperand(16), 1579 N->getOperand(17), 1580 N->getOperand(18), 1581 }; 1582 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG); 1583 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(), 1584 NewArgs, 19); 1585 } 1586 } 1587 return SDValue(); 1588 } 1589