1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines an instruction selector for the NVPTX target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "NVPTXISelDAGToDAG.h" 15 #include "llvm/IR/GlobalValue.h" 16 #include "llvm/IR/Instructions.h" 17 #include "llvm/Support/CommandLine.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/ErrorHandling.h" 20 #include "llvm/Support/raw_ostream.h" 21 #include "llvm/Target/TargetIntrinsicInfo.h" 22 23 using namespace llvm; 24 25 #define DEBUG_TYPE "nvptx-isel" 26 27 unsigned FMAContractLevel = 0; 28 29 static cl::opt<unsigned, true> 30 FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, 31 cl::desc("NVPTX Specific: FMA contraction (0: don't do it" 32 " 1: do it 2: do it aggressively"), 33 cl::location(FMAContractLevel), 34 cl::init(2)); 35 36 static cl::opt<int> UsePrecDivF32( 37 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, 38 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" 39 " IEEE Compliant F32 div.rnd if avaiable."), 40 cl::init(2)); 41 42 static cl::opt<bool> 43 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden, 44 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), 45 cl::init(true)); 46 47 static cl::opt<bool> 48 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, 49 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), 50 cl::init(false)); 51 52 53 /// createNVPTXISelDag - This pass converts a legalized DAG into a 54 /// NVPTX-specific DAG, ready for instruction scheduling. 55 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, 56 llvm::CodeGenOpt::Level OptLevel) { 57 return new NVPTXDAGToDAGISel(TM, OptLevel); 58 } 59 60 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, 61 CodeGenOpt::Level OptLevel) 62 : SelectionDAGISel(tm, OptLevel), 63 Subtarget(tm.getSubtarget<NVPTXSubtarget>()) { 64 65 doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1); 66 doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1); 67 doFMAF32AGG = 68 (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2); 69 doFMAF64AGG = 70 (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2); 71 72 allowFMA = (FMAContractLevel >= 1); 73 74 doMulWide = (OptLevel > 0); 75 } 76 77 int NVPTXDAGToDAGISel::getDivF32Level() const { 78 if (UsePrecDivF32.getNumOccurrences() > 0) { 79 // If nvptx-prec-div32=N is used on the command-line, always honor it 80 return UsePrecDivF32; 81 } else { 82 // Otherwise, use div.approx if fast math is enabled 83 if (TM.Options.UnsafeFPMath) 84 return 0; 85 else 86 return 2; 87 } 88 } 89 90 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const { 91 if (UsePrecSqrtF32.getNumOccurrences() > 0) { 92 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it 93 return UsePrecSqrtF32; 94 } else { 95 // Otherwise, use sqrt.approx if fast math is enabled 96 if (TM.Options.UnsafeFPMath) 97 return false; 98 else 99 return true; 100 } 101 } 102 103 bool NVPTXDAGToDAGISel::useF32FTZ() const { 104 if (FtzEnabled.getNumOccurrences() > 0) { 105 // If nvptx-f32ftz is used on the command-line, always honor it 106 return FtzEnabled; 107 } else { 108 const Function *F = MF->getFunction(); 109 // Otherwise, check for an nvptx-f32ftz attribute on the function 110 if (F->hasFnAttribute("nvptx-f32ftz")) 111 return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex, 112 "nvptx-f32ftz") 113 .getValueAsString() == "true"); 114 else 115 return false; 116 } 117 } 118 119 /// Select - Select instructions not customized! Used for 120 /// expanded, promoted and normal instructions. 121 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { 122 123 if (N->isMachineOpcode()) { 124 N->setNodeId(-1); 125 return nullptr; // Already selected. 126 } 127 128 SDNode *ResNode = nullptr; 129 switch (N->getOpcode()) { 130 case ISD::LOAD: 131 ResNode = SelectLoad(N); 132 break; 133 case ISD::STORE: 134 ResNode = SelectStore(N); 135 break; 136 case NVPTXISD::LoadV2: 137 case NVPTXISD::LoadV4: 138 ResNode = SelectLoadVector(N); 139 break; 140 case NVPTXISD::LDGV2: 141 case NVPTXISD::LDGV4: 142 case NVPTXISD::LDUV2: 143 case NVPTXISD::LDUV4: 144 ResNode = SelectLDGLDU(N); 145 break; 146 case NVPTXISD::StoreV2: 147 case NVPTXISD::StoreV4: 148 ResNode = SelectStoreVector(N); 149 break; 150 case NVPTXISD::LoadParam: 151 case NVPTXISD::LoadParamV2: 152 case NVPTXISD::LoadParamV4: 153 ResNode = SelectLoadParam(N); 154 break; 155 case NVPTXISD::StoreRetval: 156 case NVPTXISD::StoreRetvalV2: 157 case NVPTXISD::StoreRetvalV4: 158 ResNode = SelectStoreRetval(N); 159 break; 160 case NVPTXISD::StoreParam: 161 case NVPTXISD::StoreParamV2: 162 case NVPTXISD::StoreParamV4: 163 case NVPTXISD::StoreParamS32: 164 case NVPTXISD::StoreParamU32: 165 ResNode = SelectStoreParam(N); 166 break; 167 case ISD::INTRINSIC_WO_CHAIN: 168 ResNode = SelectIntrinsicNoChain(N); 169 break; 170 case ISD::INTRINSIC_W_CHAIN: 171 ResNode = SelectIntrinsicChain(N); 172 break; 173 case NVPTXISD::Tex1DFloatI32: 174 case NVPTXISD::Tex1DFloatFloat: 175 case NVPTXISD::Tex1DFloatFloatLevel: 176 case NVPTXISD::Tex1DFloatFloatGrad: 177 case NVPTXISD::Tex1DI32I32: 178 case NVPTXISD::Tex1DI32Float: 179 case NVPTXISD::Tex1DI32FloatLevel: 180 case NVPTXISD::Tex1DI32FloatGrad: 181 case NVPTXISD::Tex1DArrayFloatI32: 182 case NVPTXISD::Tex1DArrayFloatFloat: 183 case NVPTXISD::Tex1DArrayFloatFloatLevel: 184 case NVPTXISD::Tex1DArrayFloatFloatGrad: 185 case NVPTXISD::Tex1DArrayI32I32: 186 case NVPTXISD::Tex1DArrayI32Float: 187 case NVPTXISD::Tex1DArrayI32FloatLevel: 188 case NVPTXISD::Tex1DArrayI32FloatGrad: 189 case NVPTXISD::Tex2DFloatI32: 190 case NVPTXISD::Tex2DFloatFloat: 191 case NVPTXISD::Tex2DFloatFloatLevel: 192 case NVPTXISD::Tex2DFloatFloatGrad: 193 case NVPTXISD::Tex2DI32I32: 194 case NVPTXISD::Tex2DI32Float: 195 case NVPTXISD::Tex2DI32FloatLevel: 196 case NVPTXISD::Tex2DI32FloatGrad: 197 case NVPTXISD::Tex2DArrayFloatI32: 198 case NVPTXISD::Tex2DArrayFloatFloat: 199 case NVPTXISD::Tex2DArrayFloatFloatLevel: 200 case NVPTXISD::Tex2DArrayFloatFloatGrad: 201 case NVPTXISD::Tex2DArrayI32I32: 202 case NVPTXISD::Tex2DArrayI32Float: 203 case NVPTXISD::Tex2DArrayI32FloatLevel: 204 case NVPTXISD::Tex2DArrayI32FloatGrad: 205 case NVPTXISD::Tex3DFloatI32: 206 case NVPTXISD::Tex3DFloatFloat: 207 case NVPTXISD::Tex3DFloatFloatLevel: 208 case NVPTXISD::Tex3DFloatFloatGrad: 209 case NVPTXISD::Tex3DI32I32: 210 case NVPTXISD::Tex3DI32Float: 211 case NVPTXISD::Tex3DI32FloatLevel: 212 case NVPTXISD::Tex3DI32FloatGrad: 213 ResNode = SelectTextureIntrinsic(N); 214 break; 215 case NVPTXISD::Suld1DI8Trap: 216 case NVPTXISD::Suld1DI16Trap: 217 case NVPTXISD::Suld1DI32Trap: 218 case NVPTXISD::Suld1DV2I8Trap: 219 case NVPTXISD::Suld1DV2I16Trap: 220 case NVPTXISD::Suld1DV2I32Trap: 221 case NVPTXISD::Suld1DV4I8Trap: 222 case NVPTXISD::Suld1DV4I16Trap: 223 case NVPTXISD::Suld1DV4I32Trap: 224 case NVPTXISD::Suld1DArrayI8Trap: 225 case NVPTXISD::Suld1DArrayI16Trap: 226 case NVPTXISD::Suld1DArrayI32Trap: 227 case NVPTXISD::Suld1DArrayV2I8Trap: 228 case NVPTXISD::Suld1DArrayV2I16Trap: 229 case NVPTXISD::Suld1DArrayV2I32Trap: 230 case NVPTXISD::Suld1DArrayV4I8Trap: 231 case NVPTXISD::Suld1DArrayV4I16Trap: 232 case NVPTXISD::Suld1DArrayV4I32Trap: 233 case NVPTXISD::Suld2DI8Trap: 234 case NVPTXISD::Suld2DI16Trap: 235 case NVPTXISD::Suld2DI32Trap: 236 case NVPTXISD::Suld2DV2I8Trap: 237 case NVPTXISD::Suld2DV2I16Trap: 238 case NVPTXISD::Suld2DV2I32Trap: 239 case NVPTXISD::Suld2DV4I8Trap: 240 case NVPTXISD::Suld2DV4I16Trap: 241 case NVPTXISD::Suld2DV4I32Trap: 242 case NVPTXISD::Suld2DArrayI8Trap: 243 case NVPTXISD::Suld2DArrayI16Trap: 244 case NVPTXISD::Suld2DArrayI32Trap: 245 case NVPTXISD::Suld2DArrayV2I8Trap: 246 case NVPTXISD::Suld2DArrayV2I16Trap: 247 case NVPTXISD::Suld2DArrayV2I32Trap: 248 case NVPTXISD::Suld2DArrayV4I8Trap: 249 case NVPTXISD::Suld2DArrayV4I16Trap: 250 case NVPTXISD::Suld2DArrayV4I32Trap: 251 case NVPTXISD::Suld3DI8Trap: 252 case NVPTXISD::Suld3DI16Trap: 253 case NVPTXISD::Suld3DI32Trap: 254 case NVPTXISD::Suld3DV2I8Trap: 255 case NVPTXISD::Suld3DV2I16Trap: 256 case NVPTXISD::Suld3DV2I32Trap: 257 case NVPTXISD::Suld3DV4I8Trap: 258 case NVPTXISD::Suld3DV4I16Trap: 259 case NVPTXISD::Suld3DV4I32Trap: 260 ResNode = SelectSurfaceIntrinsic(N); 261 break; 262 case ISD::AND: 263 case ISD::SRA: 264 case ISD::SRL: 265 // Try to select BFE 266 ResNode = SelectBFE(N); 267 break; 268 case ISD::ADDRSPACECAST: 269 ResNode = SelectAddrSpaceCast(N); 270 break; 271 default: 272 break; 273 } 274 if (ResNode) 275 return ResNode; 276 return SelectCode(N); 277 } 278 279 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) { 280 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 281 switch (IID) { 282 default: 283 return NULL; 284 case Intrinsic::nvvm_ldg_global_f: 285 case Intrinsic::nvvm_ldg_global_i: 286 case Intrinsic::nvvm_ldg_global_p: 287 case Intrinsic::nvvm_ldu_global_f: 288 case Intrinsic::nvvm_ldu_global_i: 289 case Intrinsic::nvvm_ldu_global_p: 290 return SelectLDGLDU(N); 291 } 292 } 293 294 static unsigned int getCodeAddrSpace(MemSDNode *N, 295 const NVPTXSubtarget &Subtarget) { 296 const Value *Src = N->getMemOperand()->getValue(); 297 298 if (!Src) 299 return NVPTX::PTXLdStInstCode::GENERIC; 300 301 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) { 302 switch (PT->getAddressSpace()) { 303 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; 304 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; 305 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; 306 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; 307 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; 308 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT; 309 default: break; 310 } 311 } 312 return NVPTX::PTXLdStInstCode::GENERIC; 313 } 314 315 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) { 316 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 317 switch (IID) { 318 default: 319 return nullptr; 320 case Intrinsic::nvvm_texsurf_handle_internal: 321 return SelectTexSurfHandle(N); 322 } 323 } 324 325 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { 326 // Op 0 is the intrinsic ID 327 SDValue Wrapper = N->getOperand(1); 328 SDValue GlobalVal = Wrapper.getOperand(0); 329 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64, 330 GlobalVal); 331 } 332 333 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 334 SDValue Src = N->getOperand(0); 335 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N); 336 unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); 337 unsigned DstAddrSpace = CastN->getDestAddressSpace(); 338 339 assert(SrcAddrSpace != DstAddrSpace && 340 "addrspacecast must be between different address spaces"); 341 342 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { 343 // Specific to generic 344 unsigned Opc; 345 switch (SrcAddrSpace) { 346 default: report_fatal_error("Bad address space in addrspacecast"); 347 case ADDRESS_SPACE_GLOBAL: 348 Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64 349 : NVPTX::cvta_global_yes; 350 break; 351 case ADDRESS_SPACE_SHARED: 352 Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64 353 : NVPTX::cvta_shared_yes; 354 break; 355 case ADDRESS_SPACE_CONST: 356 Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64 357 : NVPTX::cvta_const_yes; 358 break; 359 case ADDRESS_SPACE_LOCAL: 360 Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64 361 : NVPTX::cvta_local_yes; 362 break; 363 } 364 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); 365 } else { 366 // Generic to specific 367 if (SrcAddrSpace != 0) 368 report_fatal_error("Cannot cast between two non-generic address spaces"); 369 unsigned Opc; 370 switch (DstAddrSpace) { 371 default: report_fatal_error("Bad address space in addrspacecast"); 372 case ADDRESS_SPACE_GLOBAL: 373 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64 374 : NVPTX::cvta_to_global_yes; 375 break; 376 case ADDRESS_SPACE_SHARED: 377 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64 378 : NVPTX::cvta_to_shared_yes; 379 break; 380 case ADDRESS_SPACE_CONST: 381 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64 382 : NVPTX::cvta_to_const_yes; 383 break; 384 case ADDRESS_SPACE_LOCAL: 385 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64 386 : NVPTX::cvta_to_local_yes; 387 break; 388 } 389 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); 390 } 391 } 392 393 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { 394 SDLoc dl(N); 395 LoadSDNode *LD = cast<LoadSDNode>(N); 396 EVT LoadedVT = LD->getMemoryVT(); 397 SDNode *NVPTXLD = nullptr; 398 399 // do not support pre/post inc/dec 400 if (LD->isIndexed()) 401 return nullptr; 402 403 if (!LoadedVT.isSimple()) 404 return nullptr; 405 406 // Address Space Setting 407 unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget); 408 409 // Volatile Setting 410 // - .volatile is only availalble for .global and .shared 411 bool isVolatile = LD->isVolatile(); 412 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 413 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 414 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 415 isVolatile = false; 416 417 // Vector Setting 418 MVT SimpleVT = LoadedVT.getSimpleVT(); 419 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 420 if (SimpleVT.isVector()) { 421 unsigned num = SimpleVT.getVectorNumElements(); 422 if (num == 2) 423 vecType = NVPTX::PTXLdStInstCode::V2; 424 else if (num == 4) 425 vecType = NVPTX::PTXLdStInstCode::V4; 426 else 427 return nullptr; 428 } 429 430 // Type Setting: fromType + fromTypeWidth 431 // 432 // Sign : ISD::SEXTLOAD 433 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 434 // type is integer 435 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 436 MVT ScalarVT = SimpleVT.getScalarType(); 437 // Read at least 8 bits (predicates are stored as 8-bit values) 438 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits()); 439 unsigned int fromType; 440 if ((LD->getExtensionType() == ISD::SEXTLOAD)) 441 fromType = NVPTX::PTXLdStInstCode::Signed; 442 else if (ScalarVT.isFloatingPoint()) 443 fromType = NVPTX::PTXLdStInstCode::Float; 444 else 445 fromType = NVPTX::PTXLdStInstCode::Unsigned; 446 447 // Create the machine instruction DAG 448 SDValue Chain = N->getOperand(0); 449 SDValue N1 = N->getOperand(1); 450 SDValue Addr; 451 SDValue Offset, Base; 452 unsigned Opcode; 453 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; 454 455 if (SelectDirectAddr(N1, Addr)) { 456 switch (TargetVT) { 457 case MVT::i8: 458 Opcode = NVPTX::LD_i8_avar; 459 break; 460 case MVT::i16: 461 Opcode = NVPTX::LD_i16_avar; 462 break; 463 case MVT::i32: 464 Opcode = NVPTX::LD_i32_avar; 465 break; 466 case MVT::i64: 467 Opcode = NVPTX::LD_i64_avar; 468 break; 469 case MVT::f32: 470 Opcode = NVPTX::LD_f32_avar; 471 break; 472 case MVT::f64: 473 Opcode = NVPTX::LD_f64_avar; 474 break; 475 default: 476 return nullptr; 477 } 478 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 479 getI32Imm(vecType), getI32Imm(fromType), 480 getI32Imm(fromTypeWidth), Addr, Chain }; 481 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 482 } else if (Subtarget.is64Bit() 483 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) 484 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { 485 switch (TargetVT) { 486 case MVT::i8: 487 Opcode = NVPTX::LD_i8_asi; 488 break; 489 case MVT::i16: 490 Opcode = NVPTX::LD_i16_asi; 491 break; 492 case MVT::i32: 493 Opcode = NVPTX::LD_i32_asi; 494 break; 495 case MVT::i64: 496 Opcode = NVPTX::LD_i64_asi; 497 break; 498 case MVT::f32: 499 Opcode = NVPTX::LD_f32_asi; 500 break; 501 case MVT::f64: 502 Opcode = NVPTX::LD_f64_asi; 503 break; 504 default: 505 return nullptr; 506 } 507 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 508 getI32Imm(vecType), getI32Imm(fromType), 509 getI32Imm(fromTypeWidth), Base, Offset, Chain }; 510 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 511 } else if (Subtarget.is64Bit() 512 ? SelectADDRri64(N1.getNode(), N1, Base, Offset) 513 : SelectADDRri(N1.getNode(), N1, Base, Offset)) { 514 if (Subtarget.is64Bit()) { 515 switch (TargetVT) { 516 case MVT::i8: 517 Opcode = NVPTX::LD_i8_ari_64; 518 break; 519 case MVT::i16: 520 Opcode = NVPTX::LD_i16_ari_64; 521 break; 522 case MVT::i32: 523 Opcode = NVPTX::LD_i32_ari_64; 524 break; 525 case MVT::i64: 526 Opcode = NVPTX::LD_i64_ari_64; 527 break; 528 case MVT::f32: 529 Opcode = NVPTX::LD_f32_ari_64; 530 break; 531 case MVT::f64: 532 Opcode = NVPTX::LD_f64_ari_64; 533 break; 534 default: 535 return nullptr; 536 } 537 } else { 538 switch (TargetVT) { 539 case MVT::i8: 540 Opcode = NVPTX::LD_i8_ari; 541 break; 542 case MVT::i16: 543 Opcode = NVPTX::LD_i16_ari; 544 break; 545 case MVT::i32: 546 Opcode = NVPTX::LD_i32_ari; 547 break; 548 case MVT::i64: 549 Opcode = NVPTX::LD_i64_ari; 550 break; 551 case MVT::f32: 552 Opcode = NVPTX::LD_f32_ari; 553 break; 554 case MVT::f64: 555 Opcode = NVPTX::LD_f64_ari; 556 break; 557 default: 558 return nullptr; 559 } 560 } 561 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 562 getI32Imm(vecType), getI32Imm(fromType), 563 getI32Imm(fromTypeWidth), Base, Offset, Chain }; 564 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 565 } else { 566 if (Subtarget.is64Bit()) { 567 switch (TargetVT) { 568 case MVT::i8: 569 Opcode = NVPTX::LD_i8_areg_64; 570 break; 571 case MVT::i16: 572 Opcode = NVPTX::LD_i16_areg_64; 573 break; 574 case MVT::i32: 575 Opcode = NVPTX::LD_i32_areg_64; 576 break; 577 case MVT::i64: 578 Opcode = NVPTX::LD_i64_areg_64; 579 break; 580 case MVT::f32: 581 Opcode = NVPTX::LD_f32_areg_64; 582 break; 583 case MVT::f64: 584 Opcode = NVPTX::LD_f64_areg_64; 585 break; 586 default: 587 return nullptr; 588 } 589 } else { 590 switch (TargetVT) { 591 case MVT::i8: 592 Opcode = NVPTX::LD_i8_areg; 593 break; 594 case MVT::i16: 595 Opcode = NVPTX::LD_i16_areg; 596 break; 597 case MVT::i32: 598 Opcode = NVPTX::LD_i32_areg; 599 break; 600 case MVT::i64: 601 Opcode = NVPTX::LD_i64_areg; 602 break; 603 case MVT::f32: 604 Opcode = NVPTX::LD_f32_areg; 605 break; 606 case MVT::f64: 607 Opcode = NVPTX::LD_f64_areg; 608 break; 609 default: 610 return nullptr; 611 } 612 } 613 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 614 getI32Imm(vecType), getI32Imm(fromType), 615 getI32Imm(fromTypeWidth), N1, Chain }; 616 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 617 } 618 619 if (NVPTXLD) { 620 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 621 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 622 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1); 623 } 624 625 return NVPTXLD; 626 } 627 628 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { 629 630 SDValue Chain = N->getOperand(0); 631 SDValue Op1 = N->getOperand(1); 632 SDValue Addr, Offset, Base; 633 unsigned Opcode; 634 SDLoc DL(N); 635 SDNode *LD; 636 MemSDNode *MemSD = cast<MemSDNode>(N); 637 EVT LoadedVT = MemSD->getMemoryVT(); 638 639 if (!LoadedVT.isSimple()) 640 return nullptr; 641 642 // Address Space Setting 643 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); 644 645 // Volatile Setting 646 // - .volatile is only availalble for .global and .shared 647 bool IsVolatile = MemSD->isVolatile(); 648 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 649 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 650 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 651 IsVolatile = false; 652 653 // Vector Setting 654 MVT SimpleVT = LoadedVT.getSimpleVT(); 655 656 // Type Setting: fromType + fromTypeWidth 657 // 658 // Sign : ISD::SEXTLOAD 659 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 660 // type is integer 661 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 662 MVT ScalarVT = SimpleVT.getScalarType(); 663 // Read at least 8 bits (predicates are stored as 8-bit values) 664 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits()); 665 unsigned int FromType; 666 // The last operand holds the original LoadSDNode::getExtensionType() value 667 unsigned ExtensionType = cast<ConstantSDNode>( 668 N->getOperand(N->getNumOperands() - 1))->getZExtValue(); 669 if (ExtensionType == ISD::SEXTLOAD) 670 FromType = NVPTX::PTXLdStInstCode::Signed; 671 else if (ScalarVT.isFloatingPoint()) 672 FromType = NVPTX::PTXLdStInstCode::Float; 673 else 674 FromType = NVPTX::PTXLdStInstCode::Unsigned; 675 676 unsigned VecType; 677 678 switch (N->getOpcode()) { 679 case NVPTXISD::LoadV2: 680 VecType = NVPTX::PTXLdStInstCode::V2; 681 break; 682 case NVPTXISD::LoadV4: 683 VecType = NVPTX::PTXLdStInstCode::V4; 684 break; 685 default: 686 return nullptr; 687 } 688 689 EVT EltVT = N->getValueType(0); 690 691 if (SelectDirectAddr(Op1, Addr)) { 692 switch (N->getOpcode()) { 693 default: 694 return nullptr; 695 case NVPTXISD::LoadV2: 696 switch (EltVT.getSimpleVT().SimpleTy) { 697 default: 698 return nullptr; 699 case MVT::i8: 700 Opcode = NVPTX::LDV_i8_v2_avar; 701 break; 702 case MVT::i16: 703 Opcode = NVPTX::LDV_i16_v2_avar; 704 break; 705 case MVT::i32: 706 Opcode = NVPTX::LDV_i32_v2_avar; 707 break; 708 case MVT::i64: 709 Opcode = NVPTX::LDV_i64_v2_avar; 710 break; 711 case MVT::f32: 712 Opcode = NVPTX::LDV_f32_v2_avar; 713 break; 714 case MVT::f64: 715 Opcode = NVPTX::LDV_f64_v2_avar; 716 break; 717 } 718 break; 719 case NVPTXISD::LoadV4: 720 switch (EltVT.getSimpleVT().SimpleTy) { 721 default: 722 return nullptr; 723 case MVT::i8: 724 Opcode = NVPTX::LDV_i8_v4_avar; 725 break; 726 case MVT::i16: 727 Opcode = NVPTX::LDV_i16_v4_avar; 728 break; 729 case MVT::i32: 730 Opcode = NVPTX::LDV_i32_v4_avar; 731 break; 732 case MVT::f32: 733 Opcode = NVPTX::LDV_f32_v4_avar; 734 break; 735 } 736 break; 737 } 738 739 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), 740 getI32Imm(VecType), getI32Imm(FromType), 741 getI32Imm(FromTypeWidth), Addr, Chain }; 742 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 743 } else if (Subtarget.is64Bit() 744 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) 745 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { 746 switch (N->getOpcode()) { 747 default: 748 return nullptr; 749 case NVPTXISD::LoadV2: 750 switch (EltVT.getSimpleVT().SimpleTy) { 751 default: 752 return nullptr; 753 case MVT::i8: 754 Opcode = NVPTX::LDV_i8_v2_asi; 755 break; 756 case MVT::i16: 757 Opcode = NVPTX::LDV_i16_v2_asi; 758 break; 759 case MVT::i32: 760 Opcode = NVPTX::LDV_i32_v2_asi; 761 break; 762 case MVT::i64: 763 Opcode = NVPTX::LDV_i64_v2_asi; 764 break; 765 case MVT::f32: 766 Opcode = NVPTX::LDV_f32_v2_asi; 767 break; 768 case MVT::f64: 769 Opcode = NVPTX::LDV_f64_v2_asi; 770 break; 771 } 772 break; 773 case NVPTXISD::LoadV4: 774 switch (EltVT.getSimpleVT().SimpleTy) { 775 default: 776 return nullptr; 777 case MVT::i8: 778 Opcode = NVPTX::LDV_i8_v4_asi; 779 break; 780 case MVT::i16: 781 Opcode = NVPTX::LDV_i16_v4_asi; 782 break; 783 case MVT::i32: 784 Opcode = NVPTX::LDV_i32_v4_asi; 785 break; 786 case MVT::f32: 787 Opcode = NVPTX::LDV_f32_v4_asi; 788 break; 789 } 790 break; 791 } 792 793 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), 794 getI32Imm(VecType), getI32Imm(FromType), 795 getI32Imm(FromTypeWidth), Base, Offset, Chain }; 796 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 797 } else if (Subtarget.is64Bit() 798 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 799 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 800 if (Subtarget.is64Bit()) { 801 switch (N->getOpcode()) { 802 default: 803 return nullptr; 804 case NVPTXISD::LoadV2: 805 switch (EltVT.getSimpleVT().SimpleTy) { 806 default: 807 return nullptr; 808 case MVT::i8: 809 Opcode = NVPTX::LDV_i8_v2_ari_64; 810 break; 811 case MVT::i16: 812 Opcode = NVPTX::LDV_i16_v2_ari_64; 813 break; 814 case MVT::i32: 815 Opcode = NVPTX::LDV_i32_v2_ari_64; 816 break; 817 case MVT::i64: 818 Opcode = NVPTX::LDV_i64_v2_ari_64; 819 break; 820 case MVT::f32: 821 Opcode = NVPTX::LDV_f32_v2_ari_64; 822 break; 823 case MVT::f64: 824 Opcode = NVPTX::LDV_f64_v2_ari_64; 825 break; 826 } 827 break; 828 case NVPTXISD::LoadV4: 829 switch (EltVT.getSimpleVT().SimpleTy) { 830 default: 831 return nullptr; 832 case MVT::i8: 833 Opcode = NVPTX::LDV_i8_v4_ari_64; 834 break; 835 case MVT::i16: 836 Opcode = NVPTX::LDV_i16_v4_ari_64; 837 break; 838 case MVT::i32: 839 Opcode = NVPTX::LDV_i32_v4_ari_64; 840 break; 841 case MVT::f32: 842 Opcode = NVPTX::LDV_f32_v4_ari_64; 843 break; 844 } 845 break; 846 } 847 } else { 848 switch (N->getOpcode()) { 849 default: 850 return nullptr; 851 case NVPTXISD::LoadV2: 852 switch (EltVT.getSimpleVT().SimpleTy) { 853 default: 854 return nullptr; 855 case MVT::i8: 856 Opcode = NVPTX::LDV_i8_v2_ari; 857 break; 858 case MVT::i16: 859 Opcode = NVPTX::LDV_i16_v2_ari; 860 break; 861 case MVT::i32: 862 Opcode = NVPTX::LDV_i32_v2_ari; 863 break; 864 case MVT::i64: 865 Opcode = NVPTX::LDV_i64_v2_ari; 866 break; 867 case MVT::f32: 868 Opcode = NVPTX::LDV_f32_v2_ari; 869 break; 870 case MVT::f64: 871 Opcode = NVPTX::LDV_f64_v2_ari; 872 break; 873 } 874 break; 875 case NVPTXISD::LoadV4: 876 switch (EltVT.getSimpleVT().SimpleTy) { 877 default: 878 return nullptr; 879 case MVT::i8: 880 Opcode = NVPTX::LDV_i8_v4_ari; 881 break; 882 case MVT::i16: 883 Opcode = NVPTX::LDV_i16_v4_ari; 884 break; 885 case MVT::i32: 886 Opcode = NVPTX::LDV_i32_v4_ari; 887 break; 888 case MVT::f32: 889 Opcode = NVPTX::LDV_f32_v4_ari; 890 break; 891 } 892 break; 893 } 894 } 895 896 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), 897 getI32Imm(VecType), getI32Imm(FromType), 898 getI32Imm(FromTypeWidth), Base, Offset, Chain }; 899 900 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 901 } else { 902 if (Subtarget.is64Bit()) { 903 switch (N->getOpcode()) { 904 default: 905 return nullptr; 906 case NVPTXISD::LoadV2: 907 switch (EltVT.getSimpleVT().SimpleTy) { 908 default: 909 return nullptr; 910 case MVT::i8: 911 Opcode = NVPTX::LDV_i8_v2_areg_64; 912 break; 913 case MVT::i16: 914 Opcode = NVPTX::LDV_i16_v2_areg_64; 915 break; 916 case MVT::i32: 917 Opcode = NVPTX::LDV_i32_v2_areg_64; 918 break; 919 case MVT::i64: 920 Opcode = NVPTX::LDV_i64_v2_areg_64; 921 break; 922 case MVT::f32: 923 Opcode = NVPTX::LDV_f32_v2_areg_64; 924 break; 925 case MVT::f64: 926 Opcode = NVPTX::LDV_f64_v2_areg_64; 927 break; 928 } 929 break; 930 case NVPTXISD::LoadV4: 931 switch (EltVT.getSimpleVT().SimpleTy) { 932 default: 933 return nullptr; 934 case MVT::i8: 935 Opcode = NVPTX::LDV_i8_v4_areg_64; 936 break; 937 case MVT::i16: 938 Opcode = NVPTX::LDV_i16_v4_areg_64; 939 break; 940 case MVT::i32: 941 Opcode = NVPTX::LDV_i32_v4_areg_64; 942 break; 943 case MVT::f32: 944 Opcode = NVPTX::LDV_f32_v4_areg_64; 945 break; 946 } 947 break; 948 } 949 } else { 950 switch (N->getOpcode()) { 951 default: 952 return nullptr; 953 case NVPTXISD::LoadV2: 954 switch (EltVT.getSimpleVT().SimpleTy) { 955 default: 956 return nullptr; 957 case MVT::i8: 958 Opcode = NVPTX::LDV_i8_v2_areg; 959 break; 960 case MVT::i16: 961 Opcode = NVPTX::LDV_i16_v2_areg; 962 break; 963 case MVT::i32: 964 Opcode = NVPTX::LDV_i32_v2_areg; 965 break; 966 case MVT::i64: 967 Opcode = NVPTX::LDV_i64_v2_areg; 968 break; 969 case MVT::f32: 970 Opcode = NVPTX::LDV_f32_v2_areg; 971 break; 972 case MVT::f64: 973 Opcode = NVPTX::LDV_f64_v2_areg; 974 break; 975 } 976 break; 977 case NVPTXISD::LoadV4: 978 switch (EltVT.getSimpleVT().SimpleTy) { 979 default: 980 return nullptr; 981 case MVT::i8: 982 Opcode = NVPTX::LDV_i8_v4_areg; 983 break; 984 case MVT::i16: 985 Opcode = NVPTX::LDV_i16_v4_areg; 986 break; 987 case MVT::i32: 988 Opcode = NVPTX::LDV_i32_v4_areg; 989 break; 990 case MVT::f32: 991 Opcode = NVPTX::LDV_f32_v4_areg; 992 break; 993 } 994 break; 995 } 996 } 997 998 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), 999 getI32Imm(VecType), getI32Imm(FromType), 1000 getI32Imm(FromTypeWidth), Op1, Chain }; 1001 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1002 } 1003 1004 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 1005 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 1006 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); 1007 1008 return LD; 1009 } 1010 1011 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { 1012 1013 SDValue Chain = N->getOperand(0); 1014 SDValue Op1; 1015 MemSDNode *Mem; 1016 bool IsLDG = true; 1017 1018 // If this is an LDG intrinsic, the address is the third operand. Its its an 1019 // LDG/LDU SD node (from custom vector handling), then its the second operand 1020 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 1021 Op1 = N->getOperand(2); 1022 Mem = cast<MemIntrinsicSDNode>(N); 1023 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 1024 switch (IID) { 1025 default: 1026 return NULL; 1027 case Intrinsic::nvvm_ldg_global_f: 1028 case Intrinsic::nvvm_ldg_global_i: 1029 case Intrinsic::nvvm_ldg_global_p: 1030 IsLDG = true; 1031 break; 1032 case Intrinsic::nvvm_ldu_global_f: 1033 case Intrinsic::nvvm_ldu_global_i: 1034 case Intrinsic::nvvm_ldu_global_p: 1035 IsLDG = false; 1036 break; 1037 } 1038 } else { 1039 Op1 = N->getOperand(1); 1040 Mem = cast<MemSDNode>(N); 1041 } 1042 1043 unsigned Opcode; 1044 SDLoc DL(N); 1045 SDNode *LD; 1046 SDValue Base, Offset, Addr; 1047 1048 EVT EltVT = Mem->getMemoryVT(); 1049 if (EltVT.isVector()) { 1050 EltVT = EltVT.getVectorElementType(); 1051 } 1052 1053 if (SelectDirectAddr(Op1, Addr)) { 1054 switch (N->getOpcode()) { 1055 default: 1056 return nullptr; 1057 case ISD::INTRINSIC_W_CHAIN: 1058 if (IsLDG) { 1059 switch (EltVT.getSimpleVT().SimpleTy) { 1060 default: 1061 return nullptr; 1062 case MVT::i8: 1063 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar; 1064 break; 1065 case MVT::i16: 1066 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar; 1067 break; 1068 case MVT::i32: 1069 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar; 1070 break; 1071 case MVT::i64: 1072 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar; 1073 break; 1074 case MVT::f32: 1075 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar; 1076 break; 1077 case MVT::f64: 1078 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar; 1079 break; 1080 } 1081 } else { 1082 switch (EltVT.getSimpleVT().SimpleTy) { 1083 default: 1084 return nullptr; 1085 case MVT::i8: 1086 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar; 1087 break; 1088 case MVT::i16: 1089 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar; 1090 break; 1091 case MVT::i32: 1092 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar; 1093 break; 1094 case MVT::i64: 1095 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar; 1096 break; 1097 case MVT::f32: 1098 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar; 1099 break; 1100 case MVT::f64: 1101 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar; 1102 break; 1103 } 1104 } 1105 break; 1106 case NVPTXISD::LDGV2: 1107 switch (EltVT.getSimpleVT().SimpleTy) { 1108 default: 1109 return nullptr; 1110 case MVT::i8: 1111 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar; 1112 break; 1113 case MVT::i16: 1114 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar; 1115 break; 1116 case MVT::i32: 1117 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar; 1118 break; 1119 case MVT::i64: 1120 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar; 1121 break; 1122 case MVT::f32: 1123 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar; 1124 break; 1125 case MVT::f64: 1126 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar; 1127 break; 1128 } 1129 break; 1130 case NVPTXISD::LDUV2: 1131 switch (EltVT.getSimpleVT().SimpleTy) { 1132 default: 1133 return nullptr; 1134 case MVT::i8: 1135 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar; 1136 break; 1137 case MVT::i16: 1138 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar; 1139 break; 1140 case MVT::i32: 1141 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar; 1142 break; 1143 case MVT::i64: 1144 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar; 1145 break; 1146 case MVT::f32: 1147 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar; 1148 break; 1149 case MVT::f64: 1150 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar; 1151 break; 1152 } 1153 break; 1154 case NVPTXISD::LDGV4: 1155 switch (EltVT.getSimpleVT().SimpleTy) { 1156 default: 1157 return nullptr; 1158 case MVT::i8: 1159 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar; 1160 break; 1161 case MVT::i16: 1162 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar; 1163 break; 1164 case MVT::i32: 1165 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar; 1166 break; 1167 case MVT::f32: 1168 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar; 1169 break; 1170 } 1171 break; 1172 case NVPTXISD::LDUV4: 1173 switch (EltVT.getSimpleVT().SimpleTy) { 1174 default: 1175 return nullptr; 1176 case MVT::i8: 1177 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar; 1178 break; 1179 case MVT::i16: 1180 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar; 1181 break; 1182 case MVT::i32: 1183 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar; 1184 break; 1185 case MVT::f32: 1186 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar; 1187 break; 1188 } 1189 break; 1190 } 1191 1192 SDValue Ops[] = { Addr, Chain }; 1193 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1194 } else if (Subtarget.is64Bit() 1195 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 1196 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 1197 if (Subtarget.is64Bit()) { 1198 switch (N->getOpcode()) { 1199 default: 1200 return nullptr; 1201 case ISD::INTRINSIC_W_CHAIN: 1202 if (IsLDG) { 1203 switch (EltVT.getSimpleVT().SimpleTy) { 1204 default: 1205 return nullptr; 1206 case MVT::i8: 1207 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64; 1208 break; 1209 case MVT::i16: 1210 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64; 1211 break; 1212 case MVT::i32: 1213 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64; 1214 break; 1215 case MVT::i64: 1216 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64; 1217 break; 1218 case MVT::f32: 1219 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64; 1220 break; 1221 case MVT::f64: 1222 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64; 1223 break; 1224 } 1225 } else { 1226 switch (EltVT.getSimpleVT().SimpleTy) { 1227 default: 1228 return nullptr; 1229 case MVT::i8: 1230 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64; 1231 break; 1232 case MVT::i16: 1233 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64; 1234 break; 1235 case MVT::i32: 1236 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64; 1237 break; 1238 case MVT::i64: 1239 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64; 1240 break; 1241 case MVT::f32: 1242 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64; 1243 break; 1244 case MVT::f64: 1245 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64; 1246 break; 1247 } 1248 } 1249 break; 1250 case NVPTXISD::LDGV2: 1251 switch (EltVT.getSimpleVT().SimpleTy) { 1252 default: 1253 return nullptr; 1254 case MVT::i8: 1255 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64; 1256 break; 1257 case MVT::i16: 1258 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64; 1259 break; 1260 case MVT::i32: 1261 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64; 1262 break; 1263 case MVT::i64: 1264 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64; 1265 break; 1266 case MVT::f32: 1267 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64; 1268 break; 1269 case MVT::f64: 1270 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64; 1271 break; 1272 } 1273 break; 1274 case NVPTXISD::LDUV2: 1275 switch (EltVT.getSimpleVT().SimpleTy) { 1276 default: 1277 return nullptr; 1278 case MVT::i8: 1279 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64; 1280 break; 1281 case MVT::i16: 1282 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64; 1283 break; 1284 case MVT::i32: 1285 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64; 1286 break; 1287 case MVT::i64: 1288 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64; 1289 break; 1290 case MVT::f32: 1291 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64; 1292 break; 1293 case MVT::f64: 1294 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64; 1295 break; 1296 } 1297 break; 1298 case NVPTXISD::LDGV4: 1299 switch (EltVT.getSimpleVT().SimpleTy) { 1300 default: 1301 return nullptr; 1302 case MVT::i8: 1303 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64; 1304 break; 1305 case MVT::i16: 1306 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64; 1307 break; 1308 case MVT::i32: 1309 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64; 1310 break; 1311 case MVT::f32: 1312 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64; 1313 break; 1314 } 1315 break; 1316 case NVPTXISD::LDUV4: 1317 switch (EltVT.getSimpleVT().SimpleTy) { 1318 default: 1319 return nullptr; 1320 case MVT::i8: 1321 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64; 1322 break; 1323 case MVT::i16: 1324 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64; 1325 break; 1326 case MVT::i32: 1327 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64; 1328 break; 1329 case MVT::f32: 1330 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64; 1331 break; 1332 } 1333 break; 1334 } 1335 } else { 1336 switch (N->getOpcode()) { 1337 default: 1338 return nullptr; 1339 case ISD::INTRINSIC_W_CHAIN: 1340 if (IsLDG) { 1341 switch (EltVT.getSimpleVT().SimpleTy) { 1342 default: 1343 return nullptr; 1344 case MVT::i8: 1345 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari; 1346 break; 1347 case MVT::i16: 1348 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari; 1349 break; 1350 case MVT::i32: 1351 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari; 1352 break; 1353 case MVT::i64: 1354 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari; 1355 break; 1356 case MVT::f32: 1357 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari; 1358 break; 1359 case MVT::f64: 1360 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari; 1361 break; 1362 } 1363 } else { 1364 switch (EltVT.getSimpleVT().SimpleTy) { 1365 default: 1366 return nullptr; 1367 case MVT::i8: 1368 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari; 1369 break; 1370 case MVT::i16: 1371 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari; 1372 break; 1373 case MVT::i32: 1374 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari; 1375 break; 1376 case MVT::i64: 1377 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari; 1378 break; 1379 case MVT::f32: 1380 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari; 1381 break; 1382 case MVT::f64: 1383 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari; 1384 break; 1385 } 1386 } 1387 break; 1388 case NVPTXISD::LDGV2: 1389 switch (EltVT.getSimpleVT().SimpleTy) { 1390 default: 1391 return nullptr; 1392 case MVT::i8: 1393 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32; 1394 break; 1395 case MVT::i16: 1396 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32; 1397 break; 1398 case MVT::i32: 1399 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32; 1400 break; 1401 case MVT::i64: 1402 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32; 1403 break; 1404 case MVT::f32: 1405 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32; 1406 break; 1407 case MVT::f64: 1408 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32; 1409 break; 1410 } 1411 break; 1412 case NVPTXISD::LDUV2: 1413 switch (EltVT.getSimpleVT().SimpleTy) { 1414 default: 1415 return nullptr; 1416 case MVT::i8: 1417 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32; 1418 break; 1419 case MVT::i16: 1420 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32; 1421 break; 1422 case MVT::i32: 1423 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32; 1424 break; 1425 case MVT::i64: 1426 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32; 1427 break; 1428 case MVT::f32: 1429 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32; 1430 break; 1431 case MVT::f64: 1432 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32; 1433 break; 1434 } 1435 break; 1436 case NVPTXISD::LDGV4: 1437 switch (EltVT.getSimpleVT().SimpleTy) { 1438 default: 1439 return nullptr; 1440 case MVT::i8: 1441 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32; 1442 break; 1443 case MVT::i16: 1444 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32; 1445 break; 1446 case MVT::i32: 1447 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32; 1448 break; 1449 case MVT::f32: 1450 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32; 1451 break; 1452 } 1453 break; 1454 case NVPTXISD::LDUV4: 1455 switch (EltVT.getSimpleVT().SimpleTy) { 1456 default: 1457 return nullptr; 1458 case MVT::i8: 1459 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32; 1460 break; 1461 case MVT::i16: 1462 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32; 1463 break; 1464 case MVT::i32: 1465 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32; 1466 break; 1467 case MVT::f32: 1468 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32; 1469 break; 1470 } 1471 break; 1472 } 1473 } 1474 1475 SDValue Ops[] = { Base, Offset, Chain }; 1476 1477 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1478 } else { 1479 if (Subtarget.is64Bit()) { 1480 switch (N->getOpcode()) { 1481 default: 1482 return nullptr; 1483 case ISD::INTRINSIC_W_CHAIN: 1484 if (IsLDG) { 1485 switch (EltVT.getSimpleVT().SimpleTy) { 1486 default: 1487 return nullptr; 1488 case MVT::i8: 1489 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64; 1490 break; 1491 case MVT::i16: 1492 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64; 1493 break; 1494 case MVT::i32: 1495 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64; 1496 break; 1497 case MVT::i64: 1498 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64; 1499 break; 1500 case MVT::f32: 1501 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64; 1502 break; 1503 case MVT::f64: 1504 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64; 1505 break; 1506 } 1507 } else { 1508 switch (EltVT.getSimpleVT().SimpleTy) { 1509 default: 1510 return nullptr; 1511 case MVT::i8: 1512 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64; 1513 break; 1514 case MVT::i16: 1515 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64; 1516 break; 1517 case MVT::i32: 1518 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64; 1519 break; 1520 case MVT::i64: 1521 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64; 1522 break; 1523 case MVT::f32: 1524 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64; 1525 break; 1526 case MVT::f64: 1527 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64; 1528 break; 1529 } 1530 } 1531 break; 1532 case NVPTXISD::LDGV2: 1533 switch (EltVT.getSimpleVT().SimpleTy) { 1534 default: 1535 return nullptr; 1536 case MVT::i8: 1537 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64; 1538 break; 1539 case MVT::i16: 1540 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64; 1541 break; 1542 case MVT::i32: 1543 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64; 1544 break; 1545 case MVT::i64: 1546 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64; 1547 break; 1548 case MVT::f32: 1549 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64; 1550 break; 1551 case MVT::f64: 1552 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64; 1553 break; 1554 } 1555 break; 1556 case NVPTXISD::LDUV2: 1557 switch (EltVT.getSimpleVT().SimpleTy) { 1558 default: 1559 return nullptr; 1560 case MVT::i8: 1561 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64; 1562 break; 1563 case MVT::i16: 1564 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64; 1565 break; 1566 case MVT::i32: 1567 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64; 1568 break; 1569 case MVT::i64: 1570 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64; 1571 break; 1572 case MVT::f32: 1573 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64; 1574 break; 1575 case MVT::f64: 1576 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64; 1577 break; 1578 } 1579 break; 1580 case NVPTXISD::LDGV4: 1581 switch (EltVT.getSimpleVT().SimpleTy) { 1582 default: 1583 return nullptr; 1584 case MVT::i8: 1585 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64; 1586 break; 1587 case MVT::i16: 1588 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64; 1589 break; 1590 case MVT::i32: 1591 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64; 1592 break; 1593 case MVT::f32: 1594 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64; 1595 break; 1596 } 1597 break; 1598 case NVPTXISD::LDUV4: 1599 switch (EltVT.getSimpleVT().SimpleTy) { 1600 default: 1601 return nullptr; 1602 case MVT::i8: 1603 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64; 1604 break; 1605 case MVT::i16: 1606 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64; 1607 break; 1608 case MVT::i32: 1609 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64; 1610 break; 1611 case MVT::f32: 1612 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64; 1613 break; 1614 } 1615 break; 1616 } 1617 } else { 1618 switch (N->getOpcode()) { 1619 default: 1620 return nullptr; 1621 case ISD::INTRINSIC_W_CHAIN: 1622 if (IsLDG) { 1623 switch (EltVT.getSimpleVT().SimpleTy) { 1624 default: 1625 return nullptr; 1626 case MVT::i8: 1627 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg; 1628 break; 1629 case MVT::i16: 1630 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg; 1631 break; 1632 case MVT::i32: 1633 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg; 1634 break; 1635 case MVT::i64: 1636 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg; 1637 break; 1638 case MVT::f32: 1639 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg; 1640 break; 1641 case MVT::f64: 1642 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg; 1643 break; 1644 } 1645 } else { 1646 switch (EltVT.getSimpleVT().SimpleTy) { 1647 default: 1648 return nullptr; 1649 case MVT::i8: 1650 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg; 1651 break; 1652 case MVT::i16: 1653 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg; 1654 break; 1655 case MVT::i32: 1656 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg; 1657 break; 1658 case MVT::i64: 1659 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg; 1660 break; 1661 case MVT::f32: 1662 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg; 1663 break; 1664 case MVT::f64: 1665 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg; 1666 break; 1667 } 1668 } 1669 break; 1670 case NVPTXISD::LDGV2: 1671 switch (EltVT.getSimpleVT().SimpleTy) { 1672 default: 1673 return nullptr; 1674 case MVT::i8: 1675 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32; 1676 break; 1677 case MVT::i16: 1678 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32; 1679 break; 1680 case MVT::i32: 1681 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32; 1682 break; 1683 case MVT::i64: 1684 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32; 1685 break; 1686 case MVT::f32: 1687 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32; 1688 break; 1689 case MVT::f64: 1690 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32; 1691 break; 1692 } 1693 break; 1694 case NVPTXISD::LDUV2: 1695 switch (EltVT.getSimpleVT().SimpleTy) { 1696 default: 1697 return nullptr; 1698 case MVT::i8: 1699 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32; 1700 break; 1701 case MVT::i16: 1702 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32; 1703 break; 1704 case MVT::i32: 1705 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32; 1706 break; 1707 case MVT::i64: 1708 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32; 1709 break; 1710 case MVT::f32: 1711 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32; 1712 break; 1713 case MVT::f64: 1714 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32; 1715 break; 1716 } 1717 break; 1718 case NVPTXISD::LDGV4: 1719 switch (EltVT.getSimpleVT().SimpleTy) { 1720 default: 1721 return nullptr; 1722 case MVT::i8: 1723 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32; 1724 break; 1725 case MVT::i16: 1726 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32; 1727 break; 1728 case MVT::i32: 1729 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32; 1730 break; 1731 case MVT::f32: 1732 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32; 1733 break; 1734 } 1735 break; 1736 case NVPTXISD::LDUV4: 1737 switch (EltVT.getSimpleVT().SimpleTy) { 1738 default: 1739 return nullptr; 1740 case MVT::i8: 1741 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32; 1742 break; 1743 case MVT::i16: 1744 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32; 1745 break; 1746 case MVT::i32: 1747 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32; 1748 break; 1749 case MVT::f32: 1750 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32; 1751 break; 1752 } 1753 break; 1754 } 1755 } 1756 1757 SDValue Ops[] = { Op1, Chain }; 1758 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1759 } 1760 1761 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 1762 MemRefs0[0] = Mem->getMemOperand(); 1763 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); 1764 1765 return LD; 1766 } 1767 1768 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { 1769 SDLoc dl(N); 1770 StoreSDNode *ST = cast<StoreSDNode>(N); 1771 EVT StoreVT = ST->getMemoryVT(); 1772 SDNode *NVPTXST = nullptr; 1773 1774 // do not support pre/post inc/dec 1775 if (ST->isIndexed()) 1776 return nullptr; 1777 1778 if (!StoreVT.isSimple()) 1779 return nullptr; 1780 1781 // Address Space Setting 1782 unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget); 1783 1784 // Volatile Setting 1785 // - .volatile is only availalble for .global and .shared 1786 bool isVolatile = ST->isVolatile(); 1787 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 1788 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 1789 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 1790 isVolatile = false; 1791 1792 // Vector Setting 1793 MVT SimpleVT = StoreVT.getSimpleVT(); 1794 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 1795 if (SimpleVT.isVector()) { 1796 unsigned num = SimpleVT.getVectorNumElements(); 1797 if (num == 2) 1798 vecType = NVPTX::PTXLdStInstCode::V2; 1799 else if (num == 4) 1800 vecType = NVPTX::PTXLdStInstCode::V4; 1801 else 1802 return nullptr; 1803 } 1804 1805 // Type Setting: toType + toTypeWidth 1806 // - for integer type, always use 'u' 1807 // 1808 MVT ScalarVT = SimpleVT.getScalarType(); 1809 unsigned toTypeWidth = ScalarVT.getSizeInBits(); 1810 unsigned int toType; 1811 if (ScalarVT.isFloatingPoint()) 1812 toType = NVPTX::PTXLdStInstCode::Float; 1813 else 1814 toType = NVPTX::PTXLdStInstCode::Unsigned; 1815 1816 // Create the machine instruction DAG 1817 SDValue Chain = N->getOperand(0); 1818 SDValue N1 = N->getOperand(1); 1819 SDValue N2 = N->getOperand(2); 1820 SDValue Addr; 1821 SDValue Offset, Base; 1822 unsigned Opcode; 1823 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy; 1824 1825 if (SelectDirectAddr(N2, Addr)) { 1826 switch (SourceVT) { 1827 case MVT::i8: 1828 Opcode = NVPTX::ST_i8_avar; 1829 break; 1830 case MVT::i16: 1831 Opcode = NVPTX::ST_i16_avar; 1832 break; 1833 case MVT::i32: 1834 Opcode = NVPTX::ST_i32_avar; 1835 break; 1836 case MVT::i64: 1837 Opcode = NVPTX::ST_i64_avar; 1838 break; 1839 case MVT::f32: 1840 Opcode = NVPTX::ST_f32_avar; 1841 break; 1842 case MVT::f64: 1843 Opcode = NVPTX::ST_f64_avar; 1844 break; 1845 default: 1846 return nullptr; 1847 } 1848 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 1849 getI32Imm(vecType), getI32Imm(toType), 1850 getI32Imm(toTypeWidth), Addr, Chain }; 1851 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 1852 } else if (Subtarget.is64Bit() 1853 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) 1854 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { 1855 switch (SourceVT) { 1856 case MVT::i8: 1857 Opcode = NVPTX::ST_i8_asi; 1858 break; 1859 case MVT::i16: 1860 Opcode = NVPTX::ST_i16_asi; 1861 break; 1862 case MVT::i32: 1863 Opcode = NVPTX::ST_i32_asi; 1864 break; 1865 case MVT::i64: 1866 Opcode = NVPTX::ST_i64_asi; 1867 break; 1868 case MVT::f32: 1869 Opcode = NVPTX::ST_f32_asi; 1870 break; 1871 case MVT::f64: 1872 Opcode = NVPTX::ST_f64_asi; 1873 break; 1874 default: 1875 return nullptr; 1876 } 1877 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 1878 getI32Imm(vecType), getI32Imm(toType), 1879 getI32Imm(toTypeWidth), Base, Offset, Chain }; 1880 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 1881 } else if (Subtarget.is64Bit() 1882 ? SelectADDRri64(N2.getNode(), N2, Base, Offset) 1883 : SelectADDRri(N2.getNode(), N2, Base, Offset)) { 1884 if (Subtarget.is64Bit()) { 1885 switch (SourceVT) { 1886 case MVT::i8: 1887 Opcode = NVPTX::ST_i8_ari_64; 1888 break; 1889 case MVT::i16: 1890 Opcode = NVPTX::ST_i16_ari_64; 1891 break; 1892 case MVT::i32: 1893 Opcode = NVPTX::ST_i32_ari_64; 1894 break; 1895 case MVT::i64: 1896 Opcode = NVPTX::ST_i64_ari_64; 1897 break; 1898 case MVT::f32: 1899 Opcode = NVPTX::ST_f32_ari_64; 1900 break; 1901 case MVT::f64: 1902 Opcode = NVPTX::ST_f64_ari_64; 1903 break; 1904 default: 1905 return nullptr; 1906 } 1907 } else { 1908 switch (SourceVT) { 1909 case MVT::i8: 1910 Opcode = NVPTX::ST_i8_ari; 1911 break; 1912 case MVT::i16: 1913 Opcode = NVPTX::ST_i16_ari; 1914 break; 1915 case MVT::i32: 1916 Opcode = NVPTX::ST_i32_ari; 1917 break; 1918 case MVT::i64: 1919 Opcode = NVPTX::ST_i64_ari; 1920 break; 1921 case MVT::f32: 1922 Opcode = NVPTX::ST_f32_ari; 1923 break; 1924 case MVT::f64: 1925 Opcode = NVPTX::ST_f64_ari; 1926 break; 1927 default: 1928 return nullptr; 1929 } 1930 } 1931 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 1932 getI32Imm(vecType), getI32Imm(toType), 1933 getI32Imm(toTypeWidth), Base, Offset, Chain }; 1934 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 1935 } else { 1936 if (Subtarget.is64Bit()) { 1937 switch (SourceVT) { 1938 case MVT::i8: 1939 Opcode = NVPTX::ST_i8_areg_64; 1940 break; 1941 case MVT::i16: 1942 Opcode = NVPTX::ST_i16_areg_64; 1943 break; 1944 case MVT::i32: 1945 Opcode = NVPTX::ST_i32_areg_64; 1946 break; 1947 case MVT::i64: 1948 Opcode = NVPTX::ST_i64_areg_64; 1949 break; 1950 case MVT::f32: 1951 Opcode = NVPTX::ST_f32_areg_64; 1952 break; 1953 case MVT::f64: 1954 Opcode = NVPTX::ST_f64_areg_64; 1955 break; 1956 default: 1957 return nullptr; 1958 } 1959 } else { 1960 switch (SourceVT) { 1961 case MVT::i8: 1962 Opcode = NVPTX::ST_i8_areg; 1963 break; 1964 case MVT::i16: 1965 Opcode = NVPTX::ST_i16_areg; 1966 break; 1967 case MVT::i32: 1968 Opcode = NVPTX::ST_i32_areg; 1969 break; 1970 case MVT::i64: 1971 Opcode = NVPTX::ST_i64_areg; 1972 break; 1973 case MVT::f32: 1974 Opcode = NVPTX::ST_f32_areg; 1975 break; 1976 case MVT::f64: 1977 Opcode = NVPTX::ST_f64_areg; 1978 break; 1979 default: 1980 return nullptr; 1981 } 1982 } 1983 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 1984 getI32Imm(vecType), getI32Imm(toType), 1985 getI32Imm(toTypeWidth), N2, Chain }; 1986 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 1987 } 1988 1989 if (NVPTXST) { 1990 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 1991 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 1992 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1); 1993 } 1994 1995 return NVPTXST; 1996 } 1997 1998 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { 1999 SDValue Chain = N->getOperand(0); 2000 SDValue Op1 = N->getOperand(1); 2001 SDValue Addr, Offset, Base; 2002 unsigned Opcode; 2003 SDLoc DL(N); 2004 SDNode *ST; 2005 EVT EltVT = Op1.getValueType(); 2006 MemSDNode *MemSD = cast<MemSDNode>(N); 2007 EVT StoreVT = MemSD->getMemoryVT(); 2008 2009 // Address Space Setting 2010 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); 2011 2012 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { 2013 report_fatal_error("Cannot store to pointer that points to constant " 2014 "memory space"); 2015 } 2016 2017 // Volatile Setting 2018 // - .volatile is only availalble for .global and .shared 2019 bool IsVolatile = MemSD->isVolatile(); 2020 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 2021 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 2022 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 2023 IsVolatile = false; 2024 2025 // Type Setting: toType + toTypeWidth 2026 // - for integer type, always use 'u' 2027 assert(StoreVT.isSimple() && "Store value is not simple"); 2028 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); 2029 unsigned ToTypeWidth = ScalarVT.getSizeInBits(); 2030 unsigned ToType; 2031 if (ScalarVT.isFloatingPoint()) 2032 ToType = NVPTX::PTXLdStInstCode::Float; 2033 else 2034 ToType = NVPTX::PTXLdStInstCode::Unsigned; 2035 2036 SmallVector<SDValue, 12> StOps; 2037 SDValue N2; 2038 unsigned VecType; 2039 2040 switch (N->getOpcode()) { 2041 case NVPTXISD::StoreV2: 2042 VecType = NVPTX::PTXLdStInstCode::V2; 2043 StOps.push_back(N->getOperand(1)); 2044 StOps.push_back(N->getOperand(2)); 2045 N2 = N->getOperand(3); 2046 break; 2047 case NVPTXISD::StoreV4: 2048 VecType = NVPTX::PTXLdStInstCode::V4; 2049 StOps.push_back(N->getOperand(1)); 2050 StOps.push_back(N->getOperand(2)); 2051 StOps.push_back(N->getOperand(3)); 2052 StOps.push_back(N->getOperand(4)); 2053 N2 = N->getOperand(5); 2054 break; 2055 default: 2056 return nullptr; 2057 } 2058 2059 StOps.push_back(getI32Imm(IsVolatile)); 2060 StOps.push_back(getI32Imm(CodeAddrSpace)); 2061 StOps.push_back(getI32Imm(VecType)); 2062 StOps.push_back(getI32Imm(ToType)); 2063 StOps.push_back(getI32Imm(ToTypeWidth)); 2064 2065 if (SelectDirectAddr(N2, Addr)) { 2066 switch (N->getOpcode()) { 2067 default: 2068 return nullptr; 2069 case NVPTXISD::StoreV2: 2070 switch (EltVT.getSimpleVT().SimpleTy) { 2071 default: 2072 return nullptr; 2073 case MVT::i8: 2074 Opcode = NVPTX::STV_i8_v2_avar; 2075 break; 2076 case MVT::i16: 2077 Opcode = NVPTX::STV_i16_v2_avar; 2078 break; 2079 case MVT::i32: 2080 Opcode = NVPTX::STV_i32_v2_avar; 2081 break; 2082 case MVT::i64: 2083 Opcode = NVPTX::STV_i64_v2_avar; 2084 break; 2085 case MVT::f32: 2086 Opcode = NVPTX::STV_f32_v2_avar; 2087 break; 2088 case MVT::f64: 2089 Opcode = NVPTX::STV_f64_v2_avar; 2090 break; 2091 } 2092 break; 2093 case NVPTXISD::StoreV4: 2094 switch (EltVT.getSimpleVT().SimpleTy) { 2095 default: 2096 return nullptr; 2097 case MVT::i8: 2098 Opcode = NVPTX::STV_i8_v4_avar; 2099 break; 2100 case MVT::i16: 2101 Opcode = NVPTX::STV_i16_v4_avar; 2102 break; 2103 case MVT::i32: 2104 Opcode = NVPTX::STV_i32_v4_avar; 2105 break; 2106 case MVT::f32: 2107 Opcode = NVPTX::STV_f32_v4_avar; 2108 break; 2109 } 2110 break; 2111 } 2112 StOps.push_back(Addr); 2113 } else if (Subtarget.is64Bit() 2114 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) 2115 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { 2116 switch (N->getOpcode()) { 2117 default: 2118 return nullptr; 2119 case NVPTXISD::StoreV2: 2120 switch (EltVT.getSimpleVT().SimpleTy) { 2121 default: 2122 return nullptr; 2123 case MVT::i8: 2124 Opcode = NVPTX::STV_i8_v2_asi; 2125 break; 2126 case MVT::i16: 2127 Opcode = NVPTX::STV_i16_v2_asi; 2128 break; 2129 case MVT::i32: 2130 Opcode = NVPTX::STV_i32_v2_asi; 2131 break; 2132 case MVT::i64: 2133 Opcode = NVPTX::STV_i64_v2_asi; 2134 break; 2135 case MVT::f32: 2136 Opcode = NVPTX::STV_f32_v2_asi; 2137 break; 2138 case MVT::f64: 2139 Opcode = NVPTX::STV_f64_v2_asi; 2140 break; 2141 } 2142 break; 2143 case NVPTXISD::StoreV4: 2144 switch (EltVT.getSimpleVT().SimpleTy) { 2145 default: 2146 return nullptr; 2147 case MVT::i8: 2148 Opcode = NVPTX::STV_i8_v4_asi; 2149 break; 2150 case MVT::i16: 2151 Opcode = NVPTX::STV_i16_v4_asi; 2152 break; 2153 case MVT::i32: 2154 Opcode = NVPTX::STV_i32_v4_asi; 2155 break; 2156 case MVT::f32: 2157 Opcode = NVPTX::STV_f32_v4_asi; 2158 break; 2159 } 2160 break; 2161 } 2162 StOps.push_back(Base); 2163 StOps.push_back(Offset); 2164 } else if (Subtarget.is64Bit() 2165 ? SelectADDRri64(N2.getNode(), N2, Base, Offset) 2166 : SelectADDRri(N2.getNode(), N2, Base, Offset)) { 2167 if (Subtarget.is64Bit()) { 2168 switch (N->getOpcode()) { 2169 default: 2170 return nullptr; 2171 case NVPTXISD::StoreV2: 2172 switch (EltVT.getSimpleVT().SimpleTy) { 2173 default: 2174 return nullptr; 2175 case MVT::i8: 2176 Opcode = NVPTX::STV_i8_v2_ari_64; 2177 break; 2178 case MVT::i16: 2179 Opcode = NVPTX::STV_i16_v2_ari_64; 2180 break; 2181 case MVT::i32: 2182 Opcode = NVPTX::STV_i32_v2_ari_64; 2183 break; 2184 case MVT::i64: 2185 Opcode = NVPTX::STV_i64_v2_ari_64; 2186 break; 2187 case MVT::f32: 2188 Opcode = NVPTX::STV_f32_v2_ari_64; 2189 break; 2190 case MVT::f64: 2191 Opcode = NVPTX::STV_f64_v2_ari_64; 2192 break; 2193 } 2194 break; 2195 case NVPTXISD::StoreV4: 2196 switch (EltVT.getSimpleVT().SimpleTy) { 2197 default: 2198 return nullptr; 2199 case MVT::i8: 2200 Opcode = NVPTX::STV_i8_v4_ari_64; 2201 break; 2202 case MVT::i16: 2203 Opcode = NVPTX::STV_i16_v4_ari_64; 2204 break; 2205 case MVT::i32: 2206 Opcode = NVPTX::STV_i32_v4_ari_64; 2207 break; 2208 case MVT::f32: 2209 Opcode = NVPTX::STV_f32_v4_ari_64; 2210 break; 2211 } 2212 break; 2213 } 2214 } else { 2215 switch (N->getOpcode()) { 2216 default: 2217 return nullptr; 2218 case NVPTXISD::StoreV2: 2219 switch (EltVT.getSimpleVT().SimpleTy) { 2220 default: 2221 return nullptr; 2222 case MVT::i8: 2223 Opcode = NVPTX::STV_i8_v2_ari; 2224 break; 2225 case MVT::i16: 2226 Opcode = NVPTX::STV_i16_v2_ari; 2227 break; 2228 case MVT::i32: 2229 Opcode = NVPTX::STV_i32_v2_ari; 2230 break; 2231 case MVT::i64: 2232 Opcode = NVPTX::STV_i64_v2_ari; 2233 break; 2234 case MVT::f32: 2235 Opcode = NVPTX::STV_f32_v2_ari; 2236 break; 2237 case MVT::f64: 2238 Opcode = NVPTX::STV_f64_v2_ari; 2239 break; 2240 } 2241 break; 2242 case NVPTXISD::StoreV4: 2243 switch (EltVT.getSimpleVT().SimpleTy) { 2244 default: 2245 return nullptr; 2246 case MVT::i8: 2247 Opcode = NVPTX::STV_i8_v4_ari; 2248 break; 2249 case MVT::i16: 2250 Opcode = NVPTX::STV_i16_v4_ari; 2251 break; 2252 case MVT::i32: 2253 Opcode = NVPTX::STV_i32_v4_ari; 2254 break; 2255 case MVT::f32: 2256 Opcode = NVPTX::STV_f32_v4_ari; 2257 break; 2258 } 2259 break; 2260 } 2261 } 2262 StOps.push_back(Base); 2263 StOps.push_back(Offset); 2264 } else { 2265 if (Subtarget.is64Bit()) { 2266 switch (N->getOpcode()) { 2267 default: 2268 return nullptr; 2269 case NVPTXISD::StoreV2: 2270 switch (EltVT.getSimpleVT().SimpleTy) { 2271 default: 2272 return nullptr; 2273 case MVT::i8: 2274 Opcode = NVPTX::STV_i8_v2_areg_64; 2275 break; 2276 case MVT::i16: 2277 Opcode = NVPTX::STV_i16_v2_areg_64; 2278 break; 2279 case MVT::i32: 2280 Opcode = NVPTX::STV_i32_v2_areg_64; 2281 break; 2282 case MVT::i64: 2283 Opcode = NVPTX::STV_i64_v2_areg_64; 2284 break; 2285 case MVT::f32: 2286 Opcode = NVPTX::STV_f32_v2_areg_64; 2287 break; 2288 case MVT::f64: 2289 Opcode = NVPTX::STV_f64_v2_areg_64; 2290 break; 2291 } 2292 break; 2293 case NVPTXISD::StoreV4: 2294 switch (EltVT.getSimpleVT().SimpleTy) { 2295 default: 2296 return nullptr; 2297 case MVT::i8: 2298 Opcode = NVPTX::STV_i8_v4_areg_64; 2299 break; 2300 case MVT::i16: 2301 Opcode = NVPTX::STV_i16_v4_areg_64; 2302 break; 2303 case MVT::i32: 2304 Opcode = NVPTX::STV_i32_v4_areg_64; 2305 break; 2306 case MVT::f32: 2307 Opcode = NVPTX::STV_f32_v4_areg_64; 2308 break; 2309 } 2310 break; 2311 } 2312 } else { 2313 switch (N->getOpcode()) { 2314 default: 2315 return nullptr; 2316 case NVPTXISD::StoreV2: 2317 switch (EltVT.getSimpleVT().SimpleTy) { 2318 default: 2319 return nullptr; 2320 case MVT::i8: 2321 Opcode = NVPTX::STV_i8_v2_areg; 2322 break; 2323 case MVT::i16: 2324 Opcode = NVPTX::STV_i16_v2_areg; 2325 break; 2326 case MVT::i32: 2327 Opcode = NVPTX::STV_i32_v2_areg; 2328 break; 2329 case MVT::i64: 2330 Opcode = NVPTX::STV_i64_v2_areg; 2331 break; 2332 case MVT::f32: 2333 Opcode = NVPTX::STV_f32_v2_areg; 2334 break; 2335 case MVT::f64: 2336 Opcode = NVPTX::STV_f64_v2_areg; 2337 break; 2338 } 2339 break; 2340 case NVPTXISD::StoreV4: 2341 switch (EltVT.getSimpleVT().SimpleTy) { 2342 default: 2343 return nullptr; 2344 case MVT::i8: 2345 Opcode = NVPTX::STV_i8_v4_areg; 2346 break; 2347 case MVT::i16: 2348 Opcode = NVPTX::STV_i16_v4_areg; 2349 break; 2350 case MVT::i32: 2351 Opcode = NVPTX::STV_i32_v4_areg; 2352 break; 2353 case MVT::f32: 2354 Opcode = NVPTX::STV_f32_v4_areg; 2355 break; 2356 } 2357 break; 2358 } 2359 } 2360 StOps.push_back(N2); 2361 } 2362 2363 StOps.push_back(Chain); 2364 2365 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps); 2366 2367 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2368 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 2369 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1); 2370 2371 return ST; 2372 } 2373 2374 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) { 2375 SDValue Chain = Node->getOperand(0); 2376 SDValue Offset = Node->getOperand(2); 2377 SDValue Flag = Node->getOperand(3); 2378 SDLoc DL(Node); 2379 MemSDNode *Mem = cast<MemSDNode>(Node); 2380 2381 unsigned VecSize; 2382 switch (Node->getOpcode()) { 2383 default: 2384 return nullptr; 2385 case NVPTXISD::LoadParam: 2386 VecSize = 1; 2387 break; 2388 case NVPTXISD::LoadParamV2: 2389 VecSize = 2; 2390 break; 2391 case NVPTXISD::LoadParamV4: 2392 VecSize = 4; 2393 break; 2394 } 2395 2396 EVT EltVT = Node->getValueType(0); 2397 EVT MemVT = Mem->getMemoryVT(); 2398 2399 unsigned Opc = 0; 2400 2401 switch (VecSize) { 2402 default: 2403 return nullptr; 2404 case 1: 2405 switch (MemVT.getSimpleVT().SimpleTy) { 2406 default: 2407 return nullptr; 2408 case MVT::i1: 2409 Opc = NVPTX::LoadParamMemI8; 2410 break; 2411 case MVT::i8: 2412 Opc = NVPTX::LoadParamMemI8; 2413 break; 2414 case MVT::i16: 2415 Opc = NVPTX::LoadParamMemI16; 2416 break; 2417 case MVT::i32: 2418 Opc = NVPTX::LoadParamMemI32; 2419 break; 2420 case MVT::i64: 2421 Opc = NVPTX::LoadParamMemI64; 2422 break; 2423 case MVT::f32: 2424 Opc = NVPTX::LoadParamMemF32; 2425 break; 2426 case MVT::f64: 2427 Opc = NVPTX::LoadParamMemF64; 2428 break; 2429 } 2430 break; 2431 case 2: 2432 switch (MemVT.getSimpleVT().SimpleTy) { 2433 default: 2434 return nullptr; 2435 case MVT::i1: 2436 Opc = NVPTX::LoadParamMemV2I8; 2437 break; 2438 case MVT::i8: 2439 Opc = NVPTX::LoadParamMemV2I8; 2440 break; 2441 case MVT::i16: 2442 Opc = NVPTX::LoadParamMemV2I16; 2443 break; 2444 case MVT::i32: 2445 Opc = NVPTX::LoadParamMemV2I32; 2446 break; 2447 case MVT::i64: 2448 Opc = NVPTX::LoadParamMemV2I64; 2449 break; 2450 case MVT::f32: 2451 Opc = NVPTX::LoadParamMemV2F32; 2452 break; 2453 case MVT::f64: 2454 Opc = NVPTX::LoadParamMemV2F64; 2455 break; 2456 } 2457 break; 2458 case 4: 2459 switch (MemVT.getSimpleVT().SimpleTy) { 2460 default: 2461 return nullptr; 2462 case MVT::i1: 2463 Opc = NVPTX::LoadParamMemV4I8; 2464 break; 2465 case MVT::i8: 2466 Opc = NVPTX::LoadParamMemV4I8; 2467 break; 2468 case MVT::i16: 2469 Opc = NVPTX::LoadParamMemV4I16; 2470 break; 2471 case MVT::i32: 2472 Opc = NVPTX::LoadParamMemV4I32; 2473 break; 2474 case MVT::f32: 2475 Opc = NVPTX::LoadParamMemV4F32; 2476 break; 2477 } 2478 break; 2479 } 2480 2481 SDVTList VTs; 2482 if (VecSize == 1) { 2483 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue); 2484 } else if (VecSize == 2) { 2485 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue); 2486 } else { 2487 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue }; 2488 VTs = CurDAG->getVTList(EVTs); 2489 } 2490 2491 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2492 2493 SmallVector<SDValue, 2> Ops; 2494 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); 2495 Ops.push_back(Chain); 2496 Ops.push_back(Flag); 2497 2498 SDNode *Ret = 2499 CurDAG->getMachineNode(Opc, DL, VTs, Ops); 2500 return Ret; 2501 } 2502 2503 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) { 2504 SDLoc DL(N); 2505 SDValue Chain = N->getOperand(0); 2506 SDValue Offset = N->getOperand(1); 2507 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2508 MemSDNode *Mem = cast<MemSDNode>(N); 2509 2510 // How many elements do we have? 2511 unsigned NumElts = 1; 2512 switch (N->getOpcode()) { 2513 default: 2514 return nullptr; 2515 case NVPTXISD::StoreRetval: 2516 NumElts = 1; 2517 break; 2518 case NVPTXISD::StoreRetvalV2: 2519 NumElts = 2; 2520 break; 2521 case NVPTXISD::StoreRetvalV4: 2522 NumElts = 4; 2523 break; 2524 } 2525 2526 // Build vector of operands 2527 SmallVector<SDValue, 6> Ops; 2528 for (unsigned i = 0; i < NumElts; ++i) 2529 Ops.push_back(N->getOperand(i + 2)); 2530 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); 2531 Ops.push_back(Chain); 2532 2533 // Determine target opcode 2534 // If we have an i1, use an 8-bit store. The lowering code in 2535 // NVPTXISelLowering will have already emitted an upcast. 2536 unsigned Opcode = 0; 2537 switch (NumElts) { 2538 default: 2539 return nullptr; 2540 case 1: 2541 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2542 default: 2543 return nullptr; 2544 case MVT::i1: 2545 Opcode = NVPTX::StoreRetvalI8; 2546 break; 2547 case MVT::i8: 2548 Opcode = NVPTX::StoreRetvalI8; 2549 break; 2550 case MVT::i16: 2551 Opcode = NVPTX::StoreRetvalI16; 2552 break; 2553 case MVT::i32: 2554 Opcode = NVPTX::StoreRetvalI32; 2555 break; 2556 case MVT::i64: 2557 Opcode = NVPTX::StoreRetvalI64; 2558 break; 2559 case MVT::f32: 2560 Opcode = NVPTX::StoreRetvalF32; 2561 break; 2562 case MVT::f64: 2563 Opcode = NVPTX::StoreRetvalF64; 2564 break; 2565 } 2566 break; 2567 case 2: 2568 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2569 default: 2570 return nullptr; 2571 case MVT::i1: 2572 Opcode = NVPTX::StoreRetvalV2I8; 2573 break; 2574 case MVT::i8: 2575 Opcode = NVPTX::StoreRetvalV2I8; 2576 break; 2577 case MVT::i16: 2578 Opcode = NVPTX::StoreRetvalV2I16; 2579 break; 2580 case MVT::i32: 2581 Opcode = NVPTX::StoreRetvalV2I32; 2582 break; 2583 case MVT::i64: 2584 Opcode = NVPTX::StoreRetvalV2I64; 2585 break; 2586 case MVT::f32: 2587 Opcode = NVPTX::StoreRetvalV2F32; 2588 break; 2589 case MVT::f64: 2590 Opcode = NVPTX::StoreRetvalV2F64; 2591 break; 2592 } 2593 break; 2594 case 4: 2595 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2596 default: 2597 return nullptr; 2598 case MVT::i1: 2599 Opcode = NVPTX::StoreRetvalV4I8; 2600 break; 2601 case MVT::i8: 2602 Opcode = NVPTX::StoreRetvalV4I8; 2603 break; 2604 case MVT::i16: 2605 Opcode = NVPTX::StoreRetvalV4I16; 2606 break; 2607 case MVT::i32: 2608 Opcode = NVPTX::StoreRetvalV4I32; 2609 break; 2610 case MVT::f32: 2611 Opcode = NVPTX::StoreRetvalV4F32; 2612 break; 2613 } 2614 break; 2615 } 2616 2617 SDNode *Ret = 2618 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); 2619 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2620 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 2621 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); 2622 2623 return Ret; 2624 } 2625 2626 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { 2627 SDLoc DL(N); 2628 SDValue Chain = N->getOperand(0); 2629 SDValue Param = N->getOperand(1); 2630 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue(); 2631 SDValue Offset = N->getOperand(2); 2632 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2633 MemSDNode *Mem = cast<MemSDNode>(N); 2634 SDValue Flag = N->getOperand(N->getNumOperands() - 1); 2635 2636 // How many elements do we have? 2637 unsigned NumElts = 1; 2638 switch (N->getOpcode()) { 2639 default: 2640 return nullptr; 2641 case NVPTXISD::StoreParamU32: 2642 case NVPTXISD::StoreParamS32: 2643 case NVPTXISD::StoreParam: 2644 NumElts = 1; 2645 break; 2646 case NVPTXISD::StoreParamV2: 2647 NumElts = 2; 2648 break; 2649 case NVPTXISD::StoreParamV4: 2650 NumElts = 4; 2651 break; 2652 } 2653 2654 // Build vector of operands 2655 SmallVector<SDValue, 8> Ops; 2656 for (unsigned i = 0; i < NumElts; ++i) 2657 Ops.push_back(N->getOperand(i + 3)); 2658 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32)); 2659 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); 2660 Ops.push_back(Chain); 2661 Ops.push_back(Flag); 2662 2663 // Determine target opcode 2664 // If we have an i1, use an 8-bit store. The lowering code in 2665 // NVPTXISelLowering will have already emitted an upcast. 2666 unsigned Opcode = 0; 2667 switch (N->getOpcode()) { 2668 default: 2669 switch (NumElts) { 2670 default: 2671 return nullptr; 2672 case 1: 2673 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2674 default: 2675 return nullptr; 2676 case MVT::i1: 2677 Opcode = NVPTX::StoreParamI8; 2678 break; 2679 case MVT::i8: 2680 Opcode = NVPTX::StoreParamI8; 2681 break; 2682 case MVT::i16: 2683 Opcode = NVPTX::StoreParamI16; 2684 break; 2685 case MVT::i32: 2686 Opcode = NVPTX::StoreParamI32; 2687 break; 2688 case MVT::i64: 2689 Opcode = NVPTX::StoreParamI64; 2690 break; 2691 case MVT::f32: 2692 Opcode = NVPTX::StoreParamF32; 2693 break; 2694 case MVT::f64: 2695 Opcode = NVPTX::StoreParamF64; 2696 break; 2697 } 2698 break; 2699 case 2: 2700 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2701 default: 2702 return nullptr; 2703 case MVT::i1: 2704 Opcode = NVPTX::StoreParamV2I8; 2705 break; 2706 case MVT::i8: 2707 Opcode = NVPTX::StoreParamV2I8; 2708 break; 2709 case MVT::i16: 2710 Opcode = NVPTX::StoreParamV2I16; 2711 break; 2712 case MVT::i32: 2713 Opcode = NVPTX::StoreParamV2I32; 2714 break; 2715 case MVT::i64: 2716 Opcode = NVPTX::StoreParamV2I64; 2717 break; 2718 case MVT::f32: 2719 Opcode = NVPTX::StoreParamV2F32; 2720 break; 2721 case MVT::f64: 2722 Opcode = NVPTX::StoreParamV2F64; 2723 break; 2724 } 2725 break; 2726 case 4: 2727 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2728 default: 2729 return nullptr; 2730 case MVT::i1: 2731 Opcode = NVPTX::StoreParamV4I8; 2732 break; 2733 case MVT::i8: 2734 Opcode = NVPTX::StoreParamV4I8; 2735 break; 2736 case MVT::i16: 2737 Opcode = NVPTX::StoreParamV4I16; 2738 break; 2739 case MVT::i32: 2740 Opcode = NVPTX::StoreParamV4I32; 2741 break; 2742 case MVT::f32: 2743 Opcode = NVPTX::StoreParamV4F32; 2744 break; 2745 } 2746 break; 2747 } 2748 break; 2749 // Special case: if we have a sign-extend/zero-extend node, insert the 2750 // conversion instruction first, and use that as the value operand to 2751 // the selected StoreParam node. 2752 case NVPTXISD::StoreParamU32: { 2753 Opcode = NVPTX::StoreParamI32; 2754 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, 2755 MVT::i32); 2756 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL, 2757 MVT::i32, Ops[0], CvtNone); 2758 Ops[0] = SDValue(Cvt, 0); 2759 break; 2760 } 2761 case NVPTXISD::StoreParamS32: { 2762 Opcode = NVPTX::StoreParamI32; 2763 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, 2764 MVT::i32); 2765 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL, 2766 MVT::i32, Ops[0], CvtNone); 2767 Ops[0] = SDValue(Cvt, 0); 2768 break; 2769 } 2770 } 2771 2772 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue); 2773 SDNode *Ret = 2774 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops); 2775 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2776 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 2777 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); 2778 2779 return Ret; 2780 } 2781 2782 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { 2783 SDValue Chain = N->getOperand(0); 2784 SDValue TexRef = N->getOperand(1); 2785 SDValue SampRef = N->getOperand(2); 2786 SDNode *Ret = nullptr; 2787 unsigned Opc = 0; 2788 SmallVector<SDValue, 8> Ops; 2789 2790 switch (N->getOpcode()) { 2791 default: return nullptr; 2792 case NVPTXISD::Tex1DFloatI32: 2793 Opc = NVPTX::TEX_1D_F32_I32; 2794 break; 2795 case NVPTXISD::Tex1DFloatFloat: 2796 Opc = NVPTX::TEX_1D_F32_F32; 2797 break; 2798 case NVPTXISD::Tex1DFloatFloatLevel: 2799 Opc = NVPTX::TEX_1D_F32_F32_LEVEL; 2800 break; 2801 case NVPTXISD::Tex1DFloatFloatGrad: 2802 Opc = NVPTX::TEX_1D_F32_F32_GRAD; 2803 break; 2804 case NVPTXISD::Tex1DI32I32: 2805 Opc = NVPTX::TEX_1D_I32_I32; 2806 break; 2807 case NVPTXISD::Tex1DI32Float: 2808 Opc = NVPTX::TEX_1D_I32_F32; 2809 break; 2810 case NVPTXISD::Tex1DI32FloatLevel: 2811 Opc = NVPTX::TEX_1D_I32_F32_LEVEL; 2812 break; 2813 case NVPTXISD::Tex1DI32FloatGrad: 2814 Opc = NVPTX::TEX_1D_I32_F32_GRAD; 2815 break; 2816 case NVPTXISD::Tex1DArrayFloatI32: 2817 Opc = NVPTX::TEX_1D_ARRAY_F32_I32; 2818 break; 2819 case NVPTXISD::Tex1DArrayFloatFloat: 2820 Opc = NVPTX::TEX_1D_ARRAY_F32_F32; 2821 break; 2822 case NVPTXISD::Tex1DArrayFloatFloatLevel: 2823 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL; 2824 break; 2825 case NVPTXISD::Tex1DArrayFloatFloatGrad: 2826 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD; 2827 break; 2828 case NVPTXISD::Tex1DArrayI32I32: 2829 Opc = NVPTX::TEX_1D_ARRAY_I32_I32; 2830 break; 2831 case NVPTXISD::Tex1DArrayI32Float: 2832 Opc = NVPTX::TEX_1D_ARRAY_I32_F32; 2833 break; 2834 case NVPTXISD::Tex1DArrayI32FloatLevel: 2835 Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL; 2836 break; 2837 case NVPTXISD::Tex1DArrayI32FloatGrad: 2838 Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD; 2839 break; 2840 case NVPTXISD::Tex2DFloatI32: 2841 Opc = NVPTX::TEX_2D_F32_I32; 2842 break; 2843 case NVPTXISD::Tex2DFloatFloat: 2844 Opc = NVPTX::TEX_2D_F32_F32; 2845 break; 2846 case NVPTXISD::Tex2DFloatFloatLevel: 2847 Opc = NVPTX::TEX_2D_F32_F32_LEVEL; 2848 break; 2849 case NVPTXISD::Tex2DFloatFloatGrad: 2850 Opc = NVPTX::TEX_2D_F32_F32_GRAD; 2851 break; 2852 case NVPTXISD::Tex2DI32I32: 2853 Opc = NVPTX::TEX_2D_I32_I32; 2854 break; 2855 case NVPTXISD::Tex2DI32Float: 2856 Opc = NVPTX::TEX_2D_I32_F32; 2857 break; 2858 case NVPTXISD::Tex2DI32FloatLevel: 2859 Opc = NVPTX::TEX_2D_I32_F32_LEVEL; 2860 break; 2861 case NVPTXISD::Tex2DI32FloatGrad: 2862 Opc = NVPTX::TEX_2D_I32_F32_GRAD; 2863 break; 2864 case NVPTXISD::Tex2DArrayFloatI32: 2865 Opc = NVPTX::TEX_2D_ARRAY_F32_I32; 2866 break; 2867 case NVPTXISD::Tex2DArrayFloatFloat: 2868 Opc = NVPTX::TEX_2D_ARRAY_F32_F32; 2869 break; 2870 case NVPTXISD::Tex2DArrayFloatFloatLevel: 2871 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL; 2872 break; 2873 case NVPTXISD::Tex2DArrayFloatFloatGrad: 2874 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD; 2875 break; 2876 case NVPTXISD::Tex2DArrayI32I32: 2877 Opc = NVPTX::TEX_2D_ARRAY_I32_I32; 2878 break; 2879 case NVPTXISD::Tex2DArrayI32Float: 2880 Opc = NVPTX::TEX_2D_ARRAY_I32_F32; 2881 break; 2882 case NVPTXISD::Tex2DArrayI32FloatLevel: 2883 Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL; 2884 break; 2885 case NVPTXISD::Tex2DArrayI32FloatGrad: 2886 Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD; 2887 break; 2888 case NVPTXISD::Tex3DFloatI32: 2889 Opc = NVPTX::TEX_3D_F32_I32; 2890 break; 2891 case NVPTXISD::Tex3DFloatFloat: 2892 Opc = NVPTX::TEX_3D_F32_F32; 2893 break; 2894 case NVPTXISD::Tex3DFloatFloatLevel: 2895 Opc = NVPTX::TEX_3D_F32_F32_LEVEL; 2896 break; 2897 case NVPTXISD::Tex3DFloatFloatGrad: 2898 Opc = NVPTX::TEX_3D_F32_F32_GRAD; 2899 break; 2900 case NVPTXISD::Tex3DI32I32: 2901 Opc = NVPTX::TEX_3D_I32_I32; 2902 break; 2903 case NVPTXISD::Tex3DI32Float: 2904 Opc = NVPTX::TEX_3D_I32_F32; 2905 break; 2906 case NVPTXISD::Tex3DI32FloatLevel: 2907 Opc = NVPTX::TEX_3D_I32_F32_LEVEL; 2908 break; 2909 case NVPTXISD::Tex3DI32FloatGrad: 2910 Opc = NVPTX::TEX_3D_I32_F32_GRAD; 2911 break; 2912 } 2913 2914 Ops.push_back(TexRef); 2915 Ops.push_back(SampRef); 2916 2917 // Copy over indices 2918 for (unsigned i = 3; i < N->getNumOperands(); ++i) { 2919 Ops.push_back(N->getOperand(i)); 2920 } 2921 2922 Ops.push_back(Chain); 2923 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 2924 return Ret; 2925 } 2926 2927 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { 2928 SDValue Chain = N->getOperand(0); 2929 SDValue TexHandle = N->getOperand(1); 2930 SDNode *Ret = nullptr; 2931 unsigned Opc = 0; 2932 SmallVector<SDValue, 8> Ops; 2933 switch (N->getOpcode()) { 2934 default: return nullptr; 2935 case NVPTXISD::Suld1DI8Trap: 2936 Opc = NVPTX::SULD_1D_I8_TRAP; 2937 Ops.push_back(TexHandle); 2938 Ops.push_back(N->getOperand(2)); 2939 Ops.push_back(Chain); 2940 break; 2941 case NVPTXISD::Suld1DI16Trap: 2942 Opc = NVPTX::SULD_1D_I16_TRAP; 2943 Ops.push_back(TexHandle); 2944 Ops.push_back(N->getOperand(2)); 2945 Ops.push_back(Chain); 2946 break; 2947 case NVPTXISD::Suld1DI32Trap: 2948 Opc = NVPTX::SULD_1D_I32_TRAP; 2949 Ops.push_back(TexHandle); 2950 Ops.push_back(N->getOperand(2)); 2951 Ops.push_back(Chain); 2952 break; 2953 case NVPTXISD::Suld1DV2I8Trap: 2954 Opc = NVPTX::SULD_1D_V2I8_TRAP; 2955 Ops.push_back(TexHandle); 2956 Ops.push_back(N->getOperand(2)); 2957 Ops.push_back(Chain); 2958 break; 2959 case NVPTXISD::Suld1DV2I16Trap: 2960 Opc = NVPTX::SULD_1D_V2I16_TRAP; 2961 Ops.push_back(TexHandle); 2962 Ops.push_back(N->getOperand(2)); 2963 Ops.push_back(Chain); 2964 break; 2965 case NVPTXISD::Suld1DV2I32Trap: 2966 Opc = NVPTX::SULD_1D_V2I32_TRAP; 2967 Ops.push_back(TexHandle); 2968 Ops.push_back(N->getOperand(2)); 2969 Ops.push_back(Chain); 2970 break; 2971 case NVPTXISD::Suld1DV4I8Trap: 2972 Opc = NVPTX::SULD_1D_V4I8_TRAP; 2973 Ops.push_back(TexHandle); 2974 Ops.push_back(N->getOperand(2)); 2975 Ops.push_back(Chain); 2976 break; 2977 case NVPTXISD::Suld1DV4I16Trap: 2978 Opc = NVPTX::SULD_1D_V4I16_TRAP; 2979 Ops.push_back(TexHandle); 2980 Ops.push_back(N->getOperand(2)); 2981 Ops.push_back(Chain); 2982 break; 2983 case NVPTXISD::Suld1DV4I32Trap: 2984 Opc = NVPTX::SULD_1D_V4I32_TRAP; 2985 Ops.push_back(TexHandle); 2986 Ops.push_back(N->getOperand(2)); 2987 Ops.push_back(Chain); 2988 break; 2989 case NVPTXISD::Suld1DArrayI8Trap: 2990 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP; 2991 Ops.push_back(TexHandle); 2992 Ops.push_back(N->getOperand(2)); 2993 Ops.push_back(N->getOperand(3)); 2994 Ops.push_back(Chain); 2995 break; 2996 case NVPTXISD::Suld1DArrayI16Trap: 2997 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP; 2998 Ops.push_back(TexHandle); 2999 Ops.push_back(N->getOperand(2)); 3000 Ops.push_back(N->getOperand(3)); 3001 Ops.push_back(Chain); 3002 break; 3003 case NVPTXISD::Suld1DArrayI32Trap: 3004 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP; 3005 Ops.push_back(TexHandle); 3006 Ops.push_back(N->getOperand(2)); 3007 Ops.push_back(N->getOperand(3)); 3008 Ops.push_back(Chain); 3009 break; 3010 case NVPTXISD::Suld1DArrayV2I8Trap: 3011 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; 3012 Ops.push_back(TexHandle); 3013 Ops.push_back(N->getOperand(2)); 3014 Ops.push_back(N->getOperand(3)); 3015 Ops.push_back(Chain); 3016 break; 3017 case NVPTXISD::Suld1DArrayV2I16Trap: 3018 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP; 3019 Ops.push_back(TexHandle); 3020 Ops.push_back(N->getOperand(2)); 3021 Ops.push_back(N->getOperand(3)); 3022 Ops.push_back(Chain); 3023 break; 3024 case NVPTXISD::Suld1DArrayV2I32Trap: 3025 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP; 3026 Ops.push_back(TexHandle); 3027 Ops.push_back(N->getOperand(2)); 3028 Ops.push_back(N->getOperand(3)); 3029 Ops.push_back(Chain); 3030 break; 3031 case NVPTXISD::Suld1DArrayV4I8Trap: 3032 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; 3033 Ops.push_back(TexHandle); 3034 Ops.push_back(N->getOperand(2)); 3035 Ops.push_back(N->getOperand(3)); 3036 Ops.push_back(Chain); 3037 break; 3038 case NVPTXISD::Suld1DArrayV4I16Trap: 3039 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP; 3040 Ops.push_back(TexHandle); 3041 Ops.push_back(N->getOperand(2)); 3042 Ops.push_back(N->getOperand(3)); 3043 Ops.push_back(Chain); 3044 break; 3045 case NVPTXISD::Suld1DArrayV4I32Trap: 3046 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP; 3047 Ops.push_back(TexHandle); 3048 Ops.push_back(N->getOperand(2)); 3049 Ops.push_back(N->getOperand(3)); 3050 Ops.push_back(Chain); 3051 break; 3052 case NVPTXISD::Suld2DI8Trap: 3053 Opc = NVPTX::SULD_2D_I8_TRAP; 3054 Ops.push_back(TexHandle); 3055 Ops.push_back(N->getOperand(2)); 3056 Ops.push_back(N->getOperand(3)); 3057 Ops.push_back(Chain); 3058 break; 3059 case NVPTXISD::Suld2DI16Trap: 3060 Opc = NVPTX::SULD_2D_I16_TRAP; 3061 Ops.push_back(TexHandle); 3062 Ops.push_back(N->getOperand(2)); 3063 Ops.push_back(N->getOperand(3)); 3064 Ops.push_back(Chain); 3065 break; 3066 case NVPTXISD::Suld2DI32Trap: 3067 Opc = NVPTX::SULD_2D_I32_TRAP; 3068 Ops.push_back(TexHandle); 3069 Ops.push_back(N->getOperand(2)); 3070 Ops.push_back(N->getOperand(3)); 3071 Ops.push_back(Chain); 3072 break; 3073 case NVPTXISD::Suld2DV2I8Trap: 3074 Opc = NVPTX::SULD_2D_V2I8_TRAP; 3075 Ops.push_back(TexHandle); 3076 Ops.push_back(N->getOperand(2)); 3077 Ops.push_back(N->getOperand(3)); 3078 Ops.push_back(Chain); 3079 break; 3080 case NVPTXISD::Suld2DV2I16Trap: 3081 Opc = NVPTX::SULD_2D_V2I16_TRAP; 3082 Ops.push_back(TexHandle); 3083 Ops.push_back(N->getOperand(2)); 3084 Ops.push_back(N->getOperand(3)); 3085 Ops.push_back(Chain); 3086 break; 3087 case NVPTXISD::Suld2DV2I32Trap: 3088 Opc = NVPTX::SULD_2D_V2I32_TRAP; 3089 Ops.push_back(TexHandle); 3090 Ops.push_back(N->getOperand(2)); 3091 Ops.push_back(N->getOperand(3)); 3092 Ops.push_back(Chain); 3093 break; 3094 case NVPTXISD::Suld2DV4I8Trap: 3095 Opc = NVPTX::SULD_2D_V4I8_TRAP; 3096 Ops.push_back(TexHandle); 3097 Ops.push_back(N->getOperand(2)); 3098 Ops.push_back(N->getOperand(3)); 3099 Ops.push_back(Chain); 3100 break; 3101 case NVPTXISD::Suld2DV4I16Trap: 3102 Opc = NVPTX::SULD_2D_V4I16_TRAP; 3103 Ops.push_back(TexHandle); 3104 Ops.push_back(N->getOperand(2)); 3105 Ops.push_back(N->getOperand(3)); 3106 Ops.push_back(Chain); 3107 break; 3108 case NVPTXISD::Suld2DV4I32Trap: 3109 Opc = NVPTX::SULD_2D_V4I32_TRAP; 3110 Ops.push_back(TexHandle); 3111 Ops.push_back(N->getOperand(2)); 3112 Ops.push_back(N->getOperand(3)); 3113 Ops.push_back(Chain); 3114 break; 3115 case NVPTXISD::Suld2DArrayI8Trap: 3116 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP; 3117 Ops.push_back(TexHandle); 3118 Ops.push_back(N->getOperand(2)); 3119 Ops.push_back(N->getOperand(3)); 3120 Ops.push_back(N->getOperand(4)); 3121 Ops.push_back(Chain); 3122 break; 3123 case NVPTXISD::Suld2DArrayI16Trap: 3124 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP; 3125 Ops.push_back(TexHandle); 3126 Ops.push_back(N->getOperand(2)); 3127 Ops.push_back(N->getOperand(3)); 3128 Ops.push_back(N->getOperand(4)); 3129 Ops.push_back(Chain); 3130 break; 3131 case NVPTXISD::Suld2DArrayI32Trap: 3132 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP; 3133 Ops.push_back(TexHandle); 3134 Ops.push_back(N->getOperand(2)); 3135 Ops.push_back(N->getOperand(3)); 3136 Ops.push_back(N->getOperand(4)); 3137 Ops.push_back(Chain); 3138 break; 3139 case NVPTXISD::Suld2DArrayV2I8Trap: 3140 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; 3141 Ops.push_back(TexHandle); 3142 Ops.push_back(N->getOperand(2)); 3143 Ops.push_back(N->getOperand(3)); 3144 Ops.push_back(N->getOperand(4)); 3145 Ops.push_back(Chain); 3146 break; 3147 case NVPTXISD::Suld2DArrayV2I16Trap: 3148 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP; 3149 Ops.push_back(TexHandle); 3150 Ops.push_back(N->getOperand(2)); 3151 Ops.push_back(N->getOperand(3)); 3152 Ops.push_back(N->getOperand(4)); 3153 Ops.push_back(Chain); 3154 break; 3155 case NVPTXISD::Suld2DArrayV2I32Trap: 3156 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP; 3157 Ops.push_back(TexHandle); 3158 Ops.push_back(N->getOperand(2)); 3159 Ops.push_back(N->getOperand(3)); 3160 Ops.push_back(N->getOperand(4)); 3161 Ops.push_back(Chain); 3162 break; 3163 case NVPTXISD::Suld2DArrayV4I8Trap: 3164 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; 3165 Ops.push_back(TexHandle); 3166 Ops.push_back(N->getOperand(2)); 3167 Ops.push_back(N->getOperand(3)); 3168 Ops.push_back(N->getOperand(4)); 3169 Ops.push_back(Chain); 3170 break; 3171 case NVPTXISD::Suld2DArrayV4I16Trap: 3172 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP; 3173 Ops.push_back(TexHandle); 3174 Ops.push_back(N->getOperand(2)); 3175 Ops.push_back(N->getOperand(3)); 3176 Ops.push_back(N->getOperand(4)); 3177 Ops.push_back(Chain); 3178 break; 3179 case NVPTXISD::Suld2DArrayV4I32Trap: 3180 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP; 3181 Ops.push_back(TexHandle); 3182 Ops.push_back(N->getOperand(2)); 3183 Ops.push_back(N->getOperand(3)); 3184 Ops.push_back(N->getOperand(4)); 3185 Ops.push_back(Chain); 3186 break; 3187 case NVPTXISD::Suld3DI8Trap: 3188 Opc = NVPTX::SULD_3D_I8_TRAP; 3189 Ops.push_back(TexHandle); 3190 Ops.push_back(N->getOperand(2)); 3191 Ops.push_back(N->getOperand(3)); 3192 Ops.push_back(N->getOperand(4)); 3193 Ops.push_back(Chain); 3194 break; 3195 case NVPTXISD::Suld3DI16Trap: 3196 Opc = NVPTX::SULD_3D_I16_TRAP; 3197 Ops.push_back(TexHandle); 3198 Ops.push_back(N->getOperand(2)); 3199 Ops.push_back(N->getOperand(3)); 3200 Ops.push_back(N->getOperand(4)); 3201 Ops.push_back(Chain); 3202 break; 3203 case NVPTXISD::Suld3DI32Trap: 3204 Opc = NVPTX::SULD_3D_I32_TRAP; 3205 Ops.push_back(TexHandle); 3206 Ops.push_back(N->getOperand(2)); 3207 Ops.push_back(N->getOperand(3)); 3208 Ops.push_back(N->getOperand(4)); 3209 Ops.push_back(Chain); 3210 break; 3211 case NVPTXISD::Suld3DV2I8Trap: 3212 Opc = NVPTX::SULD_3D_V2I8_TRAP; 3213 Ops.push_back(TexHandle); 3214 Ops.push_back(N->getOperand(2)); 3215 Ops.push_back(N->getOperand(3)); 3216 Ops.push_back(N->getOperand(4)); 3217 Ops.push_back(Chain); 3218 break; 3219 case NVPTXISD::Suld3DV2I16Trap: 3220 Opc = NVPTX::SULD_3D_V2I16_TRAP; 3221 Ops.push_back(TexHandle); 3222 Ops.push_back(N->getOperand(2)); 3223 Ops.push_back(N->getOperand(3)); 3224 Ops.push_back(N->getOperand(4)); 3225 Ops.push_back(Chain); 3226 break; 3227 case NVPTXISD::Suld3DV2I32Trap: 3228 Opc = NVPTX::SULD_3D_V2I32_TRAP; 3229 Ops.push_back(TexHandle); 3230 Ops.push_back(N->getOperand(2)); 3231 Ops.push_back(N->getOperand(3)); 3232 Ops.push_back(N->getOperand(4)); 3233 Ops.push_back(Chain); 3234 break; 3235 case NVPTXISD::Suld3DV4I8Trap: 3236 Opc = NVPTX::SULD_3D_V4I8_TRAP; 3237 Ops.push_back(TexHandle); 3238 Ops.push_back(N->getOperand(2)); 3239 Ops.push_back(N->getOperand(3)); 3240 Ops.push_back(N->getOperand(4)); 3241 Ops.push_back(Chain); 3242 break; 3243 case NVPTXISD::Suld3DV4I16Trap: 3244 Opc = NVPTX::SULD_3D_V4I16_TRAP; 3245 Ops.push_back(TexHandle); 3246 Ops.push_back(N->getOperand(2)); 3247 Ops.push_back(N->getOperand(3)); 3248 Ops.push_back(N->getOperand(4)); 3249 Ops.push_back(Chain); 3250 break; 3251 case NVPTXISD::Suld3DV4I32Trap: 3252 Opc = NVPTX::SULD_3D_V4I32_TRAP; 3253 Ops.push_back(TexHandle); 3254 Ops.push_back(N->getOperand(2)); 3255 Ops.push_back(N->getOperand(3)); 3256 Ops.push_back(N->getOperand(4)); 3257 Ops.push_back(Chain); 3258 break; 3259 } 3260 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 3261 return Ret; 3262 } 3263 3264 /// SelectBFE - Look for instruction sequences that can be made more efficient 3265 /// by using the 'bfe' (bit-field extract) PTX instruction 3266 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { 3267 SDValue LHS = N->getOperand(0); 3268 SDValue RHS = N->getOperand(1); 3269 SDValue Len; 3270 SDValue Start; 3271 SDValue Val; 3272 bool IsSigned = false; 3273 3274 if (N->getOpcode() == ISD::AND) { 3275 // Canonicalize the operands 3276 // We want 'and %val, %mask' 3277 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) { 3278 std::swap(LHS, RHS); 3279 } 3280 3281 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS); 3282 if (!Mask) { 3283 // We need a constant mask on the RHS of the AND 3284 return NULL; 3285 } 3286 3287 // Extract the mask bits 3288 uint64_t MaskVal = Mask->getZExtValue(); 3289 if (!isMask_64(MaskVal)) { 3290 // We *could* handle shifted masks here, but doing so would require an 3291 // 'and' operation to fix up the low-order bits so we would trade 3292 // shr+and for bfe+and, which has the same throughput 3293 return NULL; 3294 } 3295 3296 // How many bits are in our mask? 3297 uint64_t NumBits = CountTrailingOnes_64(MaskVal); 3298 Len = CurDAG->getTargetConstant(NumBits, MVT::i32); 3299 3300 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) { 3301 // We have a 'srl/and' pair, extract the effective start bit and length 3302 Val = LHS.getNode()->getOperand(0); 3303 Start = LHS.getNode()->getOperand(1); 3304 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start); 3305 if (StartConst) { 3306 uint64_t StartVal = StartConst->getZExtValue(); 3307 // How many "good" bits do we have left? "good" is defined here as bits 3308 // that exist in the original value, not shifted in. 3309 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal; 3310 if (NumBits > GoodBits) { 3311 // Do not handle the case where bits have been shifted in. In theory 3312 // we could handle this, but the cost is likely higher than just 3313 // emitting the srl/and pair. 3314 return NULL; 3315 } 3316 Start = CurDAG->getTargetConstant(StartVal, MVT::i32); 3317 } else { 3318 // Do not handle the case where the shift amount (can be zero if no srl 3319 // was found) is not constant. We could handle this case, but it would 3320 // require run-time logic that would be more expensive than just 3321 // emitting the srl/and pair. 3322 return NULL; 3323 } 3324 } else { 3325 // Do not handle the case where the LHS of the and is not a shift. While 3326 // it would be trivial to handle this case, it would just transform 3327 // 'and' -> 'bfe', but 'and' has higher-throughput. 3328 return NULL; 3329 } 3330 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) { 3331 if (LHS->getOpcode() == ISD::AND) { 3332 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS); 3333 if (!ShiftCnst) { 3334 // Shift amount must be constant 3335 return NULL; 3336 } 3337 3338 uint64_t ShiftAmt = ShiftCnst->getZExtValue(); 3339 3340 SDValue AndLHS = LHS->getOperand(0); 3341 SDValue AndRHS = LHS->getOperand(1); 3342 3343 // Canonicalize the AND to have the mask on the RHS 3344 if (isa<ConstantSDNode>(AndLHS)) { 3345 std::swap(AndLHS, AndRHS); 3346 } 3347 3348 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS); 3349 if (!MaskCnst) { 3350 // Mask must be constant 3351 return NULL; 3352 } 3353 3354 uint64_t MaskVal = MaskCnst->getZExtValue(); 3355 uint64_t NumZeros; 3356 uint64_t NumBits; 3357 if (isMask_64(MaskVal)) { 3358 NumZeros = 0; 3359 // The number of bits in the result bitfield will be the number of 3360 // trailing ones (the AND) minus the number of bits we shift off 3361 NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt; 3362 } else if (isShiftedMask_64(MaskVal)) { 3363 NumZeros = countTrailingZeros(MaskVal); 3364 unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros); 3365 // The number of bits in the result bitfield will be the number of 3366 // trailing zeros plus the number of set bits in the mask minus the 3367 // number of bits we shift off 3368 NumBits = NumZeros + NumOnes - ShiftAmt; 3369 } else { 3370 // This is not a mask we can handle 3371 return NULL; 3372 } 3373 3374 if (ShiftAmt < NumZeros) { 3375 // Handling this case would require extra logic that would make this 3376 // transformation non-profitable 3377 return NULL; 3378 } 3379 3380 Val = AndLHS; 3381 Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32); 3382 Len = CurDAG->getTargetConstant(NumBits, MVT::i32); 3383 } else if (LHS->getOpcode() == ISD::SHL) { 3384 // Here, we have a pattern like: 3385 // 3386 // (sra (shl val, NN), MM) 3387 // or 3388 // (srl (shl val, NN), MM) 3389 // 3390 // If MM >= NN, we can efficiently optimize this with bfe 3391 Val = LHS->getOperand(0); 3392 3393 SDValue ShlRHS = LHS->getOperand(1); 3394 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS); 3395 if (!ShlCnst) { 3396 // Shift amount must be constant 3397 return NULL; 3398 } 3399 uint64_t InnerShiftAmt = ShlCnst->getZExtValue(); 3400 3401 SDValue ShrRHS = RHS; 3402 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS); 3403 if (!ShrCnst) { 3404 // Shift amount must be constant 3405 return NULL; 3406 } 3407 uint64_t OuterShiftAmt = ShrCnst->getZExtValue(); 3408 3409 // To avoid extra codegen and be profitable, we need Outer >= Inner 3410 if (OuterShiftAmt < InnerShiftAmt) { 3411 return NULL; 3412 } 3413 3414 // If the outer shift is more than the type size, we have no bitfield to 3415 // extract (since we also check that the inner shift is <= the outer shift 3416 // then this also implies that the inner shift is < the type size) 3417 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) { 3418 return NULL; 3419 } 3420 3421 Start = 3422 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32); 3423 Len = 3424 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() - 3425 OuterShiftAmt, MVT::i32); 3426 3427 if (N->getOpcode() == ISD::SRA) { 3428 // If we have a arithmetic right shift, we need to use the signed bfe 3429 // variant 3430 IsSigned = true; 3431 } 3432 } else { 3433 // No can do... 3434 return NULL; 3435 } 3436 } else { 3437 // No can do... 3438 return NULL; 3439 } 3440 3441 3442 unsigned Opc; 3443 // For the BFE operations we form here from "and" and "srl", always use the 3444 // unsigned variants. 3445 if (Val.getValueType() == MVT::i32) { 3446 if (IsSigned) { 3447 Opc = NVPTX::BFE_S32rii; 3448 } else { 3449 Opc = NVPTX::BFE_U32rii; 3450 } 3451 } else if (Val.getValueType() == MVT::i64) { 3452 if (IsSigned) { 3453 Opc = NVPTX::BFE_S64rii; 3454 } else { 3455 Opc = NVPTX::BFE_U64rii; 3456 } 3457 } else { 3458 // We cannot handle this type 3459 return NULL; 3460 } 3461 3462 SDValue Ops[] = { 3463 Val, Start, Len 3464 }; 3465 3466 SDNode *Ret = 3467 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 3468 3469 return Ret; 3470 } 3471 3472 // SelectDirectAddr - Match a direct address for DAG. 3473 // A direct address could be a globaladdress or externalsymbol. 3474 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { 3475 // Return true if TGA or ES. 3476 if (N.getOpcode() == ISD::TargetGlobalAddress || 3477 N.getOpcode() == ISD::TargetExternalSymbol) { 3478 Address = N; 3479 return true; 3480 } 3481 if (N.getOpcode() == NVPTXISD::Wrapper) { 3482 Address = N.getOperand(0); 3483 return true; 3484 } 3485 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) { 3486 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue(); 3487 if (IID == Intrinsic::nvvm_ptr_gen_to_param) 3488 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam) 3489 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address)); 3490 } 3491 return false; 3492 } 3493 3494 // symbol+offset 3495 bool NVPTXDAGToDAGISel::SelectADDRsi_imp( 3496 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 3497 if (Addr.getOpcode() == ISD::ADD) { 3498 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 3499 SDValue base = Addr.getOperand(0); 3500 if (SelectDirectAddr(base, Base)) { 3501 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); 3502 return true; 3503 } 3504 } 3505 } 3506 return false; 3507 } 3508 3509 // symbol+offset 3510 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, 3511 SDValue &Base, SDValue &Offset) { 3512 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32); 3513 } 3514 3515 // symbol+offset 3516 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, 3517 SDValue &Base, SDValue &Offset) { 3518 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64); 3519 } 3520 3521 // register+offset 3522 bool NVPTXDAGToDAGISel::SelectADDRri_imp( 3523 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 3524 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 3525 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 3526 Offset = CurDAG->getTargetConstant(0, mvt); 3527 return true; 3528 } 3529 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 3530 Addr.getOpcode() == ISD::TargetGlobalAddress) 3531 return false; // direct calls. 3532 3533 if (Addr.getOpcode() == ISD::ADD) { 3534 if (SelectDirectAddr(Addr.getOperand(0), Addr)) { 3535 return false; 3536 } 3537 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 3538 if (FrameIndexSDNode *FIN = 3539 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) 3540 // Constant offset from frame ref. 3541 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 3542 else 3543 Base = Addr.getOperand(0); 3544 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); 3545 return true; 3546 } 3547 } 3548 return false; 3549 } 3550 3551 // register+offset 3552 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, 3553 SDValue &Base, SDValue &Offset) { 3554 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); 3555 } 3556 3557 // register+offset 3558 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, 3559 SDValue &Base, SDValue &Offset) { 3560 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); 3561 } 3562 3563 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, 3564 unsigned int spN) const { 3565 const Value *Src = nullptr; 3566 // Even though MemIntrinsicSDNode is a subclas of MemSDNode, 3567 // the classof() for MemSDNode does not include MemIntrinsicSDNode 3568 // (See SelectionDAGNodes.h). So we need to check for both. 3569 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { 3570 if (spN == 0 && mN->getMemOperand()->getPseudoValue()) 3571 return true; 3572 Src = mN->getMemOperand()->getValue(); 3573 } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) { 3574 if (spN == 0 && mN->getMemOperand()->getPseudoValue()) 3575 return true; 3576 Src = mN->getMemOperand()->getValue(); 3577 } 3578 if (!Src) 3579 return false; 3580 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) 3581 return (PT->getAddressSpace() == spN); 3582 return false; 3583 } 3584 3585 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 3586 /// inline asm expressions. 3587 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( 3588 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { 3589 SDValue Op0, Op1; 3590 switch (ConstraintCode) { 3591 default: 3592 return true; 3593 case 'm': // memory 3594 if (SelectDirectAddr(Op, Op0)) { 3595 OutOps.push_back(Op0); 3596 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); 3597 return false; 3598 } 3599 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { 3600 OutOps.push_back(Op0); 3601 OutOps.push_back(Op1); 3602 return false; 3603 } 3604 break; 3605 } 3606 return true; 3607 } 3608