1 // 2 // The LLVM Compiler Infrastructure 3 // 4 // This file is distributed under the University of Illinois Open Source 5 // License. See LICENSE.TXT for details. 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "NVPTXISelLowering.h" 15 #include "NVPTX.h" 16 #include "NVPTXTargetMachine.h" 17 #include "NVPTXTargetObjectFile.h" 18 #include "NVPTXUtilities.h" 19 #include "llvm/CodeGen/Analysis.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 25 #include "llvm/IR/CallSite.h" 26 #include "llvm/IR/DerivedTypes.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/GlobalValue.h" 29 #include "llvm/IR/IntrinsicInst.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/Module.h" 32 #include "llvm/MC/MCSectionELF.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include <sstream> 39 40 #undef DEBUG_TYPE 41 #define DEBUG_TYPE "nvptx-lower" 42 43 using namespace llvm; 44 45 static unsigned int uniqueCallSite = 0; 46 47 static cl::opt<bool> sched4reg( 48 "nvptx-sched4reg", 49 cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); 50 51 static cl::opt<unsigned> 52 FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, 53 cl::desc("NVPTX Specific: FMA contraction (0: don't do it" 54 " 1: do it 2: do it aggressively"), 55 cl::init(2)); 56 57 static bool IsPTXVectorType(MVT VT) { 58 switch (VT.SimpleTy) { 59 default: 60 return false; 61 case MVT::v2i1: 62 case MVT::v4i1: 63 case MVT::v2i8: 64 case MVT::v4i8: 65 case MVT::v2i16: 66 case MVT::v4i16: 67 case MVT::v2i32: 68 case MVT::v4i32: 69 case MVT::v2i64: 70 case MVT::v2f32: 71 case MVT::v4f32: 72 case MVT::v2f64: 73 return true; 74 } 75 } 76 77 /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive 78 /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors 79 /// into their primitive components. 80 /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the 81 /// same number of types as the Ins/Outs arrays in LowerFormalArguments, 82 /// LowerCall, and LowerReturn. 83 static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, 84 SmallVectorImpl<EVT> &ValueVTs, 85 SmallVectorImpl<uint64_t> *Offsets = nullptr, 86 uint64_t StartingOffset = 0) { 87 SmallVector<EVT, 16> TempVTs; 88 SmallVector<uint64_t, 16> TempOffsets; 89 90 ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset); 91 for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { 92 EVT VT = TempVTs[i]; 93 uint64_t Off = TempOffsets[i]; 94 if (VT.isVector()) 95 for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) { 96 ValueVTs.push_back(VT.getVectorElementType()); 97 if (Offsets) 98 Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize()); 99 } 100 else { 101 ValueVTs.push_back(VT); 102 if (Offsets) 103 Offsets->push_back(Off); 104 } 105 } 106 } 107 108 // NVPTXTargetLowering Constructor. 109 NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, 110 const NVPTXSubtarget &STI) 111 : TargetLowering(TM), nvTM(&TM), STI(STI) { 112 113 // always lower memset, memcpy, and memmove intrinsics to load/store 114 // instructions, rather 115 // then generating calls to memset, mempcy or memmove. 116 MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; 117 MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; 118 MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; 119 120 setBooleanContents(ZeroOrNegativeOneBooleanContent); 121 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 122 123 // Jump is Expensive. Don't create extra control flow for 'and', 'or' 124 // condition branches. 125 setJumpIsExpensive(true); 126 127 // By default, use the Source scheduling 128 if (sched4reg) 129 setSchedulingPreference(Sched::RegPressure); 130 else 131 setSchedulingPreference(Sched::Source); 132 133 addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); 134 addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); 135 addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); 136 addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); 137 addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); 138 addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); 139 140 // Operations not directly supported by NVPTX. 141 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 142 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 143 setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); 144 setOperationAction(ISD::SELECT_CC, MVT::i8, Expand); 145 setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); 146 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 147 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 148 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 149 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 150 setOperationAction(ISD::BR_CC, MVT::i1, Expand); 151 setOperationAction(ISD::BR_CC, MVT::i8, Expand); 152 setOperationAction(ISD::BR_CC, MVT::i16, Expand); 153 setOperationAction(ISD::BR_CC, MVT::i32, Expand); 154 setOperationAction(ISD::BR_CC, MVT::i64, Expand); 155 // Some SIGN_EXTEND_INREG can be done using cvt instruction. 156 // For others we will expand to a SHL/SRA pair. 157 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); 158 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 159 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); 160 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 161 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 162 163 setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); 164 setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); 165 setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); 166 setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); 167 setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); 168 setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); 169 170 if (STI.hasROT64()) { 171 setOperationAction(ISD::ROTL, MVT::i64, Legal); 172 setOperationAction(ISD::ROTR, MVT::i64, Legal); 173 } else { 174 setOperationAction(ISD::ROTL, MVT::i64, Expand); 175 setOperationAction(ISD::ROTR, MVT::i64, Expand); 176 } 177 if (STI.hasROT32()) { 178 setOperationAction(ISD::ROTL, MVT::i32, Legal); 179 setOperationAction(ISD::ROTR, MVT::i32, Legal); 180 } else { 181 setOperationAction(ISD::ROTL, MVT::i32, Expand); 182 setOperationAction(ISD::ROTR, MVT::i32, Expand); 183 } 184 185 setOperationAction(ISD::ROTL, MVT::i16, Expand); 186 setOperationAction(ISD::ROTR, MVT::i16, Expand); 187 setOperationAction(ISD::ROTL, MVT::i8, Expand); 188 setOperationAction(ISD::ROTR, MVT::i8, Expand); 189 setOperationAction(ISD::BSWAP, MVT::i16, Expand); 190 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 191 setOperationAction(ISD::BSWAP, MVT::i64, Expand); 192 193 // Indirect branch is not supported. 194 // This also disables Jump Table creation. 195 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 196 setOperationAction(ISD::BRIND, MVT::Other, Expand); 197 198 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 199 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 200 201 // We want to legalize constant related memmove and memcopy 202 // intrinsics. 203 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 204 205 // Turn FP extload into load/fextend 206 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 207 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 208 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 209 // Turn FP truncstore into trunc + store. 210 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 211 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 212 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 213 214 // PTX does not support load / store predicate registers 215 setOperationAction(ISD::LOAD, MVT::i1, Custom); 216 setOperationAction(ISD::STORE, MVT::i1, Custom); 217 218 for (MVT VT : MVT::integer_valuetypes()) { 219 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 220 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 221 setTruncStoreAction(VT, MVT::i1, Expand); 222 } 223 224 // This is legal in NVPTX 225 setOperationAction(ISD::ConstantFP, MVT::f64, Legal); 226 setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 227 228 // TRAP can be lowered to PTX trap 229 setOperationAction(ISD::TRAP, MVT::Other, Legal); 230 231 setOperationAction(ISD::ADDC, MVT::i64, Expand); 232 setOperationAction(ISD::ADDE, MVT::i64, Expand); 233 234 // Register custom handling for vector loads/stores 235 for (MVT VT : MVT::vector_valuetypes()) { 236 if (IsPTXVectorType(VT)) { 237 setOperationAction(ISD::LOAD, VT, Custom); 238 setOperationAction(ISD::STORE, VT, Custom); 239 setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); 240 } 241 } 242 243 // Custom handling for i8 intrinsics 244 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 245 246 setOperationAction(ISD::CTLZ, MVT::i16, Legal); 247 setOperationAction(ISD::CTLZ, MVT::i32, Legal); 248 setOperationAction(ISD::CTLZ, MVT::i64, Legal); 249 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal); 250 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal); 251 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal); 252 setOperationAction(ISD::CTTZ, MVT::i16, Expand); 253 setOperationAction(ISD::CTTZ, MVT::i32, Expand); 254 setOperationAction(ISD::CTTZ, MVT::i64, Expand); 255 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); 256 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 257 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 258 setOperationAction(ISD::CTPOP, MVT::i16, Legal); 259 setOperationAction(ISD::CTPOP, MVT::i32, Legal); 260 setOperationAction(ISD::CTPOP, MVT::i64, Legal); 261 262 // PTX does not directly support SELP of i1, so promote to i32 first 263 setOperationAction(ISD::SELECT, MVT::i1, Custom); 264 265 // We have some custom DAG combine patterns for these nodes 266 setTargetDAGCombine(ISD::ADD); 267 setTargetDAGCombine(ISD::AND); 268 setTargetDAGCombine(ISD::FADD); 269 setTargetDAGCombine(ISD::MUL); 270 setTargetDAGCombine(ISD::SHL); 271 272 // Now deduce the information based on the above mentioned 273 // actions 274 computeRegisterProperties(STI.getRegisterInfo()); 275 } 276 277 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { 278 switch (Opcode) { 279 default: 280 return nullptr; 281 case NVPTXISD::CALL: 282 return "NVPTXISD::CALL"; 283 case NVPTXISD::RET_FLAG: 284 return "NVPTXISD::RET_FLAG"; 285 case NVPTXISD::Wrapper: 286 return "NVPTXISD::Wrapper"; 287 case NVPTXISD::DeclareParam: 288 return "NVPTXISD::DeclareParam"; 289 case NVPTXISD::DeclareScalarParam: 290 return "NVPTXISD::DeclareScalarParam"; 291 case NVPTXISD::DeclareRet: 292 return "NVPTXISD::DeclareRet"; 293 case NVPTXISD::DeclareRetParam: 294 return "NVPTXISD::DeclareRetParam"; 295 case NVPTXISD::PrintCall: 296 return "NVPTXISD::PrintCall"; 297 case NVPTXISD::LoadParam: 298 return "NVPTXISD::LoadParam"; 299 case NVPTXISD::LoadParamV2: 300 return "NVPTXISD::LoadParamV2"; 301 case NVPTXISD::LoadParamV4: 302 return "NVPTXISD::LoadParamV4"; 303 case NVPTXISD::StoreParam: 304 return "NVPTXISD::StoreParam"; 305 case NVPTXISD::StoreParamV2: 306 return "NVPTXISD::StoreParamV2"; 307 case NVPTXISD::StoreParamV4: 308 return "NVPTXISD::StoreParamV4"; 309 case NVPTXISD::StoreParamS32: 310 return "NVPTXISD::StoreParamS32"; 311 case NVPTXISD::StoreParamU32: 312 return "NVPTXISD::StoreParamU32"; 313 case NVPTXISD::CallArgBegin: 314 return "NVPTXISD::CallArgBegin"; 315 case NVPTXISD::CallArg: 316 return "NVPTXISD::CallArg"; 317 case NVPTXISD::LastCallArg: 318 return "NVPTXISD::LastCallArg"; 319 case NVPTXISD::CallArgEnd: 320 return "NVPTXISD::CallArgEnd"; 321 case NVPTXISD::CallVoid: 322 return "NVPTXISD::CallVoid"; 323 case NVPTXISD::CallVal: 324 return "NVPTXISD::CallVal"; 325 case NVPTXISD::CallSymbol: 326 return "NVPTXISD::CallSymbol"; 327 case NVPTXISD::Prototype: 328 return "NVPTXISD::Prototype"; 329 case NVPTXISD::MoveParam: 330 return "NVPTXISD::MoveParam"; 331 case NVPTXISD::StoreRetval: 332 return "NVPTXISD::StoreRetval"; 333 case NVPTXISD::StoreRetvalV2: 334 return "NVPTXISD::StoreRetvalV2"; 335 case NVPTXISD::StoreRetvalV4: 336 return "NVPTXISD::StoreRetvalV4"; 337 case NVPTXISD::PseudoUseParam: 338 return "NVPTXISD::PseudoUseParam"; 339 case NVPTXISD::RETURN: 340 return "NVPTXISD::RETURN"; 341 case NVPTXISD::CallSeqBegin: 342 return "NVPTXISD::CallSeqBegin"; 343 case NVPTXISD::CallSeqEnd: 344 return "NVPTXISD::CallSeqEnd"; 345 case NVPTXISD::CallPrototype: 346 return "NVPTXISD::CallPrototype"; 347 case NVPTXISD::LoadV2: 348 return "NVPTXISD::LoadV2"; 349 case NVPTXISD::LoadV4: 350 return "NVPTXISD::LoadV4"; 351 case NVPTXISD::LDGV2: 352 return "NVPTXISD::LDGV2"; 353 case NVPTXISD::LDGV4: 354 return "NVPTXISD::LDGV4"; 355 case NVPTXISD::LDUV2: 356 return "NVPTXISD::LDUV2"; 357 case NVPTXISD::LDUV4: 358 return "NVPTXISD::LDUV4"; 359 case NVPTXISD::StoreV2: 360 return "NVPTXISD::StoreV2"; 361 case NVPTXISD::StoreV4: 362 return "NVPTXISD::StoreV4"; 363 case NVPTXISD::FUN_SHFL_CLAMP: 364 return "NVPTXISD::FUN_SHFL_CLAMP"; 365 case NVPTXISD::FUN_SHFR_CLAMP: 366 return "NVPTXISD::FUN_SHFR_CLAMP"; 367 case NVPTXISD::IMAD: 368 return "NVPTXISD::IMAD"; 369 case NVPTXISD::MUL_WIDE_SIGNED: 370 return "NVPTXISD::MUL_WIDE_SIGNED"; 371 case NVPTXISD::MUL_WIDE_UNSIGNED: 372 return "NVPTXISD::MUL_WIDE_UNSIGNED"; 373 case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; 374 case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; 375 case NVPTXISD::Tex1DFloatFloatLevel: 376 return "NVPTXISD::Tex1DFloatFloatLevel"; 377 case NVPTXISD::Tex1DFloatFloatGrad: 378 return "NVPTXISD::Tex1DFloatFloatGrad"; 379 case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; 380 case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; 381 case NVPTXISD::Tex1DS32FloatLevel: 382 return "NVPTXISD::Tex1DS32FloatLevel"; 383 case NVPTXISD::Tex1DS32FloatGrad: 384 return "NVPTXISD::Tex1DS32FloatGrad"; 385 case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; 386 case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; 387 case NVPTXISD::Tex1DU32FloatLevel: 388 return "NVPTXISD::Tex1DU32FloatLevel"; 389 case NVPTXISD::Tex1DU32FloatGrad: 390 return "NVPTXISD::Tex1DU32FloatGrad"; 391 case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; 392 case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; 393 case NVPTXISD::Tex1DArrayFloatFloatLevel: 394 return "NVPTXISD::Tex1DArrayFloatFloatLevel"; 395 case NVPTXISD::Tex1DArrayFloatFloatGrad: 396 return "NVPTXISD::Tex1DArrayFloatFloatGrad"; 397 case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; 398 case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; 399 case NVPTXISD::Tex1DArrayS32FloatLevel: 400 return "NVPTXISD::Tex1DArrayS32FloatLevel"; 401 case NVPTXISD::Tex1DArrayS32FloatGrad: 402 return "NVPTXISD::Tex1DArrayS32FloatGrad"; 403 case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; 404 case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; 405 case NVPTXISD::Tex1DArrayU32FloatLevel: 406 return "NVPTXISD::Tex1DArrayU32FloatLevel"; 407 case NVPTXISD::Tex1DArrayU32FloatGrad: 408 return "NVPTXISD::Tex1DArrayU32FloatGrad"; 409 case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; 410 case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; 411 case NVPTXISD::Tex2DFloatFloatLevel: 412 return "NVPTXISD::Tex2DFloatFloatLevel"; 413 case NVPTXISD::Tex2DFloatFloatGrad: 414 return "NVPTXISD::Tex2DFloatFloatGrad"; 415 case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; 416 case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; 417 case NVPTXISD::Tex2DS32FloatLevel: 418 return "NVPTXISD::Tex2DS32FloatLevel"; 419 case NVPTXISD::Tex2DS32FloatGrad: 420 return "NVPTXISD::Tex2DS32FloatGrad"; 421 case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; 422 case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; 423 case NVPTXISD::Tex2DU32FloatLevel: 424 return "NVPTXISD::Tex2DU32FloatLevel"; 425 case NVPTXISD::Tex2DU32FloatGrad: 426 return "NVPTXISD::Tex2DU32FloatGrad"; 427 case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; 428 case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; 429 case NVPTXISD::Tex2DArrayFloatFloatLevel: 430 return "NVPTXISD::Tex2DArrayFloatFloatLevel"; 431 case NVPTXISD::Tex2DArrayFloatFloatGrad: 432 return "NVPTXISD::Tex2DArrayFloatFloatGrad"; 433 case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; 434 case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; 435 case NVPTXISD::Tex2DArrayS32FloatLevel: 436 return "NVPTXISD::Tex2DArrayS32FloatLevel"; 437 case NVPTXISD::Tex2DArrayS32FloatGrad: 438 return "NVPTXISD::Tex2DArrayS32FloatGrad"; 439 case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; 440 case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; 441 case NVPTXISD::Tex2DArrayU32FloatLevel: 442 return "NVPTXISD::Tex2DArrayU32FloatLevel"; 443 case NVPTXISD::Tex2DArrayU32FloatGrad: 444 return "NVPTXISD::Tex2DArrayU32FloatGrad"; 445 case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; 446 case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; 447 case NVPTXISD::Tex3DFloatFloatLevel: 448 return "NVPTXISD::Tex3DFloatFloatLevel"; 449 case NVPTXISD::Tex3DFloatFloatGrad: 450 return "NVPTXISD::Tex3DFloatFloatGrad"; 451 case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; 452 case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; 453 case NVPTXISD::Tex3DS32FloatLevel: 454 return "NVPTXISD::Tex3DS32FloatLevel"; 455 case NVPTXISD::Tex3DS32FloatGrad: 456 return "NVPTXISD::Tex3DS32FloatGrad"; 457 case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; 458 case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; 459 case NVPTXISD::Tex3DU32FloatLevel: 460 return "NVPTXISD::Tex3DU32FloatLevel"; 461 case NVPTXISD::Tex3DU32FloatGrad: 462 return "NVPTXISD::Tex3DU32FloatGrad"; 463 case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; 464 case NVPTXISD::TexCubeFloatFloatLevel: 465 return "NVPTXISD::TexCubeFloatFloatLevel"; 466 case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; 467 case NVPTXISD::TexCubeS32FloatLevel: 468 return "NVPTXISD::TexCubeS32FloatLevel"; 469 case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; 470 case NVPTXISD::TexCubeU32FloatLevel: 471 return "NVPTXISD::TexCubeU32FloatLevel"; 472 case NVPTXISD::TexCubeArrayFloatFloat: 473 return "NVPTXISD::TexCubeArrayFloatFloat"; 474 case NVPTXISD::TexCubeArrayFloatFloatLevel: 475 return "NVPTXISD::TexCubeArrayFloatFloatLevel"; 476 case NVPTXISD::TexCubeArrayS32Float: 477 return "NVPTXISD::TexCubeArrayS32Float"; 478 case NVPTXISD::TexCubeArrayS32FloatLevel: 479 return "NVPTXISD::TexCubeArrayS32FloatLevel"; 480 case NVPTXISD::TexCubeArrayU32Float: 481 return "NVPTXISD::TexCubeArrayU32Float"; 482 case NVPTXISD::TexCubeArrayU32FloatLevel: 483 return "NVPTXISD::TexCubeArrayU32FloatLevel"; 484 case NVPTXISD::Tld4R2DFloatFloat: 485 return "NVPTXISD::Tld4R2DFloatFloat"; 486 case NVPTXISD::Tld4G2DFloatFloat: 487 return "NVPTXISD::Tld4G2DFloatFloat"; 488 case NVPTXISD::Tld4B2DFloatFloat: 489 return "NVPTXISD::Tld4B2DFloatFloat"; 490 case NVPTXISD::Tld4A2DFloatFloat: 491 return "NVPTXISD::Tld4A2DFloatFloat"; 492 case NVPTXISD::Tld4R2DS64Float: 493 return "NVPTXISD::Tld4R2DS64Float"; 494 case NVPTXISD::Tld4G2DS64Float: 495 return "NVPTXISD::Tld4G2DS64Float"; 496 case NVPTXISD::Tld4B2DS64Float: 497 return "NVPTXISD::Tld4B2DS64Float"; 498 case NVPTXISD::Tld4A2DS64Float: 499 return "NVPTXISD::Tld4A2DS64Float"; 500 case NVPTXISD::Tld4R2DU64Float: 501 return "NVPTXISD::Tld4R2DU64Float"; 502 case NVPTXISD::Tld4G2DU64Float: 503 return "NVPTXISD::Tld4G2DU64Float"; 504 case NVPTXISD::Tld4B2DU64Float: 505 return "NVPTXISD::Tld4B2DU64Float"; 506 case NVPTXISD::Tld4A2DU64Float: 507 return "NVPTXISD::Tld4A2DU64Float"; 508 509 case NVPTXISD::TexUnified1DFloatS32: 510 return "NVPTXISD::TexUnified1DFloatS32"; 511 case NVPTXISD::TexUnified1DFloatFloat: 512 return "NVPTXISD::TexUnified1DFloatFloat"; 513 case NVPTXISD::TexUnified1DFloatFloatLevel: 514 return "NVPTXISD::TexUnified1DFloatFloatLevel"; 515 case NVPTXISD::TexUnified1DFloatFloatGrad: 516 return "NVPTXISD::TexUnified1DFloatFloatGrad"; 517 case NVPTXISD::TexUnified1DS32S32: 518 return "NVPTXISD::TexUnified1DS32S32"; 519 case NVPTXISD::TexUnified1DS32Float: 520 return "NVPTXISD::TexUnified1DS32Float"; 521 case NVPTXISD::TexUnified1DS32FloatLevel: 522 return "NVPTXISD::TexUnified1DS32FloatLevel"; 523 case NVPTXISD::TexUnified1DS32FloatGrad: 524 return "NVPTXISD::TexUnified1DS32FloatGrad"; 525 case NVPTXISD::TexUnified1DU32S32: 526 return "NVPTXISD::TexUnified1DU32S32"; 527 case NVPTXISD::TexUnified1DU32Float: 528 return "NVPTXISD::TexUnified1DU32Float"; 529 case NVPTXISD::TexUnified1DU32FloatLevel: 530 return "NVPTXISD::TexUnified1DU32FloatLevel"; 531 case NVPTXISD::TexUnified1DU32FloatGrad: 532 return "NVPTXISD::TexUnified1DU32FloatGrad"; 533 case NVPTXISD::TexUnified1DArrayFloatS32: 534 return "NVPTXISD::TexUnified1DArrayFloatS32"; 535 case NVPTXISD::TexUnified1DArrayFloatFloat: 536 return "NVPTXISD::TexUnified1DArrayFloatFloat"; 537 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 538 return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; 539 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 540 return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; 541 case NVPTXISD::TexUnified1DArrayS32S32: 542 return "NVPTXISD::TexUnified1DArrayS32S32"; 543 case NVPTXISD::TexUnified1DArrayS32Float: 544 return "NVPTXISD::TexUnified1DArrayS32Float"; 545 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 546 return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; 547 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 548 return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; 549 case NVPTXISD::TexUnified1DArrayU32S32: 550 return "NVPTXISD::TexUnified1DArrayU32S32"; 551 case NVPTXISD::TexUnified1DArrayU32Float: 552 return "NVPTXISD::TexUnified1DArrayU32Float"; 553 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 554 return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; 555 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 556 return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; 557 case NVPTXISD::TexUnified2DFloatS32: 558 return "NVPTXISD::TexUnified2DFloatS32"; 559 case NVPTXISD::TexUnified2DFloatFloat: 560 return "NVPTXISD::TexUnified2DFloatFloat"; 561 case NVPTXISD::TexUnified2DFloatFloatLevel: 562 return "NVPTXISD::TexUnified2DFloatFloatLevel"; 563 case NVPTXISD::TexUnified2DFloatFloatGrad: 564 return "NVPTXISD::TexUnified2DFloatFloatGrad"; 565 case NVPTXISD::TexUnified2DS32S32: 566 return "NVPTXISD::TexUnified2DS32S32"; 567 case NVPTXISD::TexUnified2DS32Float: 568 return "NVPTXISD::TexUnified2DS32Float"; 569 case NVPTXISD::TexUnified2DS32FloatLevel: 570 return "NVPTXISD::TexUnified2DS32FloatLevel"; 571 case NVPTXISD::TexUnified2DS32FloatGrad: 572 return "NVPTXISD::TexUnified2DS32FloatGrad"; 573 case NVPTXISD::TexUnified2DU32S32: 574 return "NVPTXISD::TexUnified2DU32S32"; 575 case NVPTXISD::TexUnified2DU32Float: 576 return "NVPTXISD::TexUnified2DU32Float"; 577 case NVPTXISD::TexUnified2DU32FloatLevel: 578 return "NVPTXISD::TexUnified2DU32FloatLevel"; 579 case NVPTXISD::TexUnified2DU32FloatGrad: 580 return "NVPTXISD::TexUnified2DU32FloatGrad"; 581 case NVPTXISD::TexUnified2DArrayFloatS32: 582 return "NVPTXISD::TexUnified2DArrayFloatS32"; 583 case NVPTXISD::TexUnified2DArrayFloatFloat: 584 return "NVPTXISD::TexUnified2DArrayFloatFloat"; 585 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 586 return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; 587 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 588 return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; 589 case NVPTXISD::TexUnified2DArrayS32S32: 590 return "NVPTXISD::TexUnified2DArrayS32S32"; 591 case NVPTXISD::TexUnified2DArrayS32Float: 592 return "NVPTXISD::TexUnified2DArrayS32Float"; 593 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 594 return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; 595 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 596 return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; 597 case NVPTXISD::TexUnified2DArrayU32S32: 598 return "NVPTXISD::TexUnified2DArrayU32S32"; 599 case NVPTXISD::TexUnified2DArrayU32Float: 600 return "NVPTXISD::TexUnified2DArrayU32Float"; 601 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 602 return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; 603 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 604 return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; 605 case NVPTXISD::TexUnified3DFloatS32: 606 return "NVPTXISD::TexUnified3DFloatS32"; 607 case NVPTXISD::TexUnified3DFloatFloat: 608 return "NVPTXISD::TexUnified3DFloatFloat"; 609 case NVPTXISD::TexUnified3DFloatFloatLevel: 610 return "NVPTXISD::TexUnified3DFloatFloatLevel"; 611 case NVPTXISD::TexUnified3DFloatFloatGrad: 612 return "NVPTXISD::TexUnified3DFloatFloatGrad"; 613 case NVPTXISD::TexUnified3DS32S32: 614 return "NVPTXISD::TexUnified3DS32S32"; 615 case NVPTXISD::TexUnified3DS32Float: 616 return "NVPTXISD::TexUnified3DS32Float"; 617 case NVPTXISD::TexUnified3DS32FloatLevel: 618 return "NVPTXISD::TexUnified3DS32FloatLevel"; 619 case NVPTXISD::TexUnified3DS32FloatGrad: 620 return "NVPTXISD::TexUnified3DS32FloatGrad"; 621 case NVPTXISD::TexUnified3DU32S32: 622 return "NVPTXISD::TexUnified3DU32S32"; 623 case NVPTXISD::TexUnified3DU32Float: 624 return "NVPTXISD::TexUnified3DU32Float"; 625 case NVPTXISD::TexUnified3DU32FloatLevel: 626 return "NVPTXISD::TexUnified3DU32FloatLevel"; 627 case NVPTXISD::TexUnified3DU32FloatGrad: 628 return "NVPTXISD::TexUnified3DU32FloatGrad"; 629 case NVPTXISD::TexUnifiedCubeFloatFloat: 630 return "NVPTXISD::TexUnifiedCubeFloatFloat"; 631 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 632 return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; 633 case NVPTXISD::TexUnifiedCubeS32Float: 634 return "NVPTXISD::TexUnifiedCubeS32Float"; 635 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 636 return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; 637 case NVPTXISD::TexUnifiedCubeU32Float: 638 return "NVPTXISD::TexUnifiedCubeU32Float"; 639 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 640 return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; 641 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 642 return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; 643 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 644 return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; 645 case NVPTXISD::TexUnifiedCubeArrayS32Float: 646 return "NVPTXISD::TexUnifiedCubeArrayS32Float"; 647 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 648 return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; 649 case NVPTXISD::TexUnifiedCubeArrayU32Float: 650 return "NVPTXISD::TexUnifiedCubeArrayU32Float"; 651 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 652 return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; 653 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 654 return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; 655 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 656 return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; 657 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 658 return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; 659 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 660 return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; 661 case NVPTXISD::Tld4UnifiedR2DS64Float: 662 return "NVPTXISD::Tld4UnifiedR2DS64Float"; 663 case NVPTXISD::Tld4UnifiedG2DS64Float: 664 return "NVPTXISD::Tld4UnifiedG2DS64Float"; 665 case NVPTXISD::Tld4UnifiedB2DS64Float: 666 return "NVPTXISD::Tld4UnifiedB2DS64Float"; 667 case NVPTXISD::Tld4UnifiedA2DS64Float: 668 return "NVPTXISD::Tld4UnifiedA2DS64Float"; 669 case NVPTXISD::Tld4UnifiedR2DU64Float: 670 return "NVPTXISD::Tld4UnifiedR2DU64Float"; 671 case NVPTXISD::Tld4UnifiedG2DU64Float: 672 return "NVPTXISD::Tld4UnifiedG2DU64Float"; 673 case NVPTXISD::Tld4UnifiedB2DU64Float: 674 return "NVPTXISD::Tld4UnifiedB2DU64Float"; 675 case NVPTXISD::Tld4UnifiedA2DU64Float: 676 return "NVPTXISD::Tld4UnifiedA2DU64Float"; 677 678 case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; 679 case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; 680 case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; 681 case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; 682 case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; 683 case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; 684 case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; 685 case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; 686 case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; 687 case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; 688 case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; 689 690 case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; 691 case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; 692 case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; 693 case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; 694 case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; 695 case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; 696 case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; 697 case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; 698 case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; 699 case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; 700 case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; 701 702 case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; 703 case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; 704 case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; 705 case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; 706 case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; 707 case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; 708 case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; 709 case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; 710 case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; 711 case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; 712 case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; 713 714 case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; 715 case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; 716 case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; 717 case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; 718 case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; 719 case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; 720 case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; 721 case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; 722 case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; 723 case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; 724 case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; 725 726 case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; 727 case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; 728 case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; 729 case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; 730 case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; 731 case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; 732 case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; 733 case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; 734 case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; 735 case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; 736 case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; 737 738 case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; 739 case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; 740 case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; 741 case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; 742 case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; 743 case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; 744 case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; 745 case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; 746 case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; 747 case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; 748 case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; 749 750 case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; 751 case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; 752 case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; 753 case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; 754 case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; 755 case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; 756 case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; 757 case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; 758 case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; 759 case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; 760 case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; 761 762 case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; 763 case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; 764 case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; 765 case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; 766 case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; 767 case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; 768 case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; 769 case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; 770 case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; 771 case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; 772 case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; 773 774 case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; 775 case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; 776 case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; 777 case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; 778 case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; 779 case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; 780 case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; 781 case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; 782 case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; 783 case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; 784 case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; 785 786 case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; 787 case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; 788 case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; 789 case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; 790 case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; 791 case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; 792 case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; 793 case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; 794 case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; 795 case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; 796 case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; 797 798 case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; 799 case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; 800 case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; 801 case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; 802 case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; 803 case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; 804 case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; 805 case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; 806 case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; 807 case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; 808 case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; 809 810 case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; 811 case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; 812 case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; 813 case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; 814 case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; 815 case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; 816 case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; 817 case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; 818 case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; 819 case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; 820 case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; 821 822 case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; 823 case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; 824 case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; 825 case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; 826 case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; 827 case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; 828 case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; 829 case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; 830 case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; 831 case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; 832 case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; 833 834 case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; 835 case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; 836 case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; 837 case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; 838 case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; 839 case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; 840 case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; 841 case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; 842 case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; 843 case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; 844 case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; 845 846 case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; 847 case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; 848 case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; 849 case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; 850 case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; 851 case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; 852 case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; 853 case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; 854 case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; 855 case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; 856 case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; 857 } 858 } 859 860 TargetLoweringBase::LegalizeTypeAction 861 NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const { 862 if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) 863 return TypeSplitVector; 864 865 return TargetLoweringBase::getPreferredVectorAction(VT); 866 } 867 868 SDValue 869 NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { 870 SDLoc dl(Op); 871 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 872 Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); 873 return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); 874 } 875 876 std::string 877 NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, 878 const SmallVectorImpl<ISD::OutputArg> &Outs, 879 unsigned retAlignment, 880 const ImmutableCallSite *CS) const { 881 882 bool isABI = (STI.getSmVersion() >= 20); 883 assert(isABI && "Non-ABI compilation is not supported"); 884 if (!isABI) 885 return ""; 886 887 std::stringstream O; 888 O << "prototype_" << uniqueCallSite << " : .callprototype "; 889 890 if (retTy->getTypeID() == Type::VoidTyID) { 891 O << "()"; 892 } else { 893 O << "("; 894 if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { 895 unsigned size = 0; 896 if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { 897 size = ITy->getBitWidth(); 898 if (size < 32) 899 size = 32; 900 } else { 901 assert(retTy->isFloatingPointTy() && 902 "Floating point type expected here"); 903 size = retTy->getPrimitiveSizeInBits(); 904 } 905 906 O << ".param .b" << size << " _"; 907 } else if (isa<PointerType>(retTy)) { 908 O << ".param .b" << getPointerTy().getSizeInBits() << " _"; 909 } else if ((retTy->getTypeID() == Type::StructTyID) || 910 isa<VectorType>(retTy)) { 911 O << ".param .align " 912 << retAlignment 913 << " .b8 _[" 914 << getDataLayout()->getTypeAllocSize(retTy) << "]"; 915 } else { 916 llvm_unreachable("Unknown return type"); 917 } 918 O << ") "; 919 } 920 O << "_ ("; 921 922 bool first = true; 923 MVT thePointerTy = getPointerTy(); 924 925 unsigned OIdx = 0; 926 for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 927 Type *Ty = Args[i].Ty; 928 if (!first) { 929 O << ", "; 930 } 931 first = false; 932 933 if (!Outs[OIdx].Flags.isByVal()) { 934 if (Ty->isAggregateType() || Ty->isVectorTy()) { 935 unsigned align = 0; 936 const CallInst *CallI = cast<CallInst>(CS->getInstruction()); 937 const DataLayout *TD = getDataLayout(); 938 // +1 because index 0 is reserved for return type alignment 939 if (!llvm::getAlign(*CallI, i + 1, align)) 940 align = TD->getABITypeAlignment(Ty); 941 unsigned sz = TD->getTypeAllocSize(Ty); 942 O << ".param .align " << align << " .b8 "; 943 O << "_"; 944 O << "[" << sz << "]"; 945 // update the index for Outs 946 SmallVector<EVT, 16> vtparts; 947 ComputeValueVTs(*this, Ty, vtparts); 948 if (unsigned len = vtparts.size()) 949 OIdx += len - 1; 950 continue; 951 } 952 // i8 types in IR will be i16 types in SDAG 953 assert((getValueType(Ty) == Outs[OIdx].VT || 954 (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && 955 "type mismatch between callee prototype and arguments"); 956 // scalar type 957 unsigned sz = 0; 958 if (isa<IntegerType>(Ty)) { 959 sz = cast<IntegerType>(Ty)->getBitWidth(); 960 if (sz < 32) 961 sz = 32; 962 } else if (isa<PointerType>(Ty)) 963 sz = thePointerTy.getSizeInBits(); 964 else 965 sz = Ty->getPrimitiveSizeInBits(); 966 O << ".param .b" << sz << " "; 967 O << "_"; 968 continue; 969 } 970 const PointerType *PTy = dyn_cast<PointerType>(Ty); 971 assert(PTy && "Param with byval attribute should be a pointer type"); 972 Type *ETy = PTy->getElementType(); 973 974 unsigned align = Outs[OIdx].Flags.getByValAlign(); 975 unsigned sz = getDataLayout()->getTypeAllocSize(ETy); 976 O << ".param .align " << align << " .b8 "; 977 O << "_"; 978 O << "[" << sz << "]"; 979 } 980 O << ");"; 981 return O.str(); 982 } 983 984 unsigned 985 NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, 986 const ImmutableCallSite *CS, 987 Type *Ty, 988 unsigned Idx) const { 989 const DataLayout *TD = getDataLayout(); 990 unsigned Align = 0; 991 const Value *DirectCallee = CS->getCalledFunction(); 992 993 if (!DirectCallee) { 994 // We don't have a direct function symbol, but that may be because of 995 // constant cast instructions in the call. 996 const Instruction *CalleeI = CS->getInstruction(); 997 assert(CalleeI && "Call target is not a function or derived value?"); 998 999 // With bitcast'd call targets, the instruction will be the call 1000 if (isa<CallInst>(CalleeI)) { 1001 // Check if we have call alignment metadata 1002 if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align)) 1003 return Align; 1004 1005 const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue(); 1006 // Ignore any bitcast instructions 1007 while(isa<ConstantExpr>(CalleeV)) { 1008 const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); 1009 if (!CE->isCast()) 1010 break; 1011 // Look through the bitcast 1012 CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0); 1013 } 1014 1015 // We have now looked past all of the bitcasts. Do we finally have a 1016 // Function? 1017 if (isa<Function>(CalleeV)) 1018 DirectCallee = CalleeV; 1019 } 1020 } 1021 1022 // Check for function alignment information if we found that the 1023 // ultimate target is a Function 1024 if (DirectCallee) 1025 if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align)) 1026 return Align; 1027 1028 // Call is indirect or alignment information is not available, fall back to 1029 // the ABI type alignment 1030 return TD->getABITypeAlignment(Ty); 1031 } 1032 1033 SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 1034 SmallVectorImpl<SDValue> &InVals) const { 1035 SelectionDAG &DAG = CLI.DAG; 1036 SDLoc dl = CLI.DL; 1037 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1038 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1039 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1040 SDValue Chain = CLI.Chain; 1041 SDValue Callee = CLI.Callee; 1042 bool &isTailCall = CLI.IsTailCall; 1043 ArgListTy &Args = CLI.getArgs(); 1044 Type *retTy = CLI.RetTy; 1045 ImmutableCallSite *CS = CLI.CS; 1046 1047 bool isABI = (STI.getSmVersion() >= 20); 1048 assert(isABI && "Non-ABI compilation is not supported"); 1049 if (!isABI) 1050 return Chain; 1051 const DataLayout *TD = getDataLayout(); 1052 MachineFunction &MF = DAG.getMachineFunction(); 1053 const Function *F = MF.getFunction(); 1054 1055 SDValue tempChain = Chain; 1056 Chain = 1057 DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), 1058 dl); 1059 SDValue InFlag = Chain.getValue(1); 1060 1061 unsigned paramCount = 0; 1062 // Args.size() and Outs.size() need not match. 1063 // Outs.size() will be larger 1064 // * if there is an aggregate argument with multiple fields (each field 1065 // showing up separately in Outs) 1066 // * if there is a vector argument with more than typical vector-length 1067 // elements (generally if more than 4) where each vector element is 1068 // individually present in Outs. 1069 // So a different index should be used for indexing into Outs/OutVals. 1070 // See similar issue in LowerFormalArguments. 1071 unsigned OIdx = 0; 1072 // Declare the .params or .reg need to pass values 1073 // to the function 1074 for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 1075 EVT VT = Outs[OIdx].VT; 1076 Type *Ty = Args[i].Ty; 1077 1078 if (!Outs[OIdx].Flags.isByVal()) { 1079 if (Ty->isAggregateType()) { 1080 // aggregate 1081 SmallVector<EVT, 16> vtparts; 1082 SmallVector<uint64_t, 16> Offsets; 1083 ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0); 1084 1085 unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); 1086 // declare .param .align <align> .b8 .param<n>[<size>]; 1087 unsigned sz = TD->getTypeAllocSize(Ty); 1088 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1089 SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), 1090 DAG.getConstant(paramCount, MVT::i32), 1091 DAG.getConstant(sz, MVT::i32), InFlag }; 1092 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 1093 DeclareParamOps); 1094 InFlag = Chain.getValue(1); 1095 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 1096 EVT elemtype = vtparts[j]; 1097 unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]); 1098 if (elemtype.isInteger() && (sz < 8)) 1099 sz = 8; 1100 SDValue StVal = OutVals[OIdx]; 1101 if (elemtype.getSizeInBits() < 16) { 1102 StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); 1103 } 1104 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1105 SDValue CopyParamOps[] = { Chain, 1106 DAG.getConstant(paramCount, MVT::i32), 1107 DAG.getConstant(Offsets[j], MVT::i32), 1108 StVal, InFlag }; 1109 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, 1110 CopyParamVTs, CopyParamOps, 1111 elemtype, MachinePointerInfo(), 1112 ArgAlign); 1113 InFlag = Chain.getValue(1); 1114 ++OIdx; 1115 } 1116 if (vtparts.size() > 0) 1117 --OIdx; 1118 ++paramCount; 1119 continue; 1120 } 1121 if (Ty->isVectorTy()) { 1122 EVT ObjectVT = getValueType(Ty); 1123 unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); 1124 // declare .param .align <align> .b8 .param<n>[<size>]; 1125 unsigned sz = TD->getTypeAllocSize(Ty); 1126 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1127 SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), 1128 DAG.getConstant(paramCount, MVT::i32), 1129 DAG.getConstant(sz, MVT::i32), InFlag }; 1130 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 1131 DeclareParamOps); 1132 InFlag = Chain.getValue(1); 1133 unsigned NumElts = ObjectVT.getVectorNumElements(); 1134 EVT EltVT = ObjectVT.getVectorElementType(); 1135 EVT MemVT = EltVT; 1136 bool NeedExtend = false; 1137 if (EltVT.getSizeInBits() < 16) { 1138 NeedExtend = true; 1139 EltVT = MVT::i16; 1140 } 1141 1142 // V1 store 1143 if (NumElts == 1) { 1144 SDValue Elt = OutVals[OIdx++]; 1145 if (NeedExtend) 1146 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt); 1147 1148 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1149 SDValue CopyParamOps[] = { Chain, 1150 DAG.getConstant(paramCount, MVT::i32), 1151 DAG.getConstant(0, MVT::i32), Elt, 1152 InFlag }; 1153 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, 1154 CopyParamVTs, CopyParamOps, 1155 MemVT, MachinePointerInfo()); 1156 InFlag = Chain.getValue(1); 1157 } else if (NumElts == 2) { 1158 SDValue Elt0 = OutVals[OIdx++]; 1159 SDValue Elt1 = OutVals[OIdx++]; 1160 if (NeedExtend) { 1161 Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0); 1162 Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1); 1163 } 1164 1165 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1166 SDValue CopyParamOps[] = { Chain, 1167 DAG.getConstant(paramCount, MVT::i32), 1168 DAG.getConstant(0, MVT::i32), Elt0, Elt1, 1169 InFlag }; 1170 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, 1171 CopyParamVTs, CopyParamOps, 1172 MemVT, MachinePointerInfo()); 1173 InFlag = Chain.getValue(1); 1174 } else { 1175 unsigned curOffset = 0; 1176 // V4 stores 1177 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and 1178 // the 1179 // vector will be expanded to a power of 2 elements, so we know we can 1180 // always round up to the next multiple of 4 when creating the vector 1181 // stores. 1182 // e.g. 4 elem => 1 st.v4 1183 // 6 elem => 2 st.v4 1184 // 8 elem => 2 st.v4 1185 // 11 elem => 3 st.v4 1186 unsigned VecSize = 4; 1187 if (EltVT.getSizeInBits() == 64) 1188 VecSize = 2; 1189 1190 // This is potentially only part of a vector, so assume all elements 1191 // are packed together. 1192 unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize; 1193 1194 for (unsigned i = 0; i < NumElts; i += VecSize) { 1195 // Get values 1196 SDValue StoreVal; 1197 SmallVector<SDValue, 8> Ops; 1198 Ops.push_back(Chain); 1199 Ops.push_back(DAG.getConstant(paramCount, MVT::i32)); 1200 Ops.push_back(DAG.getConstant(curOffset, MVT::i32)); 1201 1202 unsigned Opc = NVPTXISD::StoreParamV2; 1203 1204 StoreVal = OutVals[OIdx++]; 1205 if (NeedExtend) 1206 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1207 Ops.push_back(StoreVal); 1208 1209 if (i + 1 < NumElts) { 1210 StoreVal = OutVals[OIdx++]; 1211 if (NeedExtend) 1212 StoreVal = 1213 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1214 } else { 1215 StoreVal = DAG.getUNDEF(EltVT); 1216 } 1217 Ops.push_back(StoreVal); 1218 1219 if (VecSize == 4) { 1220 Opc = NVPTXISD::StoreParamV4; 1221 if (i + 2 < NumElts) { 1222 StoreVal = OutVals[OIdx++]; 1223 if (NeedExtend) 1224 StoreVal = 1225 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1226 } else { 1227 StoreVal = DAG.getUNDEF(EltVT); 1228 } 1229 Ops.push_back(StoreVal); 1230 1231 if (i + 3 < NumElts) { 1232 StoreVal = OutVals[OIdx++]; 1233 if (NeedExtend) 1234 StoreVal = 1235 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1236 } else { 1237 StoreVal = DAG.getUNDEF(EltVT); 1238 } 1239 Ops.push_back(StoreVal); 1240 } 1241 1242 Ops.push_back(InFlag); 1243 1244 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1245 Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops, 1246 MemVT, MachinePointerInfo()); 1247 InFlag = Chain.getValue(1); 1248 curOffset += PerStoreOffset; 1249 } 1250 } 1251 ++paramCount; 1252 --OIdx; 1253 continue; 1254 } 1255 // Plain scalar 1256 // for ABI, declare .param .b<size> .param<n>; 1257 unsigned sz = VT.getSizeInBits(); 1258 bool needExtend = false; 1259 if (VT.isInteger()) { 1260 if (sz < 16) 1261 needExtend = true; 1262 if (sz < 32) 1263 sz = 32; 1264 } 1265 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1266 SDValue DeclareParamOps[] = { Chain, 1267 DAG.getConstant(paramCount, MVT::i32), 1268 DAG.getConstant(sz, MVT::i32), 1269 DAG.getConstant(0, MVT::i32), InFlag }; 1270 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, 1271 DeclareParamOps); 1272 InFlag = Chain.getValue(1); 1273 SDValue OutV = OutVals[OIdx]; 1274 if (needExtend) { 1275 // zext/sext i1 to i16 1276 unsigned opc = ISD::ZERO_EXTEND; 1277 if (Outs[OIdx].Flags.isSExt()) 1278 opc = ISD::SIGN_EXTEND; 1279 OutV = DAG.getNode(opc, dl, MVT::i16, OutV); 1280 } 1281 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1282 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 1283 DAG.getConstant(0, MVT::i32), OutV, InFlag }; 1284 1285 unsigned opcode = NVPTXISD::StoreParam; 1286 if (Outs[OIdx].Flags.isZExt()) 1287 opcode = NVPTXISD::StoreParamU32; 1288 else if (Outs[OIdx].Flags.isSExt()) 1289 opcode = NVPTXISD::StoreParamS32; 1290 Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, 1291 VT, MachinePointerInfo()); 1292 1293 InFlag = Chain.getValue(1); 1294 ++paramCount; 1295 continue; 1296 } 1297 // struct or vector 1298 SmallVector<EVT, 16> vtparts; 1299 SmallVector<uint64_t, 16> Offsets; 1300 const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); 1301 assert(PTy && "Type of a byval parameter should be pointer"); 1302 ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0); 1303 1304 // declare .param .align <align> .b8 .param<n>[<size>]; 1305 unsigned sz = Outs[OIdx].Flags.getByValSize(); 1306 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1307 unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign(); 1308 // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, 1309 // so we don't need to worry about natural alignment or not. 1310 // See TargetLowering::LowerCallTo(). 1311 SDValue DeclareParamOps[] = { 1312 Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), MVT::i32), 1313 DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), 1314 InFlag 1315 }; 1316 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 1317 DeclareParamOps); 1318 InFlag = Chain.getValue(1); 1319 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 1320 EVT elemtype = vtparts[j]; 1321 int curOffset = Offsets[j]; 1322 unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); 1323 SDValue srcAddr = 1324 DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx], 1325 DAG.getConstant(curOffset, getPointerTy())); 1326 SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, 1327 MachinePointerInfo(), false, false, false, 1328 PartAlign); 1329 if (elemtype.getSizeInBits() < 16) { 1330 theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); 1331 } 1332 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1333 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), 1334 DAG.getConstant(curOffset, MVT::i32), theVal, 1335 InFlag }; 1336 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, 1337 CopyParamOps, elemtype, 1338 MachinePointerInfo()); 1339 1340 InFlag = Chain.getValue(1); 1341 } 1342 ++paramCount; 1343 } 1344 1345 GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); 1346 unsigned retAlignment = 0; 1347 1348 // Handle Result 1349 if (Ins.size() > 0) { 1350 SmallVector<EVT, 16> resvtparts; 1351 ComputeValueVTs(*this, retTy, resvtparts); 1352 1353 // Declare 1354 // .param .align 16 .b8 retval0[<size-in-bytes>], or 1355 // .param .b<size-in-bits> retval0 1356 unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); 1357 // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for 1358 // these three types to match the logic in 1359 // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. 1360 // Plus, this behavior is consistent with nvcc's. 1361 if (retTy->isFloatingPointTy() || retTy->isIntegerTy() || 1362 retTy->isPointerTy()) { 1363 // Scalar needs to be at least 32bit wide 1364 if (resultsz < 32) 1365 resultsz = 32; 1366 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1367 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), 1368 DAG.getConstant(resultsz, MVT::i32), 1369 DAG.getConstant(0, MVT::i32), InFlag }; 1370 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, 1371 DeclareRetOps); 1372 InFlag = Chain.getValue(1); 1373 } else { 1374 retAlignment = getArgumentAlignment(Callee, CS, retTy, 0); 1375 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1376 SDValue DeclareRetOps[] = { Chain, 1377 DAG.getConstant(retAlignment, MVT::i32), 1378 DAG.getConstant(resultsz / 8, MVT::i32), 1379 DAG.getConstant(0, MVT::i32), InFlag }; 1380 Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, 1381 DeclareRetOps); 1382 InFlag = Chain.getValue(1); 1383 } 1384 } 1385 1386 if (!Func) { 1387 // This is indirect function call case : PTX requires a prototype of the 1388 // form 1389 // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); 1390 // to be emitted, and the label has to used as the last arg of call 1391 // instruction. 1392 // The prototype is embedded in a string and put as the operand for a 1393 // CallPrototype SDNode which will print out to the value of the string. 1394 SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1395 std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS); 1396 const char *ProtoStr = 1397 nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); 1398 SDValue ProtoOps[] = { 1399 Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, 1400 }; 1401 Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); 1402 InFlag = Chain.getValue(1); 1403 } 1404 // Op to just print "call" 1405 SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1406 SDValue PrintCallOps[] = { 1407 Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag 1408 }; 1409 Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), 1410 dl, PrintCallVTs, PrintCallOps); 1411 InFlag = Chain.getValue(1); 1412 1413 // Ops to print out the function name 1414 SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1415 SDValue CallVoidOps[] = { Chain, Callee, InFlag }; 1416 Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); 1417 InFlag = Chain.getValue(1); 1418 1419 // Ops to print out the param list 1420 SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1421 SDValue CallArgBeginOps[] = { Chain, InFlag }; 1422 Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, 1423 CallArgBeginOps); 1424 InFlag = Chain.getValue(1); 1425 1426 for (unsigned i = 0, e = paramCount; i != e; ++i) { 1427 unsigned opcode; 1428 if (i == (e - 1)) 1429 opcode = NVPTXISD::LastCallArg; 1430 else 1431 opcode = NVPTXISD::CallArg; 1432 SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1433 SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), 1434 DAG.getConstant(i, MVT::i32), InFlag }; 1435 Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); 1436 InFlag = Chain.getValue(1); 1437 } 1438 SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1439 SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32), 1440 InFlag }; 1441 Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); 1442 InFlag = Chain.getValue(1); 1443 1444 if (!Func) { 1445 SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1446 SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32), 1447 InFlag }; 1448 Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); 1449 InFlag = Chain.getValue(1); 1450 } 1451 1452 // Generate loads from param memory/moves from registers for result 1453 if (Ins.size() > 0) { 1454 if (retTy && retTy->isVectorTy()) { 1455 EVT ObjectVT = getValueType(retTy); 1456 unsigned NumElts = ObjectVT.getVectorNumElements(); 1457 EVT EltVT = ObjectVT.getVectorElementType(); 1458 assert(STI.getTargetLowering()->getNumRegisters(F->getContext(), 1459 ObjectVT) == NumElts && 1460 "Vector was not scalarized"); 1461 unsigned sz = EltVT.getSizeInBits(); 1462 bool needTruncate = sz < 8; 1463 1464 if (NumElts == 1) { 1465 // Just a simple load 1466 SmallVector<EVT, 4> LoadRetVTs; 1467 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 1468 // If loading i1/i8 result, generate 1469 // load.b8 i16 1470 // if i1 1471 // trunc i16 to i1 1472 LoadRetVTs.push_back(MVT::i16); 1473 } else 1474 LoadRetVTs.push_back(EltVT); 1475 LoadRetVTs.push_back(MVT::Other); 1476 LoadRetVTs.push_back(MVT::Glue); 1477 SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32), 1478 DAG.getConstant(0, MVT::i32), InFlag}; 1479 SDValue retval = DAG.getMemIntrinsicNode( 1480 NVPTXISD::LoadParam, dl, 1481 DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); 1482 Chain = retval.getValue(1); 1483 InFlag = retval.getValue(2); 1484 SDValue Ret0 = retval; 1485 if (needTruncate) 1486 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0); 1487 InVals.push_back(Ret0); 1488 } else if (NumElts == 2) { 1489 // LoadV2 1490 SmallVector<EVT, 4> LoadRetVTs; 1491 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 1492 // If loading i1/i8 result, generate 1493 // load.b8 i16 1494 // if i1 1495 // trunc i16 to i1 1496 LoadRetVTs.push_back(MVT::i16); 1497 LoadRetVTs.push_back(MVT::i16); 1498 } else { 1499 LoadRetVTs.push_back(EltVT); 1500 LoadRetVTs.push_back(EltVT); 1501 } 1502 LoadRetVTs.push_back(MVT::Other); 1503 LoadRetVTs.push_back(MVT::Glue); 1504 SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32), 1505 DAG.getConstant(0, MVT::i32), InFlag}; 1506 SDValue retval = DAG.getMemIntrinsicNode( 1507 NVPTXISD::LoadParamV2, dl, 1508 DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); 1509 Chain = retval.getValue(2); 1510 InFlag = retval.getValue(3); 1511 SDValue Ret0 = retval.getValue(0); 1512 SDValue Ret1 = retval.getValue(1); 1513 if (needTruncate) { 1514 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0); 1515 InVals.push_back(Ret0); 1516 Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1); 1517 InVals.push_back(Ret1); 1518 } else { 1519 InVals.push_back(Ret0); 1520 InVals.push_back(Ret1); 1521 } 1522 } else { 1523 // Split into N LoadV4 1524 unsigned Ofst = 0; 1525 unsigned VecSize = 4; 1526 unsigned Opc = NVPTXISD::LoadParamV4; 1527 if (EltVT.getSizeInBits() == 64) { 1528 VecSize = 2; 1529 Opc = NVPTXISD::LoadParamV2; 1530 } 1531 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); 1532 for (unsigned i = 0; i < NumElts; i += VecSize) { 1533 SmallVector<EVT, 8> LoadRetVTs; 1534 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 1535 // If loading i1/i8 result, generate 1536 // load.b8 i16 1537 // if i1 1538 // trunc i16 to i1 1539 for (unsigned j = 0; j < VecSize; ++j) 1540 LoadRetVTs.push_back(MVT::i16); 1541 } else { 1542 for (unsigned j = 0; j < VecSize; ++j) 1543 LoadRetVTs.push_back(EltVT); 1544 } 1545 LoadRetVTs.push_back(MVT::Other); 1546 LoadRetVTs.push_back(MVT::Glue); 1547 SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32), 1548 DAG.getConstant(Ofst, MVT::i32), InFlag}; 1549 SDValue retval = DAG.getMemIntrinsicNode( 1550 Opc, dl, DAG.getVTList(LoadRetVTs), 1551 LoadRetOps, EltVT, MachinePointerInfo()); 1552 if (VecSize == 2) { 1553 Chain = retval.getValue(2); 1554 InFlag = retval.getValue(3); 1555 } else { 1556 Chain = retval.getValue(4); 1557 InFlag = retval.getValue(5); 1558 } 1559 1560 for (unsigned j = 0; j < VecSize; ++j) { 1561 if (i + j >= NumElts) 1562 break; 1563 SDValue Elt = retval.getValue(j); 1564 if (needTruncate) 1565 Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); 1566 InVals.push_back(Elt); 1567 } 1568 Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 1569 } 1570 } 1571 } else { 1572 SmallVector<EVT, 16> VTs; 1573 SmallVector<uint64_t, 16> Offsets; 1574 ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0); 1575 assert(VTs.size() == Ins.size() && "Bad value decomposition"); 1576 unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0); 1577 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 1578 unsigned sz = VTs[i].getSizeInBits(); 1579 unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]); 1580 bool needTruncate = sz < 8; 1581 if (VTs[i].isInteger() && (sz < 8)) 1582 sz = 8; 1583 1584 SmallVector<EVT, 4> LoadRetVTs; 1585 EVT TheLoadType = VTs[i]; 1586 if (retTy->isIntegerTy() && 1587 TD->getTypeAllocSizeInBits(retTy) < 32) { 1588 // This is for integer types only, and specifically not for 1589 // aggregates. 1590 LoadRetVTs.push_back(MVT::i32); 1591 TheLoadType = MVT::i32; 1592 } else if (sz < 16) { 1593 // If loading i1/i8 result, generate 1594 // load i8 (-> i16) 1595 // trunc i16 to i1/i8 1596 LoadRetVTs.push_back(MVT::i16); 1597 } else 1598 LoadRetVTs.push_back(Ins[i].VT); 1599 LoadRetVTs.push_back(MVT::Other); 1600 LoadRetVTs.push_back(MVT::Glue); 1601 1602 SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, MVT::i32), 1603 DAG.getConstant(Offsets[i], MVT::i32), InFlag}; 1604 SDValue retval = DAG.getMemIntrinsicNode( 1605 NVPTXISD::LoadParam, dl, 1606 DAG.getVTList(LoadRetVTs), LoadRetOps, 1607 TheLoadType, MachinePointerInfo(), AlignI); 1608 Chain = retval.getValue(1); 1609 InFlag = retval.getValue(2); 1610 SDValue Ret0 = retval.getValue(0); 1611 if (needTruncate) 1612 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0); 1613 InVals.push_back(Ret0); 1614 } 1615 } 1616 } 1617 1618 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), 1619 DAG.getIntPtrConstant(uniqueCallSite + 1, true), 1620 InFlag, dl); 1621 uniqueCallSite++; 1622 1623 // set isTailCall to false for now, until we figure out how to express 1624 // tail call optimization in PTX 1625 isTailCall = false; 1626 return Chain; 1627 } 1628 1629 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() 1630 // (see LegalizeDAG.cpp). This is slow and uses local memory. 1631 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 1632 SDValue 1633 NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { 1634 SDNode *Node = Op.getNode(); 1635 SDLoc dl(Node); 1636 SmallVector<SDValue, 8> Ops; 1637 unsigned NumOperands = Node->getNumOperands(); 1638 for (unsigned i = 0; i < NumOperands; ++i) { 1639 SDValue SubOp = Node->getOperand(i); 1640 EVT VVT = SubOp.getNode()->getValueType(0); 1641 EVT EltVT = VVT.getVectorElementType(); 1642 unsigned NumSubElem = VVT.getVectorNumElements(); 1643 for (unsigned j = 0; j < NumSubElem; ++j) { 1644 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, 1645 DAG.getIntPtrConstant(j))); 1646 } 1647 } 1648 return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops); 1649 } 1650 1651 /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which 1652 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift 1653 /// amount, or 1654 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift 1655 /// amount. 1656 SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, 1657 SelectionDAG &DAG) const { 1658 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 1659 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 1660 1661 EVT VT = Op.getValueType(); 1662 unsigned VTBits = VT.getSizeInBits(); 1663 SDLoc dl(Op); 1664 SDValue ShOpLo = Op.getOperand(0); 1665 SDValue ShOpHi = Op.getOperand(1); 1666 SDValue ShAmt = Op.getOperand(2); 1667 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 1668 1669 if (VTBits == 32 && STI.getSmVersion() >= 35) { 1670 1671 // For 32bit and sm35, we can use the funnel shift 'shf' instruction. 1672 // {dHi, dLo} = {aHi, aLo} >> Amt 1673 // dHi = aHi >> Amt 1674 // dLo = shf.r.clamp aLo, aHi, Amt 1675 1676 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 1677 SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, 1678 ShAmt); 1679 1680 SDValue Ops[2] = { Lo, Hi }; 1681 return DAG.getMergeValues(Ops, dl); 1682 } 1683 else { 1684 1685 // {dHi, dLo} = {aHi, aLo} >> Amt 1686 // - if (Amt>=size) then 1687 // dLo = aHi >> (Amt-size) 1688 // dHi = aHi >> Amt (this is either all 0 or all 1) 1689 // else 1690 // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) 1691 // dHi = aHi >> Amt 1692 1693 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 1694 DAG.getConstant(VTBits, MVT::i32), ShAmt); 1695 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 1696 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 1697 DAG.getConstant(VTBits, MVT::i32)); 1698 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 1699 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 1700 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 1701 1702 SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, 1703 DAG.getConstant(VTBits, MVT::i32), ISD::SETGE); 1704 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 1705 SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); 1706 1707 SDValue Ops[2] = { Lo, Hi }; 1708 return DAG.getMergeValues(Ops, dl); 1709 } 1710 } 1711 1712 /// LowerShiftLeftParts - Lower SHL_PARTS, which 1713 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift 1714 /// amount, or 1715 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift 1716 /// amount. 1717 SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, 1718 SelectionDAG &DAG) const { 1719 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 1720 assert(Op.getOpcode() == ISD::SHL_PARTS); 1721 1722 EVT VT = Op.getValueType(); 1723 unsigned VTBits = VT.getSizeInBits(); 1724 SDLoc dl(Op); 1725 SDValue ShOpLo = Op.getOperand(0); 1726 SDValue ShOpHi = Op.getOperand(1); 1727 SDValue ShAmt = Op.getOperand(2); 1728 1729 if (VTBits == 32 && STI.getSmVersion() >= 35) { 1730 1731 // For 32bit and sm35, we can use the funnel shift 'shf' instruction. 1732 // {dHi, dLo} = {aHi, aLo} << Amt 1733 // dHi = shf.l.clamp aLo, aHi, Amt 1734 // dLo = aLo << Amt 1735 1736 SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, 1737 ShAmt); 1738 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 1739 1740 SDValue Ops[2] = { Lo, Hi }; 1741 return DAG.getMergeValues(Ops, dl); 1742 } 1743 else { 1744 1745 // {dHi, dLo} = {aHi, aLo} << Amt 1746 // - if (Amt>=size) then 1747 // dLo = aLo << Amt (all 0) 1748 // dLo = aLo << (Amt-size) 1749 // else 1750 // dLo = aLo << Amt 1751 // dHi = (aHi << Amt) | (aLo >> (size-Amt)) 1752 1753 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 1754 DAG.getConstant(VTBits, MVT::i32), ShAmt); 1755 SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 1756 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 1757 DAG.getConstant(VTBits, MVT::i32)); 1758 SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 1759 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 1760 SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 1761 1762 SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, 1763 DAG.getConstant(VTBits, MVT::i32), ISD::SETGE); 1764 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 1765 SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); 1766 1767 SDValue Ops[2] = { Lo, Hi }; 1768 return DAG.getMergeValues(Ops, dl); 1769 } 1770 } 1771 1772 SDValue 1773 NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 1774 switch (Op.getOpcode()) { 1775 case ISD::RETURNADDR: 1776 return SDValue(); 1777 case ISD::FRAMEADDR: 1778 return SDValue(); 1779 case ISD::GlobalAddress: 1780 return LowerGlobalAddress(Op, DAG); 1781 case ISD::INTRINSIC_W_CHAIN: 1782 return Op; 1783 case ISD::BUILD_VECTOR: 1784 case ISD::EXTRACT_SUBVECTOR: 1785 return Op; 1786 case ISD::CONCAT_VECTORS: 1787 return LowerCONCAT_VECTORS(Op, DAG); 1788 case ISD::STORE: 1789 return LowerSTORE(Op, DAG); 1790 case ISD::LOAD: 1791 return LowerLOAD(Op, DAG); 1792 case ISD::SHL_PARTS: 1793 return LowerShiftLeftParts(Op, DAG); 1794 case ISD::SRA_PARTS: 1795 case ISD::SRL_PARTS: 1796 return LowerShiftRightParts(Op, DAG); 1797 case ISD::SELECT: 1798 return LowerSelect(Op, DAG); 1799 default: 1800 llvm_unreachable("Custom lowering not defined for operation"); 1801 } 1802 } 1803 1804 SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const { 1805 SDValue Op0 = Op->getOperand(0); 1806 SDValue Op1 = Op->getOperand(1); 1807 SDValue Op2 = Op->getOperand(2); 1808 SDLoc DL(Op.getNode()); 1809 1810 assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1"); 1811 1812 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); 1813 Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); 1814 SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2); 1815 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select); 1816 1817 return Trunc; 1818 } 1819 1820 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1821 if (Op.getValueType() == MVT::i1) 1822 return LowerLOADi1(Op, DAG); 1823 else 1824 return SDValue(); 1825 } 1826 1827 // v = ld i1* addr 1828 // => 1829 // v1 = ld i8* addr (-> i16) 1830 // v = trunc i16 to i1 1831 SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { 1832 SDNode *Node = Op.getNode(); 1833 LoadSDNode *LD = cast<LoadSDNode>(Node); 1834 SDLoc dl(Node); 1835 assert(LD->getExtensionType() == ISD::NON_EXTLOAD); 1836 assert(Node->getValueType(0) == MVT::i1 && 1837 "Custom lowering for i1 load only"); 1838 SDValue newLD = 1839 DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), 1840 LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), 1841 LD->isInvariant(), LD->getAlignment()); 1842 SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); 1843 // The legalizer (the caller) is expecting two values from the legalized 1844 // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() 1845 // in LegalizeDAG.cpp which also uses MergeValues. 1846 SDValue Ops[] = { result, LD->getChain() }; 1847 return DAG.getMergeValues(Ops, dl); 1848 } 1849 1850 SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1851 EVT ValVT = Op.getOperand(1).getValueType(); 1852 if (ValVT == MVT::i1) 1853 return LowerSTOREi1(Op, DAG); 1854 else if (ValVT.isVector()) 1855 return LowerSTOREVector(Op, DAG); 1856 else 1857 return SDValue(); 1858 } 1859 1860 SDValue 1861 NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { 1862 SDNode *N = Op.getNode(); 1863 SDValue Val = N->getOperand(1); 1864 SDLoc DL(N); 1865 EVT ValVT = Val.getValueType(); 1866 1867 if (ValVT.isVector()) { 1868 // We only handle "native" vector sizes for now, e.g. <4 x double> is not 1869 // legal. We can (and should) split that into 2 stores of <2 x double> here 1870 // but I'm leaving that as a TODO for now. 1871 if (!ValVT.isSimple()) 1872 return SDValue(); 1873 switch (ValVT.getSimpleVT().SimpleTy) { 1874 default: 1875 return SDValue(); 1876 case MVT::v2i8: 1877 case MVT::v2i16: 1878 case MVT::v2i32: 1879 case MVT::v2i64: 1880 case MVT::v2f32: 1881 case MVT::v2f64: 1882 case MVT::v4i8: 1883 case MVT::v4i16: 1884 case MVT::v4i32: 1885 case MVT::v4f32: 1886 // This is a "native" vector type 1887 break; 1888 } 1889 1890 MemSDNode *MemSD = cast<MemSDNode>(N); 1891 const DataLayout *TD = getDataLayout(); 1892 1893 unsigned Align = MemSD->getAlignment(); 1894 unsigned PrefAlign = 1895 TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); 1896 if (Align < PrefAlign) { 1897 // This store is not sufficiently aligned, so bail out and let this vector 1898 // store be scalarized. Note that we may still be able to emit smaller 1899 // vector stores. For example, if we are storing a <4 x float> with an 1900 // alignment of 8, this check will fail but the legalizer will try again 1901 // with 2 x <2 x float>, which will succeed with an alignment of 8. 1902 return SDValue(); 1903 } 1904 1905 unsigned Opcode = 0; 1906 EVT EltVT = ValVT.getVectorElementType(); 1907 unsigned NumElts = ValVT.getVectorNumElements(); 1908 1909 // Since StoreV2 is a target node, we cannot rely on DAG type legalization. 1910 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 1911 // stored type to i16 and propagate the "real" type as the memory type. 1912 bool NeedExt = false; 1913 if (EltVT.getSizeInBits() < 16) 1914 NeedExt = true; 1915 1916 switch (NumElts) { 1917 default: 1918 return SDValue(); 1919 case 2: 1920 Opcode = NVPTXISD::StoreV2; 1921 break; 1922 case 4: { 1923 Opcode = NVPTXISD::StoreV4; 1924 break; 1925 } 1926 } 1927 1928 SmallVector<SDValue, 8> Ops; 1929 1930 // First is the chain 1931 Ops.push_back(N->getOperand(0)); 1932 1933 // Then the split values 1934 for (unsigned i = 0; i < NumElts; ++i) { 1935 SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, 1936 DAG.getIntPtrConstant(i)); 1937 if (NeedExt) 1938 ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); 1939 Ops.push_back(ExtVal); 1940 } 1941 1942 // Then any remaining arguments 1943 Ops.append(N->op_begin() + 2, N->op_end()); 1944 1945 SDValue NewSt = DAG.getMemIntrinsicNode( 1946 Opcode, DL, DAG.getVTList(MVT::Other), Ops, 1947 MemSD->getMemoryVT(), MemSD->getMemOperand()); 1948 1949 //return DCI.CombineTo(N, NewSt, true); 1950 return NewSt; 1951 } 1952 1953 return SDValue(); 1954 } 1955 1956 // st i1 v, addr 1957 // => 1958 // v1 = zxt v to i16 1959 // st.u8 i16, addr 1960 SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { 1961 SDNode *Node = Op.getNode(); 1962 SDLoc dl(Node); 1963 StoreSDNode *ST = cast<StoreSDNode>(Node); 1964 SDValue Tmp1 = ST->getChain(); 1965 SDValue Tmp2 = ST->getBasePtr(); 1966 SDValue Tmp3 = ST->getValue(); 1967 assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); 1968 unsigned Alignment = ST->getAlignment(); 1969 bool isVolatile = ST->isVolatile(); 1970 bool isNonTemporal = ST->isNonTemporal(); 1971 Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); 1972 SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, 1973 ST->getPointerInfo(), MVT::i8, isNonTemporal, 1974 isVolatile, Alignment); 1975 return Result; 1976 } 1977 1978 SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, 1979 int idx, EVT v) const { 1980 std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); 1981 std::stringstream suffix; 1982 suffix << idx; 1983 *name += suffix.str(); 1984 return DAG.getTargetExternalSymbol(name->c_str(), v); 1985 } 1986 1987 SDValue 1988 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { 1989 std::string ParamSym; 1990 raw_string_ostream ParamStr(ParamSym); 1991 1992 ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; 1993 ParamStr.flush(); 1994 1995 std::string *SavedStr = 1996 nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); 1997 return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); 1998 } 1999 2000 SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { 2001 return getExtSymb(DAG, ".HLPPARAM", idx); 2002 } 2003 2004 // Check to see if the kernel argument is image*_t or sampler_t 2005 2006 bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { 2007 static const char *const specialTypes[] = { "struct._image2d_t", 2008 "struct._image3d_t", 2009 "struct._sampler_t" }; 2010 2011 const Type *Ty = arg->getType(); 2012 const PointerType *PTy = dyn_cast<PointerType>(Ty); 2013 2014 if (!PTy) 2015 return false; 2016 2017 if (!context) 2018 return false; 2019 2020 const StructType *STy = dyn_cast<StructType>(PTy->getElementType()); 2021 const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : ""; 2022 2023 for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) 2024 if (TypeName == specialTypes[i]) 2025 return true; 2026 2027 return false; 2028 } 2029 2030 SDValue NVPTXTargetLowering::LowerFormalArguments( 2031 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 2032 const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, 2033 SmallVectorImpl<SDValue> &InVals) const { 2034 MachineFunction &MF = DAG.getMachineFunction(); 2035 const DataLayout *TD = getDataLayout(); 2036 2037 const Function *F = MF.getFunction(); 2038 const AttributeSet &PAL = F->getAttributes(); 2039 const TargetLowering *TLI = STI.getTargetLowering(); 2040 2041 SDValue Root = DAG.getRoot(); 2042 std::vector<SDValue> OutChains; 2043 2044 bool isKernel = llvm::isKernelFunction(*F); 2045 bool isABI = (STI.getSmVersion() >= 20); 2046 assert(isABI && "Non-ABI compilation is not supported"); 2047 if (!isABI) 2048 return Chain; 2049 2050 std::vector<Type *> argTypes; 2051 std::vector<const Argument *> theArgs; 2052 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 2053 I != E; ++I) { 2054 theArgs.push_back(I); 2055 argTypes.push_back(I->getType()); 2056 } 2057 // argTypes.size() (or theArgs.size()) and Ins.size() need not match. 2058 // Ins.size() will be larger 2059 // * if there is an aggregate argument with multiple fields (each field 2060 // showing up separately in Ins) 2061 // * if there is a vector argument with more than typical vector-length 2062 // elements (generally if more than 4) where each vector element is 2063 // individually present in Ins. 2064 // So a different index should be used for indexing into Ins. 2065 // See similar issue in LowerCall. 2066 unsigned InsIdx = 0; 2067 2068 int idx = 0; 2069 for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { 2070 Type *Ty = argTypes[i]; 2071 2072 // If the kernel argument is image*_t or sampler_t, convert it to 2073 // a i32 constant holding the parameter position. This can later 2074 // matched in the AsmPrinter to output the correct mangled name. 2075 if (isImageOrSamplerVal( 2076 theArgs[i], 2077 (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() 2078 : nullptr))) { 2079 assert(isKernel && "Only kernels can have image/sampler params"); 2080 InVals.push_back(DAG.getConstant(i + 1, MVT::i32)); 2081 continue; 2082 } 2083 2084 if (theArgs[i]->use_empty()) { 2085 // argument is dead 2086 if (Ty->isAggregateType()) { 2087 SmallVector<EVT, 16> vtparts; 2088 2089 ComputePTXValueVTs(*this, Ty, vtparts); 2090 assert(vtparts.size() > 0 && "empty aggregate type not expected"); 2091 for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 2092 ++parti) { 2093 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 2094 ++InsIdx; 2095 } 2096 if (vtparts.size() > 0) 2097 --InsIdx; 2098 continue; 2099 } 2100 if (Ty->isVectorTy()) { 2101 EVT ObjectVT = getValueType(Ty); 2102 unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); 2103 for (unsigned parti = 0; parti < NumRegs; ++parti) { 2104 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 2105 ++InsIdx; 2106 } 2107 if (NumRegs > 0) 2108 --InsIdx; 2109 continue; 2110 } 2111 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 2112 continue; 2113 } 2114 2115 // In the following cases, assign a node order of "idx+1" 2116 // to newly created nodes. The SDNodes for params have to 2117 // appear in the same order as their order of appearance 2118 // in the original function. "idx+1" holds that order. 2119 if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) { 2120 if (Ty->isAggregateType()) { 2121 SmallVector<EVT, 16> vtparts; 2122 SmallVector<uint64_t, 16> offsets; 2123 2124 // NOTE: Here, we lose the ability to issue vector loads for vectors 2125 // that are a part of a struct. This should be investigated in the 2126 // future. 2127 ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0); 2128 assert(vtparts.size() > 0 && "empty aggregate type not expected"); 2129 bool aggregateIsPacked = false; 2130 if (StructType *STy = llvm::dyn_cast<StructType>(Ty)) 2131 aggregateIsPacked = STy->isPacked(); 2132 2133 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 2134 for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 2135 ++parti) { 2136 EVT partVT = vtparts[parti]; 2137 Value *srcValue = Constant::getNullValue( 2138 PointerType::get(partVT.getTypeForEVT(F->getContext()), 2139 llvm::ADDRESS_SPACE_PARAM)); 2140 SDValue srcAddr = 2141 DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 2142 DAG.getConstant(offsets[parti], getPointerTy())); 2143 unsigned partAlign = 2144 aggregateIsPacked ? 1 2145 : TD->getABITypeAlignment( 2146 partVT.getTypeForEVT(F->getContext())); 2147 SDValue p; 2148 if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) { 2149 ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 2150 ISD::SEXTLOAD : ISD::ZEXTLOAD; 2151 p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, 2152 MachinePointerInfo(srcValue), partVT, false, 2153 false, false, partAlign); 2154 } else { 2155 p = DAG.getLoad(partVT, dl, Root, srcAddr, 2156 MachinePointerInfo(srcValue), false, false, false, 2157 partAlign); 2158 } 2159 if (p.getNode()) 2160 p.getNode()->setIROrder(idx + 1); 2161 InVals.push_back(p); 2162 ++InsIdx; 2163 } 2164 if (vtparts.size() > 0) 2165 --InsIdx; 2166 continue; 2167 } 2168 if (Ty->isVectorTy()) { 2169 EVT ObjectVT = getValueType(Ty); 2170 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 2171 unsigned NumElts = ObjectVT.getVectorNumElements(); 2172 assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && 2173 "Vector was not scalarized"); 2174 EVT EltVT = ObjectVT.getVectorElementType(); 2175 2176 // V1 load 2177 // f32 = load ... 2178 if (NumElts == 1) { 2179 // We only have one element, so just directly load it 2180 Value *SrcValue = Constant::getNullValue(PointerType::get( 2181 EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 2182 SDValue P = DAG.getLoad( 2183 EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, 2184 false, true, 2185 TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); 2186 if (P.getNode()) 2187 P.getNode()->setIROrder(idx + 1); 2188 2189 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) 2190 P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); 2191 InVals.push_back(P); 2192 ++InsIdx; 2193 } else if (NumElts == 2) { 2194 // V2 load 2195 // f32,f32 = load ... 2196 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); 2197 Value *SrcValue = Constant::getNullValue(PointerType::get( 2198 VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 2199 SDValue P = DAG.getLoad( 2200 VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, 2201 false, true, 2202 TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); 2203 if (P.getNode()) 2204 P.getNode()->setIROrder(idx + 1); 2205 2206 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 2207 DAG.getIntPtrConstant(0)); 2208 SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 2209 DAG.getIntPtrConstant(1)); 2210 2211 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) { 2212 Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0); 2213 Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1); 2214 } 2215 2216 InVals.push_back(Elt0); 2217 InVals.push_back(Elt1); 2218 InsIdx += 2; 2219 } else { 2220 // V4 loads 2221 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and 2222 // the 2223 // vector will be expanded to a power of 2 elements, so we know we can 2224 // always round up to the next multiple of 4 when creating the vector 2225 // loads. 2226 // e.g. 4 elem => 1 ld.v4 2227 // 6 elem => 2 ld.v4 2228 // 8 elem => 2 ld.v4 2229 // 11 elem => 3 ld.v4 2230 unsigned VecSize = 4; 2231 if (EltVT.getSizeInBits() == 64) { 2232 VecSize = 2; 2233 } 2234 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); 2235 unsigned Ofst = 0; 2236 for (unsigned i = 0; i < NumElts; i += VecSize) { 2237 Value *SrcValue = Constant::getNullValue( 2238 PointerType::get(VecVT.getTypeForEVT(F->getContext()), 2239 llvm::ADDRESS_SPACE_PARAM)); 2240 SDValue SrcAddr = 2241 DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, 2242 DAG.getConstant(Ofst, getPointerTy())); 2243 SDValue P = DAG.getLoad( 2244 VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, 2245 false, true, 2246 TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); 2247 if (P.getNode()) 2248 P.getNode()->setIROrder(idx + 1); 2249 2250 for (unsigned j = 0; j < VecSize; ++j) { 2251 if (i + j >= NumElts) 2252 break; 2253 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 2254 DAG.getIntPtrConstant(j)); 2255 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) 2256 Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); 2257 InVals.push_back(Elt); 2258 } 2259 Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 2260 } 2261 InsIdx += NumElts; 2262 } 2263 2264 if (NumElts > 0) 2265 --InsIdx; 2266 continue; 2267 } 2268 // A plain scalar. 2269 EVT ObjectVT = getValueType(Ty); 2270 // If ABI, load from the param symbol 2271 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 2272 Value *srcValue = Constant::getNullValue(PointerType::get( 2273 ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 2274 SDValue p; 2275 if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) { 2276 ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 2277 ISD::SEXTLOAD : ISD::ZEXTLOAD; 2278 p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg, 2279 MachinePointerInfo(srcValue), ObjectVT, false, false, 2280 false, 2281 TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 2282 } else { 2283 p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, 2284 MachinePointerInfo(srcValue), false, false, false, 2285 TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 2286 } 2287 if (p.getNode()) 2288 p.getNode()->setIROrder(idx + 1); 2289 InVals.push_back(p); 2290 continue; 2291 } 2292 2293 // Param has ByVal attribute 2294 // Return MoveParam(param symbol). 2295 // Ideally, the param symbol can be returned directly, 2296 // but when SDNode builder decides to use it in a CopyToReg(), 2297 // machine instruction fails because TargetExternalSymbol 2298 // (not lowered) is target dependent, and CopyToReg assumes 2299 // the source is lowered. 2300 EVT ObjectVT = getValueType(Ty); 2301 assert(ObjectVT == Ins[InsIdx].VT && 2302 "Ins type did not match function type"); 2303 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); 2304 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); 2305 if (p.getNode()) 2306 p.getNode()->setIROrder(idx + 1); 2307 if (isKernel) 2308 InVals.push_back(p); 2309 else { 2310 SDValue p2 = DAG.getNode( 2311 ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, 2312 DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); 2313 InVals.push_back(p2); 2314 } 2315 } 2316 2317 // Clang will check explicit VarArg and issue error if any. However, Clang 2318 // will let code with 2319 // implicit var arg like f() pass. See bug 617733. 2320 // We treat this case as if the arg list is empty. 2321 // if (F.isVarArg()) { 2322 // assert(0 && "VarArg not supported yet!"); 2323 //} 2324 2325 if (!OutChains.empty()) 2326 DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); 2327 2328 return Chain; 2329 } 2330 2331 2332 SDValue 2333 NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 2334 bool isVarArg, 2335 const SmallVectorImpl<ISD::OutputArg> &Outs, 2336 const SmallVectorImpl<SDValue> &OutVals, 2337 SDLoc dl, SelectionDAG &DAG) const { 2338 MachineFunction &MF = DAG.getMachineFunction(); 2339 const Function *F = MF.getFunction(); 2340 Type *RetTy = F->getReturnType(); 2341 const DataLayout *TD = getDataLayout(); 2342 2343 bool isABI = (STI.getSmVersion() >= 20); 2344 assert(isABI && "Non-ABI compilation is not supported"); 2345 if (!isABI) 2346 return Chain; 2347 2348 if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) { 2349 // If we have a vector type, the OutVals array will be the scalarized 2350 // components and we have combine them into 1 or more vector stores. 2351 unsigned NumElts = VTy->getNumElements(); 2352 assert(NumElts == Outs.size() && "Bad scalarization of return value"); 2353 2354 // const_cast can be removed in later LLVM versions 2355 EVT EltVT = getValueType(RetTy).getVectorElementType(); 2356 bool NeedExtend = false; 2357 if (EltVT.getSizeInBits() < 16) 2358 NeedExtend = true; 2359 2360 // V1 store 2361 if (NumElts == 1) { 2362 SDValue StoreVal = OutVals[0]; 2363 // We only have one element, so just directly store it 2364 if (NeedExtend) 2365 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 2366 SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal }; 2367 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 2368 DAG.getVTList(MVT::Other), Ops, 2369 EltVT, MachinePointerInfo()); 2370 2371 } else if (NumElts == 2) { 2372 // V2 store 2373 SDValue StoreVal0 = OutVals[0]; 2374 SDValue StoreVal1 = OutVals[1]; 2375 2376 if (NeedExtend) { 2377 StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0); 2378 StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1); 2379 } 2380 2381 SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0, 2382 StoreVal1 }; 2383 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, 2384 DAG.getVTList(MVT::Other), Ops, 2385 EltVT, MachinePointerInfo()); 2386 } else { 2387 // V4 stores 2388 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the 2389 // vector will be expanded to a power of 2 elements, so we know we can 2390 // always round up to the next multiple of 4 when creating the vector 2391 // stores. 2392 // e.g. 4 elem => 1 st.v4 2393 // 6 elem => 2 st.v4 2394 // 8 elem => 2 st.v4 2395 // 11 elem => 3 st.v4 2396 2397 unsigned VecSize = 4; 2398 if (OutVals[0].getValueType().getSizeInBits() == 64) 2399 VecSize = 2; 2400 2401 unsigned Offset = 0; 2402 2403 EVT VecVT = 2404 EVT::getVectorVT(F->getContext(), EltVT, VecSize); 2405 unsigned PerStoreOffset = 2406 TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 2407 2408 for (unsigned i = 0; i < NumElts; i += VecSize) { 2409 // Get values 2410 SDValue StoreVal; 2411 SmallVector<SDValue, 8> Ops; 2412 Ops.push_back(Chain); 2413 Ops.push_back(DAG.getConstant(Offset, MVT::i32)); 2414 unsigned Opc = NVPTXISD::StoreRetvalV2; 2415 EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType(); 2416 2417 StoreVal = OutVals[i]; 2418 if (NeedExtend) 2419 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2420 Ops.push_back(StoreVal); 2421 2422 if (i + 1 < NumElts) { 2423 StoreVal = OutVals[i + 1]; 2424 if (NeedExtend) 2425 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2426 } else { 2427 StoreVal = DAG.getUNDEF(ExtendedVT); 2428 } 2429 Ops.push_back(StoreVal); 2430 2431 if (VecSize == 4) { 2432 Opc = NVPTXISD::StoreRetvalV4; 2433 if (i + 2 < NumElts) { 2434 StoreVal = OutVals[i + 2]; 2435 if (NeedExtend) 2436 StoreVal = 2437 DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2438 } else { 2439 StoreVal = DAG.getUNDEF(ExtendedVT); 2440 } 2441 Ops.push_back(StoreVal); 2442 2443 if (i + 3 < NumElts) { 2444 StoreVal = OutVals[i + 3]; 2445 if (NeedExtend) 2446 StoreVal = 2447 DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2448 } else { 2449 StoreVal = DAG.getUNDEF(ExtendedVT); 2450 } 2451 Ops.push_back(StoreVal); 2452 } 2453 2454 // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); 2455 Chain = 2456 DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops, 2457 EltVT, MachinePointerInfo()); 2458 Offset += PerStoreOffset; 2459 } 2460 } 2461 } else { 2462 SmallVector<EVT, 16> ValVTs; 2463 SmallVector<uint64_t, 16> Offsets; 2464 ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0); 2465 assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition"); 2466 2467 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 2468 SDValue theVal = OutVals[i]; 2469 EVT TheValType = theVal.getValueType(); 2470 unsigned numElems = 1; 2471 if (TheValType.isVector()) 2472 numElems = TheValType.getVectorNumElements(); 2473 for (unsigned j = 0, je = numElems; j != je; ++j) { 2474 SDValue TmpVal = theVal; 2475 if (TheValType.isVector()) 2476 TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 2477 TheValType.getVectorElementType(), TmpVal, 2478 DAG.getIntPtrConstant(j)); 2479 EVT TheStoreType = ValVTs[i]; 2480 if (RetTy->isIntegerTy() && 2481 TD->getTypeAllocSizeInBits(RetTy) < 32) { 2482 // The following zero-extension is for integer types only, and 2483 // specifically not for aggregates. 2484 TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); 2485 TheStoreType = MVT::i32; 2486 } 2487 else if (TmpVal.getValueType().getSizeInBits() < 16) 2488 TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal); 2489 2490 SDValue Ops[] = { 2491 Chain, 2492 DAG.getConstant(Offsets[i], MVT::i32), 2493 TmpVal }; 2494 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 2495 DAG.getVTList(MVT::Other), Ops, 2496 TheStoreType, 2497 MachinePointerInfo()); 2498 } 2499 } 2500 } 2501 2502 return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); 2503 } 2504 2505 2506 void NVPTXTargetLowering::LowerAsmOperandForConstraint( 2507 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 2508 SelectionDAG &DAG) const { 2509 if (Constraint.length() > 1) 2510 return; 2511 else 2512 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 2513 } 2514 2515 // NVPTX suuport vector of legal types of any length in Intrinsics because the 2516 // NVPTX specific type legalizer 2517 // will legalize them to the PTX supported length. 2518 bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { 2519 if (isTypeLegal(VT)) 2520 return true; 2521 if (VT.isVector()) { 2522 MVT eVT = VT.getVectorElementType(); 2523 if (isTypeLegal(eVT)) 2524 return true; 2525 } 2526 return false; 2527 } 2528 2529 static unsigned getOpcForTextureInstr(unsigned Intrinsic) { 2530 switch (Intrinsic) { 2531 default: 2532 return 0; 2533 2534 case Intrinsic::nvvm_tex_1d_v4f32_s32: 2535 return NVPTXISD::Tex1DFloatS32; 2536 case Intrinsic::nvvm_tex_1d_v4f32_f32: 2537 return NVPTXISD::Tex1DFloatFloat; 2538 case Intrinsic::nvvm_tex_1d_level_v4f32_f32: 2539 return NVPTXISD::Tex1DFloatFloatLevel; 2540 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: 2541 return NVPTXISD::Tex1DFloatFloatGrad; 2542 case Intrinsic::nvvm_tex_1d_v4s32_s32: 2543 return NVPTXISD::Tex1DS32S32; 2544 case Intrinsic::nvvm_tex_1d_v4s32_f32: 2545 return NVPTXISD::Tex1DS32Float; 2546 case Intrinsic::nvvm_tex_1d_level_v4s32_f32: 2547 return NVPTXISD::Tex1DS32FloatLevel; 2548 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: 2549 return NVPTXISD::Tex1DS32FloatGrad; 2550 case Intrinsic::nvvm_tex_1d_v4u32_s32: 2551 return NVPTXISD::Tex1DU32S32; 2552 case Intrinsic::nvvm_tex_1d_v4u32_f32: 2553 return NVPTXISD::Tex1DU32Float; 2554 case Intrinsic::nvvm_tex_1d_level_v4u32_f32: 2555 return NVPTXISD::Tex1DU32FloatLevel; 2556 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: 2557 return NVPTXISD::Tex1DU32FloatGrad; 2558 2559 case Intrinsic::nvvm_tex_1d_array_v4f32_s32: 2560 return NVPTXISD::Tex1DArrayFloatS32; 2561 case Intrinsic::nvvm_tex_1d_array_v4f32_f32: 2562 return NVPTXISD::Tex1DArrayFloatFloat; 2563 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: 2564 return NVPTXISD::Tex1DArrayFloatFloatLevel; 2565 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: 2566 return NVPTXISD::Tex1DArrayFloatFloatGrad; 2567 case Intrinsic::nvvm_tex_1d_array_v4s32_s32: 2568 return NVPTXISD::Tex1DArrayS32S32; 2569 case Intrinsic::nvvm_tex_1d_array_v4s32_f32: 2570 return NVPTXISD::Tex1DArrayS32Float; 2571 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: 2572 return NVPTXISD::Tex1DArrayS32FloatLevel; 2573 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: 2574 return NVPTXISD::Tex1DArrayS32FloatGrad; 2575 case Intrinsic::nvvm_tex_1d_array_v4u32_s32: 2576 return NVPTXISD::Tex1DArrayU32S32; 2577 case Intrinsic::nvvm_tex_1d_array_v4u32_f32: 2578 return NVPTXISD::Tex1DArrayU32Float; 2579 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: 2580 return NVPTXISD::Tex1DArrayU32FloatLevel; 2581 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: 2582 return NVPTXISD::Tex1DArrayU32FloatGrad; 2583 2584 case Intrinsic::nvvm_tex_2d_v4f32_s32: 2585 return NVPTXISD::Tex2DFloatS32; 2586 case Intrinsic::nvvm_tex_2d_v4f32_f32: 2587 return NVPTXISD::Tex2DFloatFloat; 2588 case Intrinsic::nvvm_tex_2d_level_v4f32_f32: 2589 return NVPTXISD::Tex2DFloatFloatLevel; 2590 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: 2591 return NVPTXISD::Tex2DFloatFloatGrad; 2592 case Intrinsic::nvvm_tex_2d_v4s32_s32: 2593 return NVPTXISD::Tex2DS32S32; 2594 case Intrinsic::nvvm_tex_2d_v4s32_f32: 2595 return NVPTXISD::Tex2DS32Float; 2596 case Intrinsic::nvvm_tex_2d_level_v4s32_f32: 2597 return NVPTXISD::Tex2DS32FloatLevel; 2598 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: 2599 return NVPTXISD::Tex2DS32FloatGrad; 2600 case Intrinsic::nvvm_tex_2d_v4u32_s32: 2601 return NVPTXISD::Tex2DU32S32; 2602 case Intrinsic::nvvm_tex_2d_v4u32_f32: 2603 return NVPTXISD::Tex2DU32Float; 2604 case Intrinsic::nvvm_tex_2d_level_v4u32_f32: 2605 return NVPTXISD::Tex2DU32FloatLevel; 2606 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: 2607 return NVPTXISD::Tex2DU32FloatGrad; 2608 2609 case Intrinsic::nvvm_tex_2d_array_v4f32_s32: 2610 return NVPTXISD::Tex2DArrayFloatS32; 2611 case Intrinsic::nvvm_tex_2d_array_v4f32_f32: 2612 return NVPTXISD::Tex2DArrayFloatFloat; 2613 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: 2614 return NVPTXISD::Tex2DArrayFloatFloatLevel; 2615 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: 2616 return NVPTXISD::Tex2DArrayFloatFloatGrad; 2617 case Intrinsic::nvvm_tex_2d_array_v4s32_s32: 2618 return NVPTXISD::Tex2DArrayS32S32; 2619 case Intrinsic::nvvm_tex_2d_array_v4s32_f32: 2620 return NVPTXISD::Tex2DArrayS32Float; 2621 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: 2622 return NVPTXISD::Tex2DArrayS32FloatLevel; 2623 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: 2624 return NVPTXISD::Tex2DArrayS32FloatGrad; 2625 case Intrinsic::nvvm_tex_2d_array_v4u32_s32: 2626 return NVPTXISD::Tex2DArrayU32S32; 2627 case Intrinsic::nvvm_tex_2d_array_v4u32_f32: 2628 return NVPTXISD::Tex2DArrayU32Float; 2629 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: 2630 return NVPTXISD::Tex2DArrayU32FloatLevel; 2631 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: 2632 return NVPTXISD::Tex2DArrayU32FloatGrad; 2633 2634 case Intrinsic::nvvm_tex_3d_v4f32_s32: 2635 return NVPTXISD::Tex3DFloatS32; 2636 case Intrinsic::nvvm_tex_3d_v4f32_f32: 2637 return NVPTXISD::Tex3DFloatFloat; 2638 case Intrinsic::nvvm_tex_3d_level_v4f32_f32: 2639 return NVPTXISD::Tex3DFloatFloatLevel; 2640 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: 2641 return NVPTXISD::Tex3DFloatFloatGrad; 2642 case Intrinsic::nvvm_tex_3d_v4s32_s32: 2643 return NVPTXISD::Tex3DS32S32; 2644 case Intrinsic::nvvm_tex_3d_v4s32_f32: 2645 return NVPTXISD::Tex3DS32Float; 2646 case Intrinsic::nvvm_tex_3d_level_v4s32_f32: 2647 return NVPTXISD::Tex3DS32FloatLevel; 2648 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: 2649 return NVPTXISD::Tex3DS32FloatGrad; 2650 case Intrinsic::nvvm_tex_3d_v4u32_s32: 2651 return NVPTXISD::Tex3DU32S32; 2652 case Intrinsic::nvvm_tex_3d_v4u32_f32: 2653 return NVPTXISD::Tex3DU32Float; 2654 case Intrinsic::nvvm_tex_3d_level_v4u32_f32: 2655 return NVPTXISD::Tex3DU32FloatLevel; 2656 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: 2657 return NVPTXISD::Tex3DU32FloatGrad; 2658 2659 case Intrinsic::nvvm_tex_cube_v4f32_f32: 2660 return NVPTXISD::TexCubeFloatFloat; 2661 case Intrinsic::nvvm_tex_cube_level_v4f32_f32: 2662 return NVPTXISD::TexCubeFloatFloatLevel; 2663 case Intrinsic::nvvm_tex_cube_v4s32_f32: 2664 return NVPTXISD::TexCubeS32Float; 2665 case Intrinsic::nvvm_tex_cube_level_v4s32_f32: 2666 return NVPTXISD::TexCubeS32FloatLevel; 2667 case Intrinsic::nvvm_tex_cube_v4u32_f32: 2668 return NVPTXISD::TexCubeU32Float; 2669 case Intrinsic::nvvm_tex_cube_level_v4u32_f32: 2670 return NVPTXISD::TexCubeU32FloatLevel; 2671 2672 case Intrinsic::nvvm_tex_cube_array_v4f32_f32: 2673 return NVPTXISD::TexCubeArrayFloatFloat; 2674 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: 2675 return NVPTXISD::TexCubeArrayFloatFloatLevel; 2676 case Intrinsic::nvvm_tex_cube_array_v4s32_f32: 2677 return NVPTXISD::TexCubeArrayS32Float; 2678 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: 2679 return NVPTXISD::TexCubeArrayS32FloatLevel; 2680 case Intrinsic::nvvm_tex_cube_array_v4u32_f32: 2681 return NVPTXISD::TexCubeArrayU32Float; 2682 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: 2683 return NVPTXISD::TexCubeArrayU32FloatLevel; 2684 2685 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: 2686 return NVPTXISD::Tld4R2DFloatFloat; 2687 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: 2688 return NVPTXISD::Tld4G2DFloatFloat; 2689 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: 2690 return NVPTXISD::Tld4B2DFloatFloat; 2691 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: 2692 return NVPTXISD::Tld4A2DFloatFloat; 2693 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: 2694 return NVPTXISD::Tld4R2DS64Float; 2695 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: 2696 return NVPTXISD::Tld4G2DS64Float; 2697 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: 2698 return NVPTXISD::Tld4B2DS64Float; 2699 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: 2700 return NVPTXISD::Tld4A2DS64Float; 2701 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: 2702 return NVPTXISD::Tld4R2DU64Float; 2703 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: 2704 return NVPTXISD::Tld4G2DU64Float; 2705 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: 2706 return NVPTXISD::Tld4B2DU64Float; 2707 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: 2708 return NVPTXISD::Tld4A2DU64Float; 2709 2710 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: 2711 return NVPTXISD::TexUnified1DFloatS32; 2712 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: 2713 return NVPTXISD::TexUnified1DFloatFloat; 2714 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: 2715 return NVPTXISD::TexUnified1DFloatFloatLevel; 2716 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: 2717 return NVPTXISD::TexUnified1DFloatFloatGrad; 2718 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: 2719 return NVPTXISD::TexUnified1DS32S32; 2720 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: 2721 return NVPTXISD::TexUnified1DS32Float; 2722 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: 2723 return NVPTXISD::TexUnified1DS32FloatLevel; 2724 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: 2725 return NVPTXISD::TexUnified1DS32FloatGrad; 2726 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: 2727 return NVPTXISD::TexUnified1DU32S32; 2728 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: 2729 return NVPTXISD::TexUnified1DU32Float; 2730 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: 2731 return NVPTXISD::TexUnified1DU32FloatLevel; 2732 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: 2733 return NVPTXISD::TexUnified1DU32FloatGrad; 2734 2735 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: 2736 return NVPTXISD::TexUnified1DArrayFloatS32; 2737 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: 2738 return NVPTXISD::TexUnified1DArrayFloatFloat; 2739 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: 2740 return NVPTXISD::TexUnified1DArrayFloatFloatLevel; 2741 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: 2742 return NVPTXISD::TexUnified1DArrayFloatFloatGrad; 2743 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: 2744 return NVPTXISD::TexUnified1DArrayS32S32; 2745 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: 2746 return NVPTXISD::TexUnified1DArrayS32Float; 2747 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: 2748 return NVPTXISD::TexUnified1DArrayS32FloatLevel; 2749 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: 2750 return NVPTXISD::TexUnified1DArrayS32FloatGrad; 2751 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: 2752 return NVPTXISD::TexUnified1DArrayU32S32; 2753 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: 2754 return NVPTXISD::TexUnified1DArrayU32Float; 2755 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: 2756 return NVPTXISD::TexUnified1DArrayU32FloatLevel; 2757 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: 2758 return NVPTXISD::TexUnified1DArrayU32FloatGrad; 2759 2760 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: 2761 return NVPTXISD::TexUnified2DFloatS32; 2762 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: 2763 return NVPTXISD::TexUnified2DFloatFloat; 2764 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: 2765 return NVPTXISD::TexUnified2DFloatFloatLevel; 2766 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: 2767 return NVPTXISD::TexUnified2DFloatFloatGrad; 2768 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: 2769 return NVPTXISD::TexUnified2DS32S32; 2770 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: 2771 return NVPTXISD::TexUnified2DS32Float; 2772 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: 2773 return NVPTXISD::TexUnified2DS32FloatLevel; 2774 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: 2775 return NVPTXISD::TexUnified2DS32FloatGrad; 2776 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: 2777 return NVPTXISD::TexUnified2DU32S32; 2778 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: 2779 return NVPTXISD::TexUnified2DU32Float; 2780 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: 2781 return NVPTXISD::TexUnified2DU32FloatLevel; 2782 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: 2783 return NVPTXISD::TexUnified2DU32FloatGrad; 2784 2785 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: 2786 return NVPTXISD::TexUnified2DArrayFloatS32; 2787 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: 2788 return NVPTXISD::TexUnified2DArrayFloatFloat; 2789 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: 2790 return NVPTXISD::TexUnified2DArrayFloatFloatLevel; 2791 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: 2792 return NVPTXISD::TexUnified2DArrayFloatFloatGrad; 2793 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: 2794 return NVPTXISD::TexUnified2DArrayS32S32; 2795 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: 2796 return NVPTXISD::TexUnified2DArrayS32Float; 2797 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: 2798 return NVPTXISD::TexUnified2DArrayS32FloatLevel; 2799 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: 2800 return NVPTXISD::TexUnified2DArrayS32FloatGrad; 2801 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: 2802 return NVPTXISD::TexUnified2DArrayU32S32; 2803 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: 2804 return NVPTXISD::TexUnified2DArrayU32Float; 2805 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: 2806 return NVPTXISD::TexUnified2DArrayU32FloatLevel; 2807 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: 2808 return NVPTXISD::TexUnified2DArrayU32FloatGrad; 2809 2810 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: 2811 return NVPTXISD::TexUnified3DFloatS32; 2812 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: 2813 return NVPTXISD::TexUnified3DFloatFloat; 2814 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: 2815 return NVPTXISD::TexUnified3DFloatFloatLevel; 2816 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: 2817 return NVPTXISD::TexUnified3DFloatFloatGrad; 2818 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: 2819 return NVPTXISD::TexUnified3DS32S32; 2820 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: 2821 return NVPTXISD::TexUnified3DS32Float; 2822 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: 2823 return NVPTXISD::TexUnified3DS32FloatLevel; 2824 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: 2825 return NVPTXISD::TexUnified3DS32FloatGrad; 2826 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: 2827 return NVPTXISD::TexUnified3DU32S32; 2828 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: 2829 return NVPTXISD::TexUnified3DU32Float; 2830 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: 2831 return NVPTXISD::TexUnified3DU32FloatLevel; 2832 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: 2833 return NVPTXISD::TexUnified3DU32FloatGrad; 2834 2835 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: 2836 return NVPTXISD::TexUnifiedCubeFloatFloat; 2837 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: 2838 return NVPTXISD::TexUnifiedCubeFloatFloatLevel; 2839 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: 2840 return NVPTXISD::TexUnifiedCubeS32Float; 2841 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: 2842 return NVPTXISD::TexUnifiedCubeS32FloatLevel; 2843 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: 2844 return NVPTXISD::TexUnifiedCubeU32Float; 2845 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: 2846 return NVPTXISD::TexUnifiedCubeU32FloatLevel; 2847 2848 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: 2849 return NVPTXISD::TexUnifiedCubeArrayFloatFloat; 2850 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: 2851 return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; 2852 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: 2853 return NVPTXISD::TexUnifiedCubeArrayS32Float; 2854 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: 2855 return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; 2856 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: 2857 return NVPTXISD::TexUnifiedCubeArrayU32Float; 2858 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: 2859 return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; 2860 2861 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: 2862 return NVPTXISD::Tld4UnifiedR2DFloatFloat; 2863 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: 2864 return NVPTXISD::Tld4UnifiedG2DFloatFloat; 2865 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: 2866 return NVPTXISD::Tld4UnifiedB2DFloatFloat; 2867 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: 2868 return NVPTXISD::Tld4UnifiedA2DFloatFloat; 2869 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: 2870 return NVPTXISD::Tld4UnifiedR2DS64Float; 2871 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: 2872 return NVPTXISD::Tld4UnifiedG2DS64Float; 2873 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: 2874 return NVPTXISD::Tld4UnifiedB2DS64Float; 2875 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: 2876 return NVPTXISD::Tld4UnifiedA2DS64Float; 2877 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: 2878 return NVPTXISD::Tld4UnifiedR2DU64Float; 2879 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: 2880 return NVPTXISD::Tld4UnifiedG2DU64Float; 2881 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: 2882 return NVPTXISD::Tld4UnifiedB2DU64Float; 2883 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: 2884 return NVPTXISD::Tld4UnifiedA2DU64Float; 2885 } 2886 } 2887 2888 static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { 2889 switch (Intrinsic) { 2890 default: 2891 return 0; 2892 case Intrinsic::nvvm_suld_1d_i8_clamp: 2893 return NVPTXISD::Suld1DI8Clamp; 2894 case Intrinsic::nvvm_suld_1d_i16_clamp: 2895 return NVPTXISD::Suld1DI16Clamp; 2896 case Intrinsic::nvvm_suld_1d_i32_clamp: 2897 return NVPTXISD::Suld1DI32Clamp; 2898 case Intrinsic::nvvm_suld_1d_i64_clamp: 2899 return NVPTXISD::Suld1DI64Clamp; 2900 case Intrinsic::nvvm_suld_1d_v2i8_clamp: 2901 return NVPTXISD::Suld1DV2I8Clamp; 2902 case Intrinsic::nvvm_suld_1d_v2i16_clamp: 2903 return NVPTXISD::Suld1DV2I16Clamp; 2904 case Intrinsic::nvvm_suld_1d_v2i32_clamp: 2905 return NVPTXISD::Suld1DV2I32Clamp; 2906 case Intrinsic::nvvm_suld_1d_v2i64_clamp: 2907 return NVPTXISD::Suld1DV2I64Clamp; 2908 case Intrinsic::nvvm_suld_1d_v4i8_clamp: 2909 return NVPTXISD::Suld1DV4I8Clamp; 2910 case Intrinsic::nvvm_suld_1d_v4i16_clamp: 2911 return NVPTXISD::Suld1DV4I16Clamp; 2912 case Intrinsic::nvvm_suld_1d_v4i32_clamp: 2913 return NVPTXISD::Suld1DV4I32Clamp; 2914 case Intrinsic::nvvm_suld_1d_array_i8_clamp: 2915 return NVPTXISD::Suld1DArrayI8Clamp; 2916 case Intrinsic::nvvm_suld_1d_array_i16_clamp: 2917 return NVPTXISD::Suld1DArrayI16Clamp; 2918 case Intrinsic::nvvm_suld_1d_array_i32_clamp: 2919 return NVPTXISD::Suld1DArrayI32Clamp; 2920 case Intrinsic::nvvm_suld_1d_array_i64_clamp: 2921 return NVPTXISD::Suld1DArrayI64Clamp; 2922 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: 2923 return NVPTXISD::Suld1DArrayV2I8Clamp; 2924 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: 2925 return NVPTXISD::Suld1DArrayV2I16Clamp; 2926 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: 2927 return NVPTXISD::Suld1DArrayV2I32Clamp; 2928 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: 2929 return NVPTXISD::Suld1DArrayV2I64Clamp; 2930 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: 2931 return NVPTXISD::Suld1DArrayV4I8Clamp; 2932 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: 2933 return NVPTXISD::Suld1DArrayV4I16Clamp; 2934 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: 2935 return NVPTXISD::Suld1DArrayV4I32Clamp; 2936 case Intrinsic::nvvm_suld_2d_i8_clamp: 2937 return NVPTXISD::Suld2DI8Clamp; 2938 case Intrinsic::nvvm_suld_2d_i16_clamp: 2939 return NVPTXISD::Suld2DI16Clamp; 2940 case Intrinsic::nvvm_suld_2d_i32_clamp: 2941 return NVPTXISD::Suld2DI32Clamp; 2942 case Intrinsic::nvvm_suld_2d_i64_clamp: 2943 return NVPTXISD::Suld2DI64Clamp; 2944 case Intrinsic::nvvm_suld_2d_v2i8_clamp: 2945 return NVPTXISD::Suld2DV2I8Clamp; 2946 case Intrinsic::nvvm_suld_2d_v2i16_clamp: 2947 return NVPTXISD::Suld2DV2I16Clamp; 2948 case Intrinsic::nvvm_suld_2d_v2i32_clamp: 2949 return NVPTXISD::Suld2DV2I32Clamp; 2950 case Intrinsic::nvvm_suld_2d_v2i64_clamp: 2951 return NVPTXISD::Suld2DV2I64Clamp; 2952 case Intrinsic::nvvm_suld_2d_v4i8_clamp: 2953 return NVPTXISD::Suld2DV4I8Clamp; 2954 case Intrinsic::nvvm_suld_2d_v4i16_clamp: 2955 return NVPTXISD::Suld2DV4I16Clamp; 2956 case Intrinsic::nvvm_suld_2d_v4i32_clamp: 2957 return NVPTXISD::Suld2DV4I32Clamp; 2958 case Intrinsic::nvvm_suld_2d_array_i8_clamp: 2959 return NVPTXISD::Suld2DArrayI8Clamp; 2960 case Intrinsic::nvvm_suld_2d_array_i16_clamp: 2961 return NVPTXISD::Suld2DArrayI16Clamp; 2962 case Intrinsic::nvvm_suld_2d_array_i32_clamp: 2963 return NVPTXISD::Suld2DArrayI32Clamp; 2964 case Intrinsic::nvvm_suld_2d_array_i64_clamp: 2965 return NVPTXISD::Suld2DArrayI64Clamp; 2966 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: 2967 return NVPTXISD::Suld2DArrayV2I8Clamp; 2968 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: 2969 return NVPTXISD::Suld2DArrayV2I16Clamp; 2970 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: 2971 return NVPTXISD::Suld2DArrayV2I32Clamp; 2972 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: 2973 return NVPTXISD::Suld2DArrayV2I64Clamp; 2974 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: 2975 return NVPTXISD::Suld2DArrayV4I8Clamp; 2976 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: 2977 return NVPTXISD::Suld2DArrayV4I16Clamp; 2978 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: 2979 return NVPTXISD::Suld2DArrayV4I32Clamp; 2980 case Intrinsic::nvvm_suld_3d_i8_clamp: 2981 return NVPTXISD::Suld3DI8Clamp; 2982 case Intrinsic::nvvm_suld_3d_i16_clamp: 2983 return NVPTXISD::Suld3DI16Clamp; 2984 case Intrinsic::nvvm_suld_3d_i32_clamp: 2985 return NVPTXISD::Suld3DI32Clamp; 2986 case Intrinsic::nvvm_suld_3d_i64_clamp: 2987 return NVPTXISD::Suld3DI64Clamp; 2988 case Intrinsic::nvvm_suld_3d_v2i8_clamp: 2989 return NVPTXISD::Suld3DV2I8Clamp; 2990 case Intrinsic::nvvm_suld_3d_v2i16_clamp: 2991 return NVPTXISD::Suld3DV2I16Clamp; 2992 case Intrinsic::nvvm_suld_3d_v2i32_clamp: 2993 return NVPTXISD::Suld3DV2I32Clamp; 2994 case Intrinsic::nvvm_suld_3d_v2i64_clamp: 2995 return NVPTXISD::Suld3DV2I64Clamp; 2996 case Intrinsic::nvvm_suld_3d_v4i8_clamp: 2997 return NVPTXISD::Suld3DV4I8Clamp; 2998 case Intrinsic::nvvm_suld_3d_v4i16_clamp: 2999 return NVPTXISD::Suld3DV4I16Clamp; 3000 case Intrinsic::nvvm_suld_3d_v4i32_clamp: 3001 return NVPTXISD::Suld3DV4I32Clamp; 3002 case Intrinsic::nvvm_suld_1d_i8_trap: 3003 return NVPTXISD::Suld1DI8Trap; 3004 case Intrinsic::nvvm_suld_1d_i16_trap: 3005 return NVPTXISD::Suld1DI16Trap; 3006 case Intrinsic::nvvm_suld_1d_i32_trap: 3007 return NVPTXISD::Suld1DI32Trap; 3008 case Intrinsic::nvvm_suld_1d_i64_trap: 3009 return NVPTXISD::Suld1DI64Trap; 3010 case Intrinsic::nvvm_suld_1d_v2i8_trap: 3011 return NVPTXISD::Suld1DV2I8Trap; 3012 case Intrinsic::nvvm_suld_1d_v2i16_trap: 3013 return NVPTXISD::Suld1DV2I16Trap; 3014 case Intrinsic::nvvm_suld_1d_v2i32_trap: 3015 return NVPTXISD::Suld1DV2I32Trap; 3016 case Intrinsic::nvvm_suld_1d_v2i64_trap: 3017 return NVPTXISD::Suld1DV2I64Trap; 3018 case Intrinsic::nvvm_suld_1d_v4i8_trap: 3019 return NVPTXISD::Suld1DV4I8Trap; 3020 case Intrinsic::nvvm_suld_1d_v4i16_trap: 3021 return NVPTXISD::Suld1DV4I16Trap; 3022 case Intrinsic::nvvm_suld_1d_v4i32_trap: 3023 return NVPTXISD::Suld1DV4I32Trap; 3024 case Intrinsic::nvvm_suld_1d_array_i8_trap: 3025 return NVPTXISD::Suld1DArrayI8Trap; 3026 case Intrinsic::nvvm_suld_1d_array_i16_trap: 3027 return NVPTXISD::Suld1DArrayI16Trap; 3028 case Intrinsic::nvvm_suld_1d_array_i32_trap: 3029 return NVPTXISD::Suld1DArrayI32Trap; 3030 case Intrinsic::nvvm_suld_1d_array_i64_trap: 3031 return NVPTXISD::Suld1DArrayI64Trap; 3032 case Intrinsic::nvvm_suld_1d_array_v2i8_trap: 3033 return NVPTXISD::Suld1DArrayV2I8Trap; 3034 case Intrinsic::nvvm_suld_1d_array_v2i16_trap: 3035 return NVPTXISD::Suld1DArrayV2I16Trap; 3036 case Intrinsic::nvvm_suld_1d_array_v2i32_trap: 3037 return NVPTXISD::Suld1DArrayV2I32Trap; 3038 case Intrinsic::nvvm_suld_1d_array_v2i64_trap: 3039 return NVPTXISD::Suld1DArrayV2I64Trap; 3040 case Intrinsic::nvvm_suld_1d_array_v4i8_trap: 3041 return NVPTXISD::Suld1DArrayV4I8Trap; 3042 case Intrinsic::nvvm_suld_1d_array_v4i16_trap: 3043 return NVPTXISD::Suld1DArrayV4I16Trap; 3044 case Intrinsic::nvvm_suld_1d_array_v4i32_trap: 3045 return NVPTXISD::Suld1DArrayV4I32Trap; 3046 case Intrinsic::nvvm_suld_2d_i8_trap: 3047 return NVPTXISD::Suld2DI8Trap; 3048 case Intrinsic::nvvm_suld_2d_i16_trap: 3049 return NVPTXISD::Suld2DI16Trap; 3050 case Intrinsic::nvvm_suld_2d_i32_trap: 3051 return NVPTXISD::Suld2DI32Trap; 3052 case Intrinsic::nvvm_suld_2d_i64_trap: 3053 return NVPTXISD::Suld2DI64Trap; 3054 case Intrinsic::nvvm_suld_2d_v2i8_trap: 3055 return NVPTXISD::Suld2DV2I8Trap; 3056 case Intrinsic::nvvm_suld_2d_v2i16_trap: 3057 return NVPTXISD::Suld2DV2I16Trap; 3058 case Intrinsic::nvvm_suld_2d_v2i32_trap: 3059 return NVPTXISD::Suld2DV2I32Trap; 3060 case Intrinsic::nvvm_suld_2d_v2i64_trap: 3061 return NVPTXISD::Suld2DV2I64Trap; 3062 case Intrinsic::nvvm_suld_2d_v4i8_trap: 3063 return NVPTXISD::Suld2DV4I8Trap; 3064 case Intrinsic::nvvm_suld_2d_v4i16_trap: 3065 return NVPTXISD::Suld2DV4I16Trap; 3066 case Intrinsic::nvvm_suld_2d_v4i32_trap: 3067 return NVPTXISD::Suld2DV4I32Trap; 3068 case Intrinsic::nvvm_suld_2d_array_i8_trap: 3069 return NVPTXISD::Suld2DArrayI8Trap; 3070 case Intrinsic::nvvm_suld_2d_array_i16_trap: 3071 return NVPTXISD::Suld2DArrayI16Trap; 3072 case Intrinsic::nvvm_suld_2d_array_i32_trap: 3073 return NVPTXISD::Suld2DArrayI32Trap; 3074 case Intrinsic::nvvm_suld_2d_array_i64_trap: 3075 return NVPTXISD::Suld2DArrayI64Trap; 3076 case Intrinsic::nvvm_suld_2d_array_v2i8_trap: 3077 return NVPTXISD::Suld2DArrayV2I8Trap; 3078 case Intrinsic::nvvm_suld_2d_array_v2i16_trap: 3079 return NVPTXISD::Suld2DArrayV2I16Trap; 3080 case Intrinsic::nvvm_suld_2d_array_v2i32_trap: 3081 return NVPTXISD::Suld2DArrayV2I32Trap; 3082 case Intrinsic::nvvm_suld_2d_array_v2i64_trap: 3083 return NVPTXISD::Suld2DArrayV2I64Trap; 3084 case Intrinsic::nvvm_suld_2d_array_v4i8_trap: 3085 return NVPTXISD::Suld2DArrayV4I8Trap; 3086 case Intrinsic::nvvm_suld_2d_array_v4i16_trap: 3087 return NVPTXISD::Suld2DArrayV4I16Trap; 3088 case Intrinsic::nvvm_suld_2d_array_v4i32_trap: 3089 return NVPTXISD::Suld2DArrayV4I32Trap; 3090 case Intrinsic::nvvm_suld_3d_i8_trap: 3091 return NVPTXISD::Suld3DI8Trap; 3092 case Intrinsic::nvvm_suld_3d_i16_trap: 3093 return NVPTXISD::Suld3DI16Trap; 3094 case Intrinsic::nvvm_suld_3d_i32_trap: 3095 return NVPTXISD::Suld3DI32Trap; 3096 case Intrinsic::nvvm_suld_3d_i64_trap: 3097 return NVPTXISD::Suld3DI64Trap; 3098 case Intrinsic::nvvm_suld_3d_v2i8_trap: 3099 return NVPTXISD::Suld3DV2I8Trap; 3100 case Intrinsic::nvvm_suld_3d_v2i16_trap: 3101 return NVPTXISD::Suld3DV2I16Trap; 3102 case Intrinsic::nvvm_suld_3d_v2i32_trap: 3103 return NVPTXISD::Suld3DV2I32Trap; 3104 case Intrinsic::nvvm_suld_3d_v2i64_trap: 3105 return NVPTXISD::Suld3DV2I64Trap; 3106 case Intrinsic::nvvm_suld_3d_v4i8_trap: 3107 return NVPTXISD::Suld3DV4I8Trap; 3108 case Intrinsic::nvvm_suld_3d_v4i16_trap: 3109 return NVPTXISD::Suld3DV4I16Trap; 3110 case Intrinsic::nvvm_suld_3d_v4i32_trap: 3111 return NVPTXISD::Suld3DV4I32Trap; 3112 case Intrinsic::nvvm_suld_1d_i8_zero: 3113 return NVPTXISD::Suld1DI8Zero; 3114 case Intrinsic::nvvm_suld_1d_i16_zero: 3115 return NVPTXISD::Suld1DI16Zero; 3116 case Intrinsic::nvvm_suld_1d_i32_zero: 3117 return NVPTXISD::Suld1DI32Zero; 3118 case Intrinsic::nvvm_suld_1d_i64_zero: 3119 return NVPTXISD::Suld1DI64Zero; 3120 case Intrinsic::nvvm_suld_1d_v2i8_zero: 3121 return NVPTXISD::Suld1DV2I8Zero; 3122 case Intrinsic::nvvm_suld_1d_v2i16_zero: 3123 return NVPTXISD::Suld1DV2I16Zero; 3124 case Intrinsic::nvvm_suld_1d_v2i32_zero: 3125 return NVPTXISD::Suld1DV2I32Zero; 3126 case Intrinsic::nvvm_suld_1d_v2i64_zero: 3127 return NVPTXISD::Suld1DV2I64Zero; 3128 case Intrinsic::nvvm_suld_1d_v4i8_zero: 3129 return NVPTXISD::Suld1DV4I8Zero; 3130 case Intrinsic::nvvm_suld_1d_v4i16_zero: 3131 return NVPTXISD::Suld1DV4I16Zero; 3132 case Intrinsic::nvvm_suld_1d_v4i32_zero: 3133 return NVPTXISD::Suld1DV4I32Zero; 3134 case Intrinsic::nvvm_suld_1d_array_i8_zero: 3135 return NVPTXISD::Suld1DArrayI8Zero; 3136 case Intrinsic::nvvm_suld_1d_array_i16_zero: 3137 return NVPTXISD::Suld1DArrayI16Zero; 3138 case Intrinsic::nvvm_suld_1d_array_i32_zero: 3139 return NVPTXISD::Suld1DArrayI32Zero; 3140 case Intrinsic::nvvm_suld_1d_array_i64_zero: 3141 return NVPTXISD::Suld1DArrayI64Zero; 3142 case Intrinsic::nvvm_suld_1d_array_v2i8_zero: 3143 return NVPTXISD::Suld1DArrayV2I8Zero; 3144 case Intrinsic::nvvm_suld_1d_array_v2i16_zero: 3145 return NVPTXISD::Suld1DArrayV2I16Zero; 3146 case Intrinsic::nvvm_suld_1d_array_v2i32_zero: 3147 return NVPTXISD::Suld1DArrayV2I32Zero; 3148 case Intrinsic::nvvm_suld_1d_array_v2i64_zero: 3149 return NVPTXISD::Suld1DArrayV2I64Zero; 3150 case Intrinsic::nvvm_suld_1d_array_v4i8_zero: 3151 return NVPTXISD::Suld1DArrayV4I8Zero; 3152 case Intrinsic::nvvm_suld_1d_array_v4i16_zero: 3153 return NVPTXISD::Suld1DArrayV4I16Zero; 3154 case Intrinsic::nvvm_suld_1d_array_v4i32_zero: 3155 return NVPTXISD::Suld1DArrayV4I32Zero; 3156 case Intrinsic::nvvm_suld_2d_i8_zero: 3157 return NVPTXISD::Suld2DI8Zero; 3158 case Intrinsic::nvvm_suld_2d_i16_zero: 3159 return NVPTXISD::Suld2DI16Zero; 3160 case Intrinsic::nvvm_suld_2d_i32_zero: 3161 return NVPTXISD::Suld2DI32Zero; 3162 case Intrinsic::nvvm_suld_2d_i64_zero: 3163 return NVPTXISD::Suld2DI64Zero; 3164 case Intrinsic::nvvm_suld_2d_v2i8_zero: 3165 return NVPTXISD::Suld2DV2I8Zero; 3166 case Intrinsic::nvvm_suld_2d_v2i16_zero: 3167 return NVPTXISD::Suld2DV2I16Zero; 3168 case Intrinsic::nvvm_suld_2d_v2i32_zero: 3169 return NVPTXISD::Suld2DV2I32Zero; 3170 case Intrinsic::nvvm_suld_2d_v2i64_zero: 3171 return NVPTXISD::Suld2DV2I64Zero; 3172 case Intrinsic::nvvm_suld_2d_v4i8_zero: 3173 return NVPTXISD::Suld2DV4I8Zero; 3174 case Intrinsic::nvvm_suld_2d_v4i16_zero: 3175 return NVPTXISD::Suld2DV4I16Zero; 3176 case Intrinsic::nvvm_suld_2d_v4i32_zero: 3177 return NVPTXISD::Suld2DV4I32Zero; 3178 case Intrinsic::nvvm_suld_2d_array_i8_zero: 3179 return NVPTXISD::Suld2DArrayI8Zero; 3180 case Intrinsic::nvvm_suld_2d_array_i16_zero: 3181 return NVPTXISD::Suld2DArrayI16Zero; 3182 case Intrinsic::nvvm_suld_2d_array_i32_zero: 3183 return NVPTXISD::Suld2DArrayI32Zero; 3184 case Intrinsic::nvvm_suld_2d_array_i64_zero: 3185 return NVPTXISD::Suld2DArrayI64Zero; 3186 case Intrinsic::nvvm_suld_2d_array_v2i8_zero: 3187 return NVPTXISD::Suld2DArrayV2I8Zero; 3188 case Intrinsic::nvvm_suld_2d_array_v2i16_zero: 3189 return NVPTXISD::Suld2DArrayV2I16Zero; 3190 case Intrinsic::nvvm_suld_2d_array_v2i32_zero: 3191 return NVPTXISD::Suld2DArrayV2I32Zero; 3192 case Intrinsic::nvvm_suld_2d_array_v2i64_zero: 3193 return NVPTXISD::Suld2DArrayV2I64Zero; 3194 case Intrinsic::nvvm_suld_2d_array_v4i8_zero: 3195 return NVPTXISD::Suld2DArrayV4I8Zero; 3196 case Intrinsic::nvvm_suld_2d_array_v4i16_zero: 3197 return NVPTXISD::Suld2DArrayV4I16Zero; 3198 case Intrinsic::nvvm_suld_2d_array_v4i32_zero: 3199 return NVPTXISD::Suld2DArrayV4I32Zero; 3200 case Intrinsic::nvvm_suld_3d_i8_zero: 3201 return NVPTXISD::Suld3DI8Zero; 3202 case Intrinsic::nvvm_suld_3d_i16_zero: 3203 return NVPTXISD::Suld3DI16Zero; 3204 case Intrinsic::nvvm_suld_3d_i32_zero: 3205 return NVPTXISD::Suld3DI32Zero; 3206 case Intrinsic::nvvm_suld_3d_i64_zero: 3207 return NVPTXISD::Suld3DI64Zero; 3208 case Intrinsic::nvvm_suld_3d_v2i8_zero: 3209 return NVPTXISD::Suld3DV2I8Zero; 3210 case Intrinsic::nvvm_suld_3d_v2i16_zero: 3211 return NVPTXISD::Suld3DV2I16Zero; 3212 case Intrinsic::nvvm_suld_3d_v2i32_zero: 3213 return NVPTXISD::Suld3DV2I32Zero; 3214 case Intrinsic::nvvm_suld_3d_v2i64_zero: 3215 return NVPTXISD::Suld3DV2I64Zero; 3216 case Intrinsic::nvvm_suld_3d_v4i8_zero: 3217 return NVPTXISD::Suld3DV4I8Zero; 3218 case Intrinsic::nvvm_suld_3d_v4i16_zero: 3219 return NVPTXISD::Suld3DV4I16Zero; 3220 case Intrinsic::nvvm_suld_3d_v4i32_zero: 3221 return NVPTXISD::Suld3DV4I32Zero; 3222 } 3223 } 3224 3225 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as 3226 // TgtMemIntrinsic 3227 // because we need the information that is only available in the "Value" type 3228 // of destination 3229 // pointer. In particular, the address space information. 3230 bool NVPTXTargetLowering::getTgtMemIntrinsic( 3231 IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { 3232 switch (Intrinsic) { 3233 default: 3234 return false; 3235 3236 case Intrinsic::nvvm_atomic_load_add_f32: 3237 Info.opc = ISD::INTRINSIC_W_CHAIN; 3238 Info.memVT = MVT::f32; 3239 Info.ptrVal = I.getArgOperand(0); 3240 Info.offset = 0; 3241 Info.vol = 0; 3242 Info.readMem = true; 3243 Info.writeMem = true; 3244 Info.align = 0; 3245 return true; 3246 3247 case Intrinsic::nvvm_atomic_load_inc_32: 3248 case Intrinsic::nvvm_atomic_load_dec_32: 3249 Info.opc = ISD::INTRINSIC_W_CHAIN; 3250 Info.memVT = MVT::i32; 3251 Info.ptrVal = I.getArgOperand(0); 3252 Info.offset = 0; 3253 Info.vol = 0; 3254 Info.readMem = true; 3255 Info.writeMem = true; 3256 Info.align = 0; 3257 return true; 3258 3259 case Intrinsic::nvvm_ldu_global_i: 3260 case Intrinsic::nvvm_ldu_global_f: 3261 case Intrinsic::nvvm_ldu_global_p: { 3262 3263 Info.opc = ISD::INTRINSIC_W_CHAIN; 3264 if (Intrinsic == Intrinsic::nvvm_ldu_global_i) 3265 Info.memVT = getValueType(I.getType()); 3266 else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) 3267 Info.memVT = getPointerTy(); 3268 else 3269 Info.memVT = getValueType(I.getType()); 3270 Info.ptrVal = I.getArgOperand(0); 3271 Info.offset = 0; 3272 Info.vol = 0; 3273 Info.readMem = true; 3274 Info.writeMem = false; 3275 Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); 3276 3277 return true; 3278 } 3279 case Intrinsic::nvvm_ldg_global_i: 3280 case Intrinsic::nvvm_ldg_global_f: 3281 case Intrinsic::nvvm_ldg_global_p: { 3282 3283 Info.opc = ISD::INTRINSIC_W_CHAIN; 3284 if (Intrinsic == Intrinsic::nvvm_ldg_global_i) 3285 Info.memVT = getValueType(I.getType()); 3286 else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) 3287 Info.memVT = getPointerTy(); 3288 else 3289 Info.memVT = getValueType(I.getType()); 3290 Info.ptrVal = I.getArgOperand(0); 3291 Info.offset = 0; 3292 Info.vol = 0; 3293 Info.readMem = true; 3294 Info.writeMem = false; 3295 Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); 3296 3297 return true; 3298 } 3299 3300 case Intrinsic::nvvm_tex_1d_v4f32_s32: 3301 case Intrinsic::nvvm_tex_1d_v4f32_f32: 3302 case Intrinsic::nvvm_tex_1d_level_v4f32_f32: 3303 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: 3304 case Intrinsic::nvvm_tex_1d_array_v4f32_s32: 3305 case Intrinsic::nvvm_tex_1d_array_v4f32_f32: 3306 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: 3307 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: 3308 case Intrinsic::nvvm_tex_2d_v4f32_s32: 3309 case Intrinsic::nvvm_tex_2d_v4f32_f32: 3310 case Intrinsic::nvvm_tex_2d_level_v4f32_f32: 3311 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: 3312 case Intrinsic::nvvm_tex_2d_array_v4f32_s32: 3313 case Intrinsic::nvvm_tex_2d_array_v4f32_f32: 3314 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: 3315 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: 3316 case Intrinsic::nvvm_tex_3d_v4f32_s32: 3317 case Intrinsic::nvvm_tex_3d_v4f32_f32: 3318 case Intrinsic::nvvm_tex_3d_level_v4f32_f32: 3319 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: 3320 case Intrinsic::nvvm_tex_cube_v4f32_f32: 3321 case Intrinsic::nvvm_tex_cube_level_v4f32_f32: 3322 case Intrinsic::nvvm_tex_cube_array_v4f32_f32: 3323 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: 3324 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: 3325 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: 3326 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: 3327 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: 3328 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: 3329 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: 3330 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: 3331 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: 3332 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: 3333 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: 3334 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: 3335 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: 3336 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: 3337 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: 3338 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: 3339 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: 3340 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: 3341 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: 3342 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: 3343 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: 3344 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: 3345 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: 3346 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: 3347 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: 3348 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: 3349 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: 3350 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: 3351 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: 3352 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: 3353 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: 3354 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: 3355 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: { 3356 Info.opc = getOpcForTextureInstr(Intrinsic); 3357 Info.memVT = MVT::v4f32; 3358 Info.ptrVal = nullptr; 3359 Info.offset = 0; 3360 Info.vol = 0; 3361 Info.readMem = true; 3362 Info.writeMem = false; 3363 Info.align = 16; 3364 return true; 3365 } 3366 case Intrinsic::nvvm_tex_1d_v4s32_s32: 3367 case Intrinsic::nvvm_tex_1d_v4s32_f32: 3368 case Intrinsic::nvvm_tex_1d_level_v4s32_f32: 3369 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: 3370 case Intrinsic::nvvm_tex_1d_array_v4s32_s32: 3371 case Intrinsic::nvvm_tex_1d_array_v4s32_f32: 3372 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: 3373 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: 3374 case Intrinsic::nvvm_tex_2d_v4s32_s32: 3375 case Intrinsic::nvvm_tex_2d_v4s32_f32: 3376 case Intrinsic::nvvm_tex_2d_level_v4s32_f32: 3377 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: 3378 case Intrinsic::nvvm_tex_2d_array_v4s32_s32: 3379 case Intrinsic::nvvm_tex_2d_array_v4s32_f32: 3380 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: 3381 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: 3382 case Intrinsic::nvvm_tex_3d_v4s32_s32: 3383 case Intrinsic::nvvm_tex_3d_v4s32_f32: 3384 case Intrinsic::nvvm_tex_3d_level_v4s32_f32: 3385 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: 3386 case Intrinsic::nvvm_tex_cube_v4s32_f32: 3387 case Intrinsic::nvvm_tex_cube_level_v4s32_f32: 3388 case Intrinsic::nvvm_tex_cube_array_v4s32_f32: 3389 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: 3390 case Intrinsic::nvvm_tex_cube_v4u32_f32: 3391 case Intrinsic::nvvm_tex_cube_level_v4u32_f32: 3392 case Intrinsic::nvvm_tex_cube_array_v4u32_f32: 3393 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: 3394 case Intrinsic::nvvm_tex_1d_v4u32_s32: 3395 case Intrinsic::nvvm_tex_1d_v4u32_f32: 3396 case Intrinsic::nvvm_tex_1d_level_v4u32_f32: 3397 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: 3398 case Intrinsic::nvvm_tex_1d_array_v4u32_s32: 3399 case Intrinsic::nvvm_tex_1d_array_v4u32_f32: 3400 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: 3401 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: 3402 case Intrinsic::nvvm_tex_2d_v4u32_s32: 3403 case Intrinsic::nvvm_tex_2d_v4u32_f32: 3404 case Intrinsic::nvvm_tex_2d_level_v4u32_f32: 3405 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: 3406 case Intrinsic::nvvm_tex_2d_array_v4u32_s32: 3407 case Intrinsic::nvvm_tex_2d_array_v4u32_f32: 3408 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: 3409 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: 3410 case Intrinsic::nvvm_tex_3d_v4u32_s32: 3411 case Intrinsic::nvvm_tex_3d_v4u32_f32: 3412 case Intrinsic::nvvm_tex_3d_level_v4u32_f32: 3413 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: 3414 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: 3415 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: 3416 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: 3417 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: 3418 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: 3419 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: 3420 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: 3421 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: 3422 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: 3423 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: 3424 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: 3425 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: 3426 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: 3427 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: 3428 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: 3429 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: 3430 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: 3431 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: 3432 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: 3433 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: 3434 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: 3435 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: 3436 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: 3437 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: 3438 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: 3439 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: 3440 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: 3441 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: 3442 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: 3443 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: 3444 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: 3445 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: 3446 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: 3447 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: 3448 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: 3449 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: 3450 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: 3451 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: 3452 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: 3453 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: 3454 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: 3455 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: 3456 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: 3457 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: 3458 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: 3459 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: 3460 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: 3461 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: 3462 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: 3463 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: 3464 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: 3465 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: 3466 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: 3467 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: 3468 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: 3469 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: 3470 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: 3471 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: 3472 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: 3473 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: 3474 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: 3475 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: 3476 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: 3477 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: { 3478 Info.opc = getOpcForTextureInstr(Intrinsic); 3479 Info.memVT = MVT::v4i32; 3480 Info.ptrVal = nullptr; 3481 Info.offset = 0; 3482 Info.vol = 0; 3483 Info.readMem = true; 3484 Info.writeMem = false; 3485 Info.align = 16; 3486 return true; 3487 } 3488 case Intrinsic::nvvm_suld_1d_i8_clamp: 3489 case Intrinsic::nvvm_suld_1d_v2i8_clamp: 3490 case Intrinsic::nvvm_suld_1d_v4i8_clamp: 3491 case Intrinsic::nvvm_suld_1d_array_i8_clamp: 3492 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: 3493 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: 3494 case Intrinsic::nvvm_suld_2d_i8_clamp: 3495 case Intrinsic::nvvm_suld_2d_v2i8_clamp: 3496 case Intrinsic::nvvm_suld_2d_v4i8_clamp: 3497 case Intrinsic::nvvm_suld_2d_array_i8_clamp: 3498 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: 3499 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: 3500 case Intrinsic::nvvm_suld_3d_i8_clamp: 3501 case Intrinsic::nvvm_suld_3d_v2i8_clamp: 3502 case Intrinsic::nvvm_suld_3d_v4i8_clamp: 3503 case Intrinsic::nvvm_suld_1d_i8_trap: 3504 case Intrinsic::nvvm_suld_1d_v2i8_trap: 3505 case Intrinsic::nvvm_suld_1d_v4i8_trap: 3506 case Intrinsic::nvvm_suld_1d_array_i8_trap: 3507 case Intrinsic::nvvm_suld_1d_array_v2i8_trap: 3508 case Intrinsic::nvvm_suld_1d_array_v4i8_trap: 3509 case Intrinsic::nvvm_suld_2d_i8_trap: 3510 case Intrinsic::nvvm_suld_2d_v2i8_trap: 3511 case Intrinsic::nvvm_suld_2d_v4i8_trap: 3512 case Intrinsic::nvvm_suld_2d_array_i8_trap: 3513 case Intrinsic::nvvm_suld_2d_array_v2i8_trap: 3514 case Intrinsic::nvvm_suld_2d_array_v4i8_trap: 3515 case Intrinsic::nvvm_suld_3d_i8_trap: 3516 case Intrinsic::nvvm_suld_3d_v2i8_trap: 3517 case Intrinsic::nvvm_suld_3d_v4i8_trap: 3518 case Intrinsic::nvvm_suld_1d_i8_zero: 3519 case Intrinsic::nvvm_suld_1d_v2i8_zero: 3520 case Intrinsic::nvvm_suld_1d_v4i8_zero: 3521 case Intrinsic::nvvm_suld_1d_array_i8_zero: 3522 case Intrinsic::nvvm_suld_1d_array_v2i8_zero: 3523 case Intrinsic::nvvm_suld_1d_array_v4i8_zero: 3524 case Intrinsic::nvvm_suld_2d_i8_zero: 3525 case Intrinsic::nvvm_suld_2d_v2i8_zero: 3526 case Intrinsic::nvvm_suld_2d_v4i8_zero: 3527 case Intrinsic::nvvm_suld_2d_array_i8_zero: 3528 case Intrinsic::nvvm_suld_2d_array_v2i8_zero: 3529 case Intrinsic::nvvm_suld_2d_array_v4i8_zero: 3530 case Intrinsic::nvvm_suld_3d_i8_zero: 3531 case Intrinsic::nvvm_suld_3d_v2i8_zero: 3532 case Intrinsic::nvvm_suld_3d_v4i8_zero: { 3533 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3534 Info.memVT = MVT::i8; 3535 Info.ptrVal = nullptr; 3536 Info.offset = 0; 3537 Info.vol = 0; 3538 Info.readMem = true; 3539 Info.writeMem = false; 3540 Info.align = 16; 3541 return true; 3542 } 3543 case Intrinsic::nvvm_suld_1d_i16_clamp: 3544 case Intrinsic::nvvm_suld_1d_v2i16_clamp: 3545 case Intrinsic::nvvm_suld_1d_v4i16_clamp: 3546 case Intrinsic::nvvm_suld_1d_array_i16_clamp: 3547 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: 3548 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: 3549 case Intrinsic::nvvm_suld_2d_i16_clamp: 3550 case Intrinsic::nvvm_suld_2d_v2i16_clamp: 3551 case Intrinsic::nvvm_suld_2d_v4i16_clamp: 3552 case Intrinsic::nvvm_suld_2d_array_i16_clamp: 3553 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: 3554 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: 3555 case Intrinsic::nvvm_suld_3d_i16_clamp: 3556 case Intrinsic::nvvm_suld_3d_v2i16_clamp: 3557 case Intrinsic::nvvm_suld_3d_v4i16_clamp: 3558 case Intrinsic::nvvm_suld_1d_i16_trap: 3559 case Intrinsic::nvvm_suld_1d_v2i16_trap: 3560 case Intrinsic::nvvm_suld_1d_v4i16_trap: 3561 case Intrinsic::nvvm_suld_1d_array_i16_trap: 3562 case Intrinsic::nvvm_suld_1d_array_v2i16_trap: 3563 case Intrinsic::nvvm_suld_1d_array_v4i16_trap: 3564 case Intrinsic::nvvm_suld_2d_i16_trap: 3565 case Intrinsic::nvvm_suld_2d_v2i16_trap: 3566 case Intrinsic::nvvm_suld_2d_v4i16_trap: 3567 case Intrinsic::nvvm_suld_2d_array_i16_trap: 3568 case Intrinsic::nvvm_suld_2d_array_v2i16_trap: 3569 case Intrinsic::nvvm_suld_2d_array_v4i16_trap: 3570 case Intrinsic::nvvm_suld_3d_i16_trap: 3571 case Intrinsic::nvvm_suld_3d_v2i16_trap: 3572 case Intrinsic::nvvm_suld_3d_v4i16_trap: 3573 case Intrinsic::nvvm_suld_1d_i16_zero: 3574 case Intrinsic::nvvm_suld_1d_v2i16_zero: 3575 case Intrinsic::nvvm_suld_1d_v4i16_zero: 3576 case Intrinsic::nvvm_suld_1d_array_i16_zero: 3577 case Intrinsic::nvvm_suld_1d_array_v2i16_zero: 3578 case Intrinsic::nvvm_suld_1d_array_v4i16_zero: 3579 case Intrinsic::nvvm_suld_2d_i16_zero: 3580 case Intrinsic::nvvm_suld_2d_v2i16_zero: 3581 case Intrinsic::nvvm_suld_2d_v4i16_zero: 3582 case Intrinsic::nvvm_suld_2d_array_i16_zero: 3583 case Intrinsic::nvvm_suld_2d_array_v2i16_zero: 3584 case Intrinsic::nvvm_suld_2d_array_v4i16_zero: 3585 case Intrinsic::nvvm_suld_3d_i16_zero: 3586 case Intrinsic::nvvm_suld_3d_v2i16_zero: 3587 case Intrinsic::nvvm_suld_3d_v4i16_zero: { 3588 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3589 Info.memVT = MVT::i16; 3590 Info.ptrVal = nullptr; 3591 Info.offset = 0; 3592 Info.vol = 0; 3593 Info.readMem = true; 3594 Info.writeMem = false; 3595 Info.align = 16; 3596 return true; 3597 } 3598 case Intrinsic::nvvm_suld_1d_i32_clamp: 3599 case Intrinsic::nvvm_suld_1d_v2i32_clamp: 3600 case Intrinsic::nvvm_suld_1d_v4i32_clamp: 3601 case Intrinsic::nvvm_suld_1d_array_i32_clamp: 3602 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: 3603 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: 3604 case Intrinsic::nvvm_suld_2d_i32_clamp: 3605 case Intrinsic::nvvm_suld_2d_v2i32_clamp: 3606 case Intrinsic::nvvm_suld_2d_v4i32_clamp: 3607 case Intrinsic::nvvm_suld_2d_array_i32_clamp: 3608 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: 3609 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: 3610 case Intrinsic::nvvm_suld_3d_i32_clamp: 3611 case Intrinsic::nvvm_suld_3d_v2i32_clamp: 3612 case Intrinsic::nvvm_suld_3d_v4i32_clamp: 3613 case Intrinsic::nvvm_suld_1d_i32_trap: 3614 case Intrinsic::nvvm_suld_1d_v2i32_trap: 3615 case Intrinsic::nvvm_suld_1d_v4i32_trap: 3616 case Intrinsic::nvvm_suld_1d_array_i32_trap: 3617 case Intrinsic::nvvm_suld_1d_array_v2i32_trap: 3618 case Intrinsic::nvvm_suld_1d_array_v4i32_trap: 3619 case Intrinsic::nvvm_suld_2d_i32_trap: 3620 case Intrinsic::nvvm_suld_2d_v2i32_trap: 3621 case Intrinsic::nvvm_suld_2d_v4i32_trap: 3622 case Intrinsic::nvvm_suld_2d_array_i32_trap: 3623 case Intrinsic::nvvm_suld_2d_array_v2i32_trap: 3624 case Intrinsic::nvvm_suld_2d_array_v4i32_trap: 3625 case Intrinsic::nvvm_suld_3d_i32_trap: 3626 case Intrinsic::nvvm_suld_3d_v2i32_trap: 3627 case Intrinsic::nvvm_suld_3d_v4i32_trap: 3628 case Intrinsic::nvvm_suld_1d_i32_zero: 3629 case Intrinsic::nvvm_suld_1d_v2i32_zero: 3630 case Intrinsic::nvvm_suld_1d_v4i32_zero: 3631 case Intrinsic::nvvm_suld_1d_array_i32_zero: 3632 case Intrinsic::nvvm_suld_1d_array_v2i32_zero: 3633 case Intrinsic::nvvm_suld_1d_array_v4i32_zero: 3634 case Intrinsic::nvvm_suld_2d_i32_zero: 3635 case Intrinsic::nvvm_suld_2d_v2i32_zero: 3636 case Intrinsic::nvvm_suld_2d_v4i32_zero: 3637 case Intrinsic::nvvm_suld_2d_array_i32_zero: 3638 case Intrinsic::nvvm_suld_2d_array_v2i32_zero: 3639 case Intrinsic::nvvm_suld_2d_array_v4i32_zero: 3640 case Intrinsic::nvvm_suld_3d_i32_zero: 3641 case Intrinsic::nvvm_suld_3d_v2i32_zero: 3642 case Intrinsic::nvvm_suld_3d_v4i32_zero: { 3643 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3644 Info.memVT = MVT::i32; 3645 Info.ptrVal = nullptr; 3646 Info.offset = 0; 3647 Info.vol = 0; 3648 Info.readMem = true; 3649 Info.writeMem = false; 3650 Info.align = 16; 3651 return true; 3652 } 3653 case Intrinsic::nvvm_suld_1d_i64_clamp: 3654 case Intrinsic::nvvm_suld_1d_v2i64_clamp: 3655 case Intrinsic::nvvm_suld_1d_array_i64_clamp: 3656 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: 3657 case Intrinsic::nvvm_suld_2d_i64_clamp: 3658 case Intrinsic::nvvm_suld_2d_v2i64_clamp: 3659 case Intrinsic::nvvm_suld_2d_array_i64_clamp: 3660 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: 3661 case Intrinsic::nvvm_suld_3d_i64_clamp: 3662 case Intrinsic::nvvm_suld_3d_v2i64_clamp: 3663 case Intrinsic::nvvm_suld_1d_i64_trap: 3664 case Intrinsic::nvvm_suld_1d_v2i64_trap: 3665 case Intrinsic::nvvm_suld_1d_array_i64_trap: 3666 case Intrinsic::nvvm_suld_1d_array_v2i64_trap: 3667 case Intrinsic::nvvm_suld_2d_i64_trap: 3668 case Intrinsic::nvvm_suld_2d_v2i64_trap: 3669 case Intrinsic::nvvm_suld_2d_array_i64_trap: 3670 case Intrinsic::nvvm_suld_2d_array_v2i64_trap: 3671 case Intrinsic::nvvm_suld_3d_i64_trap: 3672 case Intrinsic::nvvm_suld_3d_v2i64_trap: 3673 case Intrinsic::nvvm_suld_1d_i64_zero: 3674 case Intrinsic::nvvm_suld_1d_v2i64_zero: 3675 case Intrinsic::nvvm_suld_1d_array_i64_zero: 3676 case Intrinsic::nvvm_suld_1d_array_v2i64_zero: 3677 case Intrinsic::nvvm_suld_2d_i64_zero: 3678 case Intrinsic::nvvm_suld_2d_v2i64_zero: 3679 case Intrinsic::nvvm_suld_2d_array_i64_zero: 3680 case Intrinsic::nvvm_suld_2d_array_v2i64_zero: 3681 case Intrinsic::nvvm_suld_3d_i64_zero: 3682 case Intrinsic::nvvm_suld_3d_v2i64_zero: { 3683 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3684 Info.memVT = MVT::i64; 3685 Info.ptrVal = nullptr; 3686 Info.offset = 0; 3687 Info.vol = 0; 3688 Info.readMem = true; 3689 Info.writeMem = false; 3690 Info.align = 16; 3691 return true; 3692 } 3693 } 3694 return false; 3695 } 3696 3697 /// isLegalAddressingMode - Return true if the addressing mode represented 3698 /// by AM is legal for this target, for a load/store of the specified type. 3699 /// Used to guide target specific optimizations, like loop strength reduction 3700 /// (LoopStrengthReduce.cpp) and memory optimization for address mode 3701 /// (CodeGenPrepare.cpp) 3702 bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, 3703 Type *Ty) const { 3704 3705 // AddrMode - This represents an addressing mode of: 3706 // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg 3707 // 3708 // The legal address modes are 3709 // - [avar] 3710 // - [areg] 3711 // - [areg+immoff] 3712 // - [immAddr] 3713 3714 if (AM.BaseGV) { 3715 if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) 3716 return false; 3717 return true; 3718 } 3719 3720 switch (AM.Scale) { 3721 case 0: // "r", "r+i" or "i" is allowed 3722 break; 3723 case 1: 3724 if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. 3725 return false; 3726 // Otherwise we have r+i. 3727 break; 3728 default: 3729 // No scale > 1 is allowed 3730 return false; 3731 } 3732 return true; 3733 } 3734 3735 //===----------------------------------------------------------------------===// 3736 // NVPTX Inline Assembly Support 3737 //===----------------------------------------------------------------------===// 3738 3739 /// getConstraintType - Given a constraint letter, return the type of 3740 /// constraint it is for this target. 3741 NVPTXTargetLowering::ConstraintType 3742 NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { 3743 if (Constraint.size() == 1) { 3744 switch (Constraint[0]) { 3745 default: 3746 break; 3747 case 'b': 3748 case 'r': 3749 case 'h': 3750 case 'c': 3751 case 'l': 3752 case 'f': 3753 case 'd': 3754 case '0': 3755 case 'N': 3756 return C_RegisterClass; 3757 } 3758 } 3759 return TargetLowering::getConstraintType(Constraint); 3760 } 3761 3762 std::pair<unsigned, const TargetRegisterClass *> 3763 NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 3764 const std::string &Constraint, 3765 MVT VT) const { 3766 if (Constraint.size() == 1) { 3767 switch (Constraint[0]) { 3768 case 'b': 3769 return std::make_pair(0U, &NVPTX::Int1RegsRegClass); 3770 case 'c': 3771 return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 3772 case 'h': 3773 return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 3774 case 'r': 3775 return std::make_pair(0U, &NVPTX::Int32RegsRegClass); 3776 case 'l': 3777 case 'N': 3778 return std::make_pair(0U, &NVPTX::Int64RegsRegClass); 3779 case 'f': 3780 return std::make_pair(0U, &NVPTX::Float32RegsRegClass); 3781 case 'd': 3782 return std::make_pair(0U, &NVPTX::Float64RegsRegClass); 3783 } 3784 } 3785 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 3786 } 3787 3788 /// getFunctionAlignment - Return the Log2 alignment of this function. 3789 unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { 3790 return 4; 3791 } 3792 3793 //===----------------------------------------------------------------------===// 3794 // NVPTX DAG Combining 3795 //===----------------------------------------------------------------------===// 3796 3797 bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, 3798 CodeGenOpt::Level OptLevel) const { 3799 const Function *F = MF.getFunction(); 3800 const TargetOptions &TO = MF.getTarget().Options; 3801 3802 // Always honor command-line argument 3803 if (FMAContractLevelOpt.getNumOccurrences() > 0) { 3804 return FMAContractLevelOpt > 0; 3805 } else if (OptLevel == 0) { 3806 // Do not contract if we're not optimizing the code 3807 return false; 3808 } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) { 3809 // Honor TargetOptions flags that explicitly say fusion is okay 3810 return true; 3811 } else if (F->hasFnAttribute("unsafe-fp-math")) { 3812 // Check for unsafe-fp-math=true coming from Clang 3813 Attribute Attr = F->getFnAttribute("unsafe-fp-math"); 3814 StringRef Val = Attr.getValueAsString(); 3815 if (Val == "true") 3816 return true; 3817 } 3818 3819 // We did not have a clear indication that fusion is allowed, so assume not 3820 return false; 3821 } 3822 3823 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 3824 /// operands N0 and N1. This is a helper for PerformADDCombine that is 3825 /// called with the default operands, and if that fails, with commuted 3826 /// operands. 3827 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 3828 TargetLowering::DAGCombinerInfo &DCI, 3829 const NVPTXSubtarget &Subtarget, 3830 CodeGenOpt::Level OptLevel) { 3831 SelectionDAG &DAG = DCI.DAG; 3832 // Skip non-integer, non-scalar case 3833 EVT VT=N0.getValueType(); 3834 if (VT.isVector()) 3835 return SDValue(); 3836 3837 // fold (add (mul a, b), c) -> (mad a, b, c) 3838 // 3839 if (N0.getOpcode() == ISD::MUL) { 3840 assert (VT.isInteger()); 3841 // For integer: 3842 // Since integer multiply-add costs the same as integer multiply 3843 // but is more costly than integer add, do the fusion only when 3844 // the mul is only used in the add. 3845 if (OptLevel==CodeGenOpt::None || VT != MVT::i32 || 3846 !N0.getNode()->hasOneUse()) 3847 return SDValue(); 3848 3849 // Do the folding 3850 return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, 3851 N0.getOperand(0), N0.getOperand(1), N1); 3852 } 3853 else if (N0.getOpcode() == ISD::FMUL) { 3854 if (VT == MVT::f32 || VT == MVT::f64) { 3855 const auto *TLI = static_cast<const NVPTXTargetLowering *>( 3856 &DAG.getTargetLoweringInfo()); 3857 if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) 3858 return SDValue(); 3859 3860 // For floating point: 3861 // Do the fusion only when the mul has less than 5 uses and all 3862 // are add. 3863 // The heuristic is that if a use is not an add, then that use 3864 // cannot be fused into fma, therefore mul is still needed anyway. 3865 // If there are more than 4 uses, even if they are all add, fusing 3866 // them will increase register pressue. 3867 // 3868 int numUses = 0; 3869 int nonAddCount = 0; 3870 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 3871 UE = N0.getNode()->use_end(); 3872 UI != UE; ++UI) { 3873 numUses++; 3874 SDNode *User = *UI; 3875 if (User->getOpcode() != ISD::FADD) 3876 ++nonAddCount; 3877 } 3878 if (numUses >= 5) 3879 return SDValue(); 3880 if (nonAddCount) { 3881 int orderNo = N->getIROrder(); 3882 int orderNo2 = N0.getNode()->getIROrder(); 3883 // simple heuristics here for considering potential register 3884 // pressure, the logics here is that the differnce are used 3885 // to measure the distance between def and use, the longer distance 3886 // more likely cause register pressure. 3887 if (orderNo - orderNo2 < 500) 3888 return SDValue(); 3889 3890 // Now, check if at least one of the FMUL's operands is live beyond the node N, 3891 // which guarantees that the FMA will not increase register pressure at node N. 3892 bool opIsLive = false; 3893 const SDNode *left = N0.getOperand(0).getNode(); 3894 const SDNode *right = N0.getOperand(1).getNode(); 3895 3896 if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right)) 3897 opIsLive = true; 3898 3899 if (!opIsLive) 3900 for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) { 3901 SDNode *User = *UI; 3902 int orderNo3 = User->getIROrder(); 3903 if (orderNo3 > orderNo) { 3904 opIsLive = true; 3905 break; 3906 } 3907 } 3908 3909 if (!opIsLive) 3910 for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) { 3911 SDNode *User = *UI; 3912 int orderNo3 = User->getIROrder(); 3913 if (orderNo3 > orderNo) { 3914 opIsLive = true; 3915 break; 3916 } 3917 } 3918 3919 if (!opIsLive) 3920 return SDValue(); 3921 } 3922 3923 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 3924 N0.getOperand(0), N0.getOperand(1), N1); 3925 } 3926 } 3927 3928 return SDValue(); 3929 } 3930 3931 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 3932 /// 3933 static SDValue PerformADDCombine(SDNode *N, 3934 TargetLowering::DAGCombinerInfo &DCI, 3935 const NVPTXSubtarget &Subtarget, 3936 CodeGenOpt::Level OptLevel) { 3937 SDValue N0 = N->getOperand(0); 3938 SDValue N1 = N->getOperand(1); 3939 3940 // First try with the default operand order. 3941 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, 3942 OptLevel); 3943 if (Result.getNode()) 3944 return Result; 3945 3946 // If that didn't work, try again with the operands commuted. 3947 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel); 3948 } 3949 3950 static SDValue PerformANDCombine(SDNode *N, 3951 TargetLowering::DAGCombinerInfo &DCI) { 3952 // The type legalizer turns a vector load of i8 values into a zextload to i16 3953 // registers, optionally ANY_EXTENDs it (if target type is integer), 3954 // and ANDs off the high 8 bits. Since we turn this load into a 3955 // target-specific DAG node, the DAG combiner fails to eliminate these AND 3956 // nodes. Do that here. 3957 SDValue Val = N->getOperand(0); 3958 SDValue Mask = N->getOperand(1); 3959 3960 if (isa<ConstantSDNode>(Val)) { 3961 std::swap(Val, Mask); 3962 } 3963 3964 SDValue AExt; 3965 // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and 3966 if (Val.getOpcode() == ISD::ANY_EXTEND) { 3967 AExt = Val; 3968 Val = Val->getOperand(0); 3969 } 3970 3971 if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { 3972 Val = Val->getOperand(0); 3973 } 3974 3975 if (Val->getOpcode() == NVPTXISD::LoadV2 || 3976 Val->getOpcode() == NVPTXISD::LoadV4) { 3977 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask); 3978 if (!MaskCnst) { 3979 // Not an AND with a constant 3980 return SDValue(); 3981 } 3982 3983 uint64_t MaskVal = MaskCnst->getZExtValue(); 3984 if (MaskVal != 0xff) { 3985 // Not an AND that chops off top 8 bits 3986 return SDValue(); 3987 } 3988 3989 MemSDNode *Mem = dyn_cast<MemSDNode>(Val); 3990 if (!Mem) { 3991 // Not a MemSDNode?!? 3992 return SDValue(); 3993 } 3994 3995 EVT MemVT = Mem->getMemoryVT(); 3996 if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { 3997 // We only handle the i8 case 3998 return SDValue(); 3999 } 4000 4001 unsigned ExtType = 4002 cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))-> 4003 getZExtValue(); 4004 if (ExtType == ISD::SEXTLOAD) { 4005 // If for some reason the load is a sextload, the and is needed to zero 4006 // out the high 8 bits 4007 return SDValue(); 4008 } 4009 4010 bool AddTo = false; 4011 if (AExt.getNode() != 0) { 4012 // Re-insert the ext as a zext. 4013 Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 4014 AExt.getValueType(), Val); 4015 AddTo = true; 4016 } 4017 4018 // If we get here, the AND is unnecessary. Just replace it with the load 4019 DCI.CombineTo(N, Val, AddTo); 4020 } 4021 4022 return SDValue(); 4023 } 4024 4025 enum OperandSignedness { 4026 Signed = 0, 4027 Unsigned, 4028 Unknown 4029 }; 4030 4031 /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand 4032 /// that can be demoted to \p OptSize bits without loss of information. The 4033 /// signedness of the operand, if determinable, is placed in \p S. 4034 static bool IsMulWideOperandDemotable(SDValue Op, 4035 unsigned OptSize, 4036 OperandSignedness &S) { 4037 S = Unknown; 4038 4039 if (Op.getOpcode() == ISD::SIGN_EXTEND || 4040 Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { 4041 EVT OrigVT = Op.getOperand(0).getValueType(); 4042 if (OrigVT.getSizeInBits() <= OptSize) { 4043 S = Signed; 4044 return true; 4045 } 4046 } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { 4047 EVT OrigVT = Op.getOperand(0).getValueType(); 4048 if (OrigVT.getSizeInBits() <= OptSize) { 4049 S = Unsigned; 4050 return true; 4051 } 4052 } 4053 4054 return false; 4055 } 4056 4057 /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can 4058 /// be demoted to \p OptSize bits without loss of information. If the operands 4059 /// contain a constant, it should appear as the RHS operand. The signedness of 4060 /// the operands is placed in \p IsSigned. 4061 static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, 4062 unsigned OptSize, 4063 bool &IsSigned) { 4064 4065 OperandSignedness LHSSign; 4066 4067 // The LHS operand must be a demotable op 4068 if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) 4069 return false; 4070 4071 // We should have been able to determine the signedness from the LHS 4072 if (LHSSign == Unknown) 4073 return false; 4074 4075 IsSigned = (LHSSign == Signed); 4076 4077 // The RHS can be a demotable op or a constant 4078 if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) { 4079 APInt Val = CI->getAPIntValue(); 4080 if (LHSSign == Unsigned) { 4081 if (Val.isIntN(OptSize)) { 4082 return true; 4083 } 4084 return false; 4085 } else { 4086 if (Val.isSignedIntN(OptSize)) { 4087 return true; 4088 } 4089 return false; 4090 } 4091 } else { 4092 OperandSignedness RHSSign; 4093 if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) 4094 return false; 4095 4096 if (LHSSign != RHSSign) 4097 return false; 4098 4099 return true; 4100 } 4101 } 4102 4103 /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply 4104 /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform 4105 /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift 4106 /// amount. 4107 static SDValue TryMULWIDECombine(SDNode *N, 4108 TargetLowering::DAGCombinerInfo &DCI) { 4109 EVT MulType = N->getValueType(0); 4110 if (MulType != MVT::i32 && MulType != MVT::i64) { 4111 return SDValue(); 4112 } 4113 4114 unsigned OptSize = MulType.getSizeInBits() >> 1; 4115 SDValue LHS = N->getOperand(0); 4116 SDValue RHS = N->getOperand(1); 4117 4118 // Canonicalize the multiply so the constant (if any) is on the right 4119 if (N->getOpcode() == ISD::MUL) { 4120 if (isa<ConstantSDNode>(LHS)) { 4121 std::swap(LHS, RHS); 4122 } 4123 } 4124 4125 // If we have a SHL, determine the actual multiply amount 4126 if (N->getOpcode() == ISD::SHL) { 4127 ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS); 4128 if (!ShlRHS) { 4129 return SDValue(); 4130 } 4131 4132 APInt ShiftAmt = ShlRHS->getAPIntValue(); 4133 unsigned BitWidth = MulType.getSizeInBits(); 4134 if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { 4135 APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; 4136 RHS = DCI.DAG.getConstant(MulVal, MulType); 4137 } else { 4138 return SDValue(); 4139 } 4140 } 4141 4142 bool Signed; 4143 // Verify that our operands are demotable 4144 if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { 4145 return SDValue(); 4146 } 4147 4148 EVT DemotedVT; 4149 if (MulType == MVT::i32) { 4150 DemotedVT = MVT::i16; 4151 } else { 4152 DemotedVT = MVT::i32; 4153 } 4154 4155 // Truncate the operands to the correct size. Note that these are just for 4156 // type consistency and will (likely) be eliminated in later phases. 4157 SDValue TruncLHS = 4158 DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, LHS); 4159 SDValue TruncRHS = 4160 DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, RHS); 4161 4162 unsigned Opc; 4163 if (Signed) { 4164 Opc = NVPTXISD::MUL_WIDE_SIGNED; 4165 } else { 4166 Opc = NVPTXISD::MUL_WIDE_UNSIGNED; 4167 } 4168 4169 return DCI.DAG.getNode(Opc, SDLoc(N), MulType, TruncLHS, TruncRHS); 4170 } 4171 4172 /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. 4173 static SDValue PerformMULCombine(SDNode *N, 4174 TargetLowering::DAGCombinerInfo &DCI, 4175 CodeGenOpt::Level OptLevel) { 4176 if (OptLevel > 0) { 4177 // Try mul.wide combining at OptLevel > 0 4178 SDValue Ret = TryMULWIDECombine(N, DCI); 4179 if (Ret.getNode()) 4180 return Ret; 4181 } 4182 4183 return SDValue(); 4184 } 4185 4186 /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. 4187 static SDValue PerformSHLCombine(SDNode *N, 4188 TargetLowering::DAGCombinerInfo &DCI, 4189 CodeGenOpt::Level OptLevel) { 4190 if (OptLevel > 0) { 4191 // Try mul.wide combining at OptLevel > 0 4192 SDValue Ret = TryMULWIDECombine(N, DCI); 4193 if (Ret.getNode()) 4194 return Ret; 4195 } 4196 4197 return SDValue(); 4198 } 4199 4200 SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, 4201 DAGCombinerInfo &DCI) const { 4202 CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); 4203 switch (N->getOpcode()) { 4204 default: break; 4205 case ISD::ADD: 4206 case ISD::FADD: 4207 return PerformADDCombine(N, DCI, STI, OptLevel); 4208 case ISD::MUL: 4209 return PerformMULCombine(N, DCI, OptLevel); 4210 case ISD::SHL: 4211 return PerformSHLCombine(N, DCI, OptLevel); 4212 case ISD::AND: 4213 return PerformANDCombine(N, DCI); 4214 } 4215 return SDValue(); 4216 } 4217 4218 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. 4219 static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, 4220 const DataLayout *TD, 4221 SmallVectorImpl<SDValue> &Results) { 4222 EVT ResVT = N->getValueType(0); 4223 SDLoc DL(N); 4224 4225 assert(ResVT.isVector() && "Vector load must have vector type"); 4226 4227 // We only handle "native" vector sizes for now, e.g. <4 x double> is not 4228 // legal. We can (and should) split that into 2 loads of <2 x double> here 4229 // but I'm leaving that as a TODO for now. 4230 assert(ResVT.isSimple() && "Can only handle simple types"); 4231 switch (ResVT.getSimpleVT().SimpleTy) { 4232 default: 4233 return; 4234 case MVT::v2i8: 4235 case MVT::v2i16: 4236 case MVT::v2i32: 4237 case MVT::v2i64: 4238 case MVT::v2f32: 4239 case MVT::v2f64: 4240 case MVT::v4i8: 4241 case MVT::v4i16: 4242 case MVT::v4i32: 4243 case MVT::v4f32: 4244 // This is a "native" vector type 4245 break; 4246 } 4247 4248 LoadSDNode *LD = cast<LoadSDNode>(N); 4249 4250 unsigned Align = LD->getAlignment(); 4251 unsigned PrefAlign = 4252 TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); 4253 if (Align < PrefAlign) { 4254 // This load is not sufficiently aligned, so bail out and let this vector 4255 // load be scalarized. Note that we may still be able to emit smaller 4256 // vector loads. For example, if we are loading a <4 x float> with an 4257 // alignment of 8, this check will fail but the legalizer will try again 4258 // with 2 x <2 x float>, which will succeed with an alignment of 8. 4259 return; 4260 } 4261 4262 EVT EltVT = ResVT.getVectorElementType(); 4263 unsigned NumElts = ResVT.getVectorNumElements(); 4264 4265 // Since LoadV2 is a target node, we cannot rely on DAG type legalization. 4266 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 4267 // loaded type to i16 and propagate the "real" type as the memory type. 4268 bool NeedTrunc = false; 4269 if (EltVT.getSizeInBits() < 16) { 4270 EltVT = MVT::i16; 4271 NeedTrunc = true; 4272 } 4273 4274 unsigned Opcode = 0; 4275 SDVTList LdResVTs; 4276 4277 switch (NumElts) { 4278 default: 4279 return; 4280 case 2: 4281 Opcode = NVPTXISD::LoadV2; 4282 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 4283 break; 4284 case 4: { 4285 Opcode = NVPTXISD::LoadV4; 4286 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 4287 LdResVTs = DAG.getVTList(ListVTs); 4288 break; 4289 } 4290 } 4291 4292 // Copy regular operands 4293 SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end()); 4294 4295 // The select routine does not have access to the LoadSDNode instance, so 4296 // pass along the extension information 4297 OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); 4298 4299 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, 4300 LD->getMemoryVT(), 4301 LD->getMemOperand()); 4302 4303 SmallVector<SDValue, 4> ScalarRes; 4304 4305 for (unsigned i = 0; i < NumElts; ++i) { 4306 SDValue Res = NewLD.getValue(i); 4307 if (NeedTrunc) 4308 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 4309 ScalarRes.push_back(Res); 4310 } 4311 4312 SDValue LoadChain = NewLD.getValue(NumElts); 4313 4314 SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); 4315 4316 Results.push_back(BuildVec); 4317 Results.push_back(LoadChain); 4318 } 4319 4320 static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, 4321 SmallVectorImpl<SDValue> &Results) { 4322 SDValue Chain = N->getOperand(0); 4323 SDValue Intrin = N->getOperand(1); 4324 SDLoc DL(N); 4325 4326 // Get the intrinsic ID 4327 unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); 4328 switch (IntrinNo) { 4329 default: 4330 return; 4331 case Intrinsic::nvvm_ldg_global_i: 4332 case Intrinsic::nvvm_ldg_global_f: 4333 case Intrinsic::nvvm_ldg_global_p: 4334 case Intrinsic::nvvm_ldu_global_i: 4335 case Intrinsic::nvvm_ldu_global_f: 4336 case Intrinsic::nvvm_ldu_global_p: { 4337 EVT ResVT = N->getValueType(0); 4338 4339 if (ResVT.isVector()) { 4340 // Vector LDG/LDU 4341 4342 unsigned NumElts = ResVT.getVectorNumElements(); 4343 EVT EltVT = ResVT.getVectorElementType(); 4344 4345 // Since LDU/LDG are target nodes, we cannot rely on DAG type 4346 // legalization. 4347 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 4348 // loaded type to i16 and propagate the "real" type as the memory type. 4349 bool NeedTrunc = false; 4350 if (EltVT.getSizeInBits() < 16) { 4351 EltVT = MVT::i16; 4352 NeedTrunc = true; 4353 } 4354 4355 unsigned Opcode = 0; 4356 SDVTList LdResVTs; 4357 4358 switch (NumElts) { 4359 default: 4360 return; 4361 case 2: 4362 switch (IntrinNo) { 4363 default: 4364 return; 4365 case Intrinsic::nvvm_ldg_global_i: 4366 case Intrinsic::nvvm_ldg_global_f: 4367 case Intrinsic::nvvm_ldg_global_p: 4368 Opcode = NVPTXISD::LDGV2; 4369 break; 4370 case Intrinsic::nvvm_ldu_global_i: 4371 case Intrinsic::nvvm_ldu_global_f: 4372 case Intrinsic::nvvm_ldu_global_p: 4373 Opcode = NVPTXISD::LDUV2; 4374 break; 4375 } 4376 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 4377 break; 4378 case 4: { 4379 switch (IntrinNo) { 4380 default: 4381 return; 4382 case Intrinsic::nvvm_ldg_global_i: 4383 case Intrinsic::nvvm_ldg_global_f: 4384 case Intrinsic::nvvm_ldg_global_p: 4385 Opcode = NVPTXISD::LDGV4; 4386 break; 4387 case Intrinsic::nvvm_ldu_global_i: 4388 case Intrinsic::nvvm_ldu_global_f: 4389 case Intrinsic::nvvm_ldu_global_p: 4390 Opcode = NVPTXISD::LDUV4; 4391 break; 4392 } 4393 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 4394 LdResVTs = DAG.getVTList(ListVTs); 4395 break; 4396 } 4397 } 4398 4399 SmallVector<SDValue, 8> OtherOps; 4400 4401 // Copy regular operands 4402 4403 OtherOps.push_back(Chain); // Chain 4404 // Skip operand 1 (intrinsic ID) 4405 // Others 4406 OtherOps.append(N->op_begin() + 2, N->op_end()); 4407 4408 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 4409 4410 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, 4411 MemSD->getMemoryVT(), 4412 MemSD->getMemOperand()); 4413 4414 SmallVector<SDValue, 4> ScalarRes; 4415 4416 for (unsigned i = 0; i < NumElts; ++i) { 4417 SDValue Res = NewLD.getValue(i); 4418 if (NeedTrunc) 4419 Res = 4420 DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 4421 ScalarRes.push_back(Res); 4422 } 4423 4424 SDValue LoadChain = NewLD.getValue(NumElts); 4425 4426 SDValue BuildVec = 4427 DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); 4428 4429 Results.push_back(BuildVec); 4430 Results.push_back(LoadChain); 4431 } else { 4432 // i8 LDG/LDU 4433 assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && 4434 "Custom handling of non-i8 ldu/ldg?"); 4435 4436 // Just copy all operands as-is 4437 SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); 4438 4439 // Force output to i16 4440 SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); 4441 4442 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 4443 4444 // We make sure the memory type is i8, which will be used during isel 4445 // to select the proper instruction. 4446 SDValue NewLD = 4447 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, 4448 MVT::i8, MemSD->getMemOperand()); 4449 4450 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, 4451 NewLD.getValue(0))); 4452 Results.push_back(NewLD.getValue(1)); 4453 } 4454 } 4455 } 4456 } 4457 4458 void NVPTXTargetLowering::ReplaceNodeResults( 4459 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 4460 switch (N->getOpcode()) { 4461 default: 4462 report_fatal_error("Unhandled custom legalization"); 4463 case ISD::LOAD: 4464 ReplaceLoadVector(N, DAG, getDataLayout(), Results); 4465 return; 4466 case ISD::INTRINSIC_W_CHAIN: 4467 ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); 4468 return; 4469 } 4470 } 4471 4472 // Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file. 4473 void NVPTXSection::anchor() {} 4474 4475 NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { 4476 delete TextSection; 4477 delete DataSection; 4478 delete BSSSection; 4479 delete ReadOnlySection; 4480 4481 delete StaticCtorSection; 4482 delete StaticDtorSection; 4483 delete LSDASection; 4484 delete EHFrameSection; 4485 delete DwarfAbbrevSection; 4486 delete DwarfInfoSection; 4487 delete DwarfLineSection; 4488 delete DwarfFrameSection; 4489 delete DwarfPubTypesSection; 4490 delete DwarfDebugInlineSection; 4491 delete DwarfStrSection; 4492 delete DwarfLocSection; 4493 delete DwarfARangesSection; 4494 delete DwarfRangesSection; 4495 } 4496 4497 const MCSection * 4498 NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, 4499 SectionKind Kind, Mangler &Mang, 4500 const TargetMachine &TM) const { 4501 return getDataSection(); 4502 } 4503