1 // 2 // The LLVM Compiler Infrastructure 3 // 4 // This file is distributed under the University of Illinois Open Source 5 // License. See LICENSE.TXT for details. 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "NVPTXISelLowering.h" 15 #include "NVPTX.h" 16 #include "NVPTXTargetMachine.h" 17 #include "NVPTXTargetObjectFile.h" 18 #include "NVPTXUtilities.h" 19 #include "llvm/CodeGen/Analysis.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 25 #include "llvm/IR/CallSite.h" 26 #include "llvm/IR/DerivedTypes.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/GlobalValue.h" 29 #include "llvm/IR/IntrinsicInst.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/Module.h" 32 #include "llvm/MC/MCSectionELF.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include <sstream> 39 40 #undef DEBUG_TYPE 41 #define DEBUG_TYPE "nvptx-lower" 42 43 using namespace llvm; 44 45 static unsigned int uniqueCallSite = 0; 46 47 static cl::opt<bool> sched4reg( 48 "nvptx-sched4reg", 49 cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); 50 51 static cl::opt<unsigned> 52 FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, 53 cl::desc("NVPTX Specific: FMA contraction (0: don't do it" 54 " 1: do it 2: do it aggressively"), 55 cl::init(2)); 56 57 static bool IsPTXVectorType(MVT VT) { 58 switch (VT.SimpleTy) { 59 default: 60 return false; 61 case MVT::v2i1: 62 case MVT::v4i1: 63 case MVT::v2i8: 64 case MVT::v4i8: 65 case MVT::v2i16: 66 case MVT::v4i16: 67 case MVT::v2i32: 68 case MVT::v4i32: 69 case MVT::v2i64: 70 case MVT::v2f32: 71 case MVT::v4f32: 72 case MVT::v2f64: 73 return true; 74 } 75 } 76 77 /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive 78 /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors 79 /// into their primitive components. 80 /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the 81 /// same number of types as the Ins/Outs arrays in LowerFormalArguments, 82 /// LowerCall, and LowerReturn. 83 static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, 84 Type *Ty, SmallVectorImpl<EVT> &ValueVTs, 85 SmallVectorImpl<uint64_t> *Offsets = nullptr, 86 uint64_t StartingOffset = 0) { 87 SmallVector<EVT, 16> TempVTs; 88 SmallVector<uint64_t, 16> TempOffsets; 89 90 ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); 91 for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { 92 EVT VT = TempVTs[i]; 93 uint64_t Off = TempOffsets[i]; 94 if (VT.isVector()) 95 for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) { 96 ValueVTs.push_back(VT.getVectorElementType()); 97 if (Offsets) 98 Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize()); 99 } 100 else { 101 ValueVTs.push_back(VT); 102 if (Offsets) 103 Offsets->push_back(Off); 104 } 105 } 106 } 107 108 // NVPTXTargetLowering Constructor. 109 NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, 110 const NVPTXSubtarget &STI) 111 : TargetLowering(TM), nvTM(&TM), STI(STI) { 112 113 // always lower memset, memcpy, and memmove intrinsics to load/store 114 // instructions, rather 115 // then generating calls to memset, mempcy or memmove. 116 MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; 117 MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; 118 MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; 119 120 setBooleanContents(ZeroOrNegativeOneBooleanContent); 121 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 122 123 // Jump is Expensive. Don't create extra control flow for 'and', 'or' 124 // condition branches. 125 setJumpIsExpensive(true); 126 127 // Wide divides are _very_ slow. Try to reduce the width of the divide if 128 // possible. 129 addBypassSlowDiv(64, 32); 130 131 // By default, use the Source scheduling 132 if (sched4reg) 133 setSchedulingPreference(Sched::RegPressure); 134 else 135 setSchedulingPreference(Sched::Source); 136 137 addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); 138 addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); 139 addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); 140 addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); 141 addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); 142 addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); 143 144 // Operations not directly supported by NVPTX. 145 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 146 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 147 setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); 148 setOperationAction(ISD::SELECT_CC, MVT::i8, Expand); 149 setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); 150 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 151 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 152 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 153 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 154 setOperationAction(ISD::BR_CC, MVT::i1, Expand); 155 setOperationAction(ISD::BR_CC, MVT::i8, Expand); 156 setOperationAction(ISD::BR_CC, MVT::i16, Expand); 157 setOperationAction(ISD::BR_CC, MVT::i32, Expand); 158 setOperationAction(ISD::BR_CC, MVT::i64, Expand); 159 // Some SIGN_EXTEND_INREG can be done using cvt instruction. 160 // For others we will expand to a SHL/SRA pair. 161 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); 162 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 163 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); 164 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 165 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 166 167 setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); 168 setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); 169 setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); 170 setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); 171 setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); 172 setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); 173 174 if (STI.hasROT64()) { 175 setOperationAction(ISD::ROTL, MVT::i64, Legal); 176 setOperationAction(ISD::ROTR, MVT::i64, Legal); 177 } else { 178 setOperationAction(ISD::ROTL, MVT::i64, Expand); 179 setOperationAction(ISD::ROTR, MVT::i64, Expand); 180 } 181 if (STI.hasROT32()) { 182 setOperationAction(ISD::ROTL, MVT::i32, Legal); 183 setOperationAction(ISD::ROTR, MVT::i32, Legal); 184 } else { 185 setOperationAction(ISD::ROTL, MVT::i32, Expand); 186 setOperationAction(ISD::ROTR, MVT::i32, Expand); 187 } 188 189 setOperationAction(ISD::ROTL, MVT::i16, Expand); 190 setOperationAction(ISD::ROTR, MVT::i16, Expand); 191 setOperationAction(ISD::ROTL, MVT::i8, Expand); 192 setOperationAction(ISD::ROTR, MVT::i8, Expand); 193 setOperationAction(ISD::BSWAP, MVT::i16, Expand); 194 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 195 setOperationAction(ISD::BSWAP, MVT::i64, Expand); 196 197 // Indirect branch is not supported. 198 // This also disables Jump Table creation. 199 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 200 setOperationAction(ISD::BRIND, MVT::Other, Expand); 201 202 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 203 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 204 205 // We want to legalize constant related memmove and memcopy 206 // intrinsics. 207 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 208 209 // Turn FP extload into load/fextend 210 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 211 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 212 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 213 setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); 214 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); 215 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); 216 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); 217 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); 218 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); 219 // Turn FP truncstore into trunc + store. 220 // FIXME: vector types should also be expanded 221 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 222 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 223 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 224 225 // PTX does not support load / store predicate registers 226 setOperationAction(ISD::LOAD, MVT::i1, Custom); 227 setOperationAction(ISD::STORE, MVT::i1, Custom); 228 229 for (MVT VT : MVT::integer_valuetypes()) { 230 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 231 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 232 setTruncStoreAction(VT, MVT::i1, Expand); 233 } 234 235 // This is legal in NVPTX 236 setOperationAction(ISD::ConstantFP, MVT::f64, Legal); 237 setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 238 239 // TRAP can be lowered to PTX trap 240 setOperationAction(ISD::TRAP, MVT::Other, Legal); 241 242 setOperationAction(ISD::ADDC, MVT::i64, Expand); 243 setOperationAction(ISD::ADDE, MVT::i64, Expand); 244 245 // Register custom handling for vector loads/stores 246 for (MVT VT : MVT::vector_valuetypes()) { 247 if (IsPTXVectorType(VT)) { 248 setOperationAction(ISD::LOAD, VT, Custom); 249 setOperationAction(ISD::STORE, VT, Custom); 250 setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); 251 } 252 } 253 254 // Custom handling for i8 intrinsics 255 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 256 257 setOperationAction(ISD::CTLZ, MVT::i16, Legal); 258 setOperationAction(ISD::CTLZ, MVT::i32, Legal); 259 setOperationAction(ISD::CTLZ, MVT::i64, Legal); 260 setOperationAction(ISD::CTTZ, MVT::i16, Expand); 261 setOperationAction(ISD::CTTZ, MVT::i32, Expand); 262 setOperationAction(ISD::CTTZ, MVT::i64, Expand); 263 setOperationAction(ISD::CTPOP, MVT::i16, Legal); 264 setOperationAction(ISD::CTPOP, MVT::i32, Legal); 265 setOperationAction(ISD::CTPOP, MVT::i64, Legal); 266 267 // PTX does not directly support SELP of i1, so promote to i32 first 268 setOperationAction(ISD::SELECT, MVT::i1, Custom); 269 270 // PTX cannot multiply two i64s in a single instruction. 271 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 272 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 273 274 // We have some custom DAG combine patterns for these nodes 275 setTargetDAGCombine(ISD::ADD); 276 setTargetDAGCombine(ISD::AND); 277 setTargetDAGCombine(ISD::FADD); 278 setTargetDAGCombine(ISD::MUL); 279 setTargetDAGCombine(ISD::SHL); 280 setTargetDAGCombine(ISD::SELECT); 281 282 // Now deduce the information based on the above mentioned 283 // actions 284 computeRegisterProperties(STI.getRegisterInfo()); 285 } 286 287 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { 288 switch ((NVPTXISD::NodeType)Opcode) { 289 case NVPTXISD::FIRST_NUMBER: 290 break; 291 case NVPTXISD::CALL: 292 return "NVPTXISD::CALL"; 293 case NVPTXISD::RET_FLAG: 294 return "NVPTXISD::RET_FLAG"; 295 case NVPTXISD::LOAD_PARAM: 296 return "NVPTXISD::LOAD_PARAM"; 297 case NVPTXISD::Wrapper: 298 return "NVPTXISD::Wrapper"; 299 case NVPTXISD::DeclareParam: 300 return "NVPTXISD::DeclareParam"; 301 case NVPTXISD::DeclareScalarParam: 302 return "NVPTXISD::DeclareScalarParam"; 303 case NVPTXISD::DeclareRet: 304 return "NVPTXISD::DeclareRet"; 305 case NVPTXISD::DeclareScalarRet: 306 return "NVPTXISD::DeclareScalarRet"; 307 case NVPTXISD::DeclareRetParam: 308 return "NVPTXISD::DeclareRetParam"; 309 case NVPTXISD::PrintCall: 310 return "NVPTXISD::PrintCall"; 311 case NVPTXISD::PrintConvergentCall: 312 return "NVPTXISD::PrintConvergentCall"; 313 case NVPTXISD::PrintCallUni: 314 return "NVPTXISD::PrintCallUni"; 315 case NVPTXISD::PrintConvergentCallUni: 316 return "NVPTXISD::PrintConvergentCallUni"; 317 case NVPTXISD::LoadParam: 318 return "NVPTXISD::LoadParam"; 319 case NVPTXISD::LoadParamV2: 320 return "NVPTXISD::LoadParamV2"; 321 case NVPTXISD::LoadParamV4: 322 return "NVPTXISD::LoadParamV4"; 323 case NVPTXISD::StoreParam: 324 return "NVPTXISD::StoreParam"; 325 case NVPTXISD::StoreParamV2: 326 return "NVPTXISD::StoreParamV2"; 327 case NVPTXISD::StoreParamV4: 328 return "NVPTXISD::StoreParamV4"; 329 case NVPTXISD::StoreParamS32: 330 return "NVPTXISD::StoreParamS32"; 331 case NVPTXISD::StoreParamU32: 332 return "NVPTXISD::StoreParamU32"; 333 case NVPTXISD::CallArgBegin: 334 return "NVPTXISD::CallArgBegin"; 335 case NVPTXISD::CallArg: 336 return "NVPTXISD::CallArg"; 337 case NVPTXISD::LastCallArg: 338 return "NVPTXISD::LastCallArg"; 339 case NVPTXISD::CallArgEnd: 340 return "NVPTXISD::CallArgEnd"; 341 case NVPTXISD::CallVoid: 342 return "NVPTXISD::CallVoid"; 343 case NVPTXISD::CallVal: 344 return "NVPTXISD::CallVal"; 345 case NVPTXISD::CallSymbol: 346 return "NVPTXISD::CallSymbol"; 347 case NVPTXISD::Prototype: 348 return "NVPTXISD::Prototype"; 349 case NVPTXISD::MoveParam: 350 return "NVPTXISD::MoveParam"; 351 case NVPTXISD::StoreRetval: 352 return "NVPTXISD::StoreRetval"; 353 case NVPTXISD::StoreRetvalV2: 354 return "NVPTXISD::StoreRetvalV2"; 355 case NVPTXISD::StoreRetvalV4: 356 return "NVPTXISD::StoreRetvalV4"; 357 case NVPTXISD::PseudoUseParam: 358 return "NVPTXISD::PseudoUseParam"; 359 case NVPTXISD::RETURN: 360 return "NVPTXISD::RETURN"; 361 case NVPTXISD::CallSeqBegin: 362 return "NVPTXISD::CallSeqBegin"; 363 case NVPTXISD::CallSeqEnd: 364 return "NVPTXISD::CallSeqEnd"; 365 case NVPTXISD::CallPrototype: 366 return "NVPTXISD::CallPrototype"; 367 case NVPTXISD::LoadV2: 368 return "NVPTXISD::LoadV2"; 369 case NVPTXISD::LoadV4: 370 return "NVPTXISD::LoadV4"; 371 case NVPTXISD::LDGV2: 372 return "NVPTXISD::LDGV2"; 373 case NVPTXISD::LDGV4: 374 return "NVPTXISD::LDGV4"; 375 case NVPTXISD::LDUV2: 376 return "NVPTXISD::LDUV2"; 377 case NVPTXISD::LDUV4: 378 return "NVPTXISD::LDUV4"; 379 case NVPTXISD::StoreV2: 380 return "NVPTXISD::StoreV2"; 381 case NVPTXISD::StoreV4: 382 return "NVPTXISD::StoreV4"; 383 case NVPTXISD::FUN_SHFL_CLAMP: 384 return "NVPTXISD::FUN_SHFL_CLAMP"; 385 case NVPTXISD::FUN_SHFR_CLAMP: 386 return "NVPTXISD::FUN_SHFR_CLAMP"; 387 case NVPTXISD::IMAD: 388 return "NVPTXISD::IMAD"; 389 case NVPTXISD::Dummy: 390 return "NVPTXISD::Dummy"; 391 case NVPTXISD::MUL_WIDE_SIGNED: 392 return "NVPTXISD::MUL_WIDE_SIGNED"; 393 case NVPTXISD::MUL_WIDE_UNSIGNED: 394 return "NVPTXISD::MUL_WIDE_UNSIGNED"; 395 case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; 396 case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; 397 case NVPTXISD::Tex1DFloatFloatLevel: 398 return "NVPTXISD::Tex1DFloatFloatLevel"; 399 case NVPTXISD::Tex1DFloatFloatGrad: 400 return "NVPTXISD::Tex1DFloatFloatGrad"; 401 case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; 402 case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; 403 case NVPTXISD::Tex1DS32FloatLevel: 404 return "NVPTXISD::Tex1DS32FloatLevel"; 405 case NVPTXISD::Tex1DS32FloatGrad: 406 return "NVPTXISD::Tex1DS32FloatGrad"; 407 case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; 408 case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; 409 case NVPTXISD::Tex1DU32FloatLevel: 410 return "NVPTXISD::Tex1DU32FloatLevel"; 411 case NVPTXISD::Tex1DU32FloatGrad: 412 return "NVPTXISD::Tex1DU32FloatGrad"; 413 case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; 414 case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; 415 case NVPTXISD::Tex1DArrayFloatFloatLevel: 416 return "NVPTXISD::Tex1DArrayFloatFloatLevel"; 417 case NVPTXISD::Tex1DArrayFloatFloatGrad: 418 return "NVPTXISD::Tex1DArrayFloatFloatGrad"; 419 case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; 420 case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; 421 case NVPTXISD::Tex1DArrayS32FloatLevel: 422 return "NVPTXISD::Tex1DArrayS32FloatLevel"; 423 case NVPTXISD::Tex1DArrayS32FloatGrad: 424 return "NVPTXISD::Tex1DArrayS32FloatGrad"; 425 case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; 426 case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; 427 case NVPTXISD::Tex1DArrayU32FloatLevel: 428 return "NVPTXISD::Tex1DArrayU32FloatLevel"; 429 case NVPTXISD::Tex1DArrayU32FloatGrad: 430 return "NVPTXISD::Tex1DArrayU32FloatGrad"; 431 case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; 432 case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; 433 case NVPTXISD::Tex2DFloatFloatLevel: 434 return "NVPTXISD::Tex2DFloatFloatLevel"; 435 case NVPTXISD::Tex2DFloatFloatGrad: 436 return "NVPTXISD::Tex2DFloatFloatGrad"; 437 case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; 438 case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; 439 case NVPTXISD::Tex2DS32FloatLevel: 440 return "NVPTXISD::Tex2DS32FloatLevel"; 441 case NVPTXISD::Tex2DS32FloatGrad: 442 return "NVPTXISD::Tex2DS32FloatGrad"; 443 case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; 444 case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; 445 case NVPTXISD::Tex2DU32FloatLevel: 446 return "NVPTXISD::Tex2DU32FloatLevel"; 447 case NVPTXISD::Tex2DU32FloatGrad: 448 return "NVPTXISD::Tex2DU32FloatGrad"; 449 case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; 450 case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; 451 case NVPTXISD::Tex2DArrayFloatFloatLevel: 452 return "NVPTXISD::Tex2DArrayFloatFloatLevel"; 453 case NVPTXISD::Tex2DArrayFloatFloatGrad: 454 return "NVPTXISD::Tex2DArrayFloatFloatGrad"; 455 case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; 456 case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; 457 case NVPTXISD::Tex2DArrayS32FloatLevel: 458 return "NVPTXISD::Tex2DArrayS32FloatLevel"; 459 case NVPTXISD::Tex2DArrayS32FloatGrad: 460 return "NVPTXISD::Tex2DArrayS32FloatGrad"; 461 case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; 462 case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; 463 case NVPTXISD::Tex2DArrayU32FloatLevel: 464 return "NVPTXISD::Tex2DArrayU32FloatLevel"; 465 case NVPTXISD::Tex2DArrayU32FloatGrad: 466 return "NVPTXISD::Tex2DArrayU32FloatGrad"; 467 case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; 468 case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; 469 case NVPTXISD::Tex3DFloatFloatLevel: 470 return "NVPTXISD::Tex3DFloatFloatLevel"; 471 case NVPTXISD::Tex3DFloatFloatGrad: 472 return "NVPTXISD::Tex3DFloatFloatGrad"; 473 case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; 474 case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; 475 case NVPTXISD::Tex3DS32FloatLevel: 476 return "NVPTXISD::Tex3DS32FloatLevel"; 477 case NVPTXISD::Tex3DS32FloatGrad: 478 return "NVPTXISD::Tex3DS32FloatGrad"; 479 case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; 480 case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; 481 case NVPTXISD::Tex3DU32FloatLevel: 482 return "NVPTXISD::Tex3DU32FloatLevel"; 483 case NVPTXISD::Tex3DU32FloatGrad: 484 return "NVPTXISD::Tex3DU32FloatGrad"; 485 case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; 486 case NVPTXISD::TexCubeFloatFloatLevel: 487 return "NVPTXISD::TexCubeFloatFloatLevel"; 488 case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; 489 case NVPTXISD::TexCubeS32FloatLevel: 490 return "NVPTXISD::TexCubeS32FloatLevel"; 491 case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; 492 case NVPTXISD::TexCubeU32FloatLevel: 493 return "NVPTXISD::TexCubeU32FloatLevel"; 494 case NVPTXISD::TexCubeArrayFloatFloat: 495 return "NVPTXISD::TexCubeArrayFloatFloat"; 496 case NVPTXISD::TexCubeArrayFloatFloatLevel: 497 return "NVPTXISD::TexCubeArrayFloatFloatLevel"; 498 case NVPTXISD::TexCubeArrayS32Float: 499 return "NVPTXISD::TexCubeArrayS32Float"; 500 case NVPTXISD::TexCubeArrayS32FloatLevel: 501 return "NVPTXISD::TexCubeArrayS32FloatLevel"; 502 case NVPTXISD::TexCubeArrayU32Float: 503 return "NVPTXISD::TexCubeArrayU32Float"; 504 case NVPTXISD::TexCubeArrayU32FloatLevel: 505 return "NVPTXISD::TexCubeArrayU32FloatLevel"; 506 case NVPTXISD::Tld4R2DFloatFloat: 507 return "NVPTXISD::Tld4R2DFloatFloat"; 508 case NVPTXISD::Tld4G2DFloatFloat: 509 return "NVPTXISD::Tld4G2DFloatFloat"; 510 case NVPTXISD::Tld4B2DFloatFloat: 511 return "NVPTXISD::Tld4B2DFloatFloat"; 512 case NVPTXISD::Tld4A2DFloatFloat: 513 return "NVPTXISD::Tld4A2DFloatFloat"; 514 case NVPTXISD::Tld4R2DS64Float: 515 return "NVPTXISD::Tld4R2DS64Float"; 516 case NVPTXISD::Tld4G2DS64Float: 517 return "NVPTXISD::Tld4G2DS64Float"; 518 case NVPTXISD::Tld4B2DS64Float: 519 return "NVPTXISD::Tld4B2DS64Float"; 520 case NVPTXISD::Tld4A2DS64Float: 521 return "NVPTXISD::Tld4A2DS64Float"; 522 case NVPTXISD::Tld4R2DU64Float: 523 return "NVPTXISD::Tld4R2DU64Float"; 524 case NVPTXISD::Tld4G2DU64Float: 525 return "NVPTXISD::Tld4G2DU64Float"; 526 case NVPTXISD::Tld4B2DU64Float: 527 return "NVPTXISD::Tld4B2DU64Float"; 528 case NVPTXISD::Tld4A2DU64Float: 529 return "NVPTXISD::Tld4A2DU64Float"; 530 531 case NVPTXISD::TexUnified1DFloatS32: 532 return "NVPTXISD::TexUnified1DFloatS32"; 533 case NVPTXISD::TexUnified1DFloatFloat: 534 return "NVPTXISD::TexUnified1DFloatFloat"; 535 case NVPTXISD::TexUnified1DFloatFloatLevel: 536 return "NVPTXISD::TexUnified1DFloatFloatLevel"; 537 case NVPTXISD::TexUnified1DFloatFloatGrad: 538 return "NVPTXISD::TexUnified1DFloatFloatGrad"; 539 case NVPTXISD::TexUnified1DS32S32: 540 return "NVPTXISD::TexUnified1DS32S32"; 541 case NVPTXISD::TexUnified1DS32Float: 542 return "NVPTXISD::TexUnified1DS32Float"; 543 case NVPTXISD::TexUnified1DS32FloatLevel: 544 return "NVPTXISD::TexUnified1DS32FloatLevel"; 545 case NVPTXISD::TexUnified1DS32FloatGrad: 546 return "NVPTXISD::TexUnified1DS32FloatGrad"; 547 case NVPTXISD::TexUnified1DU32S32: 548 return "NVPTXISD::TexUnified1DU32S32"; 549 case NVPTXISD::TexUnified1DU32Float: 550 return "NVPTXISD::TexUnified1DU32Float"; 551 case NVPTXISD::TexUnified1DU32FloatLevel: 552 return "NVPTXISD::TexUnified1DU32FloatLevel"; 553 case NVPTXISD::TexUnified1DU32FloatGrad: 554 return "NVPTXISD::TexUnified1DU32FloatGrad"; 555 case NVPTXISD::TexUnified1DArrayFloatS32: 556 return "NVPTXISD::TexUnified1DArrayFloatS32"; 557 case NVPTXISD::TexUnified1DArrayFloatFloat: 558 return "NVPTXISD::TexUnified1DArrayFloatFloat"; 559 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 560 return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; 561 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 562 return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; 563 case NVPTXISD::TexUnified1DArrayS32S32: 564 return "NVPTXISD::TexUnified1DArrayS32S32"; 565 case NVPTXISD::TexUnified1DArrayS32Float: 566 return "NVPTXISD::TexUnified1DArrayS32Float"; 567 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 568 return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; 569 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 570 return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; 571 case NVPTXISD::TexUnified1DArrayU32S32: 572 return "NVPTXISD::TexUnified1DArrayU32S32"; 573 case NVPTXISD::TexUnified1DArrayU32Float: 574 return "NVPTXISD::TexUnified1DArrayU32Float"; 575 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 576 return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; 577 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 578 return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; 579 case NVPTXISD::TexUnified2DFloatS32: 580 return "NVPTXISD::TexUnified2DFloatS32"; 581 case NVPTXISD::TexUnified2DFloatFloat: 582 return "NVPTXISD::TexUnified2DFloatFloat"; 583 case NVPTXISD::TexUnified2DFloatFloatLevel: 584 return "NVPTXISD::TexUnified2DFloatFloatLevel"; 585 case NVPTXISD::TexUnified2DFloatFloatGrad: 586 return "NVPTXISD::TexUnified2DFloatFloatGrad"; 587 case NVPTXISD::TexUnified2DS32S32: 588 return "NVPTXISD::TexUnified2DS32S32"; 589 case NVPTXISD::TexUnified2DS32Float: 590 return "NVPTXISD::TexUnified2DS32Float"; 591 case NVPTXISD::TexUnified2DS32FloatLevel: 592 return "NVPTXISD::TexUnified2DS32FloatLevel"; 593 case NVPTXISD::TexUnified2DS32FloatGrad: 594 return "NVPTXISD::TexUnified2DS32FloatGrad"; 595 case NVPTXISD::TexUnified2DU32S32: 596 return "NVPTXISD::TexUnified2DU32S32"; 597 case NVPTXISD::TexUnified2DU32Float: 598 return "NVPTXISD::TexUnified2DU32Float"; 599 case NVPTXISD::TexUnified2DU32FloatLevel: 600 return "NVPTXISD::TexUnified2DU32FloatLevel"; 601 case NVPTXISD::TexUnified2DU32FloatGrad: 602 return "NVPTXISD::TexUnified2DU32FloatGrad"; 603 case NVPTXISD::TexUnified2DArrayFloatS32: 604 return "NVPTXISD::TexUnified2DArrayFloatS32"; 605 case NVPTXISD::TexUnified2DArrayFloatFloat: 606 return "NVPTXISD::TexUnified2DArrayFloatFloat"; 607 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 608 return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; 609 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 610 return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; 611 case NVPTXISD::TexUnified2DArrayS32S32: 612 return "NVPTXISD::TexUnified2DArrayS32S32"; 613 case NVPTXISD::TexUnified2DArrayS32Float: 614 return "NVPTXISD::TexUnified2DArrayS32Float"; 615 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 616 return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; 617 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 618 return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; 619 case NVPTXISD::TexUnified2DArrayU32S32: 620 return "NVPTXISD::TexUnified2DArrayU32S32"; 621 case NVPTXISD::TexUnified2DArrayU32Float: 622 return "NVPTXISD::TexUnified2DArrayU32Float"; 623 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 624 return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; 625 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 626 return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; 627 case NVPTXISD::TexUnified3DFloatS32: 628 return "NVPTXISD::TexUnified3DFloatS32"; 629 case NVPTXISD::TexUnified3DFloatFloat: 630 return "NVPTXISD::TexUnified3DFloatFloat"; 631 case NVPTXISD::TexUnified3DFloatFloatLevel: 632 return "NVPTXISD::TexUnified3DFloatFloatLevel"; 633 case NVPTXISD::TexUnified3DFloatFloatGrad: 634 return "NVPTXISD::TexUnified3DFloatFloatGrad"; 635 case NVPTXISD::TexUnified3DS32S32: 636 return "NVPTXISD::TexUnified3DS32S32"; 637 case NVPTXISD::TexUnified3DS32Float: 638 return "NVPTXISD::TexUnified3DS32Float"; 639 case NVPTXISD::TexUnified3DS32FloatLevel: 640 return "NVPTXISD::TexUnified3DS32FloatLevel"; 641 case NVPTXISD::TexUnified3DS32FloatGrad: 642 return "NVPTXISD::TexUnified3DS32FloatGrad"; 643 case NVPTXISD::TexUnified3DU32S32: 644 return "NVPTXISD::TexUnified3DU32S32"; 645 case NVPTXISD::TexUnified3DU32Float: 646 return "NVPTXISD::TexUnified3DU32Float"; 647 case NVPTXISD::TexUnified3DU32FloatLevel: 648 return "NVPTXISD::TexUnified3DU32FloatLevel"; 649 case NVPTXISD::TexUnified3DU32FloatGrad: 650 return "NVPTXISD::TexUnified3DU32FloatGrad"; 651 case NVPTXISD::TexUnifiedCubeFloatFloat: 652 return "NVPTXISD::TexUnifiedCubeFloatFloat"; 653 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 654 return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; 655 case NVPTXISD::TexUnifiedCubeS32Float: 656 return "NVPTXISD::TexUnifiedCubeS32Float"; 657 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 658 return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; 659 case NVPTXISD::TexUnifiedCubeU32Float: 660 return "NVPTXISD::TexUnifiedCubeU32Float"; 661 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 662 return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; 663 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 664 return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; 665 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 666 return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; 667 case NVPTXISD::TexUnifiedCubeArrayS32Float: 668 return "NVPTXISD::TexUnifiedCubeArrayS32Float"; 669 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 670 return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; 671 case NVPTXISD::TexUnifiedCubeArrayU32Float: 672 return "NVPTXISD::TexUnifiedCubeArrayU32Float"; 673 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 674 return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; 675 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 676 return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; 677 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 678 return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; 679 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 680 return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; 681 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 682 return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; 683 case NVPTXISD::Tld4UnifiedR2DS64Float: 684 return "NVPTXISD::Tld4UnifiedR2DS64Float"; 685 case NVPTXISD::Tld4UnifiedG2DS64Float: 686 return "NVPTXISD::Tld4UnifiedG2DS64Float"; 687 case NVPTXISD::Tld4UnifiedB2DS64Float: 688 return "NVPTXISD::Tld4UnifiedB2DS64Float"; 689 case NVPTXISD::Tld4UnifiedA2DS64Float: 690 return "NVPTXISD::Tld4UnifiedA2DS64Float"; 691 case NVPTXISD::Tld4UnifiedR2DU64Float: 692 return "NVPTXISD::Tld4UnifiedR2DU64Float"; 693 case NVPTXISD::Tld4UnifiedG2DU64Float: 694 return "NVPTXISD::Tld4UnifiedG2DU64Float"; 695 case NVPTXISD::Tld4UnifiedB2DU64Float: 696 return "NVPTXISD::Tld4UnifiedB2DU64Float"; 697 case NVPTXISD::Tld4UnifiedA2DU64Float: 698 return "NVPTXISD::Tld4UnifiedA2DU64Float"; 699 700 case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; 701 case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; 702 case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; 703 case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; 704 case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; 705 case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; 706 case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; 707 case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; 708 case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; 709 case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; 710 case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; 711 712 case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; 713 case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; 714 case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; 715 case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; 716 case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; 717 case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; 718 case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; 719 case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; 720 case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; 721 case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; 722 case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; 723 724 case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; 725 case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; 726 case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; 727 case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; 728 case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; 729 case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; 730 case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; 731 case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; 732 case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; 733 case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; 734 case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; 735 736 case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; 737 case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; 738 case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; 739 case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; 740 case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; 741 case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; 742 case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; 743 case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; 744 case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; 745 case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; 746 case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; 747 748 case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; 749 case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; 750 case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; 751 case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; 752 case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; 753 case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; 754 case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; 755 case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; 756 case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; 757 case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; 758 case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; 759 760 case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; 761 case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; 762 case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; 763 case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; 764 case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; 765 case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; 766 case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; 767 case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; 768 case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; 769 case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; 770 case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; 771 772 case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; 773 case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; 774 case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; 775 case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; 776 case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; 777 case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; 778 case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; 779 case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; 780 case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; 781 case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; 782 case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; 783 784 case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; 785 case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; 786 case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; 787 case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; 788 case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; 789 case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; 790 case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; 791 case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; 792 case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; 793 case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; 794 case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; 795 796 case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; 797 case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; 798 case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; 799 case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; 800 case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; 801 case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; 802 case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; 803 case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; 804 case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; 805 case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; 806 case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; 807 808 case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; 809 case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; 810 case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; 811 case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; 812 case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; 813 case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; 814 case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; 815 case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; 816 case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; 817 case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; 818 case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; 819 820 case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; 821 case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; 822 case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; 823 case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; 824 case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; 825 case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; 826 case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; 827 case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; 828 case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; 829 case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; 830 case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; 831 832 case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; 833 case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; 834 case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; 835 case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; 836 case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; 837 case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; 838 case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; 839 case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; 840 case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; 841 case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; 842 case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; 843 844 case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; 845 case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; 846 case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; 847 case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; 848 case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; 849 case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; 850 case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; 851 case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; 852 case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; 853 case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; 854 case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; 855 856 case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; 857 case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; 858 case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; 859 case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; 860 case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; 861 case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; 862 case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; 863 case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; 864 case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; 865 case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; 866 case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; 867 868 case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; 869 case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; 870 case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; 871 case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; 872 case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; 873 case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; 874 case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; 875 case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; 876 case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; 877 case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; 878 case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; 879 } 880 return nullptr; 881 } 882 883 TargetLoweringBase::LegalizeTypeAction 884 NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const { 885 if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) 886 return TypeSplitVector; 887 888 return TargetLoweringBase::getPreferredVectorAction(VT); 889 } 890 891 SDValue 892 NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { 893 SDLoc dl(Op); 894 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 895 auto PtrVT = getPointerTy(DAG.getDataLayout()); 896 Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT); 897 return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op); 898 } 899 900 std::string NVPTXTargetLowering::getPrototype( 901 const DataLayout &DL, Type *retTy, const ArgListTy &Args, 902 const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment, 903 const ImmutableCallSite *CS) const { 904 auto PtrVT = getPointerTy(DL); 905 906 bool isABI = (STI.getSmVersion() >= 20); 907 assert(isABI && "Non-ABI compilation is not supported"); 908 if (!isABI) 909 return ""; 910 911 std::stringstream O; 912 O << "prototype_" << uniqueCallSite << " : .callprototype "; 913 914 if (retTy->getTypeID() == Type::VoidTyID) { 915 O << "()"; 916 } else { 917 O << "("; 918 if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { 919 unsigned size = 0; 920 if (auto *ITy = dyn_cast<IntegerType>(retTy)) { 921 size = ITy->getBitWidth(); 922 if (size < 32) 923 size = 32; 924 } else { 925 assert(retTy->isFloatingPointTy() && 926 "Floating point type expected here"); 927 size = retTy->getPrimitiveSizeInBits(); 928 } 929 930 O << ".param .b" << size << " _"; 931 } else if (isa<PointerType>(retTy)) { 932 O << ".param .b" << PtrVT.getSizeInBits() << " _"; 933 } else if ((retTy->getTypeID() == Type::StructTyID) || 934 isa<VectorType>(retTy)) { 935 auto &DL = CS->getCalledFunction()->getParent()->getDataLayout(); 936 O << ".param .align " << retAlignment << " .b8 _[" 937 << DL.getTypeAllocSize(retTy) << "]"; 938 } else { 939 llvm_unreachable("Unknown return type"); 940 } 941 O << ") "; 942 } 943 O << "_ ("; 944 945 bool first = true; 946 947 unsigned OIdx = 0; 948 for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 949 Type *Ty = Args[i].Ty; 950 if (!first) { 951 O << ", "; 952 } 953 first = false; 954 955 if (!Outs[OIdx].Flags.isByVal()) { 956 if (Ty->isAggregateType() || Ty->isVectorTy()) { 957 unsigned align = 0; 958 const CallInst *CallI = cast<CallInst>(CS->getInstruction()); 959 // +1 because index 0 is reserved for return type alignment 960 if (!llvm::getAlign(*CallI, i + 1, align)) 961 align = DL.getABITypeAlignment(Ty); 962 unsigned sz = DL.getTypeAllocSize(Ty); 963 O << ".param .align " << align << " .b8 "; 964 O << "_"; 965 O << "[" << sz << "]"; 966 // update the index for Outs 967 SmallVector<EVT, 16> vtparts; 968 ComputeValueVTs(*this, DL, Ty, vtparts); 969 if (unsigned len = vtparts.size()) 970 OIdx += len - 1; 971 continue; 972 } 973 // i8 types in IR will be i16 types in SDAG 974 assert((getValueType(DL, Ty) == Outs[OIdx].VT || 975 (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && 976 "type mismatch between callee prototype and arguments"); 977 // scalar type 978 unsigned sz = 0; 979 if (isa<IntegerType>(Ty)) { 980 sz = cast<IntegerType>(Ty)->getBitWidth(); 981 if (sz < 32) 982 sz = 32; 983 } else if (isa<PointerType>(Ty)) 984 sz = PtrVT.getSizeInBits(); 985 else 986 sz = Ty->getPrimitiveSizeInBits(); 987 O << ".param .b" << sz << " "; 988 O << "_"; 989 continue; 990 } 991 auto *PTy = dyn_cast<PointerType>(Ty); 992 assert(PTy && "Param with byval attribute should be a pointer type"); 993 Type *ETy = PTy->getElementType(); 994 995 unsigned align = Outs[OIdx].Flags.getByValAlign(); 996 unsigned sz = DL.getTypeAllocSize(ETy); 997 O << ".param .align " << align << " .b8 "; 998 O << "_"; 999 O << "[" << sz << "]"; 1000 } 1001 O << ");"; 1002 return O.str(); 1003 } 1004 1005 unsigned 1006 NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, 1007 const ImmutableCallSite *CS, 1008 Type *Ty, 1009 unsigned Idx) const { 1010 unsigned Align = 0; 1011 const Value *DirectCallee = CS->getCalledFunction(); 1012 1013 if (!DirectCallee) { 1014 // We don't have a direct function symbol, but that may be because of 1015 // constant cast instructions in the call. 1016 const Instruction *CalleeI = CS->getInstruction(); 1017 assert(CalleeI && "Call target is not a function or derived value?"); 1018 1019 // With bitcast'd call targets, the instruction will be the call 1020 if (isa<CallInst>(CalleeI)) { 1021 // Check if we have call alignment metadata 1022 if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align)) 1023 return Align; 1024 1025 const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue(); 1026 // Ignore any bitcast instructions 1027 while(isa<ConstantExpr>(CalleeV)) { 1028 const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); 1029 if (!CE->isCast()) 1030 break; 1031 // Look through the bitcast 1032 CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0); 1033 } 1034 1035 // We have now looked past all of the bitcasts. Do we finally have a 1036 // Function? 1037 if (isa<Function>(CalleeV)) 1038 DirectCallee = CalleeV; 1039 } 1040 } 1041 1042 // Check for function alignment information if we found that the 1043 // ultimate target is a Function 1044 if (DirectCallee) 1045 if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align)) 1046 return Align; 1047 1048 // Call is indirect or alignment information is not available, fall back to 1049 // the ABI type alignment 1050 auto &DL = CS->getCaller()->getParent()->getDataLayout(); 1051 return DL.getABITypeAlignment(Ty); 1052 } 1053 1054 SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 1055 SmallVectorImpl<SDValue> &InVals) const { 1056 SelectionDAG &DAG = CLI.DAG; 1057 SDLoc dl = CLI.DL; 1058 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1059 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1060 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1061 SDValue Chain = CLI.Chain; 1062 SDValue Callee = CLI.Callee; 1063 bool &isTailCall = CLI.IsTailCall; 1064 ArgListTy &Args = CLI.getArgs(); 1065 Type *retTy = CLI.RetTy; 1066 ImmutableCallSite *CS = CLI.CS; 1067 1068 bool isABI = (STI.getSmVersion() >= 20); 1069 assert(isABI && "Non-ABI compilation is not supported"); 1070 if (!isABI) 1071 return Chain; 1072 MachineFunction &MF = DAG.getMachineFunction(); 1073 const Function *F = MF.getFunction(); 1074 auto &DL = MF.getDataLayout(); 1075 1076 SDValue tempChain = Chain; 1077 Chain = DAG.getCALLSEQ_START(Chain, 1078 DAG.getIntPtrConstant(uniqueCallSite, dl, true), 1079 dl); 1080 SDValue InFlag = Chain.getValue(1); 1081 1082 unsigned paramCount = 0; 1083 // Args.size() and Outs.size() need not match. 1084 // Outs.size() will be larger 1085 // * if there is an aggregate argument with multiple fields (each field 1086 // showing up separately in Outs) 1087 // * if there is a vector argument with more than typical vector-length 1088 // elements (generally if more than 4) where each vector element is 1089 // individually present in Outs. 1090 // So a different index should be used for indexing into Outs/OutVals. 1091 // See similar issue in LowerFormalArguments. 1092 unsigned OIdx = 0; 1093 // Declare the .params or .reg need to pass values 1094 // to the function 1095 for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 1096 EVT VT = Outs[OIdx].VT; 1097 Type *Ty = Args[i].Ty; 1098 1099 if (!Outs[OIdx].Flags.isByVal()) { 1100 if (Ty->isAggregateType()) { 1101 // aggregate 1102 SmallVector<EVT, 16> vtparts; 1103 SmallVector<uint64_t, 16> Offsets; 1104 ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets, 1105 0); 1106 1107 unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); 1108 // declare .param .align <align> .b8 .param<n>[<size>]; 1109 unsigned sz = DL.getTypeAllocSize(Ty); 1110 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1111 SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl, 1112 MVT::i32), 1113 DAG.getConstant(paramCount, dl, MVT::i32), 1114 DAG.getConstant(sz, dl, MVT::i32), 1115 InFlag }; 1116 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 1117 DeclareParamOps); 1118 InFlag = Chain.getValue(1); 1119 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 1120 EVT elemtype = vtparts[j]; 1121 unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]); 1122 if (elemtype.isInteger() && (sz < 8)) 1123 sz = 8; 1124 SDValue StVal = OutVals[OIdx]; 1125 if (elemtype.getSizeInBits() < 16) { 1126 StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); 1127 } 1128 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1129 SDValue CopyParamOps[] = { Chain, 1130 DAG.getConstant(paramCount, dl, MVT::i32), 1131 DAG.getConstant(Offsets[j], dl, MVT::i32), 1132 StVal, InFlag }; 1133 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, 1134 CopyParamVTs, CopyParamOps, 1135 elemtype, MachinePointerInfo(), 1136 ArgAlign); 1137 InFlag = Chain.getValue(1); 1138 ++OIdx; 1139 } 1140 if (vtparts.size() > 0) 1141 --OIdx; 1142 ++paramCount; 1143 continue; 1144 } 1145 if (Ty->isVectorTy()) { 1146 EVT ObjectVT = getValueType(DL, Ty); 1147 unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); 1148 // declare .param .align <align> .b8 .param<n>[<size>]; 1149 unsigned sz = DL.getTypeAllocSize(Ty); 1150 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1151 SDValue DeclareParamOps[] = { Chain, 1152 DAG.getConstant(align, dl, MVT::i32), 1153 DAG.getConstant(paramCount, dl, MVT::i32), 1154 DAG.getConstant(sz, dl, MVT::i32), 1155 InFlag }; 1156 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 1157 DeclareParamOps); 1158 InFlag = Chain.getValue(1); 1159 unsigned NumElts = ObjectVT.getVectorNumElements(); 1160 EVT EltVT = ObjectVT.getVectorElementType(); 1161 EVT MemVT = EltVT; 1162 bool NeedExtend = false; 1163 if (EltVT.getSizeInBits() < 16) { 1164 NeedExtend = true; 1165 EltVT = MVT::i16; 1166 } 1167 1168 // V1 store 1169 if (NumElts == 1) { 1170 SDValue Elt = OutVals[OIdx++]; 1171 if (NeedExtend) 1172 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt); 1173 1174 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1175 SDValue CopyParamOps[] = { Chain, 1176 DAG.getConstant(paramCount, dl, MVT::i32), 1177 DAG.getConstant(0, dl, MVT::i32), Elt, 1178 InFlag }; 1179 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, 1180 CopyParamVTs, CopyParamOps, 1181 MemVT, MachinePointerInfo()); 1182 InFlag = Chain.getValue(1); 1183 } else if (NumElts == 2) { 1184 SDValue Elt0 = OutVals[OIdx++]; 1185 SDValue Elt1 = OutVals[OIdx++]; 1186 if (NeedExtend) { 1187 Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0); 1188 Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1); 1189 } 1190 1191 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1192 SDValue CopyParamOps[] = { Chain, 1193 DAG.getConstant(paramCount, dl, MVT::i32), 1194 DAG.getConstant(0, dl, MVT::i32), Elt0, 1195 Elt1, InFlag }; 1196 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, 1197 CopyParamVTs, CopyParamOps, 1198 MemVT, MachinePointerInfo()); 1199 InFlag = Chain.getValue(1); 1200 } else { 1201 unsigned curOffset = 0; 1202 // V4 stores 1203 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and 1204 // the 1205 // vector will be expanded to a power of 2 elements, so we know we can 1206 // always round up to the next multiple of 4 when creating the vector 1207 // stores. 1208 // e.g. 4 elem => 1 st.v4 1209 // 6 elem => 2 st.v4 1210 // 8 elem => 2 st.v4 1211 // 11 elem => 3 st.v4 1212 unsigned VecSize = 4; 1213 if (EltVT.getSizeInBits() == 64) 1214 VecSize = 2; 1215 1216 // This is potentially only part of a vector, so assume all elements 1217 // are packed together. 1218 unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize; 1219 1220 for (unsigned i = 0; i < NumElts; i += VecSize) { 1221 // Get values 1222 SDValue StoreVal; 1223 SmallVector<SDValue, 8> Ops; 1224 Ops.push_back(Chain); 1225 Ops.push_back(DAG.getConstant(paramCount, dl, MVT::i32)); 1226 Ops.push_back(DAG.getConstant(curOffset, dl, MVT::i32)); 1227 1228 unsigned Opc = NVPTXISD::StoreParamV2; 1229 1230 StoreVal = OutVals[OIdx++]; 1231 if (NeedExtend) 1232 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1233 Ops.push_back(StoreVal); 1234 1235 if (i + 1 < NumElts) { 1236 StoreVal = OutVals[OIdx++]; 1237 if (NeedExtend) 1238 StoreVal = 1239 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1240 } else { 1241 StoreVal = DAG.getUNDEF(EltVT); 1242 } 1243 Ops.push_back(StoreVal); 1244 1245 if (VecSize == 4) { 1246 Opc = NVPTXISD::StoreParamV4; 1247 if (i + 2 < NumElts) { 1248 StoreVal = OutVals[OIdx++]; 1249 if (NeedExtend) 1250 StoreVal = 1251 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1252 } else { 1253 StoreVal = DAG.getUNDEF(EltVT); 1254 } 1255 Ops.push_back(StoreVal); 1256 1257 if (i + 3 < NumElts) { 1258 StoreVal = OutVals[OIdx++]; 1259 if (NeedExtend) 1260 StoreVal = 1261 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1262 } else { 1263 StoreVal = DAG.getUNDEF(EltVT); 1264 } 1265 Ops.push_back(StoreVal); 1266 } 1267 1268 Ops.push_back(InFlag); 1269 1270 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1271 Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops, 1272 MemVT, MachinePointerInfo()); 1273 InFlag = Chain.getValue(1); 1274 curOffset += PerStoreOffset; 1275 } 1276 } 1277 ++paramCount; 1278 --OIdx; 1279 continue; 1280 } 1281 // Plain scalar 1282 // for ABI, declare .param .b<size> .param<n>; 1283 unsigned sz = VT.getSizeInBits(); 1284 bool needExtend = false; 1285 if (VT.isInteger()) { 1286 if (sz < 16) 1287 needExtend = true; 1288 if (sz < 32) 1289 sz = 32; 1290 } 1291 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1292 SDValue DeclareParamOps[] = { Chain, 1293 DAG.getConstant(paramCount, dl, MVT::i32), 1294 DAG.getConstant(sz, dl, MVT::i32), 1295 DAG.getConstant(0, dl, MVT::i32), InFlag }; 1296 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, 1297 DeclareParamOps); 1298 InFlag = Chain.getValue(1); 1299 SDValue OutV = OutVals[OIdx]; 1300 if (needExtend) { 1301 // zext/sext i1 to i16 1302 unsigned opc = ISD::ZERO_EXTEND; 1303 if (Outs[OIdx].Flags.isSExt()) 1304 opc = ISD::SIGN_EXTEND; 1305 OutV = DAG.getNode(opc, dl, MVT::i16, OutV); 1306 } 1307 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1308 SDValue CopyParamOps[] = { Chain, 1309 DAG.getConstant(paramCount, dl, MVT::i32), 1310 DAG.getConstant(0, dl, MVT::i32), OutV, 1311 InFlag }; 1312 1313 unsigned opcode = NVPTXISD::StoreParam; 1314 if (Outs[OIdx].Flags.isZExt() && VT.getSizeInBits() < 32) 1315 opcode = NVPTXISD::StoreParamU32; 1316 else if (Outs[OIdx].Flags.isSExt() && VT.getSizeInBits() < 32) 1317 opcode = NVPTXISD::StoreParamS32; 1318 Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, 1319 VT, MachinePointerInfo()); 1320 1321 InFlag = Chain.getValue(1); 1322 ++paramCount; 1323 continue; 1324 } 1325 // struct or vector 1326 SmallVector<EVT, 16> vtparts; 1327 SmallVector<uint64_t, 16> Offsets; 1328 auto *PTy = dyn_cast<PointerType>(Args[i].Ty); 1329 assert(PTy && "Type of a byval parameter should be pointer"); 1330 ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(), 1331 vtparts, &Offsets, 0); 1332 1333 // declare .param .align <align> .b8 .param<n>[<size>]; 1334 unsigned sz = Outs[OIdx].Flags.getByValSize(); 1335 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1336 unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign(); 1337 // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, 1338 // so we don't need to worry about natural alignment or not. 1339 // See TargetLowering::LowerCallTo(). 1340 SDValue DeclareParamOps[] = { 1341 Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), dl, MVT::i32), 1342 DAG.getConstant(paramCount, dl, MVT::i32), 1343 DAG.getConstant(sz, dl, MVT::i32), InFlag 1344 }; 1345 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 1346 DeclareParamOps); 1347 InFlag = Chain.getValue(1); 1348 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 1349 EVT elemtype = vtparts[j]; 1350 int curOffset = Offsets[j]; 1351 unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); 1352 auto PtrVT = getPointerTy(DAG.getDataLayout()); 1353 SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx], 1354 DAG.getConstant(curOffset, dl, PtrVT)); 1355 SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, 1356 MachinePointerInfo(), false, false, false, 1357 PartAlign); 1358 if (elemtype.getSizeInBits() < 16) { 1359 theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); 1360 } 1361 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1362 SDValue CopyParamOps[] = { Chain, 1363 DAG.getConstant(paramCount, dl, MVT::i32), 1364 DAG.getConstant(curOffset, dl, MVT::i32), 1365 theVal, InFlag }; 1366 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, 1367 CopyParamOps, elemtype, 1368 MachinePointerInfo()); 1369 1370 InFlag = Chain.getValue(1); 1371 } 1372 ++paramCount; 1373 } 1374 1375 GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); 1376 unsigned retAlignment = 0; 1377 1378 // Handle Result 1379 if (Ins.size() > 0) { 1380 SmallVector<EVT, 16> resvtparts; 1381 ComputeValueVTs(*this, DL, retTy, resvtparts); 1382 1383 // Declare 1384 // .param .align 16 .b8 retval0[<size-in-bytes>], or 1385 // .param .b<size-in-bits> retval0 1386 unsigned resultsz = DL.getTypeAllocSizeInBits(retTy); 1387 // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for 1388 // these three types to match the logic in 1389 // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. 1390 // Plus, this behavior is consistent with nvcc's. 1391 if (retTy->isFloatingPointTy() || retTy->isIntegerTy() || 1392 retTy->isPointerTy()) { 1393 // Scalar needs to be at least 32bit wide 1394 if (resultsz < 32) 1395 resultsz = 32; 1396 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1397 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), 1398 DAG.getConstant(resultsz, dl, MVT::i32), 1399 DAG.getConstant(0, dl, MVT::i32), InFlag }; 1400 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, 1401 DeclareRetOps); 1402 InFlag = Chain.getValue(1); 1403 } else { 1404 retAlignment = getArgumentAlignment(Callee, CS, retTy, 0); 1405 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1406 SDValue DeclareRetOps[] = { Chain, 1407 DAG.getConstant(retAlignment, dl, MVT::i32), 1408 DAG.getConstant(resultsz / 8, dl, MVT::i32), 1409 DAG.getConstant(0, dl, MVT::i32), InFlag }; 1410 Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, 1411 DeclareRetOps); 1412 InFlag = Chain.getValue(1); 1413 } 1414 } 1415 1416 if (!Func) { 1417 // This is indirect function call case : PTX requires a prototype of the 1418 // form 1419 // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); 1420 // to be emitted, and the label has to used as the last arg of call 1421 // instruction. 1422 // The prototype is embedded in a string and put as the operand for a 1423 // CallPrototype SDNode which will print out to the value of the string. 1424 SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1425 std::string Proto = 1426 getPrototype(DAG.getDataLayout(), retTy, Args, Outs, retAlignment, CS); 1427 const char *ProtoStr = 1428 nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); 1429 SDValue ProtoOps[] = { 1430 Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, 1431 }; 1432 Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); 1433 InFlag = Chain.getValue(1); 1434 } 1435 // Op to just print "call" 1436 SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1437 SDValue PrintCallOps[] = { 1438 Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag 1439 }; 1440 // We model convergent calls as separate opcodes. 1441 unsigned Opcode = Func ? NVPTXISD::PrintCallUni : NVPTXISD::PrintCall; 1442 if (CLI.IsConvergent) 1443 Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni 1444 : NVPTXISD::PrintConvergentCall; 1445 Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps); 1446 InFlag = Chain.getValue(1); 1447 1448 // Ops to print out the function name 1449 SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1450 SDValue CallVoidOps[] = { Chain, Callee, InFlag }; 1451 Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); 1452 InFlag = Chain.getValue(1); 1453 1454 // Ops to print out the param list 1455 SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1456 SDValue CallArgBeginOps[] = { Chain, InFlag }; 1457 Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, 1458 CallArgBeginOps); 1459 InFlag = Chain.getValue(1); 1460 1461 for (unsigned i = 0, e = paramCount; i != e; ++i) { 1462 unsigned opcode; 1463 if (i == (e - 1)) 1464 opcode = NVPTXISD::LastCallArg; 1465 else 1466 opcode = NVPTXISD::CallArg; 1467 SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1468 SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), 1469 DAG.getConstant(i, dl, MVT::i32), InFlag }; 1470 Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); 1471 InFlag = Chain.getValue(1); 1472 } 1473 SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1474 SDValue CallArgEndOps[] = { Chain, 1475 DAG.getConstant(Func ? 1 : 0, dl, MVT::i32), 1476 InFlag }; 1477 Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); 1478 InFlag = Chain.getValue(1); 1479 1480 if (!Func) { 1481 SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1482 SDValue PrototypeOps[] = { Chain, 1483 DAG.getConstant(uniqueCallSite, dl, MVT::i32), 1484 InFlag }; 1485 Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); 1486 InFlag = Chain.getValue(1); 1487 } 1488 1489 // Generate loads from param memory/moves from registers for result 1490 if (Ins.size() > 0) { 1491 if (retTy && retTy->isVectorTy()) { 1492 EVT ObjectVT = getValueType(DL, retTy); 1493 unsigned NumElts = ObjectVT.getVectorNumElements(); 1494 EVT EltVT = ObjectVT.getVectorElementType(); 1495 assert(STI.getTargetLowering()->getNumRegisters(F->getContext(), 1496 ObjectVT) == NumElts && 1497 "Vector was not scalarized"); 1498 unsigned sz = EltVT.getSizeInBits(); 1499 bool needTruncate = sz < 8; 1500 1501 if (NumElts == 1) { 1502 // Just a simple load 1503 SmallVector<EVT, 4> LoadRetVTs; 1504 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 1505 // If loading i1/i8 result, generate 1506 // load.b8 i16 1507 // if i1 1508 // trunc i16 to i1 1509 LoadRetVTs.push_back(MVT::i16); 1510 } else 1511 LoadRetVTs.push_back(EltVT); 1512 LoadRetVTs.push_back(MVT::Other); 1513 LoadRetVTs.push_back(MVT::Glue); 1514 SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), 1515 DAG.getConstant(0, dl, MVT::i32), InFlag}; 1516 SDValue retval = DAG.getMemIntrinsicNode( 1517 NVPTXISD::LoadParam, dl, 1518 DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); 1519 Chain = retval.getValue(1); 1520 InFlag = retval.getValue(2); 1521 SDValue Ret0 = retval; 1522 if (needTruncate) 1523 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0); 1524 InVals.push_back(Ret0); 1525 } else if (NumElts == 2) { 1526 // LoadV2 1527 SmallVector<EVT, 4> LoadRetVTs; 1528 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 1529 // If loading i1/i8 result, generate 1530 // load.b8 i16 1531 // if i1 1532 // trunc i16 to i1 1533 LoadRetVTs.push_back(MVT::i16); 1534 LoadRetVTs.push_back(MVT::i16); 1535 } else { 1536 LoadRetVTs.push_back(EltVT); 1537 LoadRetVTs.push_back(EltVT); 1538 } 1539 LoadRetVTs.push_back(MVT::Other); 1540 LoadRetVTs.push_back(MVT::Glue); 1541 SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), 1542 DAG.getConstant(0, dl, MVT::i32), InFlag}; 1543 SDValue retval = DAG.getMemIntrinsicNode( 1544 NVPTXISD::LoadParamV2, dl, 1545 DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); 1546 Chain = retval.getValue(2); 1547 InFlag = retval.getValue(3); 1548 SDValue Ret0 = retval.getValue(0); 1549 SDValue Ret1 = retval.getValue(1); 1550 if (needTruncate) { 1551 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0); 1552 InVals.push_back(Ret0); 1553 Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1); 1554 InVals.push_back(Ret1); 1555 } else { 1556 InVals.push_back(Ret0); 1557 InVals.push_back(Ret1); 1558 } 1559 } else { 1560 // Split into N LoadV4 1561 unsigned Ofst = 0; 1562 unsigned VecSize = 4; 1563 unsigned Opc = NVPTXISD::LoadParamV4; 1564 if (EltVT.getSizeInBits() == 64) { 1565 VecSize = 2; 1566 Opc = NVPTXISD::LoadParamV2; 1567 } 1568 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); 1569 for (unsigned i = 0; i < NumElts; i += VecSize) { 1570 SmallVector<EVT, 8> LoadRetVTs; 1571 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 1572 // If loading i1/i8 result, generate 1573 // load.b8 i16 1574 // if i1 1575 // trunc i16 to i1 1576 for (unsigned j = 0; j < VecSize; ++j) 1577 LoadRetVTs.push_back(MVT::i16); 1578 } else { 1579 for (unsigned j = 0; j < VecSize; ++j) 1580 LoadRetVTs.push_back(EltVT); 1581 } 1582 LoadRetVTs.push_back(MVT::Other); 1583 LoadRetVTs.push_back(MVT::Glue); 1584 SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), 1585 DAG.getConstant(Ofst, dl, MVT::i32), InFlag}; 1586 SDValue retval = DAG.getMemIntrinsicNode( 1587 Opc, dl, DAG.getVTList(LoadRetVTs), 1588 LoadRetOps, EltVT, MachinePointerInfo()); 1589 if (VecSize == 2) { 1590 Chain = retval.getValue(2); 1591 InFlag = retval.getValue(3); 1592 } else { 1593 Chain = retval.getValue(4); 1594 InFlag = retval.getValue(5); 1595 } 1596 1597 for (unsigned j = 0; j < VecSize; ++j) { 1598 if (i + j >= NumElts) 1599 break; 1600 SDValue Elt = retval.getValue(j); 1601 if (needTruncate) 1602 Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); 1603 InVals.push_back(Elt); 1604 } 1605 Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 1606 } 1607 } 1608 } else { 1609 SmallVector<EVT, 16> VTs; 1610 SmallVector<uint64_t, 16> Offsets; 1611 ComputePTXValueVTs(*this, DAG.getDataLayout(), retTy, VTs, &Offsets, 0); 1612 assert(VTs.size() == Ins.size() && "Bad value decomposition"); 1613 unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0); 1614 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 1615 unsigned sz = VTs[i].getSizeInBits(); 1616 unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]); 1617 bool needTruncate = false; 1618 if (VTs[i].isInteger() && sz < 8) { 1619 sz = 8; 1620 needTruncate = true; 1621 } 1622 1623 SmallVector<EVT, 4> LoadRetVTs; 1624 EVT TheLoadType = VTs[i]; 1625 if (retTy->isIntegerTy() && DL.getTypeAllocSizeInBits(retTy) < 32) { 1626 // This is for integer types only, and specifically not for 1627 // aggregates. 1628 LoadRetVTs.push_back(MVT::i32); 1629 TheLoadType = MVT::i32; 1630 needTruncate = true; 1631 } else if (sz < 16) { 1632 // If loading i1/i8 result, generate 1633 // load i8 (-> i16) 1634 // trunc i16 to i1/i8 1635 1636 // FIXME: Do we need to set needTruncate to true here, too? We could 1637 // not figure out what this branch is for in D17872, so we left it 1638 // alone. The comment above about loading i1/i8 may be wrong, as the 1639 // branch above seems to cover integers of size < 32. 1640 LoadRetVTs.push_back(MVT::i16); 1641 } else 1642 LoadRetVTs.push_back(Ins[i].VT); 1643 LoadRetVTs.push_back(MVT::Other); 1644 LoadRetVTs.push_back(MVT::Glue); 1645 1646 SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), 1647 DAG.getConstant(Offsets[i], dl, MVT::i32), 1648 InFlag}; 1649 SDValue retval = DAG.getMemIntrinsicNode( 1650 NVPTXISD::LoadParam, dl, 1651 DAG.getVTList(LoadRetVTs), LoadRetOps, 1652 TheLoadType, MachinePointerInfo(), AlignI); 1653 Chain = retval.getValue(1); 1654 InFlag = retval.getValue(2); 1655 SDValue Ret0 = retval.getValue(0); 1656 if (needTruncate) 1657 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0); 1658 InVals.push_back(Ret0); 1659 } 1660 } 1661 } 1662 1663 Chain = DAG.getCALLSEQ_END(Chain, 1664 DAG.getIntPtrConstant(uniqueCallSite, dl, true), 1665 DAG.getIntPtrConstant(uniqueCallSite + 1, dl, 1666 true), 1667 InFlag, dl); 1668 uniqueCallSite++; 1669 1670 // set isTailCall to false for now, until we figure out how to express 1671 // tail call optimization in PTX 1672 isTailCall = false; 1673 return Chain; 1674 } 1675 1676 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() 1677 // (see LegalizeDAG.cpp). This is slow and uses local memory. 1678 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 1679 SDValue 1680 NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { 1681 SDNode *Node = Op.getNode(); 1682 SDLoc dl(Node); 1683 SmallVector<SDValue, 8> Ops; 1684 unsigned NumOperands = Node->getNumOperands(); 1685 for (unsigned i = 0; i < NumOperands; ++i) { 1686 SDValue SubOp = Node->getOperand(i); 1687 EVT VVT = SubOp.getNode()->getValueType(0); 1688 EVT EltVT = VVT.getVectorElementType(); 1689 unsigned NumSubElem = VVT.getVectorNumElements(); 1690 for (unsigned j = 0; j < NumSubElem; ++j) { 1691 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, 1692 DAG.getIntPtrConstant(j, dl))); 1693 } 1694 } 1695 return DAG.getBuildVector(Node->getValueType(0), dl, Ops); 1696 } 1697 1698 /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which 1699 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift 1700 /// amount, or 1701 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift 1702 /// amount. 1703 SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, 1704 SelectionDAG &DAG) const { 1705 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 1706 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 1707 1708 EVT VT = Op.getValueType(); 1709 unsigned VTBits = VT.getSizeInBits(); 1710 SDLoc dl(Op); 1711 SDValue ShOpLo = Op.getOperand(0); 1712 SDValue ShOpHi = Op.getOperand(1); 1713 SDValue ShAmt = Op.getOperand(2); 1714 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 1715 1716 if (VTBits == 32 && STI.getSmVersion() >= 35) { 1717 1718 // For 32bit and sm35, we can use the funnel shift 'shf' instruction. 1719 // {dHi, dLo} = {aHi, aLo} >> Amt 1720 // dHi = aHi >> Amt 1721 // dLo = shf.r.clamp aLo, aHi, Amt 1722 1723 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 1724 SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, 1725 ShAmt); 1726 1727 SDValue Ops[2] = { Lo, Hi }; 1728 return DAG.getMergeValues(Ops, dl); 1729 } 1730 else { 1731 1732 // {dHi, dLo} = {aHi, aLo} >> Amt 1733 // - if (Amt>=size) then 1734 // dLo = aHi >> (Amt-size) 1735 // dHi = aHi >> Amt (this is either all 0 or all 1) 1736 // else 1737 // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) 1738 // dHi = aHi >> Amt 1739 1740 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 1741 DAG.getConstant(VTBits, dl, MVT::i32), 1742 ShAmt); 1743 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 1744 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 1745 DAG.getConstant(VTBits, dl, MVT::i32)); 1746 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 1747 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 1748 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 1749 1750 SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, 1751 DAG.getConstant(VTBits, dl, MVT::i32), 1752 ISD::SETGE); 1753 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 1754 SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); 1755 1756 SDValue Ops[2] = { Lo, Hi }; 1757 return DAG.getMergeValues(Ops, dl); 1758 } 1759 } 1760 1761 /// LowerShiftLeftParts - Lower SHL_PARTS, which 1762 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift 1763 /// amount, or 1764 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift 1765 /// amount. 1766 SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, 1767 SelectionDAG &DAG) const { 1768 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 1769 assert(Op.getOpcode() == ISD::SHL_PARTS); 1770 1771 EVT VT = Op.getValueType(); 1772 unsigned VTBits = VT.getSizeInBits(); 1773 SDLoc dl(Op); 1774 SDValue ShOpLo = Op.getOperand(0); 1775 SDValue ShOpHi = Op.getOperand(1); 1776 SDValue ShAmt = Op.getOperand(2); 1777 1778 if (VTBits == 32 && STI.getSmVersion() >= 35) { 1779 1780 // For 32bit and sm35, we can use the funnel shift 'shf' instruction. 1781 // {dHi, dLo} = {aHi, aLo} << Amt 1782 // dHi = shf.l.clamp aLo, aHi, Amt 1783 // dLo = aLo << Amt 1784 1785 SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, 1786 ShAmt); 1787 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 1788 1789 SDValue Ops[2] = { Lo, Hi }; 1790 return DAG.getMergeValues(Ops, dl); 1791 } 1792 else { 1793 1794 // {dHi, dLo} = {aHi, aLo} << Amt 1795 // - if (Amt>=size) then 1796 // dLo = aLo << Amt (all 0) 1797 // dLo = aLo << (Amt-size) 1798 // else 1799 // dLo = aLo << Amt 1800 // dHi = (aHi << Amt) | (aLo >> (size-Amt)) 1801 1802 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 1803 DAG.getConstant(VTBits, dl, MVT::i32), 1804 ShAmt); 1805 SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 1806 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 1807 DAG.getConstant(VTBits, dl, MVT::i32)); 1808 SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 1809 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 1810 SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 1811 1812 SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, 1813 DAG.getConstant(VTBits, dl, MVT::i32), 1814 ISD::SETGE); 1815 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 1816 SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); 1817 1818 SDValue Ops[2] = { Lo, Hi }; 1819 return DAG.getMergeValues(Ops, dl); 1820 } 1821 } 1822 1823 SDValue 1824 NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 1825 switch (Op.getOpcode()) { 1826 case ISD::RETURNADDR: 1827 return SDValue(); 1828 case ISD::FRAMEADDR: 1829 return SDValue(); 1830 case ISD::GlobalAddress: 1831 return LowerGlobalAddress(Op, DAG); 1832 case ISD::INTRINSIC_W_CHAIN: 1833 return Op; 1834 case ISD::BUILD_VECTOR: 1835 case ISD::EXTRACT_SUBVECTOR: 1836 return Op; 1837 case ISD::CONCAT_VECTORS: 1838 return LowerCONCAT_VECTORS(Op, DAG); 1839 case ISD::STORE: 1840 return LowerSTORE(Op, DAG); 1841 case ISD::LOAD: 1842 return LowerLOAD(Op, DAG); 1843 case ISD::SHL_PARTS: 1844 return LowerShiftLeftParts(Op, DAG); 1845 case ISD::SRA_PARTS: 1846 case ISD::SRL_PARTS: 1847 return LowerShiftRightParts(Op, DAG); 1848 case ISD::SELECT: 1849 return LowerSelect(Op, DAG); 1850 default: 1851 llvm_unreachable("Custom lowering not defined for operation"); 1852 } 1853 } 1854 1855 SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const { 1856 SDValue Op0 = Op->getOperand(0); 1857 SDValue Op1 = Op->getOperand(1); 1858 SDValue Op2 = Op->getOperand(2); 1859 SDLoc DL(Op.getNode()); 1860 1861 assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1"); 1862 1863 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); 1864 Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); 1865 SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2); 1866 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select); 1867 1868 return Trunc; 1869 } 1870 1871 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1872 if (Op.getValueType() == MVT::i1) 1873 return LowerLOADi1(Op, DAG); 1874 else 1875 return SDValue(); 1876 } 1877 1878 // v = ld i1* addr 1879 // => 1880 // v1 = ld i8* addr (-> i16) 1881 // v = trunc i16 to i1 1882 SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { 1883 SDNode *Node = Op.getNode(); 1884 LoadSDNode *LD = cast<LoadSDNode>(Node); 1885 SDLoc dl(Node); 1886 assert(LD->getExtensionType() == ISD::NON_EXTLOAD); 1887 assert(Node->getValueType(0) == MVT::i1 && 1888 "Custom lowering for i1 load only"); 1889 SDValue newLD = 1890 DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), 1891 LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), 1892 LD->isInvariant(), LD->getAlignment()); 1893 SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); 1894 // The legalizer (the caller) is expecting two values from the legalized 1895 // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() 1896 // in LegalizeDAG.cpp which also uses MergeValues. 1897 SDValue Ops[] = { result, LD->getChain() }; 1898 return DAG.getMergeValues(Ops, dl); 1899 } 1900 1901 SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1902 EVT ValVT = Op.getOperand(1).getValueType(); 1903 if (ValVT == MVT::i1) 1904 return LowerSTOREi1(Op, DAG); 1905 else if (ValVT.isVector()) 1906 return LowerSTOREVector(Op, DAG); 1907 else 1908 return SDValue(); 1909 } 1910 1911 SDValue 1912 NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { 1913 SDNode *N = Op.getNode(); 1914 SDValue Val = N->getOperand(1); 1915 SDLoc DL(N); 1916 EVT ValVT = Val.getValueType(); 1917 1918 if (ValVT.isVector()) { 1919 // We only handle "native" vector sizes for now, e.g. <4 x double> is not 1920 // legal. We can (and should) split that into 2 stores of <2 x double> here 1921 // but I'm leaving that as a TODO for now. 1922 if (!ValVT.isSimple()) 1923 return SDValue(); 1924 switch (ValVT.getSimpleVT().SimpleTy) { 1925 default: 1926 return SDValue(); 1927 case MVT::v2i8: 1928 case MVT::v2i16: 1929 case MVT::v2i32: 1930 case MVT::v2i64: 1931 case MVT::v2f32: 1932 case MVT::v2f64: 1933 case MVT::v4i8: 1934 case MVT::v4i16: 1935 case MVT::v4i32: 1936 case MVT::v4f32: 1937 // This is a "native" vector type 1938 break; 1939 } 1940 1941 MemSDNode *MemSD = cast<MemSDNode>(N); 1942 const DataLayout &TD = DAG.getDataLayout(); 1943 1944 unsigned Align = MemSD->getAlignment(); 1945 unsigned PrefAlign = 1946 TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); 1947 if (Align < PrefAlign) { 1948 // This store is not sufficiently aligned, so bail out and let this vector 1949 // store be scalarized. Note that we may still be able to emit smaller 1950 // vector stores. For example, if we are storing a <4 x float> with an 1951 // alignment of 8, this check will fail but the legalizer will try again 1952 // with 2 x <2 x float>, which will succeed with an alignment of 8. 1953 return SDValue(); 1954 } 1955 1956 unsigned Opcode = 0; 1957 EVT EltVT = ValVT.getVectorElementType(); 1958 unsigned NumElts = ValVT.getVectorNumElements(); 1959 1960 // Since StoreV2 is a target node, we cannot rely on DAG type legalization. 1961 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 1962 // stored type to i16 and propagate the "real" type as the memory type. 1963 bool NeedExt = false; 1964 if (EltVT.getSizeInBits() < 16) 1965 NeedExt = true; 1966 1967 switch (NumElts) { 1968 default: 1969 return SDValue(); 1970 case 2: 1971 Opcode = NVPTXISD::StoreV2; 1972 break; 1973 case 4: { 1974 Opcode = NVPTXISD::StoreV4; 1975 break; 1976 } 1977 } 1978 1979 SmallVector<SDValue, 8> Ops; 1980 1981 // First is the chain 1982 Ops.push_back(N->getOperand(0)); 1983 1984 // Then the split values 1985 for (unsigned i = 0; i < NumElts; ++i) { 1986 SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, 1987 DAG.getIntPtrConstant(i, DL)); 1988 if (NeedExt) 1989 ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); 1990 Ops.push_back(ExtVal); 1991 } 1992 1993 // Then any remaining arguments 1994 Ops.append(N->op_begin() + 2, N->op_end()); 1995 1996 SDValue NewSt = DAG.getMemIntrinsicNode( 1997 Opcode, DL, DAG.getVTList(MVT::Other), Ops, 1998 MemSD->getMemoryVT(), MemSD->getMemOperand()); 1999 2000 //return DCI.CombineTo(N, NewSt, true); 2001 return NewSt; 2002 } 2003 2004 return SDValue(); 2005 } 2006 2007 // st i1 v, addr 2008 // => 2009 // v1 = zxt v to i16 2010 // st.u8 i16, addr 2011 SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { 2012 SDNode *Node = Op.getNode(); 2013 SDLoc dl(Node); 2014 StoreSDNode *ST = cast<StoreSDNode>(Node); 2015 SDValue Tmp1 = ST->getChain(); 2016 SDValue Tmp2 = ST->getBasePtr(); 2017 SDValue Tmp3 = ST->getValue(); 2018 assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); 2019 unsigned Alignment = ST->getAlignment(); 2020 bool isVolatile = ST->isVolatile(); 2021 bool isNonTemporal = ST->isNonTemporal(); 2022 Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); 2023 SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, 2024 ST->getPointerInfo(), MVT::i8, isNonTemporal, 2025 isVolatile, Alignment); 2026 return Result; 2027 } 2028 2029 SDValue 2030 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { 2031 std::string ParamSym; 2032 raw_string_ostream ParamStr(ParamSym); 2033 2034 ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; 2035 ParamStr.flush(); 2036 2037 std::string *SavedStr = 2038 nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); 2039 return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); 2040 } 2041 2042 // Check to see if the kernel argument is image*_t or sampler_t 2043 2044 static bool isImageOrSamplerVal(const Value *arg, const Module *context) { 2045 static const char *const specialTypes[] = { "struct._image2d_t", 2046 "struct._image3d_t", 2047 "struct._sampler_t" }; 2048 2049 Type *Ty = arg->getType(); 2050 auto *PTy = dyn_cast<PointerType>(Ty); 2051 2052 if (!PTy) 2053 return false; 2054 2055 if (!context) 2056 return false; 2057 2058 auto *STy = dyn_cast<StructType>(PTy->getElementType()); 2059 if (!STy || STy->isLiteral()) 2060 return false; 2061 2062 return std::find(std::begin(specialTypes), std::end(specialTypes), 2063 STy->getName()) != std::end(specialTypes); 2064 } 2065 2066 SDValue NVPTXTargetLowering::LowerFormalArguments( 2067 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 2068 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 2069 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 2070 MachineFunction &MF = DAG.getMachineFunction(); 2071 const DataLayout &DL = DAG.getDataLayout(); 2072 auto PtrVT = getPointerTy(DAG.getDataLayout()); 2073 2074 const Function *F = MF.getFunction(); 2075 const AttributeSet &PAL = F->getAttributes(); 2076 const TargetLowering *TLI = STI.getTargetLowering(); 2077 2078 SDValue Root = DAG.getRoot(); 2079 std::vector<SDValue> OutChains; 2080 2081 bool isKernel = llvm::isKernelFunction(*F); 2082 bool isABI = (STI.getSmVersion() >= 20); 2083 assert(isABI && "Non-ABI compilation is not supported"); 2084 if (!isABI) 2085 return Chain; 2086 2087 std::vector<Type *> argTypes; 2088 std::vector<const Argument *> theArgs; 2089 for (const Argument &I : F->args()) { 2090 theArgs.push_back(&I); 2091 argTypes.push_back(I.getType()); 2092 } 2093 // argTypes.size() (or theArgs.size()) and Ins.size() need not match. 2094 // Ins.size() will be larger 2095 // * if there is an aggregate argument with multiple fields (each field 2096 // showing up separately in Ins) 2097 // * if there is a vector argument with more than typical vector-length 2098 // elements (generally if more than 4) where each vector element is 2099 // individually present in Ins. 2100 // So a different index should be used for indexing into Ins. 2101 // See similar issue in LowerCall. 2102 unsigned InsIdx = 0; 2103 2104 int idx = 0; 2105 for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { 2106 Type *Ty = argTypes[i]; 2107 2108 // If the kernel argument is image*_t or sampler_t, convert it to 2109 // a i32 constant holding the parameter position. This can later 2110 // matched in the AsmPrinter to output the correct mangled name. 2111 if (isImageOrSamplerVal( 2112 theArgs[i], 2113 (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() 2114 : nullptr))) { 2115 assert(isKernel && "Only kernels can have image/sampler params"); 2116 InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32)); 2117 continue; 2118 } 2119 2120 if (theArgs[i]->use_empty()) { 2121 // argument is dead 2122 if (Ty->isAggregateType()) { 2123 SmallVector<EVT, 16> vtparts; 2124 2125 ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts); 2126 assert(vtparts.size() > 0 && "empty aggregate type not expected"); 2127 for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 2128 ++parti) { 2129 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 2130 ++InsIdx; 2131 } 2132 if (vtparts.size() > 0) 2133 --InsIdx; 2134 continue; 2135 } 2136 if (Ty->isVectorTy()) { 2137 EVT ObjectVT = getValueType(DL, Ty); 2138 unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); 2139 for (unsigned parti = 0; parti < NumRegs; ++parti) { 2140 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 2141 ++InsIdx; 2142 } 2143 if (NumRegs > 0) 2144 --InsIdx; 2145 continue; 2146 } 2147 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 2148 continue; 2149 } 2150 2151 // In the following cases, assign a node order of "idx+1" 2152 // to newly created nodes. The SDNodes for params have to 2153 // appear in the same order as their order of appearance 2154 // in the original function. "idx+1" holds that order. 2155 if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) { 2156 if (Ty->isAggregateType()) { 2157 SmallVector<EVT, 16> vtparts; 2158 SmallVector<uint64_t, 16> offsets; 2159 2160 // NOTE: Here, we lose the ability to issue vector loads for vectors 2161 // that are a part of a struct. This should be investigated in the 2162 // future. 2163 ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &offsets, 2164 0); 2165 assert(vtparts.size() > 0 && "empty aggregate type not expected"); 2166 bool aggregateIsPacked = false; 2167 if (StructType *STy = llvm::dyn_cast<StructType>(Ty)) 2168 aggregateIsPacked = STy->isPacked(); 2169 2170 SDValue Arg = getParamSymbol(DAG, idx, PtrVT); 2171 for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 2172 ++parti) { 2173 EVT partVT = vtparts[parti]; 2174 Value *srcValue = Constant::getNullValue( 2175 PointerType::get(partVT.getTypeForEVT(F->getContext()), 2176 llvm::ADDRESS_SPACE_PARAM)); 2177 SDValue srcAddr = 2178 DAG.getNode(ISD::ADD, dl, PtrVT, Arg, 2179 DAG.getConstant(offsets[parti], dl, PtrVT)); 2180 unsigned partAlign = aggregateIsPacked 2181 ? 1 2182 : DL.getABITypeAlignment( 2183 partVT.getTypeForEVT(F->getContext())); 2184 SDValue p; 2185 if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) { 2186 ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 2187 ISD::SEXTLOAD : ISD::ZEXTLOAD; 2188 p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, 2189 MachinePointerInfo(srcValue), partVT, false, 2190 false, false, partAlign); 2191 } else { 2192 p = DAG.getLoad(partVT, dl, Root, srcAddr, 2193 MachinePointerInfo(srcValue), false, false, false, 2194 partAlign); 2195 } 2196 if (p.getNode()) 2197 p.getNode()->setIROrder(idx + 1); 2198 InVals.push_back(p); 2199 ++InsIdx; 2200 } 2201 if (vtparts.size() > 0) 2202 --InsIdx; 2203 continue; 2204 } 2205 if (Ty->isVectorTy()) { 2206 EVT ObjectVT = getValueType(DL, Ty); 2207 SDValue Arg = getParamSymbol(DAG, idx, PtrVT); 2208 unsigned NumElts = ObjectVT.getVectorNumElements(); 2209 assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && 2210 "Vector was not scalarized"); 2211 EVT EltVT = ObjectVT.getVectorElementType(); 2212 2213 // V1 load 2214 // f32 = load ... 2215 if (NumElts == 1) { 2216 // We only have one element, so just directly load it 2217 Value *SrcValue = Constant::getNullValue(PointerType::get( 2218 EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 2219 SDValue P = DAG.getLoad( 2220 EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false, 2221 true, 2222 DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); 2223 if (P.getNode()) 2224 P.getNode()->setIROrder(idx + 1); 2225 2226 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) 2227 P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); 2228 InVals.push_back(P); 2229 ++InsIdx; 2230 } else if (NumElts == 2) { 2231 // V2 load 2232 // f32,f32 = load ... 2233 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); 2234 Value *SrcValue = Constant::getNullValue(PointerType::get( 2235 VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 2236 SDValue P = DAG.getLoad( 2237 VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false, 2238 true, 2239 DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); 2240 if (P.getNode()) 2241 P.getNode()->setIROrder(idx + 1); 2242 2243 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 2244 DAG.getIntPtrConstant(0, dl)); 2245 SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 2246 DAG.getIntPtrConstant(1, dl)); 2247 2248 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) { 2249 Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0); 2250 Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1); 2251 } 2252 2253 InVals.push_back(Elt0); 2254 InVals.push_back(Elt1); 2255 InsIdx += 2; 2256 } else { 2257 // V4 loads 2258 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and 2259 // the 2260 // vector will be expanded to a power of 2 elements, so we know we can 2261 // always round up to the next multiple of 4 when creating the vector 2262 // loads. 2263 // e.g. 4 elem => 1 ld.v4 2264 // 6 elem => 2 ld.v4 2265 // 8 elem => 2 ld.v4 2266 // 11 elem => 3 ld.v4 2267 unsigned VecSize = 4; 2268 if (EltVT.getSizeInBits() == 64) { 2269 VecSize = 2; 2270 } 2271 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); 2272 unsigned Ofst = 0; 2273 for (unsigned i = 0; i < NumElts; i += VecSize) { 2274 Value *SrcValue = Constant::getNullValue( 2275 PointerType::get(VecVT.getTypeForEVT(F->getContext()), 2276 llvm::ADDRESS_SPACE_PARAM)); 2277 SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, 2278 DAG.getConstant(Ofst, dl, PtrVT)); 2279 SDValue P = DAG.getLoad( 2280 VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, 2281 false, true, 2282 DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); 2283 if (P.getNode()) 2284 P.getNode()->setIROrder(idx + 1); 2285 2286 for (unsigned j = 0; j < VecSize; ++j) { 2287 if (i + j >= NumElts) 2288 break; 2289 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 2290 DAG.getIntPtrConstant(j, dl)); 2291 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) 2292 Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); 2293 InVals.push_back(Elt); 2294 } 2295 Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 2296 } 2297 InsIdx += NumElts; 2298 } 2299 2300 if (NumElts > 0) 2301 --InsIdx; 2302 continue; 2303 } 2304 // A plain scalar. 2305 EVT ObjectVT = getValueType(DL, Ty); 2306 // If ABI, load from the param symbol 2307 SDValue Arg = getParamSymbol(DAG, idx, PtrVT); 2308 Value *srcValue = Constant::getNullValue(PointerType::get( 2309 ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 2310 SDValue p; 2311 if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) { 2312 ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 2313 ISD::SEXTLOAD : ISD::ZEXTLOAD; 2314 p = DAG.getExtLoad( 2315 ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue), 2316 ObjectVT, false, false, false, 2317 DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 2318 } else { 2319 p = DAG.getLoad( 2320 Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue), false, 2321 false, false, 2322 DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 2323 } 2324 if (p.getNode()) 2325 p.getNode()->setIROrder(idx + 1); 2326 InVals.push_back(p); 2327 continue; 2328 } 2329 2330 // Param has ByVal attribute 2331 // Return MoveParam(param symbol). 2332 // Ideally, the param symbol can be returned directly, 2333 // but when SDNode builder decides to use it in a CopyToReg(), 2334 // machine instruction fails because TargetExternalSymbol 2335 // (not lowered) is target dependent, and CopyToReg assumes 2336 // the source is lowered. 2337 EVT ObjectVT = getValueType(DL, Ty); 2338 assert(ObjectVT == Ins[InsIdx].VT && 2339 "Ins type did not match function type"); 2340 SDValue Arg = getParamSymbol(DAG, idx, PtrVT); 2341 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); 2342 if (p.getNode()) 2343 p.getNode()->setIROrder(idx + 1); 2344 if (isKernel) 2345 InVals.push_back(p); 2346 else { 2347 SDValue p2 = DAG.getNode( 2348 ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, 2349 DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p); 2350 InVals.push_back(p2); 2351 } 2352 } 2353 2354 // Clang will check explicit VarArg and issue error if any. However, Clang 2355 // will let code with 2356 // implicit var arg like f() pass. See bug 617733. 2357 // We treat this case as if the arg list is empty. 2358 // if (F.isVarArg()) { 2359 // assert(0 && "VarArg not supported yet!"); 2360 //} 2361 2362 if (!OutChains.empty()) 2363 DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); 2364 2365 return Chain; 2366 } 2367 2368 SDValue 2369 NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 2370 bool isVarArg, 2371 const SmallVectorImpl<ISD::OutputArg> &Outs, 2372 const SmallVectorImpl<SDValue> &OutVals, 2373 const SDLoc &dl, SelectionDAG &DAG) const { 2374 MachineFunction &MF = DAG.getMachineFunction(); 2375 const Function *F = MF.getFunction(); 2376 Type *RetTy = F->getReturnType(); 2377 const DataLayout &TD = DAG.getDataLayout(); 2378 2379 bool isABI = (STI.getSmVersion() >= 20); 2380 assert(isABI && "Non-ABI compilation is not supported"); 2381 if (!isABI) 2382 return Chain; 2383 2384 if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) { 2385 // If we have a vector type, the OutVals array will be the scalarized 2386 // components and we have combine them into 1 or more vector stores. 2387 unsigned NumElts = VTy->getNumElements(); 2388 assert(NumElts == Outs.size() && "Bad scalarization of return value"); 2389 2390 // const_cast can be removed in later LLVM versions 2391 EVT EltVT = getValueType(TD, RetTy).getVectorElementType(); 2392 bool NeedExtend = false; 2393 if (EltVT.getSizeInBits() < 16) 2394 NeedExtend = true; 2395 2396 // V1 store 2397 if (NumElts == 1) { 2398 SDValue StoreVal = OutVals[0]; 2399 // We only have one element, so just directly store it 2400 if (NeedExtend) 2401 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 2402 SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal }; 2403 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 2404 DAG.getVTList(MVT::Other), Ops, 2405 EltVT, MachinePointerInfo()); 2406 2407 } else if (NumElts == 2) { 2408 // V2 store 2409 SDValue StoreVal0 = OutVals[0]; 2410 SDValue StoreVal1 = OutVals[1]; 2411 2412 if (NeedExtend) { 2413 StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0); 2414 StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1); 2415 } 2416 2417 SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal0, 2418 StoreVal1 }; 2419 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, 2420 DAG.getVTList(MVT::Other), Ops, 2421 EltVT, MachinePointerInfo()); 2422 } else { 2423 // V4 stores 2424 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the 2425 // vector will be expanded to a power of 2 elements, so we know we can 2426 // always round up to the next multiple of 4 when creating the vector 2427 // stores. 2428 // e.g. 4 elem => 1 st.v4 2429 // 6 elem => 2 st.v4 2430 // 8 elem => 2 st.v4 2431 // 11 elem => 3 st.v4 2432 2433 unsigned VecSize = 4; 2434 if (OutVals[0].getValueType().getSizeInBits() == 64) 2435 VecSize = 2; 2436 2437 unsigned Offset = 0; 2438 2439 EVT VecVT = 2440 EVT::getVectorVT(F->getContext(), EltVT, VecSize); 2441 unsigned PerStoreOffset = 2442 TD.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 2443 2444 for (unsigned i = 0; i < NumElts; i += VecSize) { 2445 // Get values 2446 SDValue StoreVal; 2447 SmallVector<SDValue, 8> Ops; 2448 Ops.push_back(Chain); 2449 Ops.push_back(DAG.getConstant(Offset, dl, MVT::i32)); 2450 unsigned Opc = NVPTXISD::StoreRetvalV2; 2451 EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType(); 2452 2453 StoreVal = OutVals[i]; 2454 if (NeedExtend) 2455 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2456 Ops.push_back(StoreVal); 2457 2458 if (i + 1 < NumElts) { 2459 StoreVal = OutVals[i + 1]; 2460 if (NeedExtend) 2461 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2462 } else { 2463 StoreVal = DAG.getUNDEF(ExtendedVT); 2464 } 2465 Ops.push_back(StoreVal); 2466 2467 if (VecSize == 4) { 2468 Opc = NVPTXISD::StoreRetvalV4; 2469 if (i + 2 < NumElts) { 2470 StoreVal = OutVals[i + 2]; 2471 if (NeedExtend) 2472 StoreVal = 2473 DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2474 } else { 2475 StoreVal = DAG.getUNDEF(ExtendedVT); 2476 } 2477 Ops.push_back(StoreVal); 2478 2479 if (i + 3 < NumElts) { 2480 StoreVal = OutVals[i + 3]; 2481 if (NeedExtend) 2482 StoreVal = 2483 DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2484 } else { 2485 StoreVal = DAG.getUNDEF(ExtendedVT); 2486 } 2487 Ops.push_back(StoreVal); 2488 } 2489 2490 // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); 2491 Chain = 2492 DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops, 2493 EltVT, MachinePointerInfo()); 2494 Offset += PerStoreOffset; 2495 } 2496 } 2497 } else { 2498 SmallVector<EVT, 16> ValVTs; 2499 SmallVector<uint64_t, 16> Offsets; 2500 ComputePTXValueVTs(*this, DAG.getDataLayout(), RetTy, ValVTs, &Offsets, 0); 2501 assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition"); 2502 2503 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 2504 SDValue theVal = OutVals[i]; 2505 EVT TheValType = theVal.getValueType(); 2506 unsigned numElems = 1; 2507 if (TheValType.isVector()) 2508 numElems = TheValType.getVectorNumElements(); 2509 for (unsigned j = 0, je = numElems; j != je; ++j) { 2510 SDValue TmpVal = theVal; 2511 if (TheValType.isVector()) 2512 TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 2513 TheValType.getVectorElementType(), TmpVal, 2514 DAG.getIntPtrConstant(j, dl)); 2515 EVT TheStoreType = ValVTs[i]; 2516 if (RetTy->isIntegerTy() && TD.getTypeAllocSizeInBits(RetTy) < 32) { 2517 // The following zero-extension is for integer types only, and 2518 // specifically not for aggregates. 2519 TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); 2520 TheStoreType = MVT::i32; 2521 } 2522 else if (TmpVal.getValueType().getSizeInBits() < 16) 2523 TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal); 2524 2525 SDValue Ops[] = { 2526 Chain, 2527 DAG.getConstant(Offsets[i], dl, MVT::i32), 2528 TmpVal }; 2529 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 2530 DAG.getVTList(MVT::Other), Ops, 2531 TheStoreType, 2532 MachinePointerInfo()); 2533 } 2534 } 2535 } 2536 2537 return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); 2538 } 2539 2540 2541 void NVPTXTargetLowering::LowerAsmOperandForConstraint( 2542 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 2543 SelectionDAG &DAG) const { 2544 if (Constraint.length() > 1) 2545 return; 2546 else 2547 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 2548 } 2549 2550 static unsigned getOpcForTextureInstr(unsigned Intrinsic) { 2551 switch (Intrinsic) { 2552 default: 2553 return 0; 2554 2555 case Intrinsic::nvvm_tex_1d_v4f32_s32: 2556 return NVPTXISD::Tex1DFloatS32; 2557 case Intrinsic::nvvm_tex_1d_v4f32_f32: 2558 return NVPTXISD::Tex1DFloatFloat; 2559 case Intrinsic::nvvm_tex_1d_level_v4f32_f32: 2560 return NVPTXISD::Tex1DFloatFloatLevel; 2561 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: 2562 return NVPTXISD::Tex1DFloatFloatGrad; 2563 case Intrinsic::nvvm_tex_1d_v4s32_s32: 2564 return NVPTXISD::Tex1DS32S32; 2565 case Intrinsic::nvvm_tex_1d_v4s32_f32: 2566 return NVPTXISD::Tex1DS32Float; 2567 case Intrinsic::nvvm_tex_1d_level_v4s32_f32: 2568 return NVPTXISD::Tex1DS32FloatLevel; 2569 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: 2570 return NVPTXISD::Tex1DS32FloatGrad; 2571 case Intrinsic::nvvm_tex_1d_v4u32_s32: 2572 return NVPTXISD::Tex1DU32S32; 2573 case Intrinsic::nvvm_tex_1d_v4u32_f32: 2574 return NVPTXISD::Tex1DU32Float; 2575 case Intrinsic::nvvm_tex_1d_level_v4u32_f32: 2576 return NVPTXISD::Tex1DU32FloatLevel; 2577 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: 2578 return NVPTXISD::Tex1DU32FloatGrad; 2579 2580 case Intrinsic::nvvm_tex_1d_array_v4f32_s32: 2581 return NVPTXISD::Tex1DArrayFloatS32; 2582 case Intrinsic::nvvm_tex_1d_array_v4f32_f32: 2583 return NVPTXISD::Tex1DArrayFloatFloat; 2584 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: 2585 return NVPTXISD::Tex1DArrayFloatFloatLevel; 2586 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: 2587 return NVPTXISD::Tex1DArrayFloatFloatGrad; 2588 case Intrinsic::nvvm_tex_1d_array_v4s32_s32: 2589 return NVPTXISD::Tex1DArrayS32S32; 2590 case Intrinsic::nvvm_tex_1d_array_v4s32_f32: 2591 return NVPTXISD::Tex1DArrayS32Float; 2592 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: 2593 return NVPTXISD::Tex1DArrayS32FloatLevel; 2594 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: 2595 return NVPTXISD::Tex1DArrayS32FloatGrad; 2596 case Intrinsic::nvvm_tex_1d_array_v4u32_s32: 2597 return NVPTXISD::Tex1DArrayU32S32; 2598 case Intrinsic::nvvm_tex_1d_array_v4u32_f32: 2599 return NVPTXISD::Tex1DArrayU32Float; 2600 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: 2601 return NVPTXISD::Tex1DArrayU32FloatLevel; 2602 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: 2603 return NVPTXISD::Tex1DArrayU32FloatGrad; 2604 2605 case Intrinsic::nvvm_tex_2d_v4f32_s32: 2606 return NVPTXISD::Tex2DFloatS32; 2607 case Intrinsic::nvvm_tex_2d_v4f32_f32: 2608 return NVPTXISD::Tex2DFloatFloat; 2609 case Intrinsic::nvvm_tex_2d_level_v4f32_f32: 2610 return NVPTXISD::Tex2DFloatFloatLevel; 2611 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: 2612 return NVPTXISD::Tex2DFloatFloatGrad; 2613 case Intrinsic::nvvm_tex_2d_v4s32_s32: 2614 return NVPTXISD::Tex2DS32S32; 2615 case Intrinsic::nvvm_tex_2d_v4s32_f32: 2616 return NVPTXISD::Tex2DS32Float; 2617 case Intrinsic::nvvm_tex_2d_level_v4s32_f32: 2618 return NVPTXISD::Tex2DS32FloatLevel; 2619 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: 2620 return NVPTXISD::Tex2DS32FloatGrad; 2621 case Intrinsic::nvvm_tex_2d_v4u32_s32: 2622 return NVPTXISD::Tex2DU32S32; 2623 case Intrinsic::nvvm_tex_2d_v4u32_f32: 2624 return NVPTXISD::Tex2DU32Float; 2625 case Intrinsic::nvvm_tex_2d_level_v4u32_f32: 2626 return NVPTXISD::Tex2DU32FloatLevel; 2627 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: 2628 return NVPTXISD::Tex2DU32FloatGrad; 2629 2630 case Intrinsic::nvvm_tex_2d_array_v4f32_s32: 2631 return NVPTXISD::Tex2DArrayFloatS32; 2632 case Intrinsic::nvvm_tex_2d_array_v4f32_f32: 2633 return NVPTXISD::Tex2DArrayFloatFloat; 2634 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: 2635 return NVPTXISD::Tex2DArrayFloatFloatLevel; 2636 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: 2637 return NVPTXISD::Tex2DArrayFloatFloatGrad; 2638 case Intrinsic::nvvm_tex_2d_array_v4s32_s32: 2639 return NVPTXISD::Tex2DArrayS32S32; 2640 case Intrinsic::nvvm_tex_2d_array_v4s32_f32: 2641 return NVPTXISD::Tex2DArrayS32Float; 2642 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: 2643 return NVPTXISD::Tex2DArrayS32FloatLevel; 2644 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: 2645 return NVPTXISD::Tex2DArrayS32FloatGrad; 2646 case Intrinsic::nvvm_tex_2d_array_v4u32_s32: 2647 return NVPTXISD::Tex2DArrayU32S32; 2648 case Intrinsic::nvvm_tex_2d_array_v4u32_f32: 2649 return NVPTXISD::Tex2DArrayU32Float; 2650 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: 2651 return NVPTXISD::Tex2DArrayU32FloatLevel; 2652 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: 2653 return NVPTXISD::Tex2DArrayU32FloatGrad; 2654 2655 case Intrinsic::nvvm_tex_3d_v4f32_s32: 2656 return NVPTXISD::Tex3DFloatS32; 2657 case Intrinsic::nvvm_tex_3d_v4f32_f32: 2658 return NVPTXISD::Tex3DFloatFloat; 2659 case Intrinsic::nvvm_tex_3d_level_v4f32_f32: 2660 return NVPTXISD::Tex3DFloatFloatLevel; 2661 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: 2662 return NVPTXISD::Tex3DFloatFloatGrad; 2663 case Intrinsic::nvvm_tex_3d_v4s32_s32: 2664 return NVPTXISD::Tex3DS32S32; 2665 case Intrinsic::nvvm_tex_3d_v4s32_f32: 2666 return NVPTXISD::Tex3DS32Float; 2667 case Intrinsic::nvvm_tex_3d_level_v4s32_f32: 2668 return NVPTXISD::Tex3DS32FloatLevel; 2669 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: 2670 return NVPTXISD::Tex3DS32FloatGrad; 2671 case Intrinsic::nvvm_tex_3d_v4u32_s32: 2672 return NVPTXISD::Tex3DU32S32; 2673 case Intrinsic::nvvm_tex_3d_v4u32_f32: 2674 return NVPTXISD::Tex3DU32Float; 2675 case Intrinsic::nvvm_tex_3d_level_v4u32_f32: 2676 return NVPTXISD::Tex3DU32FloatLevel; 2677 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: 2678 return NVPTXISD::Tex3DU32FloatGrad; 2679 2680 case Intrinsic::nvvm_tex_cube_v4f32_f32: 2681 return NVPTXISD::TexCubeFloatFloat; 2682 case Intrinsic::nvvm_tex_cube_level_v4f32_f32: 2683 return NVPTXISD::TexCubeFloatFloatLevel; 2684 case Intrinsic::nvvm_tex_cube_v4s32_f32: 2685 return NVPTXISD::TexCubeS32Float; 2686 case Intrinsic::nvvm_tex_cube_level_v4s32_f32: 2687 return NVPTXISD::TexCubeS32FloatLevel; 2688 case Intrinsic::nvvm_tex_cube_v4u32_f32: 2689 return NVPTXISD::TexCubeU32Float; 2690 case Intrinsic::nvvm_tex_cube_level_v4u32_f32: 2691 return NVPTXISD::TexCubeU32FloatLevel; 2692 2693 case Intrinsic::nvvm_tex_cube_array_v4f32_f32: 2694 return NVPTXISD::TexCubeArrayFloatFloat; 2695 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: 2696 return NVPTXISD::TexCubeArrayFloatFloatLevel; 2697 case Intrinsic::nvvm_tex_cube_array_v4s32_f32: 2698 return NVPTXISD::TexCubeArrayS32Float; 2699 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: 2700 return NVPTXISD::TexCubeArrayS32FloatLevel; 2701 case Intrinsic::nvvm_tex_cube_array_v4u32_f32: 2702 return NVPTXISD::TexCubeArrayU32Float; 2703 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: 2704 return NVPTXISD::TexCubeArrayU32FloatLevel; 2705 2706 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: 2707 return NVPTXISD::Tld4R2DFloatFloat; 2708 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: 2709 return NVPTXISD::Tld4G2DFloatFloat; 2710 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: 2711 return NVPTXISD::Tld4B2DFloatFloat; 2712 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: 2713 return NVPTXISD::Tld4A2DFloatFloat; 2714 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: 2715 return NVPTXISD::Tld4R2DS64Float; 2716 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: 2717 return NVPTXISD::Tld4G2DS64Float; 2718 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: 2719 return NVPTXISD::Tld4B2DS64Float; 2720 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: 2721 return NVPTXISD::Tld4A2DS64Float; 2722 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: 2723 return NVPTXISD::Tld4R2DU64Float; 2724 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: 2725 return NVPTXISD::Tld4G2DU64Float; 2726 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: 2727 return NVPTXISD::Tld4B2DU64Float; 2728 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: 2729 return NVPTXISD::Tld4A2DU64Float; 2730 2731 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: 2732 return NVPTXISD::TexUnified1DFloatS32; 2733 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: 2734 return NVPTXISD::TexUnified1DFloatFloat; 2735 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: 2736 return NVPTXISD::TexUnified1DFloatFloatLevel; 2737 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: 2738 return NVPTXISD::TexUnified1DFloatFloatGrad; 2739 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: 2740 return NVPTXISD::TexUnified1DS32S32; 2741 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: 2742 return NVPTXISD::TexUnified1DS32Float; 2743 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: 2744 return NVPTXISD::TexUnified1DS32FloatLevel; 2745 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: 2746 return NVPTXISD::TexUnified1DS32FloatGrad; 2747 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: 2748 return NVPTXISD::TexUnified1DU32S32; 2749 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: 2750 return NVPTXISD::TexUnified1DU32Float; 2751 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: 2752 return NVPTXISD::TexUnified1DU32FloatLevel; 2753 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: 2754 return NVPTXISD::TexUnified1DU32FloatGrad; 2755 2756 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: 2757 return NVPTXISD::TexUnified1DArrayFloatS32; 2758 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: 2759 return NVPTXISD::TexUnified1DArrayFloatFloat; 2760 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: 2761 return NVPTXISD::TexUnified1DArrayFloatFloatLevel; 2762 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: 2763 return NVPTXISD::TexUnified1DArrayFloatFloatGrad; 2764 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: 2765 return NVPTXISD::TexUnified1DArrayS32S32; 2766 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: 2767 return NVPTXISD::TexUnified1DArrayS32Float; 2768 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: 2769 return NVPTXISD::TexUnified1DArrayS32FloatLevel; 2770 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: 2771 return NVPTXISD::TexUnified1DArrayS32FloatGrad; 2772 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: 2773 return NVPTXISD::TexUnified1DArrayU32S32; 2774 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: 2775 return NVPTXISD::TexUnified1DArrayU32Float; 2776 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: 2777 return NVPTXISD::TexUnified1DArrayU32FloatLevel; 2778 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: 2779 return NVPTXISD::TexUnified1DArrayU32FloatGrad; 2780 2781 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: 2782 return NVPTXISD::TexUnified2DFloatS32; 2783 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: 2784 return NVPTXISD::TexUnified2DFloatFloat; 2785 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: 2786 return NVPTXISD::TexUnified2DFloatFloatLevel; 2787 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: 2788 return NVPTXISD::TexUnified2DFloatFloatGrad; 2789 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: 2790 return NVPTXISD::TexUnified2DS32S32; 2791 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: 2792 return NVPTXISD::TexUnified2DS32Float; 2793 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: 2794 return NVPTXISD::TexUnified2DS32FloatLevel; 2795 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: 2796 return NVPTXISD::TexUnified2DS32FloatGrad; 2797 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: 2798 return NVPTXISD::TexUnified2DU32S32; 2799 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: 2800 return NVPTXISD::TexUnified2DU32Float; 2801 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: 2802 return NVPTXISD::TexUnified2DU32FloatLevel; 2803 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: 2804 return NVPTXISD::TexUnified2DU32FloatGrad; 2805 2806 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: 2807 return NVPTXISD::TexUnified2DArrayFloatS32; 2808 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: 2809 return NVPTXISD::TexUnified2DArrayFloatFloat; 2810 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: 2811 return NVPTXISD::TexUnified2DArrayFloatFloatLevel; 2812 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: 2813 return NVPTXISD::TexUnified2DArrayFloatFloatGrad; 2814 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: 2815 return NVPTXISD::TexUnified2DArrayS32S32; 2816 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: 2817 return NVPTXISD::TexUnified2DArrayS32Float; 2818 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: 2819 return NVPTXISD::TexUnified2DArrayS32FloatLevel; 2820 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: 2821 return NVPTXISD::TexUnified2DArrayS32FloatGrad; 2822 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: 2823 return NVPTXISD::TexUnified2DArrayU32S32; 2824 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: 2825 return NVPTXISD::TexUnified2DArrayU32Float; 2826 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: 2827 return NVPTXISD::TexUnified2DArrayU32FloatLevel; 2828 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: 2829 return NVPTXISD::TexUnified2DArrayU32FloatGrad; 2830 2831 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: 2832 return NVPTXISD::TexUnified3DFloatS32; 2833 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: 2834 return NVPTXISD::TexUnified3DFloatFloat; 2835 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: 2836 return NVPTXISD::TexUnified3DFloatFloatLevel; 2837 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: 2838 return NVPTXISD::TexUnified3DFloatFloatGrad; 2839 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: 2840 return NVPTXISD::TexUnified3DS32S32; 2841 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: 2842 return NVPTXISD::TexUnified3DS32Float; 2843 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: 2844 return NVPTXISD::TexUnified3DS32FloatLevel; 2845 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: 2846 return NVPTXISD::TexUnified3DS32FloatGrad; 2847 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: 2848 return NVPTXISD::TexUnified3DU32S32; 2849 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: 2850 return NVPTXISD::TexUnified3DU32Float; 2851 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: 2852 return NVPTXISD::TexUnified3DU32FloatLevel; 2853 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: 2854 return NVPTXISD::TexUnified3DU32FloatGrad; 2855 2856 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: 2857 return NVPTXISD::TexUnifiedCubeFloatFloat; 2858 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: 2859 return NVPTXISD::TexUnifiedCubeFloatFloatLevel; 2860 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: 2861 return NVPTXISD::TexUnifiedCubeS32Float; 2862 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: 2863 return NVPTXISD::TexUnifiedCubeS32FloatLevel; 2864 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: 2865 return NVPTXISD::TexUnifiedCubeU32Float; 2866 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: 2867 return NVPTXISD::TexUnifiedCubeU32FloatLevel; 2868 2869 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: 2870 return NVPTXISD::TexUnifiedCubeArrayFloatFloat; 2871 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: 2872 return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; 2873 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: 2874 return NVPTXISD::TexUnifiedCubeArrayS32Float; 2875 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: 2876 return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; 2877 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: 2878 return NVPTXISD::TexUnifiedCubeArrayU32Float; 2879 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: 2880 return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; 2881 2882 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: 2883 return NVPTXISD::Tld4UnifiedR2DFloatFloat; 2884 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: 2885 return NVPTXISD::Tld4UnifiedG2DFloatFloat; 2886 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: 2887 return NVPTXISD::Tld4UnifiedB2DFloatFloat; 2888 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: 2889 return NVPTXISD::Tld4UnifiedA2DFloatFloat; 2890 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: 2891 return NVPTXISD::Tld4UnifiedR2DS64Float; 2892 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: 2893 return NVPTXISD::Tld4UnifiedG2DS64Float; 2894 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: 2895 return NVPTXISD::Tld4UnifiedB2DS64Float; 2896 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: 2897 return NVPTXISD::Tld4UnifiedA2DS64Float; 2898 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: 2899 return NVPTXISD::Tld4UnifiedR2DU64Float; 2900 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: 2901 return NVPTXISD::Tld4UnifiedG2DU64Float; 2902 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: 2903 return NVPTXISD::Tld4UnifiedB2DU64Float; 2904 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: 2905 return NVPTXISD::Tld4UnifiedA2DU64Float; 2906 } 2907 } 2908 2909 static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { 2910 switch (Intrinsic) { 2911 default: 2912 return 0; 2913 case Intrinsic::nvvm_suld_1d_i8_clamp: 2914 return NVPTXISD::Suld1DI8Clamp; 2915 case Intrinsic::nvvm_suld_1d_i16_clamp: 2916 return NVPTXISD::Suld1DI16Clamp; 2917 case Intrinsic::nvvm_suld_1d_i32_clamp: 2918 return NVPTXISD::Suld1DI32Clamp; 2919 case Intrinsic::nvvm_suld_1d_i64_clamp: 2920 return NVPTXISD::Suld1DI64Clamp; 2921 case Intrinsic::nvvm_suld_1d_v2i8_clamp: 2922 return NVPTXISD::Suld1DV2I8Clamp; 2923 case Intrinsic::nvvm_suld_1d_v2i16_clamp: 2924 return NVPTXISD::Suld1DV2I16Clamp; 2925 case Intrinsic::nvvm_suld_1d_v2i32_clamp: 2926 return NVPTXISD::Suld1DV2I32Clamp; 2927 case Intrinsic::nvvm_suld_1d_v2i64_clamp: 2928 return NVPTXISD::Suld1DV2I64Clamp; 2929 case Intrinsic::nvvm_suld_1d_v4i8_clamp: 2930 return NVPTXISD::Suld1DV4I8Clamp; 2931 case Intrinsic::nvvm_suld_1d_v4i16_clamp: 2932 return NVPTXISD::Suld1DV4I16Clamp; 2933 case Intrinsic::nvvm_suld_1d_v4i32_clamp: 2934 return NVPTXISD::Suld1DV4I32Clamp; 2935 case Intrinsic::nvvm_suld_1d_array_i8_clamp: 2936 return NVPTXISD::Suld1DArrayI8Clamp; 2937 case Intrinsic::nvvm_suld_1d_array_i16_clamp: 2938 return NVPTXISD::Suld1DArrayI16Clamp; 2939 case Intrinsic::nvvm_suld_1d_array_i32_clamp: 2940 return NVPTXISD::Suld1DArrayI32Clamp; 2941 case Intrinsic::nvvm_suld_1d_array_i64_clamp: 2942 return NVPTXISD::Suld1DArrayI64Clamp; 2943 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: 2944 return NVPTXISD::Suld1DArrayV2I8Clamp; 2945 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: 2946 return NVPTXISD::Suld1DArrayV2I16Clamp; 2947 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: 2948 return NVPTXISD::Suld1DArrayV2I32Clamp; 2949 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: 2950 return NVPTXISD::Suld1DArrayV2I64Clamp; 2951 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: 2952 return NVPTXISD::Suld1DArrayV4I8Clamp; 2953 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: 2954 return NVPTXISD::Suld1DArrayV4I16Clamp; 2955 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: 2956 return NVPTXISD::Suld1DArrayV4I32Clamp; 2957 case Intrinsic::nvvm_suld_2d_i8_clamp: 2958 return NVPTXISD::Suld2DI8Clamp; 2959 case Intrinsic::nvvm_suld_2d_i16_clamp: 2960 return NVPTXISD::Suld2DI16Clamp; 2961 case Intrinsic::nvvm_suld_2d_i32_clamp: 2962 return NVPTXISD::Suld2DI32Clamp; 2963 case Intrinsic::nvvm_suld_2d_i64_clamp: 2964 return NVPTXISD::Suld2DI64Clamp; 2965 case Intrinsic::nvvm_suld_2d_v2i8_clamp: 2966 return NVPTXISD::Suld2DV2I8Clamp; 2967 case Intrinsic::nvvm_suld_2d_v2i16_clamp: 2968 return NVPTXISD::Suld2DV2I16Clamp; 2969 case Intrinsic::nvvm_suld_2d_v2i32_clamp: 2970 return NVPTXISD::Suld2DV2I32Clamp; 2971 case Intrinsic::nvvm_suld_2d_v2i64_clamp: 2972 return NVPTXISD::Suld2DV2I64Clamp; 2973 case Intrinsic::nvvm_suld_2d_v4i8_clamp: 2974 return NVPTXISD::Suld2DV4I8Clamp; 2975 case Intrinsic::nvvm_suld_2d_v4i16_clamp: 2976 return NVPTXISD::Suld2DV4I16Clamp; 2977 case Intrinsic::nvvm_suld_2d_v4i32_clamp: 2978 return NVPTXISD::Suld2DV4I32Clamp; 2979 case Intrinsic::nvvm_suld_2d_array_i8_clamp: 2980 return NVPTXISD::Suld2DArrayI8Clamp; 2981 case Intrinsic::nvvm_suld_2d_array_i16_clamp: 2982 return NVPTXISD::Suld2DArrayI16Clamp; 2983 case Intrinsic::nvvm_suld_2d_array_i32_clamp: 2984 return NVPTXISD::Suld2DArrayI32Clamp; 2985 case Intrinsic::nvvm_suld_2d_array_i64_clamp: 2986 return NVPTXISD::Suld2DArrayI64Clamp; 2987 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: 2988 return NVPTXISD::Suld2DArrayV2I8Clamp; 2989 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: 2990 return NVPTXISD::Suld2DArrayV2I16Clamp; 2991 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: 2992 return NVPTXISD::Suld2DArrayV2I32Clamp; 2993 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: 2994 return NVPTXISD::Suld2DArrayV2I64Clamp; 2995 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: 2996 return NVPTXISD::Suld2DArrayV4I8Clamp; 2997 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: 2998 return NVPTXISD::Suld2DArrayV4I16Clamp; 2999 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: 3000 return NVPTXISD::Suld2DArrayV4I32Clamp; 3001 case Intrinsic::nvvm_suld_3d_i8_clamp: 3002 return NVPTXISD::Suld3DI8Clamp; 3003 case Intrinsic::nvvm_suld_3d_i16_clamp: 3004 return NVPTXISD::Suld3DI16Clamp; 3005 case Intrinsic::nvvm_suld_3d_i32_clamp: 3006 return NVPTXISD::Suld3DI32Clamp; 3007 case Intrinsic::nvvm_suld_3d_i64_clamp: 3008 return NVPTXISD::Suld3DI64Clamp; 3009 case Intrinsic::nvvm_suld_3d_v2i8_clamp: 3010 return NVPTXISD::Suld3DV2I8Clamp; 3011 case Intrinsic::nvvm_suld_3d_v2i16_clamp: 3012 return NVPTXISD::Suld3DV2I16Clamp; 3013 case Intrinsic::nvvm_suld_3d_v2i32_clamp: 3014 return NVPTXISD::Suld3DV2I32Clamp; 3015 case Intrinsic::nvvm_suld_3d_v2i64_clamp: 3016 return NVPTXISD::Suld3DV2I64Clamp; 3017 case Intrinsic::nvvm_suld_3d_v4i8_clamp: 3018 return NVPTXISD::Suld3DV4I8Clamp; 3019 case Intrinsic::nvvm_suld_3d_v4i16_clamp: 3020 return NVPTXISD::Suld3DV4I16Clamp; 3021 case Intrinsic::nvvm_suld_3d_v4i32_clamp: 3022 return NVPTXISD::Suld3DV4I32Clamp; 3023 case Intrinsic::nvvm_suld_1d_i8_trap: 3024 return NVPTXISD::Suld1DI8Trap; 3025 case Intrinsic::nvvm_suld_1d_i16_trap: 3026 return NVPTXISD::Suld1DI16Trap; 3027 case Intrinsic::nvvm_suld_1d_i32_trap: 3028 return NVPTXISD::Suld1DI32Trap; 3029 case Intrinsic::nvvm_suld_1d_i64_trap: 3030 return NVPTXISD::Suld1DI64Trap; 3031 case Intrinsic::nvvm_suld_1d_v2i8_trap: 3032 return NVPTXISD::Suld1DV2I8Trap; 3033 case Intrinsic::nvvm_suld_1d_v2i16_trap: 3034 return NVPTXISD::Suld1DV2I16Trap; 3035 case Intrinsic::nvvm_suld_1d_v2i32_trap: 3036 return NVPTXISD::Suld1DV2I32Trap; 3037 case Intrinsic::nvvm_suld_1d_v2i64_trap: 3038 return NVPTXISD::Suld1DV2I64Trap; 3039 case Intrinsic::nvvm_suld_1d_v4i8_trap: 3040 return NVPTXISD::Suld1DV4I8Trap; 3041 case Intrinsic::nvvm_suld_1d_v4i16_trap: 3042 return NVPTXISD::Suld1DV4I16Trap; 3043 case Intrinsic::nvvm_suld_1d_v4i32_trap: 3044 return NVPTXISD::Suld1DV4I32Trap; 3045 case Intrinsic::nvvm_suld_1d_array_i8_trap: 3046 return NVPTXISD::Suld1DArrayI8Trap; 3047 case Intrinsic::nvvm_suld_1d_array_i16_trap: 3048 return NVPTXISD::Suld1DArrayI16Trap; 3049 case Intrinsic::nvvm_suld_1d_array_i32_trap: 3050 return NVPTXISD::Suld1DArrayI32Trap; 3051 case Intrinsic::nvvm_suld_1d_array_i64_trap: 3052 return NVPTXISD::Suld1DArrayI64Trap; 3053 case Intrinsic::nvvm_suld_1d_array_v2i8_trap: 3054 return NVPTXISD::Suld1DArrayV2I8Trap; 3055 case Intrinsic::nvvm_suld_1d_array_v2i16_trap: 3056 return NVPTXISD::Suld1DArrayV2I16Trap; 3057 case Intrinsic::nvvm_suld_1d_array_v2i32_trap: 3058 return NVPTXISD::Suld1DArrayV2I32Trap; 3059 case Intrinsic::nvvm_suld_1d_array_v2i64_trap: 3060 return NVPTXISD::Suld1DArrayV2I64Trap; 3061 case Intrinsic::nvvm_suld_1d_array_v4i8_trap: 3062 return NVPTXISD::Suld1DArrayV4I8Trap; 3063 case Intrinsic::nvvm_suld_1d_array_v4i16_trap: 3064 return NVPTXISD::Suld1DArrayV4I16Trap; 3065 case Intrinsic::nvvm_suld_1d_array_v4i32_trap: 3066 return NVPTXISD::Suld1DArrayV4I32Trap; 3067 case Intrinsic::nvvm_suld_2d_i8_trap: 3068 return NVPTXISD::Suld2DI8Trap; 3069 case Intrinsic::nvvm_suld_2d_i16_trap: 3070 return NVPTXISD::Suld2DI16Trap; 3071 case Intrinsic::nvvm_suld_2d_i32_trap: 3072 return NVPTXISD::Suld2DI32Trap; 3073 case Intrinsic::nvvm_suld_2d_i64_trap: 3074 return NVPTXISD::Suld2DI64Trap; 3075 case Intrinsic::nvvm_suld_2d_v2i8_trap: 3076 return NVPTXISD::Suld2DV2I8Trap; 3077 case Intrinsic::nvvm_suld_2d_v2i16_trap: 3078 return NVPTXISD::Suld2DV2I16Trap; 3079 case Intrinsic::nvvm_suld_2d_v2i32_trap: 3080 return NVPTXISD::Suld2DV2I32Trap; 3081 case Intrinsic::nvvm_suld_2d_v2i64_trap: 3082 return NVPTXISD::Suld2DV2I64Trap; 3083 case Intrinsic::nvvm_suld_2d_v4i8_trap: 3084 return NVPTXISD::Suld2DV4I8Trap; 3085 case Intrinsic::nvvm_suld_2d_v4i16_trap: 3086 return NVPTXISD::Suld2DV4I16Trap; 3087 case Intrinsic::nvvm_suld_2d_v4i32_trap: 3088 return NVPTXISD::Suld2DV4I32Trap; 3089 case Intrinsic::nvvm_suld_2d_array_i8_trap: 3090 return NVPTXISD::Suld2DArrayI8Trap; 3091 case Intrinsic::nvvm_suld_2d_array_i16_trap: 3092 return NVPTXISD::Suld2DArrayI16Trap; 3093 case Intrinsic::nvvm_suld_2d_array_i32_trap: 3094 return NVPTXISD::Suld2DArrayI32Trap; 3095 case Intrinsic::nvvm_suld_2d_array_i64_trap: 3096 return NVPTXISD::Suld2DArrayI64Trap; 3097 case Intrinsic::nvvm_suld_2d_array_v2i8_trap: 3098 return NVPTXISD::Suld2DArrayV2I8Trap; 3099 case Intrinsic::nvvm_suld_2d_array_v2i16_trap: 3100 return NVPTXISD::Suld2DArrayV2I16Trap; 3101 case Intrinsic::nvvm_suld_2d_array_v2i32_trap: 3102 return NVPTXISD::Suld2DArrayV2I32Trap; 3103 case Intrinsic::nvvm_suld_2d_array_v2i64_trap: 3104 return NVPTXISD::Suld2DArrayV2I64Trap; 3105 case Intrinsic::nvvm_suld_2d_array_v4i8_trap: 3106 return NVPTXISD::Suld2DArrayV4I8Trap; 3107 case Intrinsic::nvvm_suld_2d_array_v4i16_trap: 3108 return NVPTXISD::Suld2DArrayV4I16Trap; 3109 case Intrinsic::nvvm_suld_2d_array_v4i32_trap: 3110 return NVPTXISD::Suld2DArrayV4I32Trap; 3111 case Intrinsic::nvvm_suld_3d_i8_trap: 3112 return NVPTXISD::Suld3DI8Trap; 3113 case Intrinsic::nvvm_suld_3d_i16_trap: 3114 return NVPTXISD::Suld3DI16Trap; 3115 case Intrinsic::nvvm_suld_3d_i32_trap: 3116 return NVPTXISD::Suld3DI32Trap; 3117 case Intrinsic::nvvm_suld_3d_i64_trap: 3118 return NVPTXISD::Suld3DI64Trap; 3119 case Intrinsic::nvvm_suld_3d_v2i8_trap: 3120 return NVPTXISD::Suld3DV2I8Trap; 3121 case Intrinsic::nvvm_suld_3d_v2i16_trap: 3122 return NVPTXISD::Suld3DV2I16Trap; 3123 case Intrinsic::nvvm_suld_3d_v2i32_trap: 3124 return NVPTXISD::Suld3DV2I32Trap; 3125 case Intrinsic::nvvm_suld_3d_v2i64_trap: 3126 return NVPTXISD::Suld3DV2I64Trap; 3127 case Intrinsic::nvvm_suld_3d_v4i8_trap: 3128 return NVPTXISD::Suld3DV4I8Trap; 3129 case Intrinsic::nvvm_suld_3d_v4i16_trap: 3130 return NVPTXISD::Suld3DV4I16Trap; 3131 case Intrinsic::nvvm_suld_3d_v4i32_trap: 3132 return NVPTXISD::Suld3DV4I32Trap; 3133 case Intrinsic::nvvm_suld_1d_i8_zero: 3134 return NVPTXISD::Suld1DI8Zero; 3135 case Intrinsic::nvvm_suld_1d_i16_zero: 3136 return NVPTXISD::Suld1DI16Zero; 3137 case Intrinsic::nvvm_suld_1d_i32_zero: 3138 return NVPTXISD::Suld1DI32Zero; 3139 case Intrinsic::nvvm_suld_1d_i64_zero: 3140 return NVPTXISD::Suld1DI64Zero; 3141 case Intrinsic::nvvm_suld_1d_v2i8_zero: 3142 return NVPTXISD::Suld1DV2I8Zero; 3143 case Intrinsic::nvvm_suld_1d_v2i16_zero: 3144 return NVPTXISD::Suld1DV2I16Zero; 3145 case Intrinsic::nvvm_suld_1d_v2i32_zero: 3146 return NVPTXISD::Suld1DV2I32Zero; 3147 case Intrinsic::nvvm_suld_1d_v2i64_zero: 3148 return NVPTXISD::Suld1DV2I64Zero; 3149 case Intrinsic::nvvm_suld_1d_v4i8_zero: 3150 return NVPTXISD::Suld1DV4I8Zero; 3151 case Intrinsic::nvvm_suld_1d_v4i16_zero: 3152 return NVPTXISD::Suld1DV4I16Zero; 3153 case Intrinsic::nvvm_suld_1d_v4i32_zero: 3154 return NVPTXISD::Suld1DV4I32Zero; 3155 case Intrinsic::nvvm_suld_1d_array_i8_zero: 3156 return NVPTXISD::Suld1DArrayI8Zero; 3157 case Intrinsic::nvvm_suld_1d_array_i16_zero: 3158 return NVPTXISD::Suld1DArrayI16Zero; 3159 case Intrinsic::nvvm_suld_1d_array_i32_zero: 3160 return NVPTXISD::Suld1DArrayI32Zero; 3161 case Intrinsic::nvvm_suld_1d_array_i64_zero: 3162 return NVPTXISD::Suld1DArrayI64Zero; 3163 case Intrinsic::nvvm_suld_1d_array_v2i8_zero: 3164 return NVPTXISD::Suld1DArrayV2I8Zero; 3165 case Intrinsic::nvvm_suld_1d_array_v2i16_zero: 3166 return NVPTXISD::Suld1DArrayV2I16Zero; 3167 case Intrinsic::nvvm_suld_1d_array_v2i32_zero: 3168 return NVPTXISD::Suld1DArrayV2I32Zero; 3169 case Intrinsic::nvvm_suld_1d_array_v2i64_zero: 3170 return NVPTXISD::Suld1DArrayV2I64Zero; 3171 case Intrinsic::nvvm_suld_1d_array_v4i8_zero: 3172 return NVPTXISD::Suld1DArrayV4I8Zero; 3173 case Intrinsic::nvvm_suld_1d_array_v4i16_zero: 3174 return NVPTXISD::Suld1DArrayV4I16Zero; 3175 case Intrinsic::nvvm_suld_1d_array_v4i32_zero: 3176 return NVPTXISD::Suld1DArrayV4I32Zero; 3177 case Intrinsic::nvvm_suld_2d_i8_zero: 3178 return NVPTXISD::Suld2DI8Zero; 3179 case Intrinsic::nvvm_suld_2d_i16_zero: 3180 return NVPTXISD::Suld2DI16Zero; 3181 case Intrinsic::nvvm_suld_2d_i32_zero: 3182 return NVPTXISD::Suld2DI32Zero; 3183 case Intrinsic::nvvm_suld_2d_i64_zero: 3184 return NVPTXISD::Suld2DI64Zero; 3185 case Intrinsic::nvvm_suld_2d_v2i8_zero: 3186 return NVPTXISD::Suld2DV2I8Zero; 3187 case Intrinsic::nvvm_suld_2d_v2i16_zero: 3188 return NVPTXISD::Suld2DV2I16Zero; 3189 case Intrinsic::nvvm_suld_2d_v2i32_zero: 3190 return NVPTXISD::Suld2DV2I32Zero; 3191 case Intrinsic::nvvm_suld_2d_v2i64_zero: 3192 return NVPTXISD::Suld2DV2I64Zero; 3193 case Intrinsic::nvvm_suld_2d_v4i8_zero: 3194 return NVPTXISD::Suld2DV4I8Zero; 3195 case Intrinsic::nvvm_suld_2d_v4i16_zero: 3196 return NVPTXISD::Suld2DV4I16Zero; 3197 case Intrinsic::nvvm_suld_2d_v4i32_zero: 3198 return NVPTXISD::Suld2DV4I32Zero; 3199 case Intrinsic::nvvm_suld_2d_array_i8_zero: 3200 return NVPTXISD::Suld2DArrayI8Zero; 3201 case Intrinsic::nvvm_suld_2d_array_i16_zero: 3202 return NVPTXISD::Suld2DArrayI16Zero; 3203 case Intrinsic::nvvm_suld_2d_array_i32_zero: 3204 return NVPTXISD::Suld2DArrayI32Zero; 3205 case Intrinsic::nvvm_suld_2d_array_i64_zero: 3206 return NVPTXISD::Suld2DArrayI64Zero; 3207 case Intrinsic::nvvm_suld_2d_array_v2i8_zero: 3208 return NVPTXISD::Suld2DArrayV2I8Zero; 3209 case Intrinsic::nvvm_suld_2d_array_v2i16_zero: 3210 return NVPTXISD::Suld2DArrayV2I16Zero; 3211 case Intrinsic::nvvm_suld_2d_array_v2i32_zero: 3212 return NVPTXISD::Suld2DArrayV2I32Zero; 3213 case Intrinsic::nvvm_suld_2d_array_v2i64_zero: 3214 return NVPTXISD::Suld2DArrayV2I64Zero; 3215 case Intrinsic::nvvm_suld_2d_array_v4i8_zero: 3216 return NVPTXISD::Suld2DArrayV4I8Zero; 3217 case Intrinsic::nvvm_suld_2d_array_v4i16_zero: 3218 return NVPTXISD::Suld2DArrayV4I16Zero; 3219 case Intrinsic::nvvm_suld_2d_array_v4i32_zero: 3220 return NVPTXISD::Suld2DArrayV4I32Zero; 3221 case Intrinsic::nvvm_suld_3d_i8_zero: 3222 return NVPTXISD::Suld3DI8Zero; 3223 case Intrinsic::nvvm_suld_3d_i16_zero: 3224 return NVPTXISD::Suld3DI16Zero; 3225 case Intrinsic::nvvm_suld_3d_i32_zero: 3226 return NVPTXISD::Suld3DI32Zero; 3227 case Intrinsic::nvvm_suld_3d_i64_zero: 3228 return NVPTXISD::Suld3DI64Zero; 3229 case Intrinsic::nvvm_suld_3d_v2i8_zero: 3230 return NVPTXISD::Suld3DV2I8Zero; 3231 case Intrinsic::nvvm_suld_3d_v2i16_zero: 3232 return NVPTXISD::Suld3DV2I16Zero; 3233 case Intrinsic::nvvm_suld_3d_v2i32_zero: 3234 return NVPTXISD::Suld3DV2I32Zero; 3235 case Intrinsic::nvvm_suld_3d_v2i64_zero: 3236 return NVPTXISD::Suld3DV2I64Zero; 3237 case Intrinsic::nvvm_suld_3d_v4i8_zero: 3238 return NVPTXISD::Suld3DV4I8Zero; 3239 case Intrinsic::nvvm_suld_3d_v4i16_zero: 3240 return NVPTXISD::Suld3DV4I16Zero; 3241 case Intrinsic::nvvm_suld_3d_v4i32_zero: 3242 return NVPTXISD::Suld3DV4I32Zero; 3243 } 3244 } 3245 3246 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as 3247 // TgtMemIntrinsic 3248 // because we need the information that is only available in the "Value" type 3249 // of destination 3250 // pointer. In particular, the address space information. 3251 bool NVPTXTargetLowering::getTgtMemIntrinsic( 3252 IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { 3253 switch (Intrinsic) { 3254 default: 3255 return false; 3256 3257 case Intrinsic::nvvm_atomic_load_add_f32: 3258 Info.opc = ISD::INTRINSIC_W_CHAIN; 3259 Info.memVT = MVT::f32; 3260 Info.ptrVal = I.getArgOperand(0); 3261 Info.offset = 0; 3262 Info.vol = 0; 3263 Info.readMem = true; 3264 Info.writeMem = true; 3265 Info.align = 0; 3266 return true; 3267 3268 case Intrinsic::nvvm_atomic_load_inc_32: 3269 case Intrinsic::nvvm_atomic_load_dec_32: 3270 Info.opc = ISD::INTRINSIC_W_CHAIN; 3271 Info.memVT = MVT::i32; 3272 Info.ptrVal = I.getArgOperand(0); 3273 Info.offset = 0; 3274 Info.vol = 0; 3275 Info.readMem = true; 3276 Info.writeMem = true; 3277 Info.align = 0; 3278 return true; 3279 3280 case Intrinsic::nvvm_ldu_global_i: 3281 case Intrinsic::nvvm_ldu_global_f: 3282 case Intrinsic::nvvm_ldu_global_p: { 3283 auto &DL = I.getModule()->getDataLayout(); 3284 Info.opc = ISD::INTRINSIC_W_CHAIN; 3285 if (Intrinsic == Intrinsic::nvvm_ldu_global_i) 3286 Info.memVT = getValueType(DL, I.getType()); 3287 else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) 3288 Info.memVT = getPointerTy(DL); 3289 else 3290 Info.memVT = getValueType(DL, I.getType()); 3291 Info.ptrVal = I.getArgOperand(0); 3292 Info.offset = 0; 3293 Info.vol = 0; 3294 Info.readMem = true; 3295 Info.writeMem = false; 3296 Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); 3297 3298 return true; 3299 } 3300 case Intrinsic::nvvm_ldg_global_i: 3301 case Intrinsic::nvvm_ldg_global_f: 3302 case Intrinsic::nvvm_ldg_global_p: { 3303 auto &DL = I.getModule()->getDataLayout(); 3304 3305 Info.opc = ISD::INTRINSIC_W_CHAIN; 3306 if (Intrinsic == Intrinsic::nvvm_ldg_global_i) 3307 Info.memVT = getValueType(DL, I.getType()); 3308 else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) 3309 Info.memVT = getPointerTy(DL); 3310 else 3311 Info.memVT = getValueType(DL, I.getType()); 3312 Info.ptrVal = I.getArgOperand(0); 3313 Info.offset = 0; 3314 Info.vol = 0; 3315 Info.readMem = true; 3316 Info.writeMem = false; 3317 Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); 3318 3319 return true; 3320 } 3321 3322 case Intrinsic::nvvm_tex_1d_v4f32_s32: 3323 case Intrinsic::nvvm_tex_1d_v4f32_f32: 3324 case Intrinsic::nvvm_tex_1d_level_v4f32_f32: 3325 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: 3326 case Intrinsic::nvvm_tex_1d_array_v4f32_s32: 3327 case Intrinsic::nvvm_tex_1d_array_v4f32_f32: 3328 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: 3329 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: 3330 case Intrinsic::nvvm_tex_2d_v4f32_s32: 3331 case Intrinsic::nvvm_tex_2d_v4f32_f32: 3332 case Intrinsic::nvvm_tex_2d_level_v4f32_f32: 3333 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: 3334 case Intrinsic::nvvm_tex_2d_array_v4f32_s32: 3335 case Intrinsic::nvvm_tex_2d_array_v4f32_f32: 3336 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: 3337 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: 3338 case Intrinsic::nvvm_tex_3d_v4f32_s32: 3339 case Intrinsic::nvvm_tex_3d_v4f32_f32: 3340 case Intrinsic::nvvm_tex_3d_level_v4f32_f32: 3341 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: 3342 case Intrinsic::nvvm_tex_cube_v4f32_f32: 3343 case Intrinsic::nvvm_tex_cube_level_v4f32_f32: 3344 case Intrinsic::nvvm_tex_cube_array_v4f32_f32: 3345 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: 3346 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: 3347 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: 3348 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: 3349 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: 3350 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: 3351 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: 3352 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: 3353 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: 3354 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: 3355 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: 3356 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: 3357 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: 3358 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: 3359 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: 3360 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: 3361 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: 3362 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: 3363 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: 3364 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: 3365 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: 3366 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: 3367 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: 3368 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: 3369 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: 3370 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: 3371 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: 3372 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: 3373 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: 3374 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: 3375 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: 3376 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: 3377 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: { 3378 Info.opc = getOpcForTextureInstr(Intrinsic); 3379 Info.memVT = MVT::v4f32; 3380 Info.ptrVal = nullptr; 3381 Info.offset = 0; 3382 Info.vol = 0; 3383 Info.readMem = true; 3384 Info.writeMem = false; 3385 Info.align = 16; 3386 return true; 3387 } 3388 case Intrinsic::nvvm_tex_1d_v4s32_s32: 3389 case Intrinsic::nvvm_tex_1d_v4s32_f32: 3390 case Intrinsic::nvvm_tex_1d_level_v4s32_f32: 3391 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: 3392 case Intrinsic::nvvm_tex_1d_array_v4s32_s32: 3393 case Intrinsic::nvvm_tex_1d_array_v4s32_f32: 3394 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: 3395 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: 3396 case Intrinsic::nvvm_tex_2d_v4s32_s32: 3397 case Intrinsic::nvvm_tex_2d_v4s32_f32: 3398 case Intrinsic::nvvm_tex_2d_level_v4s32_f32: 3399 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: 3400 case Intrinsic::nvvm_tex_2d_array_v4s32_s32: 3401 case Intrinsic::nvvm_tex_2d_array_v4s32_f32: 3402 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: 3403 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: 3404 case Intrinsic::nvvm_tex_3d_v4s32_s32: 3405 case Intrinsic::nvvm_tex_3d_v4s32_f32: 3406 case Intrinsic::nvvm_tex_3d_level_v4s32_f32: 3407 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: 3408 case Intrinsic::nvvm_tex_cube_v4s32_f32: 3409 case Intrinsic::nvvm_tex_cube_level_v4s32_f32: 3410 case Intrinsic::nvvm_tex_cube_array_v4s32_f32: 3411 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: 3412 case Intrinsic::nvvm_tex_cube_v4u32_f32: 3413 case Intrinsic::nvvm_tex_cube_level_v4u32_f32: 3414 case Intrinsic::nvvm_tex_cube_array_v4u32_f32: 3415 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: 3416 case Intrinsic::nvvm_tex_1d_v4u32_s32: 3417 case Intrinsic::nvvm_tex_1d_v4u32_f32: 3418 case Intrinsic::nvvm_tex_1d_level_v4u32_f32: 3419 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: 3420 case Intrinsic::nvvm_tex_1d_array_v4u32_s32: 3421 case Intrinsic::nvvm_tex_1d_array_v4u32_f32: 3422 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: 3423 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: 3424 case Intrinsic::nvvm_tex_2d_v4u32_s32: 3425 case Intrinsic::nvvm_tex_2d_v4u32_f32: 3426 case Intrinsic::nvvm_tex_2d_level_v4u32_f32: 3427 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: 3428 case Intrinsic::nvvm_tex_2d_array_v4u32_s32: 3429 case Intrinsic::nvvm_tex_2d_array_v4u32_f32: 3430 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: 3431 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: 3432 case Intrinsic::nvvm_tex_3d_v4u32_s32: 3433 case Intrinsic::nvvm_tex_3d_v4u32_f32: 3434 case Intrinsic::nvvm_tex_3d_level_v4u32_f32: 3435 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: 3436 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: 3437 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: 3438 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: 3439 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: 3440 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: 3441 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: 3442 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: 3443 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: 3444 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: 3445 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: 3446 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: 3447 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: 3448 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: 3449 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: 3450 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: 3451 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: 3452 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: 3453 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: 3454 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: 3455 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: 3456 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: 3457 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: 3458 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: 3459 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: 3460 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: 3461 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: 3462 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: 3463 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: 3464 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: 3465 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: 3466 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: 3467 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: 3468 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: 3469 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: 3470 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: 3471 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: 3472 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: 3473 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: 3474 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: 3475 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: 3476 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: 3477 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: 3478 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: 3479 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: 3480 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: 3481 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: 3482 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: 3483 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: 3484 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: 3485 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: 3486 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: 3487 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: 3488 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: 3489 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: 3490 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: 3491 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: 3492 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: 3493 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: 3494 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: 3495 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: 3496 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: 3497 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: 3498 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: 3499 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: { 3500 Info.opc = getOpcForTextureInstr(Intrinsic); 3501 Info.memVT = MVT::v4i32; 3502 Info.ptrVal = nullptr; 3503 Info.offset = 0; 3504 Info.vol = 0; 3505 Info.readMem = true; 3506 Info.writeMem = false; 3507 Info.align = 16; 3508 return true; 3509 } 3510 case Intrinsic::nvvm_suld_1d_i8_clamp: 3511 case Intrinsic::nvvm_suld_1d_v2i8_clamp: 3512 case Intrinsic::nvvm_suld_1d_v4i8_clamp: 3513 case Intrinsic::nvvm_suld_1d_array_i8_clamp: 3514 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: 3515 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: 3516 case Intrinsic::nvvm_suld_2d_i8_clamp: 3517 case Intrinsic::nvvm_suld_2d_v2i8_clamp: 3518 case Intrinsic::nvvm_suld_2d_v4i8_clamp: 3519 case Intrinsic::nvvm_suld_2d_array_i8_clamp: 3520 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: 3521 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: 3522 case Intrinsic::nvvm_suld_3d_i8_clamp: 3523 case Intrinsic::nvvm_suld_3d_v2i8_clamp: 3524 case Intrinsic::nvvm_suld_3d_v4i8_clamp: 3525 case Intrinsic::nvvm_suld_1d_i8_trap: 3526 case Intrinsic::nvvm_suld_1d_v2i8_trap: 3527 case Intrinsic::nvvm_suld_1d_v4i8_trap: 3528 case Intrinsic::nvvm_suld_1d_array_i8_trap: 3529 case Intrinsic::nvvm_suld_1d_array_v2i8_trap: 3530 case Intrinsic::nvvm_suld_1d_array_v4i8_trap: 3531 case Intrinsic::nvvm_suld_2d_i8_trap: 3532 case Intrinsic::nvvm_suld_2d_v2i8_trap: 3533 case Intrinsic::nvvm_suld_2d_v4i8_trap: 3534 case Intrinsic::nvvm_suld_2d_array_i8_trap: 3535 case Intrinsic::nvvm_suld_2d_array_v2i8_trap: 3536 case Intrinsic::nvvm_suld_2d_array_v4i8_trap: 3537 case Intrinsic::nvvm_suld_3d_i8_trap: 3538 case Intrinsic::nvvm_suld_3d_v2i8_trap: 3539 case Intrinsic::nvvm_suld_3d_v4i8_trap: 3540 case Intrinsic::nvvm_suld_1d_i8_zero: 3541 case Intrinsic::nvvm_suld_1d_v2i8_zero: 3542 case Intrinsic::nvvm_suld_1d_v4i8_zero: 3543 case Intrinsic::nvvm_suld_1d_array_i8_zero: 3544 case Intrinsic::nvvm_suld_1d_array_v2i8_zero: 3545 case Intrinsic::nvvm_suld_1d_array_v4i8_zero: 3546 case Intrinsic::nvvm_suld_2d_i8_zero: 3547 case Intrinsic::nvvm_suld_2d_v2i8_zero: 3548 case Intrinsic::nvvm_suld_2d_v4i8_zero: 3549 case Intrinsic::nvvm_suld_2d_array_i8_zero: 3550 case Intrinsic::nvvm_suld_2d_array_v2i8_zero: 3551 case Intrinsic::nvvm_suld_2d_array_v4i8_zero: 3552 case Intrinsic::nvvm_suld_3d_i8_zero: 3553 case Intrinsic::nvvm_suld_3d_v2i8_zero: 3554 case Intrinsic::nvvm_suld_3d_v4i8_zero: { 3555 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3556 Info.memVT = MVT::i8; 3557 Info.ptrVal = nullptr; 3558 Info.offset = 0; 3559 Info.vol = 0; 3560 Info.readMem = true; 3561 Info.writeMem = false; 3562 Info.align = 16; 3563 return true; 3564 } 3565 case Intrinsic::nvvm_suld_1d_i16_clamp: 3566 case Intrinsic::nvvm_suld_1d_v2i16_clamp: 3567 case Intrinsic::nvvm_suld_1d_v4i16_clamp: 3568 case Intrinsic::nvvm_suld_1d_array_i16_clamp: 3569 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: 3570 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: 3571 case Intrinsic::nvvm_suld_2d_i16_clamp: 3572 case Intrinsic::nvvm_suld_2d_v2i16_clamp: 3573 case Intrinsic::nvvm_suld_2d_v4i16_clamp: 3574 case Intrinsic::nvvm_suld_2d_array_i16_clamp: 3575 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: 3576 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: 3577 case Intrinsic::nvvm_suld_3d_i16_clamp: 3578 case Intrinsic::nvvm_suld_3d_v2i16_clamp: 3579 case Intrinsic::nvvm_suld_3d_v4i16_clamp: 3580 case Intrinsic::nvvm_suld_1d_i16_trap: 3581 case Intrinsic::nvvm_suld_1d_v2i16_trap: 3582 case Intrinsic::nvvm_suld_1d_v4i16_trap: 3583 case Intrinsic::nvvm_suld_1d_array_i16_trap: 3584 case Intrinsic::nvvm_suld_1d_array_v2i16_trap: 3585 case Intrinsic::nvvm_suld_1d_array_v4i16_trap: 3586 case Intrinsic::nvvm_suld_2d_i16_trap: 3587 case Intrinsic::nvvm_suld_2d_v2i16_trap: 3588 case Intrinsic::nvvm_suld_2d_v4i16_trap: 3589 case Intrinsic::nvvm_suld_2d_array_i16_trap: 3590 case Intrinsic::nvvm_suld_2d_array_v2i16_trap: 3591 case Intrinsic::nvvm_suld_2d_array_v4i16_trap: 3592 case Intrinsic::nvvm_suld_3d_i16_trap: 3593 case Intrinsic::nvvm_suld_3d_v2i16_trap: 3594 case Intrinsic::nvvm_suld_3d_v4i16_trap: 3595 case Intrinsic::nvvm_suld_1d_i16_zero: 3596 case Intrinsic::nvvm_suld_1d_v2i16_zero: 3597 case Intrinsic::nvvm_suld_1d_v4i16_zero: 3598 case Intrinsic::nvvm_suld_1d_array_i16_zero: 3599 case Intrinsic::nvvm_suld_1d_array_v2i16_zero: 3600 case Intrinsic::nvvm_suld_1d_array_v4i16_zero: 3601 case Intrinsic::nvvm_suld_2d_i16_zero: 3602 case Intrinsic::nvvm_suld_2d_v2i16_zero: 3603 case Intrinsic::nvvm_suld_2d_v4i16_zero: 3604 case Intrinsic::nvvm_suld_2d_array_i16_zero: 3605 case Intrinsic::nvvm_suld_2d_array_v2i16_zero: 3606 case Intrinsic::nvvm_suld_2d_array_v4i16_zero: 3607 case Intrinsic::nvvm_suld_3d_i16_zero: 3608 case Intrinsic::nvvm_suld_3d_v2i16_zero: 3609 case Intrinsic::nvvm_suld_3d_v4i16_zero: { 3610 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3611 Info.memVT = MVT::i16; 3612 Info.ptrVal = nullptr; 3613 Info.offset = 0; 3614 Info.vol = 0; 3615 Info.readMem = true; 3616 Info.writeMem = false; 3617 Info.align = 16; 3618 return true; 3619 } 3620 case Intrinsic::nvvm_suld_1d_i32_clamp: 3621 case Intrinsic::nvvm_suld_1d_v2i32_clamp: 3622 case Intrinsic::nvvm_suld_1d_v4i32_clamp: 3623 case Intrinsic::nvvm_suld_1d_array_i32_clamp: 3624 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: 3625 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: 3626 case Intrinsic::nvvm_suld_2d_i32_clamp: 3627 case Intrinsic::nvvm_suld_2d_v2i32_clamp: 3628 case Intrinsic::nvvm_suld_2d_v4i32_clamp: 3629 case Intrinsic::nvvm_suld_2d_array_i32_clamp: 3630 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: 3631 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: 3632 case Intrinsic::nvvm_suld_3d_i32_clamp: 3633 case Intrinsic::nvvm_suld_3d_v2i32_clamp: 3634 case Intrinsic::nvvm_suld_3d_v4i32_clamp: 3635 case Intrinsic::nvvm_suld_1d_i32_trap: 3636 case Intrinsic::nvvm_suld_1d_v2i32_trap: 3637 case Intrinsic::nvvm_suld_1d_v4i32_trap: 3638 case Intrinsic::nvvm_suld_1d_array_i32_trap: 3639 case Intrinsic::nvvm_suld_1d_array_v2i32_trap: 3640 case Intrinsic::nvvm_suld_1d_array_v4i32_trap: 3641 case Intrinsic::nvvm_suld_2d_i32_trap: 3642 case Intrinsic::nvvm_suld_2d_v2i32_trap: 3643 case Intrinsic::nvvm_suld_2d_v4i32_trap: 3644 case Intrinsic::nvvm_suld_2d_array_i32_trap: 3645 case Intrinsic::nvvm_suld_2d_array_v2i32_trap: 3646 case Intrinsic::nvvm_suld_2d_array_v4i32_trap: 3647 case Intrinsic::nvvm_suld_3d_i32_trap: 3648 case Intrinsic::nvvm_suld_3d_v2i32_trap: 3649 case Intrinsic::nvvm_suld_3d_v4i32_trap: 3650 case Intrinsic::nvvm_suld_1d_i32_zero: 3651 case Intrinsic::nvvm_suld_1d_v2i32_zero: 3652 case Intrinsic::nvvm_suld_1d_v4i32_zero: 3653 case Intrinsic::nvvm_suld_1d_array_i32_zero: 3654 case Intrinsic::nvvm_suld_1d_array_v2i32_zero: 3655 case Intrinsic::nvvm_suld_1d_array_v4i32_zero: 3656 case Intrinsic::nvvm_suld_2d_i32_zero: 3657 case Intrinsic::nvvm_suld_2d_v2i32_zero: 3658 case Intrinsic::nvvm_suld_2d_v4i32_zero: 3659 case Intrinsic::nvvm_suld_2d_array_i32_zero: 3660 case Intrinsic::nvvm_suld_2d_array_v2i32_zero: 3661 case Intrinsic::nvvm_suld_2d_array_v4i32_zero: 3662 case Intrinsic::nvvm_suld_3d_i32_zero: 3663 case Intrinsic::nvvm_suld_3d_v2i32_zero: 3664 case Intrinsic::nvvm_suld_3d_v4i32_zero: { 3665 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3666 Info.memVT = MVT::i32; 3667 Info.ptrVal = nullptr; 3668 Info.offset = 0; 3669 Info.vol = 0; 3670 Info.readMem = true; 3671 Info.writeMem = false; 3672 Info.align = 16; 3673 return true; 3674 } 3675 case Intrinsic::nvvm_suld_1d_i64_clamp: 3676 case Intrinsic::nvvm_suld_1d_v2i64_clamp: 3677 case Intrinsic::nvvm_suld_1d_array_i64_clamp: 3678 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: 3679 case Intrinsic::nvvm_suld_2d_i64_clamp: 3680 case Intrinsic::nvvm_suld_2d_v2i64_clamp: 3681 case Intrinsic::nvvm_suld_2d_array_i64_clamp: 3682 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: 3683 case Intrinsic::nvvm_suld_3d_i64_clamp: 3684 case Intrinsic::nvvm_suld_3d_v2i64_clamp: 3685 case Intrinsic::nvvm_suld_1d_i64_trap: 3686 case Intrinsic::nvvm_suld_1d_v2i64_trap: 3687 case Intrinsic::nvvm_suld_1d_array_i64_trap: 3688 case Intrinsic::nvvm_suld_1d_array_v2i64_trap: 3689 case Intrinsic::nvvm_suld_2d_i64_trap: 3690 case Intrinsic::nvvm_suld_2d_v2i64_trap: 3691 case Intrinsic::nvvm_suld_2d_array_i64_trap: 3692 case Intrinsic::nvvm_suld_2d_array_v2i64_trap: 3693 case Intrinsic::nvvm_suld_3d_i64_trap: 3694 case Intrinsic::nvvm_suld_3d_v2i64_trap: 3695 case Intrinsic::nvvm_suld_1d_i64_zero: 3696 case Intrinsic::nvvm_suld_1d_v2i64_zero: 3697 case Intrinsic::nvvm_suld_1d_array_i64_zero: 3698 case Intrinsic::nvvm_suld_1d_array_v2i64_zero: 3699 case Intrinsic::nvvm_suld_2d_i64_zero: 3700 case Intrinsic::nvvm_suld_2d_v2i64_zero: 3701 case Intrinsic::nvvm_suld_2d_array_i64_zero: 3702 case Intrinsic::nvvm_suld_2d_array_v2i64_zero: 3703 case Intrinsic::nvvm_suld_3d_i64_zero: 3704 case Intrinsic::nvvm_suld_3d_v2i64_zero: { 3705 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3706 Info.memVT = MVT::i64; 3707 Info.ptrVal = nullptr; 3708 Info.offset = 0; 3709 Info.vol = 0; 3710 Info.readMem = true; 3711 Info.writeMem = false; 3712 Info.align = 16; 3713 return true; 3714 } 3715 } 3716 return false; 3717 } 3718 3719 /// isLegalAddressingMode - Return true if the addressing mode represented 3720 /// by AM is legal for this target, for a load/store of the specified type. 3721 /// Used to guide target specific optimizations, like loop strength reduction 3722 /// (LoopStrengthReduce.cpp) and memory optimization for address mode 3723 /// (CodeGenPrepare.cpp) 3724 bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, 3725 const AddrMode &AM, Type *Ty, 3726 unsigned AS) const { 3727 3728 // AddrMode - This represents an addressing mode of: 3729 // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg 3730 // 3731 // The legal address modes are 3732 // - [avar] 3733 // - [areg] 3734 // - [areg+immoff] 3735 // - [immAddr] 3736 3737 if (AM.BaseGV) { 3738 return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale; 3739 } 3740 3741 switch (AM.Scale) { 3742 case 0: // "r", "r+i" or "i" is allowed 3743 break; 3744 case 1: 3745 if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. 3746 return false; 3747 // Otherwise we have r+i. 3748 break; 3749 default: 3750 // No scale > 1 is allowed 3751 return false; 3752 } 3753 return true; 3754 } 3755 3756 //===----------------------------------------------------------------------===// 3757 // NVPTX Inline Assembly Support 3758 //===----------------------------------------------------------------------===// 3759 3760 /// getConstraintType - Given a constraint letter, return the type of 3761 /// constraint it is for this target. 3762 NVPTXTargetLowering::ConstraintType 3763 NVPTXTargetLowering::getConstraintType(StringRef Constraint) const { 3764 if (Constraint.size() == 1) { 3765 switch (Constraint[0]) { 3766 default: 3767 break; 3768 case 'b': 3769 case 'r': 3770 case 'h': 3771 case 'c': 3772 case 'l': 3773 case 'f': 3774 case 'd': 3775 case '0': 3776 case 'N': 3777 return C_RegisterClass; 3778 } 3779 } 3780 return TargetLowering::getConstraintType(Constraint); 3781 } 3782 3783 std::pair<unsigned, const TargetRegisterClass *> 3784 NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 3785 StringRef Constraint, 3786 MVT VT) const { 3787 if (Constraint.size() == 1) { 3788 switch (Constraint[0]) { 3789 case 'b': 3790 return std::make_pair(0U, &NVPTX::Int1RegsRegClass); 3791 case 'c': 3792 return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 3793 case 'h': 3794 return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 3795 case 'r': 3796 return std::make_pair(0U, &NVPTX::Int32RegsRegClass); 3797 case 'l': 3798 case 'N': 3799 return std::make_pair(0U, &NVPTX::Int64RegsRegClass); 3800 case 'f': 3801 return std::make_pair(0U, &NVPTX::Float32RegsRegClass); 3802 case 'd': 3803 return std::make_pair(0U, &NVPTX::Float64RegsRegClass); 3804 } 3805 } 3806 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 3807 } 3808 3809 //===----------------------------------------------------------------------===// 3810 // NVPTX DAG Combining 3811 //===----------------------------------------------------------------------===// 3812 3813 bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, 3814 CodeGenOpt::Level OptLevel) const { 3815 const Function *F = MF.getFunction(); 3816 const TargetOptions &TO = MF.getTarget().Options; 3817 3818 // Always honor command-line argument 3819 if (FMAContractLevelOpt.getNumOccurrences() > 0) { 3820 return FMAContractLevelOpt > 0; 3821 } else if (OptLevel == 0) { 3822 // Do not contract if we're not optimizing the code 3823 return false; 3824 } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) { 3825 // Honor TargetOptions flags that explicitly say fusion is okay 3826 return true; 3827 } else if (F->hasFnAttribute("unsafe-fp-math")) { 3828 // Check for unsafe-fp-math=true coming from Clang 3829 Attribute Attr = F->getFnAttribute("unsafe-fp-math"); 3830 StringRef Val = Attr.getValueAsString(); 3831 if (Val == "true") 3832 return true; 3833 } 3834 3835 // We did not have a clear indication that fusion is allowed, so assume not 3836 return false; 3837 } 3838 3839 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 3840 /// operands N0 and N1. This is a helper for PerformADDCombine that is 3841 /// called with the default operands, and if that fails, with commuted 3842 /// operands. 3843 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 3844 TargetLowering::DAGCombinerInfo &DCI, 3845 const NVPTXSubtarget &Subtarget, 3846 CodeGenOpt::Level OptLevel) { 3847 SelectionDAG &DAG = DCI.DAG; 3848 // Skip non-integer, non-scalar case 3849 EVT VT=N0.getValueType(); 3850 if (VT.isVector()) 3851 return SDValue(); 3852 3853 // fold (add (mul a, b), c) -> (mad a, b, c) 3854 // 3855 if (N0.getOpcode() == ISD::MUL) { 3856 assert (VT.isInteger()); 3857 // For integer: 3858 // Since integer multiply-add costs the same as integer multiply 3859 // but is more costly than integer add, do the fusion only when 3860 // the mul is only used in the add. 3861 if (OptLevel==CodeGenOpt::None || VT != MVT::i32 || 3862 !N0.getNode()->hasOneUse()) 3863 return SDValue(); 3864 3865 // Do the folding 3866 return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, 3867 N0.getOperand(0), N0.getOperand(1), N1); 3868 } 3869 else if (N0.getOpcode() == ISD::FMUL) { 3870 if (VT == MVT::f32 || VT == MVT::f64) { 3871 const auto *TLI = static_cast<const NVPTXTargetLowering *>( 3872 &DAG.getTargetLoweringInfo()); 3873 if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) 3874 return SDValue(); 3875 3876 // For floating point: 3877 // Do the fusion only when the mul has less than 5 uses and all 3878 // are add. 3879 // The heuristic is that if a use is not an add, then that use 3880 // cannot be fused into fma, therefore mul is still needed anyway. 3881 // If there are more than 4 uses, even if they are all add, fusing 3882 // them will increase register pressue. 3883 // 3884 int numUses = 0; 3885 int nonAddCount = 0; 3886 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 3887 UE = N0.getNode()->use_end(); 3888 UI != UE; ++UI) { 3889 numUses++; 3890 SDNode *User = *UI; 3891 if (User->getOpcode() != ISD::FADD) 3892 ++nonAddCount; 3893 } 3894 if (numUses >= 5) 3895 return SDValue(); 3896 if (nonAddCount) { 3897 int orderNo = N->getIROrder(); 3898 int orderNo2 = N0.getNode()->getIROrder(); 3899 // simple heuristics here for considering potential register 3900 // pressure, the logics here is that the differnce are used 3901 // to measure the distance between def and use, the longer distance 3902 // more likely cause register pressure. 3903 if (orderNo - orderNo2 < 500) 3904 return SDValue(); 3905 3906 // Now, check if at least one of the FMUL's operands is live beyond the node N, 3907 // which guarantees that the FMA will not increase register pressure at node N. 3908 bool opIsLive = false; 3909 const SDNode *left = N0.getOperand(0).getNode(); 3910 const SDNode *right = N0.getOperand(1).getNode(); 3911 3912 if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right)) 3913 opIsLive = true; 3914 3915 if (!opIsLive) 3916 for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) { 3917 SDNode *User = *UI; 3918 int orderNo3 = User->getIROrder(); 3919 if (orderNo3 > orderNo) { 3920 opIsLive = true; 3921 break; 3922 } 3923 } 3924 3925 if (!opIsLive) 3926 for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) { 3927 SDNode *User = *UI; 3928 int orderNo3 = User->getIROrder(); 3929 if (orderNo3 > orderNo) { 3930 opIsLive = true; 3931 break; 3932 } 3933 } 3934 3935 if (!opIsLive) 3936 return SDValue(); 3937 } 3938 3939 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 3940 N0.getOperand(0), N0.getOperand(1), N1); 3941 } 3942 } 3943 3944 return SDValue(); 3945 } 3946 3947 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 3948 /// 3949 static SDValue PerformADDCombine(SDNode *N, 3950 TargetLowering::DAGCombinerInfo &DCI, 3951 const NVPTXSubtarget &Subtarget, 3952 CodeGenOpt::Level OptLevel) { 3953 SDValue N0 = N->getOperand(0); 3954 SDValue N1 = N->getOperand(1); 3955 3956 // First try with the default operand order. 3957 if (SDValue Result = 3958 PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, OptLevel)) 3959 return Result; 3960 3961 // If that didn't work, try again with the operands commuted. 3962 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel); 3963 } 3964 3965 static SDValue PerformANDCombine(SDNode *N, 3966 TargetLowering::DAGCombinerInfo &DCI) { 3967 // The type legalizer turns a vector load of i8 values into a zextload to i16 3968 // registers, optionally ANY_EXTENDs it (if target type is integer), 3969 // and ANDs off the high 8 bits. Since we turn this load into a 3970 // target-specific DAG node, the DAG combiner fails to eliminate these AND 3971 // nodes. Do that here. 3972 SDValue Val = N->getOperand(0); 3973 SDValue Mask = N->getOperand(1); 3974 3975 if (isa<ConstantSDNode>(Val)) { 3976 std::swap(Val, Mask); 3977 } 3978 3979 SDValue AExt; 3980 // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and 3981 if (Val.getOpcode() == ISD::ANY_EXTEND) { 3982 AExt = Val; 3983 Val = Val->getOperand(0); 3984 } 3985 3986 if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { 3987 Val = Val->getOperand(0); 3988 } 3989 3990 if (Val->getOpcode() == NVPTXISD::LoadV2 || 3991 Val->getOpcode() == NVPTXISD::LoadV4) { 3992 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask); 3993 if (!MaskCnst) { 3994 // Not an AND with a constant 3995 return SDValue(); 3996 } 3997 3998 uint64_t MaskVal = MaskCnst->getZExtValue(); 3999 if (MaskVal != 0xff) { 4000 // Not an AND that chops off top 8 bits 4001 return SDValue(); 4002 } 4003 4004 MemSDNode *Mem = dyn_cast<MemSDNode>(Val); 4005 if (!Mem) { 4006 // Not a MemSDNode?!? 4007 return SDValue(); 4008 } 4009 4010 EVT MemVT = Mem->getMemoryVT(); 4011 if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { 4012 // We only handle the i8 case 4013 return SDValue(); 4014 } 4015 4016 unsigned ExtType = 4017 cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))-> 4018 getZExtValue(); 4019 if (ExtType == ISD::SEXTLOAD) { 4020 // If for some reason the load is a sextload, the and is needed to zero 4021 // out the high 8 bits 4022 return SDValue(); 4023 } 4024 4025 bool AddTo = false; 4026 if (AExt.getNode() != 0) { 4027 // Re-insert the ext as a zext. 4028 Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 4029 AExt.getValueType(), Val); 4030 AddTo = true; 4031 } 4032 4033 // If we get here, the AND is unnecessary. Just replace it with the load 4034 DCI.CombineTo(N, Val, AddTo); 4035 } 4036 4037 return SDValue(); 4038 } 4039 4040 static SDValue PerformSELECTCombine(SDNode *N, 4041 TargetLowering::DAGCombinerInfo &DCI) { 4042 // Currently this detects patterns for integer min and max and 4043 // lowers them to PTX-specific intrinsics that enable hardware 4044 // support. 4045 4046 const SDValue Cond = N->getOperand(0); 4047 if (Cond.getOpcode() != ISD::SETCC) return SDValue(); 4048 4049 const SDValue LHS = Cond.getOperand(0); 4050 const SDValue RHS = Cond.getOperand(1); 4051 const SDValue True = N->getOperand(1); 4052 const SDValue False = N->getOperand(2); 4053 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) 4054 return SDValue(); 4055 4056 const EVT VT = N->getValueType(0); 4057 if (VT != MVT::i32 && VT != MVT::i64) return SDValue(); 4058 4059 const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 4060 SDValue Larger; // The larger of LHS and RHS when condition is true. 4061 switch (CC) { 4062 case ISD::SETULT: 4063 case ISD::SETULE: 4064 case ISD::SETLT: 4065 case ISD::SETLE: 4066 Larger = RHS; 4067 break; 4068 4069 case ISD::SETGT: 4070 case ISD::SETGE: 4071 case ISD::SETUGT: 4072 case ISD::SETUGE: 4073 Larger = LHS; 4074 break; 4075 4076 default: 4077 return SDValue(); 4078 } 4079 const bool IsMax = (Larger == True); 4080 const bool IsSigned = ISD::isSignedIntSetCC(CC); 4081 4082 unsigned IntrinsicId; 4083 if (VT == MVT::i32) { 4084 if (IsSigned) 4085 IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i; 4086 else 4087 IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui; 4088 } else { 4089 assert(VT == MVT::i64); 4090 if (IsSigned) 4091 IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll; 4092 else 4093 IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull; 4094 } 4095 4096 SDLoc DL(N); 4097 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, 4098 DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS); 4099 } 4100 4101 enum OperandSignedness { 4102 Signed = 0, 4103 Unsigned, 4104 Unknown 4105 }; 4106 4107 /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand 4108 /// that can be demoted to \p OptSize bits without loss of information. The 4109 /// signedness of the operand, if determinable, is placed in \p S. 4110 static bool IsMulWideOperandDemotable(SDValue Op, 4111 unsigned OptSize, 4112 OperandSignedness &S) { 4113 S = Unknown; 4114 4115 if (Op.getOpcode() == ISD::SIGN_EXTEND || 4116 Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { 4117 EVT OrigVT = Op.getOperand(0).getValueType(); 4118 if (OrigVT.getSizeInBits() <= OptSize) { 4119 S = Signed; 4120 return true; 4121 } 4122 } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { 4123 EVT OrigVT = Op.getOperand(0).getValueType(); 4124 if (OrigVT.getSizeInBits() <= OptSize) { 4125 S = Unsigned; 4126 return true; 4127 } 4128 } 4129 4130 return false; 4131 } 4132 4133 /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can 4134 /// be demoted to \p OptSize bits without loss of information. If the operands 4135 /// contain a constant, it should appear as the RHS operand. The signedness of 4136 /// the operands is placed in \p IsSigned. 4137 static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, 4138 unsigned OptSize, 4139 bool &IsSigned) { 4140 4141 OperandSignedness LHSSign; 4142 4143 // The LHS operand must be a demotable op 4144 if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) 4145 return false; 4146 4147 // We should have been able to determine the signedness from the LHS 4148 if (LHSSign == Unknown) 4149 return false; 4150 4151 IsSigned = (LHSSign == Signed); 4152 4153 // The RHS can be a demotable op or a constant 4154 if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) { 4155 const APInt &Val = CI->getAPIntValue(); 4156 if (LHSSign == Unsigned) { 4157 return Val.isIntN(OptSize); 4158 } else { 4159 return Val.isSignedIntN(OptSize); 4160 } 4161 } else { 4162 OperandSignedness RHSSign; 4163 if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) 4164 return false; 4165 4166 return LHSSign == RHSSign; 4167 } 4168 } 4169 4170 /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply 4171 /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform 4172 /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift 4173 /// amount. 4174 static SDValue TryMULWIDECombine(SDNode *N, 4175 TargetLowering::DAGCombinerInfo &DCI) { 4176 EVT MulType = N->getValueType(0); 4177 if (MulType != MVT::i32 && MulType != MVT::i64) { 4178 return SDValue(); 4179 } 4180 4181 SDLoc DL(N); 4182 unsigned OptSize = MulType.getSizeInBits() >> 1; 4183 SDValue LHS = N->getOperand(0); 4184 SDValue RHS = N->getOperand(1); 4185 4186 // Canonicalize the multiply so the constant (if any) is on the right 4187 if (N->getOpcode() == ISD::MUL) { 4188 if (isa<ConstantSDNode>(LHS)) { 4189 std::swap(LHS, RHS); 4190 } 4191 } 4192 4193 // If we have a SHL, determine the actual multiply amount 4194 if (N->getOpcode() == ISD::SHL) { 4195 ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS); 4196 if (!ShlRHS) { 4197 return SDValue(); 4198 } 4199 4200 APInt ShiftAmt = ShlRHS->getAPIntValue(); 4201 unsigned BitWidth = MulType.getSizeInBits(); 4202 if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { 4203 APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; 4204 RHS = DCI.DAG.getConstant(MulVal, DL, MulType); 4205 } else { 4206 return SDValue(); 4207 } 4208 } 4209 4210 bool Signed; 4211 // Verify that our operands are demotable 4212 if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { 4213 return SDValue(); 4214 } 4215 4216 EVT DemotedVT; 4217 if (MulType == MVT::i32) { 4218 DemotedVT = MVT::i16; 4219 } else { 4220 DemotedVT = MVT::i32; 4221 } 4222 4223 // Truncate the operands to the correct size. Note that these are just for 4224 // type consistency and will (likely) be eliminated in later phases. 4225 SDValue TruncLHS = 4226 DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS); 4227 SDValue TruncRHS = 4228 DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS); 4229 4230 unsigned Opc; 4231 if (Signed) { 4232 Opc = NVPTXISD::MUL_WIDE_SIGNED; 4233 } else { 4234 Opc = NVPTXISD::MUL_WIDE_UNSIGNED; 4235 } 4236 4237 return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS); 4238 } 4239 4240 /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. 4241 static SDValue PerformMULCombine(SDNode *N, 4242 TargetLowering::DAGCombinerInfo &DCI, 4243 CodeGenOpt::Level OptLevel) { 4244 if (OptLevel > 0) { 4245 // Try mul.wide combining at OptLevel > 0 4246 if (SDValue Ret = TryMULWIDECombine(N, DCI)) 4247 return Ret; 4248 } 4249 4250 return SDValue(); 4251 } 4252 4253 /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. 4254 static SDValue PerformSHLCombine(SDNode *N, 4255 TargetLowering::DAGCombinerInfo &DCI, 4256 CodeGenOpt::Level OptLevel) { 4257 if (OptLevel > 0) { 4258 // Try mul.wide combining at OptLevel > 0 4259 if (SDValue Ret = TryMULWIDECombine(N, DCI)) 4260 return Ret; 4261 } 4262 4263 return SDValue(); 4264 } 4265 4266 SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, 4267 DAGCombinerInfo &DCI) const { 4268 CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); 4269 switch (N->getOpcode()) { 4270 default: break; 4271 case ISD::ADD: 4272 case ISD::FADD: 4273 return PerformADDCombine(N, DCI, STI, OptLevel); 4274 case ISD::MUL: 4275 return PerformMULCombine(N, DCI, OptLevel); 4276 case ISD::SHL: 4277 return PerformSHLCombine(N, DCI, OptLevel); 4278 case ISD::AND: 4279 return PerformANDCombine(N, DCI); 4280 case ISD::SELECT: 4281 return PerformSELECTCombine(N, DCI); 4282 } 4283 return SDValue(); 4284 } 4285 4286 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. 4287 static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, 4288 SmallVectorImpl<SDValue> &Results) { 4289 EVT ResVT = N->getValueType(0); 4290 SDLoc DL(N); 4291 4292 assert(ResVT.isVector() && "Vector load must have vector type"); 4293 4294 // We only handle "native" vector sizes for now, e.g. <4 x double> is not 4295 // legal. We can (and should) split that into 2 loads of <2 x double> here 4296 // but I'm leaving that as a TODO for now. 4297 assert(ResVT.isSimple() && "Can only handle simple types"); 4298 switch (ResVT.getSimpleVT().SimpleTy) { 4299 default: 4300 return; 4301 case MVT::v2i8: 4302 case MVT::v2i16: 4303 case MVT::v2i32: 4304 case MVT::v2i64: 4305 case MVT::v2f32: 4306 case MVT::v2f64: 4307 case MVT::v4i8: 4308 case MVT::v4i16: 4309 case MVT::v4i32: 4310 case MVT::v4f32: 4311 // This is a "native" vector type 4312 break; 4313 } 4314 4315 LoadSDNode *LD = cast<LoadSDNode>(N); 4316 4317 unsigned Align = LD->getAlignment(); 4318 auto &TD = DAG.getDataLayout(); 4319 unsigned PrefAlign = 4320 TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); 4321 if (Align < PrefAlign) { 4322 // This load is not sufficiently aligned, so bail out and let this vector 4323 // load be scalarized. Note that we may still be able to emit smaller 4324 // vector loads. For example, if we are loading a <4 x float> with an 4325 // alignment of 8, this check will fail but the legalizer will try again 4326 // with 2 x <2 x float>, which will succeed with an alignment of 8. 4327 return; 4328 } 4329 4330 EVT EltVT = ResVT.getVectorElementType(); 4331 unsigned NumElts = ResVT.getVectorNumElements(); 4332 4333 // Since LoadV2 is a target node, we cannot rely on DAG type legalization. 4334 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 4335 // loaded type to i16 and propagate the "real" type as the memory type. 4336 bool NeedTrunc = false; 4337 if (EltVT.getSizeInBits() < 16) { 4338 EltVT = MVT::i16; 4339 NeedTrunc = true; 4340 } 4341 4342 unsigned Opcode = 0; 4343 SDVTList LdResVTs; 4344 4345 switch (NumElts) { 4346 default: 4347 return; 4348 case 2: 4349 Opcode = NVPTXISD::LoadV2; 4350 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 4351 break; 4352 case 4: { 4353 Opcode = NVPTXISD::LoadV4; 4354 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 4355 LdResVTs = DAG.getVTList(ListVTs); 4356 break; 4357 } 4358 } 4359 4360 // Copy regular operands 4361 SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end()); 4362 4363 // The select routine does not have access to the LoadSDNode instance, so 4364 // pass along the extension information 4365 OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL)); 4366 4367 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, 4368 LD->getMemoryVT(), 4369 LD->getMemOperand()); 4370 4371 SmallVector<SDValue, 4> ScalarRes; 4372 4373 for (unsigned i = 0; i < NumElts; ++i) { 4374 SDValue Res = NewLD.getValue(i); 4375 if (NeedTrunc) 4376 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 4377 ScalarRes.push_back(Res); 4378 } 4379 4380 SDValue LoadChain = NewLD.getValue(NumElts); 4381 4382 SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes); 4383 4384 Results.push_back(BuildVec); 4385 Results.push_back(LoadChain); 4386 } 4387 4388 static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, 4389 SmallVectorImpl<SDValue> &Results) { 4390 SDValue Chain = N->getOperand(0); 4391 SDValue Intrin = N->getOperand(1); 4392 SDLoc DL(N); 4393 4394 // Get the intrinsic ID 4395 unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); 4396 switch (IntrinNo) { 4397 default: 4398 return; 4399 case Intrinsic::nvvm_ldg_global_i: 4400 case Intrinsic::nvvm_ldg_global_f: 4401 case Intrinsic::nvvm_ldg_global_p: 4402 case Intrinsic::nvvm_ldu_global_i: 4403 case Intrinsic::nvvm_ldu_global_f: 4404 case Intrinsic::nvvm_ldu_global_p: { 4405 EVT ResVT = N->getValueType(0); 4406 4407 if (ResVT.isVector()) { 4408 // Vector LDG/LDU 4409 4410 unsigned NumElts = ResVT.getVectorNumElements(); 4411 EVT EltVT = ResVT.getVectorElementType(); 4412 4413 // Since LDU/LDG are target nodes, we cannot rely on DAG type 4414 // legalization. 4415 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 4416 // loaded type to i16 and propagate the "real" type as the memory type. 4417 bool NeedTrunc = false; 4418 if (EltVT.getSizeInBits() < 16) { 4419 EltVT = MVT::i16; 4420 NeedTrunc = true; 4421 } 4422 4423 unsigned Opcode = 0; 4424 SDVTList LdResVTs; 4425 4426 switch (NumElts) { 4427 default: 4428 return; 4429 case 2: 4430 switch (IntrinNo) { 4431 default: 4432 return; 4433 case Intrinsic::nvvm_ldg_global_i: 4434 case Intrinsic::nvvm_ldg_global_f: 4435 case Intrinsic::nvvm_ldg_global_p: 4436 Opcode = NVPTXISD::LDGV2; 4437 break; 4438 case Intrinsic::nvvm_ldu_global_i: 4439 case Intrinsic::nvvm_ldu_global_f: 4440 case Intrinsic::nvvm_ldu_global_p: 4441 Opcode = NVPTXISD::LDUV2; 4442 break; 4443 } 4444 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 4445 break; 4446 case 4: { 4447 switch (IntrinNo) { 4448 default: 4449 return; 4450 case Intrinsic::nvvm_ldg_global_i: 4451 case Intrinsic::nvvm_ldg_global_f: 4452 case Intrinsic::nvvm_ldg_global_p: 4453 Opcode = NVPTXISD::LDGV4; 4454 break; 4455 case Intrinsic::nvvm_ldu_global_i: 4456 case Intrinsic::nvvm_ldu_global_f: 4457 case Intrinsic::nvvm_ldu_global_p: 4458 Opcode = NVPTXISD::LDUV4; 4459 break; 4460 } 4461 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 4462 LdResVTs = DAG.getVTList(ListVTs); 4463 break; 4464 } 4465 } 4466 4467 SmallVector<SDValue, 8> OtherOps; 4468 4469 // Copy regular operands 4470 4471 OtherOps.push_back(Chain); // Chain 4472 // Skip operand 1 (intrinsic ID) 4473 // Others 4474 OtherOps.append(N->op_begin() + 2, N->op_end()); 4475 4476 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 4477 4478 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, 4479 MemSD->getMemoryVT(), 4480 MemSD->getMemOperand()); 4481 4482 SmallVector<SDValue, 4> ScalarRes; 4483 4484 for (unsigned i = 0; i < NumElts; ++i) { 4485 SDValue Res = NewLD.getValue(i); 4486 if (NeedTrunc) 4487 Res = 4488 DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 4489 ScalarRes.push_back(Res); 4490 } 4491 4492 SDValue LoadChain = NewLD.getValue(NumElts); 4493 4494 SDValue BuildVec = 4495 DAG.getBuildVector(ResVT, DL, ScalarRes); 4496 4497 Results.push_back(BuildVec); 4498 Results.push_back(LoadChain); 4499 } else { 4500 // i8 LDG/LDU 4501 assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && 4502 "Custom handling of non-i8 ldu/ldg?"); 4503 4504 // Just copy all operands as-is 4505 SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); 4506 4507 // Force output to i16 4508 SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); 4509 4510 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 4511 4512 // We make sure the memory type is i8, which will be used during isel 4513 // to select the proper instruction. 4514 SDValue NewLD = 4515 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, 4516 MVT::i8, MemSD->getMemOperand()); 4517 4518 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, 4519 NewLD.getValue(0))); 4520 Results.push_back(NewLD.getValue(1)); 4521 } 4522 } 4523 } 4524 } 4525 4526 void NVPTXTargetLowering::ReplaceNodeResults( 4527 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 4528 switch (N->getOpcode()) { 4529 default: 4530 report_fatal_error("Unhandled custom legalization"); 4531 case ISD::LOAD: 4532 ReplaceLoadVector(N, DAG, Results); 4533 return; 4534 case ISD::INTRINSIC_W_CHAIN: 4535 ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); 4536 return; 4537 } 4538 } 4539 4540 // Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file. 4541 void NVPTXSection::anchor() {} 4542 4543 NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { 4544 delete static_cast<NVPTXSection *>(TextSection); 4545 delete static_cast<NVPTXSection *>(DataSection); 4546 delete static_cast<NVPTXSection *>(BSSSection); 4547 delete static_cast<NVPTXSection *>(ReadOnlySection); 4548 4549 delete static_cast<NVPTXSection *>(StaticCtorSection); 4550 delete static_cast<NVPTXSection *>(StaticDtorSection); 4551 delete static_cast<NVPTXSection *>(LSDASection); 4552 delete static_cast<NVPTXSection *>(EHFrameSection); 4553 delete static_cast<NVPTXSection *>(DwarfAbbrevSection); 4554 delete static_cast<NVPTXSection *>(DwarfInfoSection); 4555 delete static_cast<NVPTXSection *>(DwarfLineSection); 4556 delete static_cast<NVPTXSection *>(DwarfFrameSection); 4557 delete static_cast<NVPTXSection *>(DwarfPubTypesSection); 4558 delete static_cast<const NVPTXSection *>(DwarfDebugInlineSection); 4559 delete static_cast<NVPTXSection *>(DwarfStrSection); 4560 delete static_cast<NVPTXSection *>(DwarfLocSection); 4561 delete static_cast<NVPTXSection *>(DwarfARangesSection); 4562 delete static_cast<NVPTXSection *>(DwarfRangesSection); 4563 delete static_cast<NVPTXSection *>(DwarfMacinfoSection); 4564 } 4565 4566 MCSection * 4567 NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, 4568 SectionKind Kind, Mangler &Mang, 4569 const TargetMachine &TM) const { 4570 return getDataSection(); 4571 } 4572