1 // 2 // The LLVM Compiler Infrastructure 3 // 4 // This file is distributed under the University of Illinois Open Source 5 // License. See LICENSE.TXT for details. 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "NVPTXISelLowering.h" 15 #include "NVPTX.h" 16 #include "NVPTXTargetMachine.h" 17 #include "NVPTXTargetObjectFile.h" 18 #include "NVPTXUtilities.h" 19 #include "llvm/CodeGen/Analysis.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 25 #include "llvm/IR/CallSite.h" 26 #include "llvm/IR/DerivedTypes.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/GlobalValue.h" 29 #include "llvm/IR/IntrinsicInst.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/Module.h" 32 #include "llvm/MC/MCSectionELF.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include <sstream> 39 40 #undef DEBUG_TYPE 41 #define DEBUG_TYPE "nvptx-lower" 42 43 using namespace llvm; 44 45 static unsigned int uniqueCallSite = 0; 46 47 static cl::opt<bool> sched4reg( 48 "nvptx-sched4reg", 49 cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); 50 51 static cl::opt<unsigned> 52 FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, 53 cl::desc("NVPTX Specific: FMA contraction (0: don't do it" 54 " 1: do it 2: do it aggressively"), 55 cl::init(2)); 56 57 static bool IsPTXVectorType(MVT VT) { 58 switch (VT.SimpleTy) { 59 default: 60 return false; 61 case MVT::v2i1: 62 case MVT::v4i1: 63 case MVT::v2i8: 64 case MVT::v4i8: 65 case MVT::v2i16: 66 case MVT::v4i16: 67 case MVT::v2i32: 68 case MVT::v4i32: 69 case MVT::v2i64: 70 case MVT::v2f32: 71 case MVT::v4f32: 72 case MVT::v2f64: 73 return true; 74 } 75 } 76 77 /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive 78 /// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors 79 /// into their primitive components. 80 /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the 81 /// same number of types as the Ins/Outs arrays in LowerFormalArguments, 82 /// LowerCall, and LowerReturn. 83 static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, 84 Type *Ty, SmallVectorImpl<EVT> &ValueVTs, 85 SmallVectorImpl<uint64_t> *Offsets = nullptr, 86 uint64_t StartingOffset = 0) { 87 SmallVector<EVT, 16> TempVTs; 88 SmallVector<uint64_t, 16> TempOffsets; 89 90 ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); 91 for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { 92 EVT VT = TempVTs[i]; 93 uint64_t Off = TempOffsets[i]; 94 if (VT.isVector()) 95 for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) { 96 ValueVTs.push_back(VT.getVectorElementType()); 97 if (Offsets) 98 Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize()); 99 } 100 else { 101 ValueVTs.push_back(VT); 102 if (Offsets) 103 Offsets->push_back(Off); 104 } 105 } 106 } 107 108 // NVPTXTargetLowering Constructor. 109 NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, 110 const NVPTXSubtarget &STI) 111 : TargetLowering(TM), nvTM(&TM), STI(STI) { 112 113 // always lower memset, memcpy, and memmove intrinsics to load/store 114 // instructions, rather 115 // then generating calls to memset, mempcy or memmove. 116 MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; 117 MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; 118 MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; 119 120 setBooleanContents(ZeroOrNegativeOneBooleanContent); 121 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 122 123 // Jump is Expensive. Don't create extra control flow for 'and', 'or' 124 // condition branches. 125 setJumpIsExpensive(true); 126 127 // Wide divides are _very_ slow. Try to reduce the width of the divide if 128 // possible. 129 addBypassSlowDiv(64, 32); 130 131 // By default, use the Source scheduling 132 if (sched4reg) 133 setSchedulingPreference(Sched::RegPressure); 134 else 135 setSchedulingPreference(Sched::Source); 136 137 addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); 138 addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); 139 addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); 140 addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); 141 addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); 142 addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); 143 144 // Operations not directly supported by NVPTX. 145 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 146 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 147 setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); 148 setOperationAction(ISD::SELECT_CC, MVT::i8, Expand); 149 setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); 150 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 151 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 152 setOperationAction(ISD::BR_CC, MVT::f32, Expand); 153 setOperationAction(ISD::BR_CC, MVT::f64, Expand); 154 setOperationAction(ISD::BR_CC, MVT::i1, Expand); 155 setOperationAction(ISD::BR_CC, MVT::i8, Expand); 156 setOperationAction(ISD::BR_CC, MVT::i16, Expand); 157 setOperationAction(ISD::BR_CC, MVT::i32, Expand); 158 setOperationAction(ISD::BR_CC, MVT::i64, Expand); 159 // Some SIGN_EXTEND_INREG can be done using cvt instruction. 160 // For others we will expand to a SHL/SRA pair. 161 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); 162 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 163 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); 164 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 165 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 166 167 setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); 168 setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); 169 setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); 170 setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); 171 setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); 172 setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); 173 174 if (STI.hasROT64()) { 175 setOperationAction(ISD::ROTL, MVT::i64, Legal); 176 setOperationAction(ISD::ROTR, MVT::i64, Legal); 177 } else { 178 setOperationAction(ISD::ROTL, MVT::i64, Expand); 179 setOperationAction(ISD::ROTR, MVT::i64, Expand); 180 } 181 if (STI.hasROT32()) { 182 setOperationAction(ISD::ROTL, MVT::i32, Legal); 183 setOperationAction(ISD::ROTR, MVT::i32, Legal); 184 } else { 185 setOperationAction(ISD::ROTL, MVT::i32, Expand); 186 setOperationAction(ISD::ROTR, MVT::i32, Expand); 187 } 188 189 setOperationAction(ISD::ROTL, MVT::i16, Expand); 190 setOperationAction(ISD::ROTR, MVT::i16, Expand); 191 setOperationAction(ISD::ROTL, MVT::i8, Expand); 192 setOperationAction(ISD::ROTR, MVT::i8, Expand); 193 setOperationAction(ISD::BSWAP, MVT::i16, Expand); 194 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 195 setOperationAction(ISD::BSWAP, MVT::i64, Expand); 196 197 // Indirect branch is not supported. 198 // This also disables Jump Table creation. 199 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 200 setOperationAction(ISD::BRIND, MVT::Other, Expand); 201 202 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 203 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 204 205 // We want to legalize constant related memmove and memcopy 206 // intrinsics. 207 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 208 209 // Turn FP extload into load/fextend 210 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 211 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 212 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 213 setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); 214 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); 215 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); 216 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); 217 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); 218 setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); 219 // Turn FP truncstore into trunc + store. 220 // FIXME: vector types should also be expanded 221 setTruncStoreAction(MVT::f32, MVT::f16, Expand); 222 setTruncStoreAction(MVT::f64, MVT::f16, Expand); 223 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 224 225 // PTX does not support load / store predicate registers 226 setOperationAction(ISD::LOAD, MVT::i1, Custom); 227 setOperationAction(ISD::STORE, MVT::i1, Custom); 228 229 for (MVT VT : MVT::integer_valuetypes()) { 230 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 231 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 232 setTruncStoreAction(VT, MVT::i1, Expand); 233 } 234 235 // This is legal in NVPTX 236 setOperationAction(ISD::ConstantFP, MVT::f64, Legal); 237 setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 238 239 // TRAP can be lowered to PTX trap 240 setOperationAction(ISD::TRAP, MVT::Other, Legal); 241 242 setOperationAction(ISD::ADDC, MVT::i64, Expand); 243 setOperationAction(ISD::ADDE, MVT::i64, Expand); 244 245 // Register custom handling for vector loads/stores 246 for (MVT VT : MVT::vector_valuetypes()) { 247 if (IsPTXVectorType(VT)) { 248 setOperationAction(ISD::LOAD, VT, Custom); 249 setOperationAction(ISD::STORE, VT, Custom); 250 setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); 251 } 252 } 253 254 // Custom handling for i8 intrinsics 255 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); 256 257 setOperationAction(ISD::CTLZ, MVT::i16, Legal); 258 setOperationAction(ISD::CTLZ, MVT::i32, Legal); 259 setOperationAction(ISD::CTLZ, MVT::i64, Legal); 260 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal); 261 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal); 262 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal); 263 setOperationAction(ISD::CTTZ, MVT::i16, Expand); 264 setOperationAction(ISD::CTTZ, MVT::i32, Expand); 265 setOperationAction(ISD::CTTZ, MVT::i64, Expand); 266 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); 267 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 268 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 269 setOperationAction(ISD::CTPOP, MVT::i16, Legal); 270 setOperationAction(ISD::CTPOP, MVT::i32, Legal); 271 setOperationAction(ISD::CTPOP, MVT::i64, Legal); 272 273 // PTX does not directly support SELP of i1, so promote to i32 first 274 setOperationAction(ISD::SELECT, MVT::i1, Custom); 275 276 // We have some custom DAG combine patterns for these nodes 277 setTargetDAGCombine(ISD::ADD); 278 setTargetDAGCombine(ISD::AND); 279 setTargetDAGCombine(ISD::FADD); 280 setTargetDAGCombine(ISD::MUL); 281 setTargetDAGCombine(ISD::SHL); 282 setTargetDAGCombine(ISD::SELECT); 283 284 // Now deduce the information based on the above mentioned 285 // actions 286 computeRegisterProperties(STI.getRegisterInfo()); 287 } 288 289 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { 290 switch ((NVPTXISD::NodeType)Opcode) { 291 case NVPTXISD::FIRST_NUMBER: 292 break; 293 case NVPTXISD::CALL: 294 return "NVPTXISD::CALL"; 295 case NVPTXISD::RET_FLAG: 296 return "NVPTXISD::RET_FLAG"; 297 case NVPTXISD::LOAD_PARAM: 298 return "NVPTXISD::LOAD_PARAM"; 299 case NVPTXISD::Wrapper: 300 return "NVPTXISD::Wrapper"; 301 case NVPTXISD::DeclareParam: 302 return "NVPTXISD::DeclareParam"; 303 case NVPTXISD::DeclareScalarParam: 304 return "NVPTXISD::DeclareScalarParam"; 305 case NVPTXISD::DeclareRet: 306 return "NVPTXISD::DeclareRet"; 307 case NVPTXISD::DeclareScalarRet: 308 return "NVPTXISD::DeclareScalarRet"; 309 case NVPTXISD::DeclareRetParam: 310 return "NVPTXISD::DeclareRetParam"; 311 case NVPTXISD::PrintCall: 312 return "NVPTXISD::PrintCall"; 313 case NVPTXISD::PrintCallUni: 314 return "NVPTXISD::PrintCallUni"; 315 case NVPTXISD::LoadParam: 316 return "NVPTXISD::LoadParam"; 317 case NVPTXISD::LoadParamV2: 318 return "NVPTXISD::LoadParamV2"; 319 case NVPTXISD::LoadParamV4: 320 return "NVPTXISD::LoadParamV4"; 321 case NVPTXISD::StoreParam: 322 return "NVPTXISD::StoreParam"; 323 case NVPTXISD::StoreParamV2: 324 return "NVPTXISD::StoreParamV2"; 325 case NVPTXISD::StoreParamV4: 326 return "NVPTXISD::StoreParamV4"; 327 case NVPTXISD::StoreParamS32: 328 return "NVPTXISD::StoreParamS32"; 329 case NVPTXISD::StoreParamU32: 330 return "NVPTXISD::StoreParamU32"; 331 case NVPTXISD::CallArgBegin: 332 return "NVPTXISD::CallArgBegin"; 333 case NVPTXISD::CallArg: 334 return "NVPTXISD::CallArg"; 335 case NVPTXISD::LastCallArg: 336 return "NVPTXISD::LastCallArg"; 337 case NVPTXISD::CallArgEnd: 338 return "NVPTXISD::CallArgEnd"; 339 case NVPTXISD::CallVoid: 340 return "NVPTXISD::CallVoid"; 341 case NVPTXISD::CallVal: 342 return "NVPTXISD::CallVal"; 343 case NVPTXISD::CallSymbol: 344 return "NVPTXISD::CallSymbol"; 345 case NVPTXISD::Prototype: 346 return "NVPTXISD::Prototype"; 347 case NVPTXISD::MoveParam: 348 return "NVPTXISD::MoveParam"; 349 case NVPTXISD::StoreRetval: 350 return "NVPTXISD::StoreRetval"; 351 case NVPTXISD::StoreRetvalV2: 352 return "NVPTXISD::StoreRetvalV2"; 353 case NVPTXISD::StoreRetvalV4: 354 return "NVPTXISD::StoreRetvalV4"; 355 case NVPTXISD::PseudoUseParam: 356 return "NVPTXISD::PseudoUseParam"; 357 case NVPTXISD::RETURN: 358 return "NVPTXISD::RETURN"; 359 case NVPTXISD::CallSeqBegin: 360 return "NVPTXISD::CallSeqBegin"; 361 case NVPTXISD::CallSeqEnd: 362 return "NVPTXISD::CallSeqEnd"; 363 case NVPTXISD::CallPrototype: 364 return "NVPTXISD::CallPrototype"; 365 case NVPTXISD::LoadV2: 366 return "NVPTXISD::LoadV2"; 367 case NVPTXISD::LoadV4: 368 return "NVPTXISD::LoadV4"; 369 case NVPTXISD::LDGV2: 370 return "NVPTXISD::LDGV2"; 371 case NVPTXISD::LDGV4: 372 return "NVPTXISD::LDGV4"; 373 case NVPTXISD::LDUV2: 374 return "NVPTXISD::LDUV2"; 375 case NVPTXISD::LDUV4: 376 return "NVPTXISD::LDUV4"; 377 case NVPTXISD::StoreV2: 378 return "NVPTXISD::StoreV2"; 379 case NVPTXISD::StoreV4: 380 return "NVPTXISD::StoreV4"; 381 case NVPTXISD::FUN_SHFL_CLAMP: 382 return "NVPTXISD::FUN_SHFL_CLAMP"; 383 case NVPTXISD::FUN_SHFR_CLAMP: 384 return "NVPTXISD::FUN_SHFR_CLAMP"; 385 case NVPTXISD::IMAD: 386 return "NVPTXISD::IMAD"; 387 case NVPTXISD::Dummy: 388 return "NVPTXISD::Dummy"; 389 case NVPTXISD::MUL_WIDE_SIGNED: 390 return "NVPTXISD::MUL_WIDE_SIGNED"; 391 case NVPTXISD::MUL_WIDE_UNSIGNED: 392 return "NVPTXISD::MUL_WIDE_UNSIGNED"; 393 case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; 394 case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; 395 case NVPTXISD::Tex1DFloatFloatLevel: 396 return "NVPTXISD::Tex1DFloatFloatLevel"; 397 case NVPTXISD::Tex1DFloatFloatGrad: 398 return "NVPTXISD::Tex1DFloatFloatGrad"; 399 case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; 400 case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; 401 case NVPTXISD::Tex1DS32FloatLevel: 402 return "NVPTXISD::Tex1DS32FloatLevel"; 403 case NVPTXISD::Tex1DS32FloatGrad: 404 return "NVPTXISD::Tex1DS32FloatGrad"; 405 case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; 406 case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; 407 case NVPTXISD::Tex1DU32FloatLevel: 408 return "NVPTXISD::Tex1DU32FloatLevel"; 409 case NVPTXISD::Tex1DU32FloatGrad: 410 return "NVPTXISD::Tex1DU32FloatGrad"; 411 case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; 412 case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; 413 case NVPTXISD::Tex1DArrayFloatFloatLevel: 414 return "NVPTXISD::Tex1DArrayFloatFloatLevel"; 415 case NVPTXISD::Tex1DArrayFloatFloatGrad: 416 return "NVPTXISD::Tex1DArrayFloatFloatGrad"; 417 case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; 418 case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; 419 case NVPTXISD::Tex1DArrayS32FloatLevel: 420 return "NVPTXISD::Tex1DArrayS32FloatLevel"; 421 case NVPTXISD::Tex1DArrayS32FloatGrad: 422 return "NVPTXISD::Tex1DArrayS32FloatGrad"; 423 case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; 424 case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; 425 case NVPTXISD::Tex1DArrayU32FloatLevel: 426 return "NVPTXISD::Tex1DArrayU32FloatLevel"; 427 case NVPTXISD::Tex1DArrayU32FloatGrad: 428 return "NVPTXISD::Tex1DArrayU32FloatGrad"; 429 case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; 430 case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; 431 case NVPTXISD::Tex2DFloatFloatLevel: 432 return "NVPTXISD::Tex2DFloatFloatLevel"; 433 case NVPTXISD::Tex2DFloatFloatGrad: 434 return "NVPTXISD::Tex2DFloatFloatGrad"; 435 case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; 436 case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; 437 case NVPTXISD::Tex2DS32FloatLevel: 438 return "NVPTXISD::Tex2DS32FloatLevel"; 439 case NVPTXISD::Tex2DS32FloatGrad: 440 return "NVPTXISD::Tex2DS32FloatGrad"; 441 case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; 442 case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; 443 case NVPTXISD::Tex2DU32FloatLevel: 444 return "NVPTXISD::Tex2DU32FloatLevel"; 445 case NVPTXISD::Tex2DU32FloatGrad: 446 return "NVPTXISD::Tex2DU32FloatGrad"; 447 case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; 448 case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; 449 case NVPTXISD::Tex2DArrayFloatFloatLevel: 450 return "NVPTXISD::Tex2DArrayFloatFloatLevel"; 451 case NVPTXISD::Tex2DArrayFloatFloatGrad: 452 return "NVPTXISD::Tex2DArrayFloatFloatGrad"; 453 case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; 454 case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; 455 case NVPTXISD::Tex2DArrayS32FloatLevel: 456 return "NVPTXISD::Tex2DArrayS32FloatLevel"; 457 case NVPTXISD::Tex2DArrayS32FloatGrad: 458 return "NVPTXISD::Tex2DArrayS32FloatGrad"; 459 case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; 460 case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; 461 case NVPTXISD::Tex2DArrayU32FloatLevel: 462 return "NVPTXISD::Tex2DArrayU32FloatLevel"; 463 case NVPTXISD::Tex2DArrayU32FloatGrad: 464 return "NVPTXISD::Tex2DArrayU32FloatGrad"; 465 case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; 466 case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; 467 case NVPTXISD::Tex3DFloatFloatLevel: 468 return "NVPTXISD::Tex3DFloatFloatLevel"; 469 case NVPTXISD::Tex3DFloatFloatGrad: 470 return "NVPTXISD::Tex3DFloatFloatGrad"; 471 case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; 472 case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; 473 case NVPTXISD::Tex3DS32FloatLevel: 474 return "NVPTXISD::Tex3DS32FloatLevel"; 475 case NVPTXISD::Tex3DS32FloatGrad: 476 return "NVPTXISD::Tex3DS32FloatGrad"; 477 case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; 478 case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; 479 case NVPTXISD::Tex3DU32FloatLevel: 480 return "NVPTXISD::Tex3DU32FloatLevel"; 481 case NVPTXISD::Tex3DU32FloatGrad: 482 return "NVPTXISD::Tex3DU32FloatGrad"; 483 case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; 484 case NVPTXISD::TexCubeFloatFloatLevel: 485 return "NVPTXISD::TexCubeFloatFloatLevel"; 486 case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; 487 case NVPTXISD::TexCubeS32FloatLevel: 488 return "NVPTXISD::TexCubeS32FloatLevel"; 489 case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; 490 case NVPTXISD::TexCubeU32FloatLevel: 491 return "NVPTXISD::TexCubeU32FloatLevel"; 492 case NVPTXISD::TexCubeArrayFloatFloat: 493 return "NVPTXISD::TexCubeArrayFloatFloat"; 494 case NVPTXISD::TexCubeArrayFloatFloatLevel: 495 return "NVPTXISD::TexCubeArrayFloatFloatLevel"; 496 case NVPTXISD::TexCubeArrayS32Float: 497 return "NVPTXISD::TexCubeArrayS32Float"; 498 case NVPTXISD::TexCubeArrayS32FloatLevel: 499 return "NVPTXISD::TexCubeArrayS32FloatLevel"; 500 case NVPTXISD::TexCubeArrayU32Float: 501 return "NVPTXISD::TexCubeArrayU32Float"; 502 case NVPTXISD::TexCubeArrayU32FloatLevel: 503 return "NVPTXISD::TexCubeArrayU32FloatLevel"; 504 case NVPTXISD::Tld4R2DFloatFloat: 505 return "NVPTXISD::Tld4R2DFloatFloat"; 506 case NVPTXISD::Tld4G2DFloatFloat: 507 return "NVPTXISD::Tld4G2DFloatFloat"; 508 case NVPTXISD::Tld4B2DFloatFloat: 509 return "NVPTXISD::Tld4B2DFloatFloat"; 510 case NVPTXISD::Tld4A2DFloatFloat: 511 return "NVPTXISD::Tld4A2DFloatFloat"; 512 case NVPTXISD::Tld4R2DS64Float: 513 return "NVPTXISD::Tld4R2DS64Float"; 514 case NVPTXISD::Tld4G2DS64Float: 515 return "NVPTXISD::Tld4G2DS64Float"; 516 case NVPTXISD::Tld4B2DS64Float: 517 return "NVPTXISD::Tld4B2DS64Float"; 518 case NVPTXISD::Tld4A2DS64Float: 519 return "NVPTXISD::Tld4A2DS64Float"; 520 case NVPTXISD::Tld4R2DU64Float: 521 return "NVPTXISD::Tld4R2DU64Float"; 522 case NVPTXISD::Tld4G2DU64Float: 523 return "NVPTXISD::Tld4G2DU64Float"; 524 case NVPTXISD::Tld4B2DU64Float: 525 return "NVPTXISD::Tld4B2DU64Float"; 526 case NVPTXISD::Tld4A2DU64Float: 527 return "NVPTXISD::Tld4A2DU64Float"; 528 529 case NVPTXISD::TexUnified1DFloatS32: 530 return "NVPTXISD::TexUnified1DFloatS32"; 531 case NVPTXISD::TexUnified1DFloatFloat: 532 return "NVPTXISD::TexUnified1DFloatFloat"; 533 case NVPTXISD::TexUnified1DFloatFloatLevel: 534 return "NVPTXISD::TexUnified1DFloatFloatLevel"; 535 case NVPTXISD::TexUnified1DFloatFloatGrad: 536 return "NVPTXISD::TexUnified1DFloatFloatGrad"; 537 case NVPTXISD::TexUnified1DS32S32: 538 return "NVPTXISD::TexUnified1DS32S32"; 539 case NVPTXISD::TexUnified1DS32Float: 540 return "NVPTXISD::TexUnified1DS32Float"; 541 case NVPTXISD::TexUnified1DS32FloatLevel: 542 return "NVPTXISD::TexUnified1DS32FloatLevel"; 543 case NVPTXISD::TexUnified1DS32FloatGrad: 544 return "NVPTXISD::TexUnified1DS32FloatGrad"; 545 case NVPTXISD::TexUnified1DU32S32: 546 return "NVPTXISD::TexUnified1DU32S32"; 547 case NVPTXISD::TexUnified1DU32Float: 548 return "NVPTXISD::TexUnified1DU32Float"; 549 case NVPTXISD::TexUnified1DU32FloatLevel: 550 return "NVPTXISD::TexUnified1DU32FloatLevel"; 551 case NVPTXISD::TexUnified1DU32FloatGrad: 552 return "NVPTXISD::TexUnified1DU32FloatGrad"; 553 case NVPTXISD::TexUnified1DArrayFloatS32: 554 return "NVPTXISD::TexUnified1DArrayFloatS32"; 555 case NVPTXISD::TexUnified1DArrayFloatFloat: 556 return "NVPTXISD::TexUnified1DArrayFloatFloat"; 557 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 558 return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; 559 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 560 return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; 561 case NVPTXISD::TexUnified1DArrayS32S32: 562 return "NVPTXISD::TexUnified1DArrayS32S32"; 563 case NVPTXISD::TexUnified1DArrayS32Float: 564 return "NVPTXISD::TexUnified1DArrayS32Float"; 565 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 566 return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; 567 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 568 return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; 569 case NVPTXISD::TexUnified1DArrayU32S32: 570 return "NVPTXISD::TexUnified1DArrayU32S32"; 571 case NVPTXISD::TexUnified1DArrayU32Float: 572 return "NVPTXISD::TexUnified1DArrayU32Float"; 573 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 574 return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; 575 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 576 return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; 577 case NVPTXISD::TexUnified2DFloatS32: 578 return "NVPTXISD::TexUnified2DFloatS32"; 579 case NVPTXISD::TexUnified2DFloatFloat: 580 return "NVPTXISD::TexUnified2DFloatFloat"; 581 case NVPTXISD::TexUnified2DFloatFloatLevel: 582 return "NVPTXISD::TexUnified2DFloatFloatLevel"; 583 case NVPTXISD::TexUnified2DFloatFloatGrad: 584 return "NVPTXISD::TexUnified2DFloatFloatGrad"; 585 case NVPTXISD::TexUnified2DS32S32: 586 return "NVPTXISD::TexUnified2DS32S32"; 587 case NVPTXISD::TexUnified2DS32Float: 588 return "NVPTXISD::TexUnified2DS32Float"; 589 case NVPTXISD::TexUnified2DS32FloatLevel: 590 return "NVPTXISD::TexUnified2DS32FloatLevel"; 591 case NVPTXISD::TexUnified2DS32FloatGrad: 592 return "NVPTXISD::TexUnified2DS32FloatGrad"; 593 case NVPTXISD::TexUnified2DU32S32: 594 return "NVPTXISD::TexUnified2DU32S32"; 595 case NVPTXISD::TexUnified2DU32Float: 596 return "NVPTXISD::TexUnified2DU32Float"; 597 case NVPTXISD::TexUnified2DU32FloatLevel: 598 return "NVPTXISD::TexUnified2DU32FloatLevel"; 599 case NVPTXISD::TexUnified2DU32FloatGrad: 600 return "NVPTXISD::TexUnified2DU32FloatGrad"; 601 case NVPTXISD::TexUnified2DArrayFloatS32: 602 return "NVPTXISD::TexUnified2DArrayFloatS32"; 603 case NVPTXISD::TexUnified2DArrayFloatFloat: 604 return "NVPTXISD::TexUnified2DArrayFloatFloat"; 605 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 606 return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; 607 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 608 return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; 609 case NVPTXISD::TexUnified2DArrayS32S32: 610 return "NVPTXISD::TexUnified2DArrayS32S32"; 611 case NVPTXISD::TexUnified2DArrayS32Float: 612 return "NVPTXISD::TexUnified2DArrayS32Float"; 613 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 614 return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; 615 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 616 return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; 617 case NVPTXISD::TexUnified2DArrayU32S32: 618 return "NVPTXISD::TexUnified2DArrayU32S32"; 619 case NVPTXISD::TexUnified2DArrayU32Float: 620 return "NVPTXISD::TexUnified2DArrayU32Float"; 621 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 622 return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; 623 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 624 return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; 625 case NVPTXISD::TexUnified3DFloatS32: 626 return "NVPTXISD::TexUnified3DFloatS32"; 627 case NVPTXISD::TexUnified3DFloatFloat: 628 return "NVPTXISD::TexUnified3DFloatFloat"; 629 case NVPTXISD::TexUnified3DFloatFloatLevel: 630 return "NVPTXISD::TexUnified3DFloatFloatLevel"; 631 case NVPTXISD::TexUnified3DFloatFloatGrad: 632 return "NVPTXISD::TexUnified3DFloatFloatGrad"; 633 case NVPTXISD::TexUnified3DS32S32: 634 return "NVPTXISD::TexUnified3DS32S32"; 635 case NVPTXISD::TexUnified3DS32Float: 636 return "NVPTXISD::TexUnified3DS32Float"; 637 case NVPTXISD::TexUnified3DS32FloatLevel: 638 return "NVPTXISD::TexUnified3DS32FloatLevel"; 639 case NVPTXISD::TexUnified3DS32FloatGrad: 640 return "NVPTXISD::TexUnified3DS32FloatGrad"; 641 case NVPTXISD::TexUnified3DU32S32: 642 return "NVPTXISD::TexUnified3DU32S32"; 643 case NVPTXISD::TexUnified3DU32Float: 644 return "NVPTXISD::TexUnified3DU32Float"; 645 case NVPTXISD::TexUnified3DU32FloatLevel: 646 return "NVPTXISD::TexUnified3DU32FloatLevel"; 647 case NVPTXISD::TexUnified3DU32FloatGrad: 648 return "NVPTXISD::TexUnified3DU32FloatGrad"; 649 case NVPTXISD::TexUnifiedCubeFloatFloat: 650 return "NVPTXISD::TexUnifiedCubeFloatFloat"; 651 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 652 return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; 653 case NVPTXISD::TexUnifiedCubeS32Float: 654 return "NVPTXISD::TexUnifiedCubeS32Float"; 655 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 656 return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; 657 case NVPTXISD::TexUnifiedCubeU32Float: 658 return "NVPTXISD::TexUnifiedCubeU32Float"; 659 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 660 return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; 661 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 662 return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; 663 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 664 return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; 665 case NVPTXISD::TexUnifiedCubeArrayS32Float: 666 return "NVPTXISD::TexUnifiedCubeArrayS32Float"; 667 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 668 return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; 669 case NVPTXISD::TexUnifiedCubeArrayU32Float: 670 return "NVPTXISD::TexUnifiedCubeArrayU32Float"; 671 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 672 return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; 673 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 674 return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; 675 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 676 return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; 677 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 678 return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; 679 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 680 return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; 681 case NVPTXISD::Tld4UnifiedR2DS64Float: 682 return "NVPTXISD::Tld4UnifiedR2DS64Float"; 683 case NVPTXISD::Tld4UnifiedG2DS64Float: 684 return "NVPTXISD::Tld4UnifiedG2DS64Float"; 685 case NVPTXISD::Tld4UnifiedB2DS64Float: 686 return "NVPTXISD::Tld4UnifiedB2DS64Float"; 687 case NVPTXISD::Tld4UnifiedA2DS64Float: 688 return "NVPTXISD::Tld4UnifiedA2DS64Float"; 689 case NVPTXISD::Tld4UnifiedR2DU64Float: 690 return "NVPTXISD::Tld4UnifiedR2DU64Float"; 691 case NVPTXISD::Tld4UnifiedG2DU64Float: 692 return "NVPTXISD::Tld4UnifiedG2DU64Float"; 693 case NVPTXISD::Tld4UnifiedB2DU64Float: 694 return "NVPTXISD::Tld4UnifiedB2DU64Float"; 695 case NVPTXISD::Tld4UnifiedA2DU64Float: 696 return "NVPTXISD::Tld4UnifiedA2DU64Float"; 697 698 case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; 699 case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; 700 case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; 701 case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; 702 case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; 703 case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; 704 case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; 705 case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; 706 case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; 707 case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; 708 case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; 709 710 case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; 711 case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; 712 case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; 713 case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; 714 case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; 715 case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; 716 case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; 717 case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; 718 case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; 719 case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; 720 case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; 721 722 case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; 723 case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; 724 case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; 725 case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; 726 case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; 727 case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; 728 case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; 729 case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; 730 case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; 731 case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; 732 case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; 733 734 case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; 735 case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; 736 case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; 737 case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; 738 case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; 739 case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; 740 case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; 741 case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; 742 case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; 743 case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; 744 case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; 745 746 case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; 747 case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; 748 case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; 749 case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; 750 case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; 751 case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; 752 case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; 753 case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; 754 case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; 755 case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; 756 case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; 757 758 case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; 759 case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; 760 case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; 761 case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; 762 case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; 763 case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; 764 case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; 765 case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; 766 case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; 767 case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; 768 case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; 769 770 case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; 771 case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; 772 case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; 773 case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; 774 case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; 775 case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; 776 case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; 777 case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; 778 case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; 779 case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; 780 case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; 781 782 case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; 783 case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; 784 case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; 785 case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; 786 case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; 787 case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; 788 case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; 789 case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; 790 case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; 791 case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; 792 case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; 793 794 case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; 795 case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; 796 case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; 797 case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; 798 case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; 799 case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; 800 case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; 801 case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; 802 case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; 803 case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; 804 case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; 805 806 case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; 807 case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; 808 case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; 809 case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; 810 case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; 811 case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; 812 case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; 813 case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; 814 case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; 815 case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; 816 case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; 817 818 case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; 819 case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; 820 case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; 821 case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; 822 case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; 823 case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; 824 case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; 825 case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; 826 case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; 827 case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; 828 case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; 829 830 case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; 831 case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; 832 case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; 833 case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; 834 case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; 835 case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; 836 case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; 837 case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; 838 case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; 839 case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; 840 case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; 841 842 case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; 843 case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; 844 case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; 845 case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; 846 case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; 847 case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; 848 case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; 849 case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; 850 case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; 851 case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; 852 case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; 853 854 case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; 855 case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; 856 case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; 857 case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; 858 case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; 859 case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; 860 case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; 861 case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; 862 case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; 863 case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; 864 case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; 865 866 case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; 867 case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; 868 case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; 869 case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; 870 case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; 871 case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; 872 case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; 873 case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; 874 case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; 875 case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; 876 case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; 877 } 878 return nullptr; 879 } 880 881 TargetLoweringBase::LegalizeTypeAction 882 NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const { 883 if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) 884 return TypeSplitVector; 885 886 return TargetLoweringBase::getPreferredVectorAction(VT); 887 } 888 889 SDValue 890 NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { 891 SDLoc dl(Op); 892 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 893 auto PtrVT = getPointerTy(DAG.getDataLayout()); 894 Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT); 895 return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op); 896 } 897 898 std::string NVPTXTargetLowering::getPrototype( 899 const DataLayout &DL, Type *retTy, const ArgListTy &Args, 900 const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment, 901 const ImmutableCallSite *CS) const { 902 auto PtrVT = getPointerTy(DL); 903 904 bool isABI = (STI.getSmVersion() >= 20); 905 assert(isABI && "Non-ABI compilation is not supported"); 906 if (!isABI) 907 return ""; 908 909 std::stringstream O; 910 O << "prototype_" << uniqueCallSite << " : .callprototype "; 911 912 if (retTy->getTypeID() == Type::VoidTyID) { 913 O << "()"; 914 } else { 915 O << "("; 916 if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { 917 unsigned size = 0; 918 if (auto *ITy = dyn_cast<IntegerType>(retTy)) { 919 size = ITy->getBitWidth(); 920 if (size < 32) 921 size = 32; 922 } else { 923 assert(retTy->isFloatingPointTy() && 924 "Floating point type expected here"); 925 size = retTy->getPrimitiveSizeInBits(); 926 } 927 928 O << ".param .b" << size << " _"; 929 } else if (isa<PointerType>(retTy)) { 930 O << ".param .b" << PtrVT.getSizeInBits() << " _"; 931 } else if ((retTy->getTypeID() == Type::StructTyID) || 932 isa<VectorType>(retTy)) { 933 auto &DL = CS->getCalledFunction()->getParent()->getDataLayout(); 934 O << ".param .align " << retAlignment << " .b8 _[" 935 << DL.getTypeAllocSize(retTy) << "]"; 936 } else { 937 llvm_unreachable("Unknown return type"); 938 } 939 O << ") "; 940 } 941 O << "_ ("; 942 943 bool first = true; 944 945 unsigned OIdx = 0; 946 for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 947 Type *Ty = Args[i].Ty; 948 if (!first) { 949 O << ", "; 950 } 951 first = false; 952 953 if (!Outs[OIdx].Flags.isByVal()) { 954 if (Ty->isAggregateType() || Ty->isVectorTy()) { 955 unsigned align = 0; 956 const CallInst *CallI = cast<CallInst>(CS->getInstruction()); 957 // +1 because index 0 is reserved for return type alignment 958 if (!llvm::getAlign(*CallI, i + 1, align)) 959 align = DL.getABITypeAlignment(Ty); 960 unsigned sz = DL.getTypeAllocSize(Ty); 961 O << ".param .align " << align << " .b8 "; 962 O << "_"; 963 O << "[" << sz << "]"; 964 // update the index for Outs 965 SmallVector<EVT, 16> vtparts; 966 ComputeValueVTs(*this, DL, Ty, vtparts); 967 if (unsigned len = vtparts.size()) 968 OIdx += len - 1; 969 continue; 970 } 971 // i8 types in IR will be i16 types in SDAG 972 assert((getValueType(DL, Ty) == Outs[OIdx].VT || 973 (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && 974 "type mismatch between callee prototype and arguments"); 975 // scalar type 976 unsigned sz = 0; 977 if (isa<IntegerType>(Ty)) { 978 sz = cast<IntegerType>(Ty)->getBitWidth(); 979 if (sz < 32) 980 sz = 32; 981 } else if (isa<PointerType>(Ty)) 982 sz = PtrVT.getSizeInBits(); 983 else 984 sz = Ty->getPrimitiveSizeInBits(); 985 O << ".param .b" << sz << " "; 986 O << "_"; 987 continue; 988 } 989 auto *PTy = dyn_cast<PointerType>(Ty); 990 assert(PTy && "Param with byval attribute should be a pointer type"); 991 Type *ETy = PTy->getElementType(); 992 993 unsigned align = Outs[OIdx].Flags.getByValAlign(); 994 unsigned sz = DL.getTypeAllocSize(ETy); 995 O << ".param .align " << align << " .b8 "; 996 O << "_"; 997 O << "[" << sz << "]"; 998 } 999 O << ");"; 1000 return O.str(); 1001 } 1002 1003 unsigned 1004 NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, 1005 const ImmutableCallSite *CS, 1006 Type *Ty, 1007 unsigned Idx) const { 1008 unsigned Align = 0; 1009 const Value *DirectCallee = CS->getCalledFunction(); 1010 1011 if (!DirectCallee) { 1012 // We don't have a direct function symbol, but that may be because of 1013 // constant cast instructions in the call. 1014 const Instruction *CalleeI = CS->getInstruction(); 1015 assert(CalleeI && "Call target is not a function or derived value?"); 1016 1017 // With bitcast'd call targets, the instruction will be the call 1018 if (isa<CallInst>(CalleeI)) { 1019 // Check if we have call alignment metadata 1020 if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align)) 1021 return Align; 1022 1023 const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue(); 1024 // Ignore any bitcast instructions 1025 while(isa<ConstantExpr>(CalleeV)) { 1026 const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); 1027 if (!CE->isCast()) 1028 break; 1029 // Look through the bitcast 1030 CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0); 1031 } 1032 1033 // We have now looked past all of the bitcasts. Do we finally have a 1034 // Function? 1035 if (isa<Function>(CalleeV)) 1036 DirectCallee = CalleeV; 1037 } 1038 } 1039 1040 // Check for function alignment information if we found that the 1041 // ultimate target is a Function 1042 if (DirectCallee) 1043 if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align)) 1044 return Align; 1045 1046 // Call is indirect or alignment information is not available, fall back to 1047 // the ABI type alignment 1048 auto &DL = CS->getCaller()->getParent()->getDataLayout(); 1049 return DL.getABITypeAlignment(Ty); 1050 } 1051 1052 SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 1053 SmallVectorImpl<SDValue> &InVals) const { 1054 SelectionDAG &DAG = CLI.DAG; 1055 SDLoc dl = CLI.DL; 1056 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1057 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1058 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1059 SDValue Chain = CLI.Chain; 1060 SDValue Callee = CLI.Callee; 1061 bool &isTailCall = CLI.IsTailCall; 1062 ArgListTy &Args = CLI.getArgs(); 1063 Type *retTy = CLI.RetTy; 1064 ImmutableCallSite *CS = CLI.CS; 1065 1066 bool isABI = (STI.getSmVersion() >= 20); 1067 assert(isABI && "Non-ABI compilation is not supported"); 1068 if (!isABI) 1069 return Chain; 1070 MachineFunction &MF = DAG.getMachineFunction(); 1071 const Function *F = MF.getFunction(); 1072 auto &DL = MF.getDataLayout(); 1073 1074 SDValue tempChain = Chain; 1075 Chain = DAG.getCALLSEQ_START(Chain, 1076 DAG.getIntPtrConstant(uniqueCallSite, dl, true), 1077 dl); 1078 SDValue InFlag = Chain.getValue(1); 1079 1080 unsigned paramCount = 0; 1081 // Args.size() and Outs.size() need not match. 1082 // Outs.size() will be larger 1083 // * if there is an aggregate argument with multiple fields (each field 1084 // showing up separately in Outs) 1085 // * if there is a vector argument with more than typical vector-length 1086 // elements (generally if more than 4) where each vector element is 1087 // individually present in Outs. 1088 // So a different index should be used for indexing into Outs/OutVals. 1089 // See similar issue in LowerFormalArguments. 1090 unsigned OIdx = 0; 1091 // Declare the .params or .reg need to pass values 1092 // to the function 1093 for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { 1094 EVT VT = Outs[OIdx].VT; 1095 Type *Ty = Args[i].Ty; 1096 1097 if (!Outs[OIdx].Flags.isByVal()) { 1098 if (Ty->isAggregateType()) { 1099 // aggregate 1100 SmallVector<EVT, 16> vtparts; 1101 SmallVector<uint64_t, 16> Offsets; 1102 ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets, 1103 0); 1104 1105 unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); 1106 // declare .param .align <align> .b8 .param<n>[<size>]; 1107 unsigned sz = DL.getTypeAllocSize(Ty); 1108 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1109 SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl, 1110 MVT::i32), 1111 DAG.getConstant(paramCount, dl, MVT::i32), 1112 DAG.getConstant(sz, dl, MVT::i32), 1113 InFlag }; 1114 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 1115 DeclareParamOps); 1116 InFlag = Chain.getValue(1); 1117 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 1118 EVT elemtype = vtparts[j]; 1119 unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]); 1120 if (elemtype.isInteger() && (sz < 8)) 1121 sz = 8; 1122 SDValue StVal = OutVals[OIdx]; 1123 if (elemtype.getSizeInBits() < 16) { 1124 StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); 1125 } 1126 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1127 SDValue CopyParamOps[] = { Chain, 1128 DAG.getConstant(paramCount, dl, MVT::i32), 1129 DAG.getConstant(Offsets[j], dl, MVT::i32), 1130 StVal, InFlag }; 1131 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, 1132 CopyParamVTs, CopyParamOps, 1133 elemtype, MachinePointerInfo(), 1134 ArgAlign); 1135 InFlag = Chain.getValue(1); 1136 ++OIdx; 1137 } 1138 if (vtparts.size() > 0) 1139 --OIdx; 1140 ++paramCount; 1141 continue; 1142 } 1143 if (Ty->isVectorTy()) { 1144 EVT ObjectVT = getValueType(DL, Ty); 1145 unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); 1146 // declare .param .align <align> .b8 .param<n>[<size>]; 1147 unsigned sz = DL.getTypeAllocSize(Ty); 1148 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1149 SDValue DeclareParamOps[] = { Chain, 1150 DAG.getConstant(align, dl, MVT::i32), 1151 DAG.getConstant(paramCount, dl, MVT::i32), 1152 DAG.getConstant(sz, dl, MVT::i32), 1153 InFlag }; 1154 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 1155 DeclareParamOps); 1156 InFlag = Chain.getValue(1); 1157 unsigned NumElts = ObjectVT.getVectorNumElements(); 1158 EVT EltVT = ObjectVT.getVectorElementType(); 1159 EVT MemVT = EltVT; 1160 bool NeedExtend = false; 1161 if (EltVT.getSizeInBits() < 16) { 1162 NeedExtend = true; 1163 EltVT = MVT::i16; 1164 } 1165 1166 // V1 store 1167 if (NumElts == 1) { 1168 SDValue Elt = OutVals[OIdx++]; 1169 if (NeedExtend) 1170 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt); 1171 1172 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1173 SDValue CopyParamOps[] = { Chain, 1174 DAG.getConstant(paramCount, dl, MVT::i32), 1175 DAG.getConstant(0, dl, MVT::i32), Elt, 1176 InFlag }; 1177 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, 1178 CopyParamVTs, CopyParamOps, 1179 MemVT, MachinePointerInfo()); 1180 InFlag = Chain.getValue(1); 1181 } else if (NumElts == 2) { 1182 SDValue Elt0 = OutVals[OIdx++]; 1183 SDValue Elt1 = OutVals[OIdx++]; 1184 if (NeedExtend) { 1185 Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0); 1186 Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1); 1187 } 1188 1189 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1190 SDValue CopyParamOps[] = { Chain, 1191 DAG.getConstant(paramCount, dl, MVT::i32), 1192 DAG.getConstant(0, dl, MVT::i32), Elt0, 1193 Elt1, InFlag }; 1194 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, 1195 CopyParamVTs, CopyParamOps, 1196 MemVT, MachinePointerInfo()); 1197 InFlag = Chain.getValue(1); 1198 } else { 1199 unsigned curOffset = 0; 1200 // V4 stores 1201 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and 1202 // the 1203 // vector will be expanded to a power of 2 elements, so we know we can 1204 // always round up to the next multiple of 4 when creating the vector 1205 // stores. 1206 // e.g. 4 elem => 1 st.v4 1207 // 6 elem => 2 st.v4 1208 // 8 elem => 2 st.v4 1209 // 11 elem => 3 st.v4 1210 unsigned VecSize = 4; 1211 if (EltVT.getSizeInBits() == 64) 1212 VecSize = 2; 1213 1214 // This is potentially only part of a vector, so assume all elements 1215 // are packed together. 1216 unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize; 1217 1218 for (unsigned i = 0; i < NumElts; i += VecSize) { 1219 // Get values 1220 SDValue StoreVal; 1221 SmallVector<SDValue, 8> Ops; 1222 Ops.push_back(Chain); 1223 Ops.push_back(DAG.getConstant(paramCount, dl, MVT::i32)); 1224 Ops.push_back(DAG.getConstant(curOffset, dl, MVT::i32)); 1225 1226 unsigned Opc = NVPTXISD::StoreParamV2; 1227 1228 StoreVal = OutVals[OIdx++]; 1229 if (NeedExtend) 1230 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1231 Ops.push_back(StoreVal); 1232 1233 if (i + 1 < NumElts) { 1234 StoreVal = OutVals[OIdx++]; 1235 if (NeedExtend) 1236 StoreVal = 1237 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1238 } else { 1239 StoreVal = DAG.getUNDEF(EltVT); 1240 } 1241 Ops.push_back(StoreVal); 1242 1243 if (VecSize == 4) { 1244 Opc = NVPTXISD::StoreParamV4; 1245 if (i + 2 < NumElts) { 1246 StoreVal = OutVals[OIdx++]; 1247 if (NeedExtend) 1248 StoreVal = 1249 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1250 } else { 1251 StoreVal = DAG.getUNDEF(EltVT); 1252 } 1253 Ops.push_back(StoreVal); 1254 1255 if (i + 3 < NumElts) { 1256 StoreVal = OutVals[OIdx++]; 1257 if (NeedExtend) 1258 StoreVal = 1259 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 1260 } else { 1261 StoreVal = DAG.getUNDEF(EltVT); 1262 } 1263 Ops.push_back(StoreVal); 1264 } 1265 1266 Ops.push_back(InFlag); 1267 1268 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1269 Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops, 1270 MemVT, MachinePointerInfo()); 1271 InFlag = Chain.getValue(1); 1272 curOffset += PerStoreOffset; 1273 } 1274 } 1275 ++paramCount; 1276 --OIdx; 1277 continue; 1278 } 1279 // Plain scalar 1280 // for ABI, declare .param .b<size> .param<n>; 1281 unsigned sz = VT.getSizeInBits(); 1282 bool needExtend = false; 1283 if (VT.isInteger()) { 1284 if (sz < 16) 1285 needExtend = true; 1286 if (sz < 32) 1287 sz = 32; 1288 } 1289 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1290 SDValue DeclareParamOps[] = { Chain, 1291 DAG.getConstant(paramCount, dl, MVT::i32), 1292 DAG.getConstant(sz, dl, MVT::i32), 1293 DAG.getConstant(0, dl, MVT::i32), InFlag }; 1294 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, 1295 DeclareParamOps); 1296 InFlag = Chain.getValue(1); 1297 SDValue OutV = OutVals[OIdx]; 1298 if (needExtend) { 1299 // zext/sext i1 to i16 1300 unsigned opc = ISD::ZERO_EXTEND; 1301 if (Outs[OIdx].Flags.isSExt()) 1302 opc = ISD::SIGN_EXTEND; 1303 OutV = DAG.getNode(opc, dl, MVT::i16, OutV); 1304 } 1305 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1306 SDValue CopyParamOps[] = { Chain, 1307 DAG.getConstant(paramCount, dl, MVT::i32), 1308 DAG.getConstant(0, dl, MVT::i32), OutV, 1309 InFlag }; 1310 1311 unsigned opcode = NVPTXISD::StoreParam; 1312 if (Outs[OIdx].Flags.isZExt()) 1313 opcode = NVPTXISD::StoreParamU32; 1314 else if (Outs[OIdx].Flags.isSExt()) 1315 opcode = NVPTXISD::StoreParamS32; 1316 Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, 1317 VT, MachinePointerInfo()); 1318 1319 InFlag = Chain.getValue(1); 1320 ++paramCount; 1321 continue; 1322 } 1323 // struct or vector 1324 SmallVector<EVT, 16> vtparts; 1325 SmallVector<uint64_t, 16> Offsets; 1326 auto *PTy = dyn_cast<PointerType>(Args[i].Ty); 1327 assert(PTy && "Type of a byval parameter should be pointer"); 1328 ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(), 1329 vtparts, &Offsets, 0); 1330 1331 // declare .param .align <align> .b8 .param<n>[<size>]; 1332 unsigned sz = Outs[OIdx].Flags.getByValSize(); 1333 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1334 unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign(); 1335 // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, 1336 // so we don't need to worry about natural alignment or not. 1337 // See TargetLowering::LowerCallTo(). 1338 SDValue DeclareParamOps[] = { 1339 Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), dl, MVT::i32), 1340 DAG.getConstant(paramCount, dl, MVT::i32), 1341 DAG.getConstant(sz, dl, MVT::i32), InFlag 1342 }; 1343 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, 1344 DeclareParamOps); 1345 InFlag = Chain.getValue(1); 1346 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { 1347 EVT elemtype = vtparts[j]; 1348 int curOffset = Offsets[j]; 1349 unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); 1350 auto PtrVT = getPointerTy(DAG.getDataLayout()); 1351 SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx], 1352 DAG.getConstant(curOffset, dl, PtrVT)); 1353 SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, 1354 MachinePointerInfo(), false, false, false, 1355 PartAlign); 1356 if (elemtype.getSizeInBits() < 16) { 1357 theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); 1358 } 1359 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1360 SDValue CopyParamOps[] = { Chain, 1361 DAG.getConstant(paramCount, dl, MVT::i32), 1362 DAG.getConstant(curOffset, dl, MVT::i32), 1363 theVal, InFlag }; 1364 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, 1365 CopyParamOps, elemtype, 1366 MachinePointerInfo()); 1367 1368 InFlag = Chain.getValue(1); 1369 } 1370 ++paramCount; 1371 } 1372 1373 GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); 1374 unsigned retAlignment = 0; 1375 1376 // Handle Result 1377 if (Ins.size() > 0) { 1378 SmallVector<EVT, 16> resvtparts; 1379 ComputeValueVTs(*this, DL, retTy, resvtparts); 1380 1381 // Declare 1382 // .param .align 16 .b8 retval0[<size-in-bytes>], or 1383 // .param .b<size-in-bits> retval0 1384 unsigned resultsz = DL.getTypeAllocSizeInBits(retTy); 1385 // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for 1386 // these three types to match the logic in 1387 // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. 1388 // Plus, this behavior is consistent with nvcc's. 1389 if (retTy->isFloatingPointTy() || retTy->isIntegerTy() || 1390 retTy->isPointerTy()) { 1391 // Scalar needs to be at least 32bit wide 1392 if (resultsz < 32) 1393 resultsz = 32; 1394 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1395 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), 1396 DAG.getConstant(resultsz, dl, MVT::i32), 1397 DAG.getConstant(0, dl, MVT::i32), InFlag }; 1398 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, 1399 DeclareRetOps); 1400 InFlag = Chain.getValue(1); 1401 } else { 1402 retAlignment = getArgumentAlignment(Callee, CS, retTy, 0); 1403 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1404 SDValue DeclareRetOps[] = { Chain, 1405 DAG.getConstant(retAlignment, dl, MVT::i32), 1406 DAG.getConstant(resultsz / 8, dl, MVT::i32), 1407 DAG.getConstant(0, dl, MVT::i32), InFlag }; 1408 Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, 1409 DeclareRetOps); 1410 InFlag = Chain.getValue(1); 1411 } 1412 } 1413 1414 if (!Func) { 1415 // This is indirect function call case : PTX requires a prototype of the 1416 // form 1417 // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); 1418 // to be emitted, and the label has to used as the last arg of call 1419 // instruction. 1420 // The prototype is embedded in a string and put as the operand for a 1421 // CallPrototype SDNode which will print out to the value of the string. 1422 SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1423 std::string Proto = 1424 getPrototype(DAG.getDataLayout(), retTy, Args, Outs, retAlignment, CS); 1425 const char *ProtoStr = 1426 nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); 1427 SDValue ProtoOps[] = { 1428 Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, 1429 }; 1430 Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); 1431 InFlag = Chain.getValue(1); 1432 } 1433 // Op to just print "call" 1434 SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1435 SDValue PrintCallOps[] = { 1436 Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag 1437 }; 1438 Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), 1439 dl, PrintCallVTs, PrintCallOps); 1440 InFlag = Chain.getValue(1); 1441 1442 // Ops to print out the function name 1443 SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1444 SDValue CallVoidOps[] = { Chain, Callee, InFlag }; 1445 Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); 1446 InFlag = Chain.getValue(1); 1447 1448 // Ops to print out the param list 1449 SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1450 SDValue CallArgBeginOps[] = { Chain, InFlag }; 1451 Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, 1452 CallArgBeginOps); 1453 InFlag = Chain.getValue(1); 1454 1455 for (unsigned i = 0, e = paramCount; i != e; ++i) { 1456 unsigned opcode; 1457 if (i == (e - 1)) 1458 opcode = NVPTXISD::LastCallArg; 1459 else 1460 opcode = NVPTXISD::CallArg; 1461 SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1462 SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), 1463 DAG.getConstant(i, dl, MVT::i32), InFlag }; 1464 Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); 1465 InFlag = Chain.getValue(1); 1466 } 1467 SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1468 SDValue CallArgEndOps[] = { Chain, 1469 DAG.getConstant(Func ? 1 : 0, dl, MVT::i32), 1470 InFlag }; 1471 Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); 1472 InFlag = Chain.getValue(1); 1473 1474 if (!Func) { 1475 SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1476 SDValue PrototypeOps[] = { Chain, 1477 DAG.getConstant(uniqueCallSite, dl, MVT::i32), 1478 InFlag }; 1479 Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); 1480 InFlag = Chain.getValue(1); 1481 } 1482 1483 // Generate loads from param memory/moves from registers for result 1484 if (Ins.size() > 0) { 1485 if (retTy && retTy->isVectorTy()) { 1486 EVT ObjectVT = getValueType(DL, retTy); 1487 unsigned NumElts = ObjectVT.getVectorNumElements(); 1488 EVT EltVT = ObjectVT.getVectorElementType(); 1489 assert(STI.getTargetLowering()->getNumRegisters(F->getContext(), 1490 ObjectVT) == NumElts && 1491 "Vector was not scalarized"); 1492 unsigned sz = EltVT.getSizeInBits(); 1493 bool needTruncate = sz < 8; 1494 1495 if (NumElts == 1) { 1496 // Just a simple load 1497 SmallVector<EVT, 4> LoadRetVTs; 1498 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 1499 // If loading i1/i8 result, generate 1500 // load.b8 i16 1501 // if i1 1502 // trunc i16 to i1 1503 LoadRetVTs.push_back(MVT::i16); 1504 } else 1505 LoadRetVTs.push_back(EltVT); 1506 LoadRetVTs.push_back(MVT::Other); 1507 LoadRetVTs.push_back(MVT::Glue); 1508 SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), 1509 DAG.getConstant(0, dl, MVT::i32), InFlag}; 1510 SDValue retval = DAG.getMemIntrinsicNode( 1511 NVPTXISD::LoadParam, dl, 1512 DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); 1513 Chain = retval.getValue(1); 1514 InFlag = retval.getValue(2); 1515 SDValue Ret0 = retval; 1516 if (needTruncate) 1517 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0); 1518 InVals.push_back(Ret0); 1519 } else if (NumElts == 2) { 1520 // LoadV2 1521 SmallVector<EVT, 4> LoadRetVTs; 1522 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 1523 // If loading i1/i8 result, generate 1524 // load.b8 i16 1525 // if i1 1526 // trunc i16 to i1 1527 LoadRetVTs.push_back(MVT::i16); 1528 LoadRetVTs.push_back(MVT::i16); 1529 } else { 1530 LoadRetVTs.push_back(EltVT); 1531 LoadRetVTs.push_back(EltVT); 1532 } 1533 LoadRetVTs.push_back(MVT::Other); 1534 LoadRetVTs.push_back(MVT::Glue); 1535 SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), 1536 DAG.getConstant(0, dl, MVT::i32), InFlag}; 1537 SDValue retval = DAG.getMemIntrinsicNode( 1538 NVPTXISD::LoadParamV2, dl, 1539 DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); 1540 Chain = retval.getValue(2); 1541 InFlag = retval.getValue(3); 1542 SDValue Ret0 = retval.getValue(0); 1543 SDValue Ret1 = retval.getValue(1); 1544 if (needTruncate) { 1545 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0); 1546 InVals.push_back(Ret0); 1547 Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1); 1548 InVals.push_back(Ret1); 1549 } else { 1550 InVals.push_back(Ret0); 1551 InVals.push_back(Ret1); 1552 } 1553 } else { 1554 // Split into N LoadV4 1555 unsigned Ofst = 0; 1556 unsigned VecSize = 4; 1557 unsigned Opc = NVPTXISD::LoadParamV4; 1558 if (EltVT.getSizeInBits() == 64) { 1559 VecSize = 2; 1560 Opc = NVPTXISD::LoadParamV2; 1561 } 1562 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); 1563 for (unsigned i = 0; i < NumElts; i += VecSize) { 1564 SmallVector<EVT, 8> LoadRetVTs; 1565 if (EltVT == MVT::i1 || EltVT == MVT::i8) { 1566 // If loading i1/i8 result, generate 1567 // load.b8 i16 1568 // if i1 1569 // trunc i16 to i1 1570 for (unsigned j = 0; j < VecSize; ++j) 1571 LoadRetVTs.push_back(MVT::i16); 1572 } else { 1573 for (unsigned j = 0; j < VecSize; ++j) 1574 LoadRetVTs.push_back(EltVT); 1575 } 1576 LoadRetVTs.push_back(MVT::Other); 1577 LoadRetVTs.push_back(MVT::Glue); 1578 SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), 1579 DAG.getConstant(Ofst, dl, MVT::i32), InFlag}; 1580 SDValue retval = DAG.getMemIntrinsicNode( 1581 Opc, dl, DAG.getVTList(LoadRetVTs), 1582 LoadRetOps, EltVT, MachinePointerInfo()); 1583 if (VecSize == 2) { 1584 Chain = retval.getValue(2); 1585 InFlag = retval.getValue(3); 1586 } else { 1587 Chain = retval.getValue(4); 1588 InFlag = retval.getValue(5); 1589 } 1590 1591 for (unsigned j = 0; j < VecSize; ++j) { 1592 if (i + j >= NumElts) 1593 break; 1594 SDValue Elt = retval.getValue(j); 1595 if (needTruncate) 1596 Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); 1597 InVals.push_back(Elt); 1598 } 1599 Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 1600 } 1601 } 1602 } else { 1603 SmallVector<EVT, 16> VTs; 1604 SmallVector<uint64_t, 16> Offsets; 1605 ComputePTXValueVTs(*this, DAG.getDataLayout(), retTy, VTs, &Offsets, 0); 1606 assert(VTs.size() == Ins.size() && "Bad value decomposition"); 1607 unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0); 1608 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 1609 unsigned sz = VTs[i].getSizeInBits(); 1610 unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]); 1611 bool needTruncate = sz < 8; 1612 if (VTs[i].isInteger() && (sz < 8)) 1613 sz = 8; 1614 1615 SmallVector<EVT, 4> LoadRetVTs; 1616 EVT TheLoadType = VTs[i]; 1617 if (retTy->isIntegerTy() && DL.getTypeAllocSizeInBits(retTy) < 32) { 1618 // This is for integer types only, and specifically not for 1619 // aggregates. 1620 LoadRetVTs.push_back(MVT::i32); 1621 TheLoadType = MVT::i32; 1622 } else if (sz < 16) { 1623 // If loading i1/i8 result, generate 1624 // load i8 (-> i16) 1625 // trunc i16 to i1/i8 1626 LoadRetVTs.push_back(MVT::i16); 1627 } else 1628 LoadRetVTs.push_back(Ins[i].VT); 1629 LoadRetVTs.push_back(MVT::Other); 1630 LoadRetVTs.push_back(MVT::Glue); 1631 1632 SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), 1633 DAG.getConstant(Offsets[i], dl, MVT::i32), 1634 InFlag}; 1635 SDValue retval = DAG.getMemIntrinsicNode( 1636 NVPTXISD::LoadParam, dl, 1637 DAG.getVTList(LoadRetVTs), LoadRetOps, 1638 TheLoadType, MachinePointerInfo(), AlignI); 1639 Chain = retval.getValue(1); 1640 InFlag = retval.getValue(2); 1641 SDValue Ret0 = retval.getValue(0); 1642 if (needTruncate) 1643 Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0); 1644 InVals.push_back(Ret0); 1645 } 1646 } 1647 } 1648 1649 Chain = DAG.getCALLSEQ_END(Chain, 1650 DAG.getIntPtrConstant(uniqueCallSite, dl, true), 1651 DAG.getIntPtrConstant(uniqueCallSite + 1, dl, 1652 true), 1653 InFlag, dl); 1654 uniqueCallSite++; 1655 1656 // set isTailCall to false for now, until we figure out how to express 1657 // tail call optimization in PTX 1658 isTailCall = false; 1659 return Chain; 1660 } 1661 1662 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() 1663 // (see LegalizeDAG.cpp). This is slow and uses local memory. 1664 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 1665 SDValue 1666 NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { 1667 SDNode *Node = Op.getNode(); 1668 SDLoc dl(Node); 1669 SmallVector<SDValue, 8> Ops; 1670 unsigned NumOperands = Node->getNumOperands(); 1671 for (unsigned i = 0; i < NumOperands; ++i) { 1672 SDValue SubOp = Node->getOperand(i); 1673 EVT VVT = SubOp.getNode()->getValueType(0); 1674 EVT EltVT = VVT.getVectorElementType(); 1675 unsigned NumSubElem = VVT.getVectorNumElements(); 1676 for (unsigned j = 0; j < NumSubElem; ++j) { 1677 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, 1678 DAG.getIntPtrConstant(j, dl))); 1679 } 1680 } 1681 return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops); 1682 } 1683 1684 /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which 1685 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift 1686 /// amount, or 1687 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift 1688 /// amount. 1689 SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, 1690 SelectionDAG &DAG) const { 1691 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 1692 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 1693 1694 EVT VT = Op.getValueType(); 1695 unsigned VTBits = VT.getSizeInBits(); 1696 SDLoc dl(Op); 1697 SDValue ShOpLo = Op.getOperand(0); 1698 SDValue ShOpHi = Op.getOperand(1); 1699 SDValue ShAmt = Op.getOperand(2); 1700 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 1701 1702 if (VTBits == 32 && STI.getSmVersion() >= 35) { 1703 1704 // For 32bit and sm35, we can use the funnel shift 'shf' instruction. 1705 // {dHi, dLo} = {aHi, aLo} >> Amt 1706 // dHi = aHi >> Amt 1707 // dLo = shf.r.clamp aLo, aHi, Amt 1708 1709 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 1710 SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, 1711 ShAmt); 1712 1713 SDValue Ops[2] = { Lo, Hi }; 1714 return DAG.getMergeValues(Ops, dl); 1715 } 1716 else { 1717 1718 // {dHi, dLo} = {aHi, aLo} >> Amt 1719 // - if (Amt>=size) then 1720 // dLo = aHi >> (Amt-size) 1721 // dHi = aHi >> Amt (this is either all 0 or all 1) 1722 // else 1723 // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) 1724 // dHi = aHi >> Amt 1725 1726 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 1727 DAG.getConstant(VTBits, dl, MVT::i32), 1728 ShAmt); 1729 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 1730 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 1731 DAG.getConstant(VTBits, dl, MVT::i32)); 1732 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 1733 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 1734 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 1735 1736 SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, 1737 DAG.getConstant(VTBits, dl, MVT::i32), 1738 ISD::SETGE); 1739 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 1740 SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); 1741 1742 SDValue Ops[2] = { Lo, Hi }; 1743 return DAG.getMergeValues(Ops, dl); 1744 } 1745 } 1746 1747 /// LowerShiftLeftParts - Lower SHL_PARTS, which 1748 /// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift 1749 /// amount, or 1750 /// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift 1751 /// amount. 1752 SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, 1753 SelectionDAG &DAG) const { 1754 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 1755 assert(Op.getOpcode() == ISD::SHL_PARTS); 1756 1757 EVT VT = Op.getValueType(); 1758 unsigned VTBits = VT.getSizeInBits(); 1759 SDLoc dl(Op); 1760 SDValue ShOpLo = Op.getOperand(0); 1761 SDValue ShOpHi = Op.getOperand(1); 1762 SDValue ShAmt = Op.getOperand(2); 1763 1764 if (VTBits == 32 && STI.getSmVersion() >= 35) { 1765 1766 // For 32bit and sm35, we can use the funnel shift 'shf' instruction. 1767 // {dHi, dLo} = {aHi, aLo} << Amt 1768 // dHi = shf.l.clamp aLo, aHi, Amt 1769 // dLo = aLo << Amt 1770 1771 SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, 1772 ShAmt); 1773 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 1774 1775 SDValue Ops[2] = { Lo, Hi }; 1776 return DAG.getMergeValues(Ops, dl); 1777 } 1778 else { 1779 1780 // {dHi, dLo} = {aHi, aLo} << Amt 1781 // - if (Amt>=size) then 1782 // dLo = aLo << Amt (all 0) 1783 // dLo = aLo << (Amt-size) 1784 // else 1785 // dLo = aLo << Amt 1786 // dHi = (aHi << Amt) | (aLo >> (size-Amt)) 1787 1788 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 1789 DAG.getConstant(VTBits, dl, MVT::i32), 1790 ShAmt); 1791 SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 1792 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 1793 DAG.getConstant(VTBits, dl, MVT::i32)); 1794 SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 1795 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 1796 SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 1797 1798 SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, 1799 DAG.getConstant(VTBits, dl, MVT::i32), 1800 ISD::SETGE); 1801 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 1802 SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); 1803 1804 SDValue Ops[2] = { Lo, Hi }; 1805 return DAG.getMergeValues(Ops, dl); 1806 } 1807 } 1808 1809 SDValue 1810 NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 1811 switch (Op.getOpcode()) { 1812 case ISD::RETURNADDR: 1813 return SDValue(); 1814 case ISD::FRAMEADDR: 1815 return SDValue(); 1816 case ISD::GlobalAddress: 1817 return LowerGlobalAddress(Op, DAG); 1818 case ISD::INTRINSIC_W_CHAIN: 1819 return Op; 1820 case ISD::BUILD_VECTOR: 1821 case ISD::EXTRACT_SUBVECTOR: 1822 return Op; 1823 case ISD::CONCAT_VECTORS: 1824 return LowerCONCAT_VECTORS(Op, DAG); 1825 case ISD::STORE: 1826 return LowerSTORE(Op, DAG); 1827 case ISD::LOAD: 1828 return LowerLOAD(Op, DAG); 1829 case ISD::SHL_PARTS: 1830 return LowerShiftLeftParts(Op, DAG); 1831 case ISD::SRA_PARTS: 1832 case ISD::SRL_PARTS: 1833 return LowerShiftRightParts(Op, DAG); 1834 case ISD::SELECT: 1835 return LowerSelect(Op, DAG); 1836 default: 1837 llvm_unreachable("Custom lowering not defined for operation"); 1838 } 1839 } 1840 1841 SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const { 1842 SDValue Op0 = Op->getOperand(0); 1843 SDValue Op1 = Op->getOperand(1); 1844 SDValue Op2 = Op->getOperand(2); 1845 SDLoc DL(Op.getNode()); 1846 1847 assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1"); 1848 1849 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); 1850 Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); 1851 SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2); 1852 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select); 1853 1854 return Trunc; 1855 } 1856 1857 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1858 if (Op.getValueType() == MVT::i1) 1859 return LowerLOADi1(Op, DAG); 1860 else 1861 return SDValue(); 1862 } 1863 1864 // v = ld i1* addr 1865 // => 1866 // v1 = ld i8* addr (-> i16) 1867 // v = trunc i16 to i1 1868 SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { 1869 SDNode *Node = Op.getNode(); 1870 LoadSDNode *LD = cast<LoadSDNode>(Node); 1871 SDLoc dl(Node); 1872 assert(LD->getExtensionType() == ISD::NON_EXTLOAD); 1873 assert(Node->getValueType(0) == MVT::i1 && 1874 "Custom lowering for i1 load only"); 1875 SDValue newLD = 1876 DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), 1877 LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), 1878 LD->isInvariant(), LD->getAlignment()); 1879 SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); 1880 // The legalizer (the caller) is expecting two values from the legalized 1881 // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() 1882 // in LegalizeDAG.cpp which also uses MergeValues. 1883 SDValue Ops[] = { result, LD->getChain() }; 1884 return DAG.getMergeValues(Ops, dl); 1885 } 1886 1887 SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1888 EVT ValVT = Op.getOperand(1).getValueType(); 1889 if (ValVT == MVT::i1) 1890 return LowerSTOREi1(Op, DAG); 1891 else if (ValVT.isVector()) 1892 return LowerSTOREVector(Op, DAG); 1893 else 1894 return SDValue(); 1895 } 1896 1897 SDValue 1898 NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { 1899 SDNode *N = Op.getNode(); 1900 SDValue Val = N->getOperand(1); 1901 SDLoc DL(N); 1902 EVT ValVT = Val.getValueType(); 1903 1904 if (ValVT.isVector()) { 1905 // We only handle "native" vector sizes for now, e.g. <4 x double> is not 1906 // legal. We can (and should) split that into 2 stores of <2 x double> here 1907 // but I'm leaving that as a TODO for now. 1908 if (!ValVT.isSimple()) 1909 return SDValue(); 1910 switch (ValVT.getSimpleVT().SimpleTy) { 1911 default: 1912 return SDValue(); 1913 case MVT::v2i8: 1914 case MVT::v2i16: 1915 case MVT::v2i32: 1916 case MVT::v2i64: 1917 case MVT::v2f32: 1918 case MVT::v2f64: 1919 case MVT::v4i8: 1920 case MVT::v4i16: 1921 case MVT::v4i32: 1922 case MVT::v4f32: 1923 // This is a "native" vector type 1924 break; 1925 } 1926 1927 MemSDNode *MemSD = cast<MemSDNode>(N); 1928 const DataLayout &TD = DAG.getDataLayout(); 1929 1930 unsigned Align = MemSD->getAlignment(); 1931 unsigned PrefAlign = 1932 TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); 1933 if (Align < PrefAlign) { 1934 // This store is not sufficiently aligned, so bail out and let this vector 1935 // store be scalarized. Note that we may still be able to emit smaller 1936 // vector stores. For example, if we are storing a <4 x float> with an 1937 // alignment of 8, this check will fail but the legalizer will try again 1938 // with 2 x <2 x float>, which will succeed with an alignment of 8. 1939 return SDValue(); 1940 } 1941 1942 unsigned Opcode = 0; 1943 EVT EltVT = ValVT.getVectorElementType(); 1944 unsigned NumElts = ValVT.getVectorNumElements(); 1945 1946 // Since StoreV2 is a target node, we cannot rely on DAG type legalization. 1947 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 1948 // stored type to i16 and propagate the "real" type as the memory type. 1949 bool NeedExt = false; 1950 if (EltVT.getSizeInBits() < 16) 1951 NeedExt = true; 1952 1953 switch (NumElts) { 1954 default: 1955 return SDValue(); 1956 case 2: 1957 Opcode = NVPTXISD::StoreV2; 1958 break; 1959 case 4: { 1960 Opcode = NVPTXISD::StoreV4; 1961 break; 1962 } 1963 } 1964 1965 SmallVector<SDValue, 8> Ops; 1966 1967 // First is the chain 1968 Ops.push_back(N->getOperand(0)); 1969 1970 // Then the split values 1971 for (unsigned i = 0; i < NumElts; ++i) { 1972 SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, 1973 DAG.getIntPtrConstant(i, DL)); 1974 if (NeedExt) 1975 ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); 1976 Ops.push_back(ExtVal); 1977 } 1978 1979 // Then any remaining arguments 1980 Ops.append(N->op_begin() + 2, N->op_end()); 1981 1982 SDValue NewSt = DAG.getMemIntrinsicNode( 1983 Opcode, DL, DAG.getVTList(MVT::Other), Ops, 1984 MemSD->getMemoryVT(), MemSD->getMemOperand()); 1985 1986 //return DCI.CombineTo(N, NewSt, true); 1987 return NewSt; 1988 } 1989 1990 return SDValue(); 1991 } 1992 1993 // st i1 v, addr 1994 // => 1995 // v1 = zxt v to i16 1996 // st.u8 i16, addr 1997 SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { 1998 SDNode *Node = Op.getNode(); 1999 SDLoc dl(Node); 2000 StoreSDNode *ST = cast<StoreSDNode>(Node); 2001 SDValue Tmp1 = ST->getChain(); 2002 SDValue Tmp2 = ST->getBasePtr(); 2003 SDValue Tmp3 = ST->getValue(); 2004 assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); 2005 unsigned Alignment = ST->getAlignment(); 2006 bool isVolatile = ST->isVolatile(); 2007 bool isNonTemporal = ST->isNonTemporal(); 2008 Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); 2009 SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, 2010 ST->getPointerInfo(), MVT::i8, isNonTemporal, 2011 isVolatile, Alignment); 2012 return Result; 2013 } 2014 2015 SDValue 2016 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { 2017 std::string ParamSym; 2018 raw_string_ostream ParamStr(ParamSym); 2019 2020 ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; 2021 ParamStr.flush(); 2022 2023 std::string *SavedStr = 2024 nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); 2025 return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); 2026 } 2027 2028 // Check to see if the kernel argument is image*_t or sampler_t 2029 2030 bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { 2031 static const char *const specialTypes[] = { "struct._image2d_t", 2032 "struct._image3d_t", 2033 "struct._sampler_t" }; 2034 2035 Type *Ty = arg->getType(); 2036 auto *PTy = dyn_cast<PointerType>(Ty); 2037 2038 if (!PTy) 2039 return false; 2040 2041 if (!context) 2042 return false; 2043 2044 auto *STy = dyn_cast<StructType>(PTy->getElementType()); 2045 const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : ""; 2046 2047 return std::find(std::begin(specialTypes), std::end(specialTypes), 2048 TypeName) != std::end(specialTypes); 2049 } 2050 2051 SDValue NVPTXTargetLowering::LowerFormalArguments( 2052 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 2053 const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, 2054 SmallVectorImpl<SDValue> &InVals) const { 2055 MachineFunction &MF = DAG.getMachineFunction(); 2056 const DataLayout &DL = DAG.getDataLayout(); 2057 auto PtrVT = getPointerTy(DAG.getDataLayout()); 2058 2059 const Function *F = MF.getFunction(); 2060 const AttributeSet &PAL = F->getAttributes(); 2061 const TargetLowering *TLI = STI.getTargetLowering(); 2062 2063 SDValue Root = DAG.getRoot(); 2064 std::vector<SDValue> OutChains; 2065 2066 bool isKernel = llvm::isKernelFunction(*F); 2067 bool isABI = (STI.getSmVersion() >= 20); 2068 assert(isABI && "Non-ABI compilation is not supported"); 2069 if (!isABI) 2070 return Chain; 2071 2072 std::vector<Type *> argTypes; 2073 std::vector<const Argument *> theArgs; 2074 for (const Argument &I : F->args()) { 2075 theArgs.push_back(&I); 2076 argTypes.push_back(I.getType()); 2077 } 2078 // argTypes.size() (or theArgs.size()) and Ins.size() need not match. 2079 // Ins.size() will be larger 2080 // * if there is an aggregate argument with multiple fields (each field 2081 // showing up separately in Ins) 2082 // * if there is a vector argument with more than typical vector-length 2083 // elements (generally if more than 4) where each vector element is 2084 // individually present in Ins. 2085 // So a different index should be used for indexing into Ins. 2086 // See similar issue in LowerCall. 2087 unsigned InsIdx = 0; 2088 2089 int idx = 0; 2090 for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { 2091 Type *Ty = argTypes[i]; 2092 2093 // If the kernel argument is image*_t or sampler_t, convert it to 2094 // a i32 constant holding the parameter position. This can later 2095 // matched in the AsmPrinter to output the correct mangled name. 2096 if (isImageOrSamplerVal( 2097 theArgs[i], 2098 (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() 2099 : nullptr))) { 2100 assert(isKernel && "Only kernels can have image/sampler params"); 2101 InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32)); 2102 continue; 2103 } 2104 2105 if (theArgs[i]->use_empty()) { 2106 // argument is dead 2107 if (Ty->isAggregateType()) { 2108 SmallVector<EVT, 16> vtparts; 2109 2110 ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts); 2111 assert(vtparts.size() > 0 && "empty aggregate type not expected"); 2112 for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 2113 ++parti) { 2114 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 2115 ++InsIdx; 2116 } 2117 if (vtparts.size() > 0) 2118 --InsIdx; 2119 continue; 2120 } 2121 if (Ty->isVectorTy()) { 2122 EVT ObjectVT = getValueType(DL, Ty); 2123 unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); 2124 for (unsigned parti = 0; parti < NumRegs; ++parti) { 2125 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 2126 ++InsIdx; 2127 } 2128 if (NumRegs > 0) 2129 --InsIdx; 2130 continue; 2131 } 2132 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); 2133 continue; 2134 } 2135 2136 // In the following cases, assign a node order of "idx+1" 2137 // to newly created nodes. The SDNodes for params have to 2138 // appear in the same order as their order of appearance 2139 // in the original function. "idx+1" holds that order. 2140 if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) { 2141 if (Ty->isAggregateType()) { 2142 SmallVector<EVT, 16> vtparts; 2143 SmallVector<uint64_t, 16> offsets; 2144 2145 // NOTE: Here, we lose the ability to issue vector loads for vectors 2146 // that are a part of a struct. This should be investigated in the 2147 // future. 2148 ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &offsets, 2149 0); 2150 assert(vtparts.size() > 0 && "empty aggregate type not expected"); 2151 bool aggregateIsPacked = false; 2152 if (StructType *STy = llvm::dyn_cast<StructType>(Ty)) 2153 aggregateIsPacked = STy->isPacked(); 2154 2155 SDValue Arg = getParamSymbol(DAG, idx, PtrVT); 2156 for (unsigned parti = 0, parte = vtparts.size(); parti != parte; 2157 ++parti) { 2158 EVT partVT = vtparts[parti]; 2159 Value *srcValue = Constant::getNullValue( 2160 PointerType::get(partVT.getTypeForEVT(F->getContext()), 2161 llvm::ADDRESS_SPACE_PARAM)); 2162 SDValue srcAddr = 2163 DAG.getNode(ISD::ADD, dl, PtrVT, Arg, 2164 DAG.getConstant(offsets[parti], dl, PtrVT)); 2165 unsigned partAlign = aggregateIsPacked 2166 ? 1 2167 : DL.getABITypeAlignment( 2168 partVT.getTypeForEVT(F->getContext())); 2169 SDValue p; 2170 if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) { 2171 ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 2172 ISD::SEXTLOAD : ISD::ZEXTLOAD; 2173 p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, 2174 MachinePointerInfo(srcValue), partVT, false, 2175 false, false, partAlign); 2176 } else { 2177 p = DAG.getLoad(partVT, dl, Root, srcAddr, 2178 MachinePointerInfo(srcValue), false, false, false, 2179 partAlign); 2180 } 2181 if (p.getNode()) 2182 p.getNode()->setIROrder(idx + 1); 2183 InVals.push_back(p); 2184 ++InsIdx; 2185 } 2186 if (vtparts.size() > 0) 2187 --InsIdx; 2188 continue; 2189 } 2190 if (Ty->isVectorTy()) { 2191 EVT ObjectVT = getValueType(DL, Ty); 2192 SDValue Arg = getParamSymbol(DAG, idx, PtrVT); 2193 unsigned NumElts = ObjectVT.getVectorNumElements(); 2194 assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && 2195 "Vector was not scalarized"); 2196 EVT EltVT = ObjectVT.getVectorElementType(); 2197 2198 // V1 load 2199 // f32 = load ... 2200 if (NumElts == 1) { 2201 // We only have one element, so just directly load it 2202 Value *SrcValue = Constant::getNullValue(PointerType::get( 2203 EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 2204 SDValue P = DAG.getLoad( 2205 EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false, 2206 true, 2207 DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); 2208 if (P.getNode()) 2209 P.getNode()->setIROrder(idx + 1); 2210 2211 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) 2212 P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); 2213 InVals.push_back(P); 2214 ++InsIdx; 2215 } else if (NumElts == 2) { 2216 // V2 load 2217 // f32,f32 = load ... 2218 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); 2219 Value *SrcValue = Constant::getNullValue(PointerType::get( 2220 VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 2221 SDValue P = DAG.getLoad( 2222 VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false, 2223 true, 2224 DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); 2225 if (P.getNode()) 2226 P.getNode()->setIROrder(idx + 1); 2227 2228 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 2229 DAG.getIntPtrConstant(0, dl)); 2230 SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 2231 DAG.getIntPtrConstant(1, dl)); 2232 2233 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) { 2234 Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0); 2235 Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1); 2236 } 2237 2238 InVals.push_back(Elt0); 2239 InVals.push_back(Elt1); 2240 InsIdx += 2; 2241 } else { 2242 // V4 loads 2243 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and 2244 // the 2245 // vector will be expanded to a power of 2 elements, so we know we can 2246 // always round up to the next multiple of 4 when creating the vector 2247 // loads. 2248 // e.g. 4 elem => 1 ld.v4 2249 // 6 elem => 2 ld.v4 2250 // 8 elem => 2 ld.v4 2251 // 11 elem => 3 ld.v4 2252 unsigned VecSize = 4; 2253 if (EltVT.getSizeInBits() == 64) { 2254 VecSize = 2; 2255 } 2256 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); 2257 unsigned Ofst = 0; 2258 for (unsigned i = 0; i < NumElts; i += VecSize) { 2259 Value *SrcValue = Constant::getNullValue( 2260 PointerType::get(VecVT.getTypeForEVT(F->getContext()), 2261 llvm::ADDRESS_SPACE_PARAM)); 2262 SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, 2263 DAG.getConstant(Ofst, dl, PtrVT)); 2264 SDValue P = DAG.getLoad( 2265 VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, 2266 false, true, 2267 DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); 2268 if (P.getNode()) 2269 P.getNode()->setIROrder(idx + 1); 2270 2271 for (unsigned j = 0; j < VecSize; ++j) { 2272 if (i + j >= NumElts) 2273 break; 2274 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, 2275 DAG.getIntPtrConstant(j, dl)); 2276 if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) 2277 Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); 2278 InVals.push_back(Elt); 2279 } 2280 Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 2281 } 2282 InsIdx += NumElts; 2283 } 2284 2285 if (NumElts > 0) 2286 --InsIdx; 2287 continue; 2288 } 2289 // A plain scalar. 2290 EVT ObjectVT = getValueType(DL, Ty); 2291 // If ABI, load from the param symbol 2292 SDValue Arg = getParamSymbol(DAG, idx, PtrVT); 2293 Value *srcValue = Constant::getNullValue(PointerType::get( 2294 ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); 2295 SDValue p; 2296 if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) { 2297 ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 2298 ISD::SEXTLOAD : ISD::ZEXTLOAD; 2299 p = DAG.getExtLoad( 2300 ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue), 2301 ObjectVT, false, false, false, 2302 DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 2303 } else { 2304 p = DAG.getLoad( 2305 Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue), false, 2306 false, false, 2307 DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); 2308 } 2309 if (p.getNode()) 2310 p.getNode()->setIROrder(idx + 1); 2311 InVals.push_back(p); 2312 continue; 2313 } 2314 2315 // Param has ByVal attribute 2316 // Return MoveParam(param symbol). 2317 // Ideally, the param symbol can be returned directly, 2318 // but when SDNode builder decides to use it in a CopyToReg(), 2319 // machine instruction fails because TargetExternalSymbol 2320 // (not lowered) is target dependent, and CopyToReg assumes 2321 // the source is lowered. 2322 EVT ObjectVT = getValueType(DL, Ty); 2323 assert(ObjectVT == Ins[InsIdx].VT && 2324 "Ins type did not match function type"); 2325 SDValue Arg = getParamSymbol(DAG, idx, PtrVT); 2326 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); 2327 if (p.getNode()) 2328 p.getNode()->setIROrder(idx + 1); 2329 if (isKernel) 2330 InVals.push_back(p); 2331 else { 2332 SDValue p2 = DAG.getNode( 2333 ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, 2334 DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p); 2335 InVals.push_back(p2); 2336 } 2337 } 2338 2339 // Clang will check explicit VarArg and issue error if any. However, Clang 2340 // will let code with 2341 // implicit var arg like f() pass. See bug 617733. 2342 // We treat this case as if the arg list is empty. 2343 // if (F.isVarArg()) { 2344 // assert(0 && "VarArg not supported yet!"); 2345 //} 2346 2347 if (!OutChains.empty()) 2348 DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); 2349 2350 return Chain; 2351 } 2352 2353 2354 SDValue 2355 NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 2356 bool isVarArg, 2357 const SmallVectorImpl<ISD::OutputArg> &Outs, 2358 const SmallVectorImpl<SDValue> &OutVals, 2359 SDLoc dl, SelectionDAG &DAG) const { 2360 MachineFunction &MF = DAG.getMachineFunction(); 2361 const Function *F = MF.getFunction(); 2362 Type *RetTy = F->getReturnType(); 2363 const DataLayout &TD = DAG.getDataLayout(); 2364 2365 bool isABI = (STI.getSmVersion() >= 20); 2366 assert(isABI && "Non-ABI compilation is not supported"); 2367 if (!isABI) 2368 return Chain; 2369 2370 if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) { 2371 // If we have a vector type, the OutVals array will be the scalarized 2372 // components and we have combine them into 1 or more vector stores. 2373 unsigned NumElts = VTy->getNumElements(); 2374 assert(NumElts == Outs.size() && "Bad scalarization of return value"); 2375 2376 // const_cast can be removed in later LLVM versions 2377 EVT EltVT = getValueType(TD, RetTy).getVectorElementType(); 2378 bool NeedExtend = false; 2379 if (EltVT.getSizeInBits() < 16) 2380 NeedExtend = true; 2381 2382 // V1 store 2383 if (NumElts == 1) { 2384 SDValue StoreVal = OutVals[0]; 2385 // We only have one element, so just directly store it 2386 if (NeedExtend) 2387 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); 2388 SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal }; 2389 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 2390 DAG.getVTList(MVT::Other), Ops, 2391 EltVT, MachinePointerInfo()); 2392 2393 } else if (NumElts == 2) { 2394 // V2 store 2395 SDValue StoreVal0 = OutVals[0]; 2396 SDValue StoreVal1 = OutVals[1]; 2397 2398 if (NeedExtend) { 2399 StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0); 2400 StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1); 2401 } 2402 2403 SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal0, 2404 StoreVal1 }; 2405 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, 2406 DAG.getVTList(MVT::Other), Ops, 2407 EltVT, MachinePointerInfo()); 2408 } else { 2409 // V4 stores 2410 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the 2411 // vector will be expanded to a power of 2 elements, so we know we can 2412 // always round up to the next multiple of 4 when creating the vector 2413 // stores. 2414 // e.g. 4 elem => 1 st.v4 2415 // 6 elem => 2 st.v4 2416 // 8 elem => 2 st.v4 2417 // 11 elem => 3 st.v4 2418 2419 unsigned VecSize = 4; 2420 if (OutVals[0].getValueType().getSizeInBits() == 64) 2421 VecSize = 2; 2422 2423 unsigned Offset = 0; 2424 2425 EVT VecVT = 2426 EVT::getVectorVT(F->getContext(), EltVT, VecSize); 2427 unsigned PerStoreOffset = 2428 TD.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); 2429 2430 for (unsigned i = 0; i < NumElts; i += VecSize) { 2431 // Get values 2432 SDValue StoreVal; 2433 SmallVector<SDValue, 8> Ops; 2434 Ops.push_back(Chain); 2435 Ops.push_back(DAG.getConstant(Offset, dl, MVT::i32)); 2436 unsigned Opc = NVPTXISD::StoreRetvalV2; 2437 EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType(); 2438 2439 StoreVal = OutVals[i]; 2440 if (NeedExtend) 2441 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2442 Ops.push_back(StoreVal); 2443 2444 if (i + 1 < NumElts) { 2445 StoreVal = OutVals[i + 1]; 2446 if (NeedExtend) 2447 StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2448 } else { 2449 StoreVal = DAG.getUNDEF(ExtendedVT); 2450 } 2451 Ops.push_back(StoreVal); 2452 2453 if (VecSize == 4) { 2454 Opc = NVPTXISD::StoreRetvalV4; 2455 if (i + 2 < NumElts) { 2456 StoreVal = OutVals[i + 2]; 2457 if (NeedExtend) 2458 StoreVal = 2459 DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2460 } else { 2461 StoreVal = DAG.getUNDEF(ExtendedVT); 2462 } 2463 Ops.push_back(StoreVal); 2464 2465 if (i + 3 < NumElts) { 2466 StoreVal = OutVals[i + 3]; 2467 if (NeedExtend) 2468 StoreVal = 2469 DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal); 2470 } else { 2471 StoreVal = DAG.getUNDEF(ExtendedVT); 2472 } 2473 Ops.push_back(StoreVal); 2474 } 2475 2476 // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); 2477 Chain = 2478 DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops, 2479 EltVT, MachinePointerInfo()); 2480 Offset += PerStoreOffset; 2481 } 2482 } 2483 } else { 2484 SmallVector<EVT, 16> ValVTs; 2485 SmallVector<uint64_t, 16> Offsets; 2486 ComputePTXValueVTs(*this, DAG.getDataLayout(), RetTy, ValVTs, &Offsets, 0); 2487 assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition"); 2488 2489 for (unsigned i = 0, e = Outs.size(); i != e; ++i) { 2490 SDValue theVal = OutVals[i]; 2491 EVT TheValType = theVal.getValueType(); 2492 unsigned numElems = 1; 2493 if (TheValType.isVector()) 2494 numElems = TheValType.getVectorNumElements(); 2495 for (unsigned j = 0, je = numElems; j != je; ++j) { 2496 SDValue TmpVal = theVal; 2497 if (TheValType.isVector()) 2498 TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 2499 TheValType.getVectorElementType(), TmpVal, 2500 DAG.getIntPtrConstant(j, dl)); 2501 EVT TheStoreType = ValVTs[i]; 2502 if (RetTy->isIntegerTy() && TD.getTypeAllocSizeInBits(RetTy) < 32) { 2503 // The following zero-extension is for integer types only, and 2504 // specifically not for aggregates. 2505 TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); 2506 TheStoreType = MVT::i32; 2507 } 2508 else if (TmpVal.getValueType().getSizeInBits() < 16) 2509 TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal); 2510 2511 SDValue Ops[] = { 2512 Chain, 2513 DAG.getConstant(Offsets[i], dl, MVT::i32), 2514 TmpVal }; 2515 Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, 2516 DAG.getVTList(MVT::Other), Ops, 2517 TheStoreType, 2518 MachinePointerInfo()); 2519 } 2520 } 2521 } 2522 2523 return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); 2524 } 2525 2526 2527 void NVPTXTargetLowering::LowerAsmOperandForConstraint( 2528 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, 2529 SelectionDAG &DAG) const { 2530 if (Constraint.length() > 1) 2531 return; 2532 else 2533 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 2534 } 2535 2536 static unsigned getOpcForTextureInstr(unsigned Intrinsic) { 2537 switch (Intrinsic) { 2538 default: 2539 return 0; 2540 2541 case Intrinsic::nvvm_tex_1d_v4f32_s32: 2542 return NVPTXISD::Tex1DFloatS32; 2543 case Intrinsic::nvvm_tex_1d_v4f32_f32: 2544 return NVPTXISD::Tex1DFloatFloat; 2545 case Intrinsic::nvvm_tex_1d_level_v4f32_f32: 2546 return NVPTXISD::Tex1DFloatFloatLevel; 2547 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: 2548 return NVPTXISD::Tex1DFloatFloatGrad; 2549 case Intrinsic::nvvm_tex_1d_v4s32_s32: 2550 return NVPTXISD::Tex1DS32S32; 2551 case Intrinsic::nvvm_tex_1d_v4s32_f32: 2552 return NVPTXISD::Tex1DS32Float; 2553 case Intrinsic::nvvm_tex_1d_level_v4s32_f32: 2554 return NVPTXISD::Tex1DS32FloatLevel; 2555 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: 2556 return NVPTXISD::Tex1DS32FloatGrad; 2557 case Intrinsic::nvvm_tex_1d_v4u32_s32: 2558 return NVPTXISD::Tex1DU32S32; 2559 case Intrinsic::nvvm_tex_1d_v4u32_f32: 2560 return NVPTXISD::Tex1DU32Float; 2561 case Intrinsic::nvvm_tex_1d_level_v4u32_f32: 2562 return NVPTXISD::Tex1DU32FloatLevel; 2563 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: 2564 return NVPTXISD::Tex1DU32FloatGrad; 2565 2566 case Intrinsic::nvvm_tex_1d_array_v4f32_s32: 2567 return NVPTXISD::Tex1DArrayFloatS32; 2568 case Intrinsic::nvvm_tex_1d_array_v4f32_f32: 2569 return NVPTXISD::Tex1DArrayFloatFloat; 2570 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: 2571 return NVPTXISD::Tex1DArrayFloatFloatLevel; 2572 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: 2573 return NVPTXISD::Tex1DArrayFloatFloatGrad; 2574 case Intrinsic::nvvm_tex_1d_array_v4s32_s32: 2575 return NVPTXISD::Tex1DArrayS32S32; 2576 case Intrinsic::nvvm_tex_1d_array_v4s32_f32: 2577 return NVPTXISD::Tex1DArrayS32Float; 2578 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: 2579 return NVPTXISD::Tex1DArrayS32FloatLevel; 2580 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: 2581 return NVPTXISD::Tex1DArrayS32FloatGrad; 2582 case Intrinsic::nvvm_tex_1d_array_v4u32_s32: 2583 return NVPTXISD::Tex1DArrayU32S32; 2584 case Intrinsic::nvvm_tex_1d_array_v4u32_f32: 2585 return NVPTXISD::Tex1DArrayU32Float; 2586 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: 2587 return NVPTXISD::Tex1DArrayU32FloatLevel; 2588 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: 2589 return NVPTXISD::Tex1DArrayU32FloatGrad; 2590 2591 case Intrinsic::nvvm_tex_2d_v4f32_s32: 2592 return NVPTXISD::Tex2DFloatS32; 2593 case Intrinsic::nvvm_tex_2d_v4f32_f32: 2594 return NVPTXISD::Tex2DFloatFloat; 2595 case Intrinsic::nvvm_tex_2d_level_v4f32_f32: 2596 return NVPTXISD::Tex2DFloatFloatLevel; 2597 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: 2598 return NVPTXISD::Tex2DFloatFloatGrad; 2599 case Intrinsic::nvvm_tex_2d_v4s32_s32: 2600 return NVPTXISD::Tex2DS32S32; 2601 case Intrinsic::nvvm_tex_2d_v4s32_f32: 2602 return NVPTXISD::Tex2DS32Float; 2603 case Intrinsic::nvvm_tex_2d_level_v4s32_f32: 2604 return NVPTXISD::Tex2DS32FloatLevel; 2605 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: 2606 return NVPTXISD::Tex2DS32FloatGrad; 2607 case Intrinsic::nvvm_tex_2d_v4u32_s32: 2608 return NVPTXISD::Tex2DU32S32; 2609 case Intrinsic::nvvm_tex_2d_v4u32_f32: 2610 return NVPTXISD::Tex2DU32Float; 2611 case Intrinsic::nvvm_tex_2d_level_v4u32_f32: 2612 return NVPTXISD::Tex2DU32FloatLevel; 2613 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: 2614 return NVPTXISD::Tex2DU32FloatGrad; 2615 2616 case Intrinsic::nvvm_tex_2d_array_v4f32_s32: 2617 return NVPTXISD::Tex2DArrayFloatS32; 2618 case Intrinsic::nvvm_tex_2d_array_v4f32_f32: 2619 return NVPTXISD::Tex2DArrayFloatFloat; 2620 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: 2621 return NVPTXISD::Tex2DArrayFloatFloatLevel; 2622 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: 2623 return NVPTXISD::Tex2DArrayFloatFloatGrad; 2624 case Intrinsic::nvvm_tex_2d_array_v4s32_s32: 2625 return NVPTXISD::Tex2DArrayS32S32; 2626 case Intrinsic::nvvm_tex_2d_array_v4s32_f32: 2627 return NVPTXISD::Tex2DArrayS32Float; 2628 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: 2629 return NVPTXISD::Tex2DArrayS32FloatLevel; 2630 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: 2631 return NVPTXISD::Tex2DArrayS32FloatGrad; 2632 case Intrinsic::nvvm_tex_2d_array_v4u32_s32: 2633 return NVPTXISD::Tex2DArrayU32S32; 2634 case Intrinsic::nvvm_tex_2d_array_v4u32_f32: 2635 return NVPTXISD::Tex2DArrayU32Float; 2636 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: 2637 return NVPTXISD::Tex2DArrayU32FloatLevel; 2638 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: 2639 return NVPTXISD::Tex2DArrayU32FloatGrad; 2640 2641 case Intrinsic::nvvm_tex_3d_v4f32_s32: 2642 return NVPTXISD::Tex3DFloatS32; 2643 case Intrinsic::nvvm_tex_3d_v4f32_f32: 2644 return NVPTXISD::Tex3DFloatFloat; 2645 case Intrinsic::nvvm_tex_3d_level_v4f32_f32: 2646 return NVPTXISD::Tex3DFloatFloatLevel; 2647 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: 2648 return NVPTXISD::Tex3DFloatFloatGrad; 2649 case Intrinsic::nvvm_tex_3d_v4s32_s32: 2650 return NVPTXISD::Tex3DS32S32; 2651 case Intrinsic::nvvm_tex_3d_v4s32_f32: 2652 return NVPTXISD::Tex3DS32Float; 2653 case Intrinsic::nvvm_tex_3d_level_v4s32_f32: 2654 return NVPTXISD::Tex3DS32FloatLevel; 2655 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: 2656 return NVPTXISD::Tex3DS32FloatGrad; 2657 case Intrinsic::nvvm_tex_3d_v4u32_s32: 2658 return NVPTXISD::Tex3DU32S32; 2659 case Intrinsic::nvvm_tex_3d_v4u32_f32: 2660 return NVPTXISD::Tex3DU32Float; 2661 case Intrinsic::nvvm_tex_3d_level_v4u32_f32: 2662 return NVPTXISD::Tex3DU32FloatLevel; 2663 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: 2664 return NVPTXISD::Tex3DU32FloatGrad; 2665 2666 case Intrinsic::nvvm_tex_cube_v4f32_f32: 2667 return NVPTXISD::TexCubeFloatFloat; 2668 case Intrinsic::nvvm_tex_cube_level_v4f32_f32: 2669 return NVPTXISD::TexCubeFloatFloatLevel; 2670 case Intrinsic::nvvm_tex_cube_v4s32_f32: 2671 return NVPTXISD::TexCubeS32Float; 2672 case Intrinsic::nvvm_tex_cube_level_v4s32_f32: 2673 return NVPTXISD::TexCubeS32FloatLevel; 2674 case Intrinsic::nvvm_tex_cube_v4u32_f32: 2675 return NVPTXISD::TexCubeU32Float; 2676 case Intrinsic::nvvm_tex_cube_level_v4u32_f32: 2677 return NVPTXISD::TexCubeU32FloatLevel; 2678 2679 case Intrinsic::nvvm_tex_cube_array_v4f32_f32: 2680 return NVPTXISD::TexCubeArrayFloatFloat; 2681 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: 2682 return NVPTXISD::TexCubeArrayFloatFloatLevel; 2683 case Intrinsic::nvvm_tex_cube_array_v4s32_f32: 2684 return NVPTXISD::TexCubeArrayS32Float; 2685 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: 2686 return NVPTXISD::TexCubeArrayS32FloatLevel; 2687 case Intrinsic::nvvm_tex_cube_array_v4u32_f32: 2688 return NVPTXISD::TexCubeArrayU32Float; 2689 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: 2690 return NVPTXISD::TexCubeArrayU32FloatLevel; 2691 2692 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: 2693 return NVPTXISD::Tld4R2DFloatFloat; 2694 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: 2695 return NVPTXISD::Tld4G2DFloatFloat; 2696 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: 2697 return NVPTXISD::Tld4B2DFloatFloat; 2698 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: 2699 return NVPTXISD::Tld4A2DFloatFloat; 2700 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: 2701 return NVPTXISD::Tld4R2DS64Float; 2702 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: 2703 return NVPTXISD::Tld4G2DS64Float; 2704 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: 2705 return NVPTXISD::Tld4B2DS64Float; 2706 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: 2707 return NVPTXISD::Tld4A2DS64Float; 2708 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: 2709 return NVPTXISD::Tld4R2DU64Float; 2710 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: 2711 return NVPTXISD::Tld4G2DU64Float; 2712 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: 2713 return NVPTXISD::Tld4B2DU64Float; 2714 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: 2715 return NVPTXISD::Tld4A2DU64Float; 2716 2717 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: 2718 return NVPTXISD::TexUnified1DFloatS32; 2719 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: 2720 return NVPTXISD::TexUnified1DFloatFloat; 2721 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: 2722 return NVPTXISD::TexUnified1DFloatFloatLevel; 2723 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: 2724 return NVPTXISD::TexUnified1DFloatFloatGrad; 2725 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: 2726 return NVPTXISD::TexUnified1DS32S32; 2727 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: 2728 return NVPTXISD::TexUnified1DS32Float; 2729 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: 2730 return NVPTXISD::TexUnified1DS32FloatLevel; 2731 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: 2732 return NVPTXISD::TexUnified1DS32FloatGrad; 2733 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: 2734 return NVPTXISD::TexUnified1DU32S32; 2735 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: 2736 return NVPTXISD::TexUnified1DU32Float; 2737 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: 2738 return NVPTXISD::TexUnified1DU32FloatLevel; 2739 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: 2740 return NVPTXISD::TexUnified1DU32FloatGrad; 2741 2742 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: 2743 return NVPTXISD::TexUnified1DArrayFloatS32; 2744 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: 2745 return NVPTXISD::TexUnified1DArrayFloatFloat; 2746 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: 2747 return NVPTXISD::TexUnified1DArrayFloatFloatLevel; 2748 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: 2749 return NVPTXISD::TexUnified1DArrayFloatFloatGrad; 2750 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: 2751 return NVPTXISD::TexUnified1DArrayS32S32; 2752 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: 2753 return NVPTXISD::TexUnified1DArrayS32Float; 2754 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: 2755 return NVPTXISD::TexUnified1DArrayS32FloatLevel; 2756 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: 2757 return NVPTXISD::TexUnified1DArrayS32FloatGrad; 2758 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: 2759 return NVPTXISD::TexUnified1DArrayU32S32; 2760 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: 2761 return NVPTXISD::TexUnified1DArrayU32Float; 2762 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: 2763 return NVPTXISD::TexUnified1DArrayU32FloatLevel; 2764 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: 2765 return NVPTXISD::TexUnified1DArrayU32FloatGrad; 2766 2767 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: 2768 return NVPTXISD::TexUnified2DFloatS32; 2769 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: 2770 return NVPTXISD::TexUnified2DFloatFloat; 2771 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: 2772 return NVPTXISD::TexUnified2DFloatFloatLevel; 2773 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: 2774 return NVPTXISD::TexUnified2DFloatFloatGrad; 2775 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: 2776 return NVPTXISD::TexUnified2DS32S32; 2777 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: 2778 return NVPTXISD::TexUnified2DS32Float; 2779 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: 2780 return NVPTXISD::TexUnified2DS32FloatLevel; 2781 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: 2782 return NVPTXISD::TexUnified2DS32FloatGrad; 2783 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: 2784 return NVPTXISD::TexUnified2DU32S32; 2785 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: 2786 return NVPTXISD::TexUnified2DU32Float; 2787 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: 2788 return NVPTXISD::TexUnified2DU32FloatLevel; 2789 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: 2790 return NVPTXISD::TexUnified2DU32FloatGrad; 2791 2792 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: 2793 return NVPTXISD::TexUnified2DArrayFloatS32; 2794 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: 2795 return NVPTXISD::TexUnified2DArrayFloatFloat; 2796 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: 2797 return NVPTXISD::TexUnified2DArrayFloatFloatLevel; 2798 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: 2799 return NVPTXISD::TexUnified2DArrayFloatFloatGrad; 2800 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: 2801 return NVPTXISD::TexUnified2DArrayS32S32; 2802 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: 2803 return NVPTXISD::TexUnified2DArrayS32Float; 2804 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: 2805 return NVPTXISD::TexUnified2DArrayS32FloatLevel; 2806 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: 2807 return NVPTXISD::TexUnified2DArrayS32FloatGrad; 2808 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: 2809 return NVPTXISD::TexUnified2DArrayU32S32; 2810 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: 2811 return NVPTXISD::TexUnified2DArrayU32Float; 2812 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: 2813 return NVPTXISD::TexUnified2DArrayU32FloatLevel; 2814 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: 2815 return NVPTXISD::TexUnified2DArrayU32FloatGrad; 2816 2817 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: 2818 return NVPTXISD::TexUnified3DFloatS32; 2819 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: 2820 return NVPTXISD::TexUnified3DFloatFloat; 2821 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: 2822 return NVPTXISD::TexUnified3DFloatFloatLevel; 2823 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: 2824 return NVPTXISD::TexUnified3DFloatFloatGrad; 2825 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: 2826 return NVPTXISD::TexUnified3DS32S32; 2827 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: 2828 return NVPTXISD::TexUnified3DS32Float; 2829 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: 2830 return NVPTXISD::TexUnified3DS32FloatLevel; 2831 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: 2832 return NVPTXISD::TexUnified3DS32FloatGrad; 2833 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: 2834 return NVPTXISD::TexUnified3DU32S32; 2835 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: 2836 return NVPTXISD::TexUnified3DU32Float; 2837 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: 2838 return NVPTXISD::TexUnified3DU32FloatLevel; 2839 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: 2840 return NVPTXISD::TexUnified3DU32FloatGrad; 2841 2842 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: 2843 return NVPTXISD::TexUnifiedCubeFloatFloat; 2844 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: 2845 return NVPTXISD::TexUnifiedCubeFloatFloatLevel; 2846 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: 2847 return NVPTXISD::TexUnifiedCubeS32Float; 2848 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: 2849 return NVPTXISD::TexUnifiedCubeS32FloatLevel; 2850 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: 2851 return NVPTXISD::TexUnifiedCubeU32Float; 2852 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: 2853 return NVPTXISD::TexUnifiedCubeU32FloatLevel; 2854 2855 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: 2856 return NVPTXISD::TexUnifiedCubeArrayFloatFloat; 2857 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: 2858 return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; 2859 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: 2860 return NVPTXISD::TexUnifiedCubeArrayS32Float; 2861 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: 2862 return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; 2863 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: 2864 return NVPTXISD::TexUnifiedCubeArrayU32Float; 2865 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: 2866 return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; 2867 2868 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: 2869 return NVPTXISD::Tld4UnifiedR2DFloatFloat; 2870 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: 2871 return NVPTXISD::Tld4UnifiedG2DFloatFloat; 2872 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: 2873 return NVPTXISD::Tld4UnifiedB2DFloatFloat; 2874 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: 2875 return NVPTXISD::Tld4UnifiedA2DFloatFloat; 2876 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: 2877 return NVPTXISD::Tld4UnifiedR2DS64Float; 2878 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: 2879 return NVPTXISD::Tld4UnifiedG2DS64Float; 2880 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: 2881 return NVPTXISD::Tld4UnifiedB2DS64Float; 2882 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: 2883 return NVPTXISD::Tld4UnifiedA2DS64Float; 2884 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: 2885 return NVPTXISD::Tld4UnifiedR2DU64Float; 2886 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: 2887 return NVPTXISD::Tld4UnifiedG2DU64Float; 2888 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: 2889 return NVPTXISD::Tld4UnifiedB2DU64Float; 2890 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: 2891 return NVPTXISD::Tld4UnifiedA2DU64Float; 2892 } 2893 } 2894 2895 static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { 2896 switch (Intrinsic) { 2897 default: 2898 return 0; 2899 case Intrinsic::nvvm_suld_1d_i8_clamp: 2900 return NVPTXISD::Suld1DI8Clamp; 2901 case Intrinsic::nvvm_suld_1d_i16_clamp: 2902 return NVPTXISD::Suld1DI16Clamp; 2903 case Intrinsic::nvvm_suld_1d_i32_clamp: 2904 return NVPTXISD::Suld1DI32Clamp; 2905 case Intrinsic::nvvm_suld_1d_i64_clamp: 2906 return NVPTXISD::Suld1DI64Clamp; 2907 case Intrinsic::nvvm_suld_1d_v2i8_clamp: 2908 return NVPTXISD::Suld1DV2I8Clamp; 2909 case Intrinsic::nvvm_suld_1d_v2i16_clamp: 2910 return NVPTXISD::Suld1DV2I16Clamp; 2911 case Intrinsic::nvvm_suld_1d_v2i32_clamp: 2912 return NVPTXISD::Suld1DV2I32Clamp; 2913 case Intrinsic::nvvm_suld_1d_v2i64_clamp: 2914 return NVPTXISD::Suld1DV2I64Clamp; 2915 case Intrinsic::nvvm_suld_1d_v4i8_clamp: 2916 return NVPTXISD::Suld1DV4I8Clamp; 2917 case Intrinsic::nvvm_suld_1d_v4i16_clamp: 2918 return NVPTXISD::Suld1DV4I16Clamp; 2919 case Intrinsic::nvvm_suld_1d_v4i32_clamp: 2920 return NVPTXISD::Suld1DV4I32Clamp; 2921 case Intrinsic::nvvm_suld_1d_array_i8_clamp: 2922 return NVPTXISD::Suld1DArrayI8Clamp; 2923 case Intrinsic::nvvm_suld_1d_array_i16_clamp: 2924 return NVPTXISD::Suld1DArrayI16Clamp; 2925 case Intrinsic::nvvm_suld_1d_array_i32_clamp: 2926 return NVPTXISD::Suld1DArrayI32Clamp; 2927 case Intrinsic::nvvm_suld_1d_array_i64_clamp: 2928 return NVPTXISD::Suld1DArrayI64Clamp; 2929 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: 2930 return NVPTXISD::Suld1DArrayV2I8Clamp; 2931 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: 2932 return NVPTXISD::Suld1DArrayV2I16Clamp; 2933 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: 2934 return NVPTXISD::Suld1DArrayV2I32Clamp; 2935 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: 2936 return NVPTXISD::Suld1DArrayV2I64Clamp; 2937 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: 2938 return NVPTXISD::Suld1DArrayV4I8Clamp; 2939 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: 2940 return NVPTXISD::Suld1DArrayV4I16Clamp; 2941 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: 2942 return NVPTXISD::Suld1DArrayV4I32Clamp; 2943 case Intrinsic::nvvm_suld_2d_i8_clamp: 2944 return NVPTXISD::Suld2DI8Clamp; 2945 case Intrinsic::nvvm_suld_2d_i16_clamp: 2946 return NVPTXISD::Suld2DI16Clamp; 2947 case Intrinsic::nvvm_suld_2d_i32_clamp: 2948 return NVPTXISD::Suld2DI32Clamp; 2949 case Intrinsic::nvvm_suld_2d_i64_clamp: 2950 return NVPTXISD::Suld2DI64Clamp; 2951 case Intrinsic::nvvm_suld_2d_v2i8_clamp: 2952 return NVPTXISD::Suld2DV2I8Clamp; 2953 case Intrinsic::nvvm_suld_2d_v2i16_clamp: 2954 return NVPTXISD::Suld2DV2I16Clamp; 2955 case Intrinsic::nvvm_suld_2d_v2i32_clamp: 2956 return NVPTXISD::Suld2DV2I32Clamp; 2957 case Intrinsic::nvvm_suld_2d_v2i64_clamp: 2958 return NVPTXISD::Suld2DV2I64Clamp; 2959 case Intrinsic::nvvm_suld_2d_v4i8_clamp: 2960 return NVPTXISD::Suld2DV4I8Clamp; 2961 case Intrinsic::nvvm_suld_2d_v4i16_clamp: 2962 return NVPTXISD::Suld2DV4I16Clamp; 2963 case Intrinsic::nvvm_suld_2d_v4i32_clamp: 2964 return NVPTXISD::Suld2DV4I32Clamp; 2965 case Intrinsic::nvvm_suld_2d_array_i8_clamp: 2966 return NVPTXISD::Suld2DArrayI8Clamp; 2967 case Intrinsic::nvvm_suld_2d_array_i16_clamp: 2968 return NVPTXISD::Suld2DArrayI16Clamp; 2969 case Intrinsic::nvvm_suld_2d_array_i32_clamp: 2970 return NVPTXISD::Suld2DArrayI32Clamp; 2971 case Intrinsic::nvvm_suld_2d_array_i64_clamp: 2972 return NVPTXISD::Suld2DArrayI64Clamp; 2973 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: 2974 return NVPTXISD::Suld2DArrayV2I8Clamp; 2975 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: 2976 return NVPTXISD::Suld2DArrayV2I16Clamp; 2977 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: 2978 return NVPTXISD::Suld2DArrayV2I32Clamp; 2979 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: 2980 return NVPTXISD::Suld2DArrayV2I64Clamp; 2981 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: 2982 return NVPTXISD::Suld2DArrayV4I8Clamp; 2983 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: 2984 return NVPTXISD::Suld2DArrayV4I16Clamp; 2985 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: 2986 return NVPTXISD::Suld2DArrayV4I32Clamp; 2987 case Intrinsic::nvvm_suld_3d_i8_clamp: 2988 return NVPTXISD::Suld3DI8Clamp; 2989 case Intrinsic::nvvm_suld_3d_i16_clamp: 2990 return NVPTXISD::Suld3DI16Clamp; 2991 case Intrinsic::nvvm_suld_3d_i32_clamp: 2992 return NVPTXISD::Suld3DI32Clamp; 2993 case Intrinsic::nvvm_suld_3d_i64_clamp: 2994 return NVPTXISD::Suld3DI64Clamp; 2995 case Intrinsic::nvvm_suld_3d_v2i8_clamp: 2996 return NVPTXISD::Suld3DV2I8Clamp; 2997 case Intrinsic::nvvm_suld_3d_v2i16_clamp: 2998 return NVPTXISD::Suld3DV2I16Clamp; 2999 case Intrinsic::nvvm_suld_3d_v2i32_clamp: 3000 return NVPTXISD::Suld3DV2I32Clamp; 3001 case Intrinsic::nvvm_suld_3d_v2i64_clamp: 3002 return NVPTXISD::Suld3DV2I64Clamp; 3003 case Intrinsic::nvvm_suld_3d_v4i8_clamp: 3004 return NVPTXISD::Suld3DV4I8Clamp; 3005 case Intrinsic::nvvm_suld_3d_v4i16_clamp: 3006 return NVPTXISD::Suld3DV4I16Clamp; 3007 case Intrinsic::nvvm_suld_3d_v4i32_clamp: 3008 return NVPTXISD::Suld3DV4I32Clamp; 3009 case Intrinsic::nvvm_suld_1d_i8_trap: 3010 return NVPTXISD::Suld1DI8Trap; 3011 case Intrinsic::nvvm_suld_1d_i16_trap: 3012 return NVPTXISD::Suld1DI16Trap; 3013 case Intrinsic::nvvm_suld_1d_i32_trap: 3014 return NVPTXISD::Suld1DI32Trap; 3015 case Intrinsic::nvvm_suld_1d_i64_trap: 3016 return NVPTXISD::Suld1DI64Trap; 3017 case Intrinsic::nvvm_suld_1d_v2i8_trap: 3018 return NVPTXISD::Suld1DV2I8Trap; 3019 case Intrinsic::nvvm_suld_1d_v2i16_trap: 3020 return NVPTXISD::Suld1DV2I16Trap; 3021 case Intrinsic::nvvm_suld_1d_v2i32_trap: 3022 return NVPTXISD::Suld1DV2I32Trap; 3023 case Intrinsic::nvvm_suld_1d_v2i64_trap: 3024 return NVPTXISD::Suld1DV2I64Trap; 3025 case Intrinsic::nvvm_suld_1d_v4i8_trap: 3026 return NVPTXISD::Suld1DV4I8Trap; 3027 case Intrinsic::nvvm_suld_1d_v4i16_trap: 3028 return NVPTXISD::Suld1DV4I16Trap; 3029 case Intrinsic::nvvm_suld_1d_v4i32_trap: 3030 return NVPTXISD::Suld1DV4I32Trap; 3031 case Intrinsic::nvvm_suld_1d_array_i8_trap: 3032 return NVPTXISD::Suld1DArrayI8Trap; 3033 case Intrinsic::nvvm_suld_1d_array_i16_trap: 3034 return NVPTXISD::Suld1DArrayI16Trap; 3035 case Intrinsic::nvvm_suld_1d_array_i32_trap: 3036 return NVPTXISD::Suld1DArrayI32Trap; 3037 case Intrinsic::nvvm_suld_1d_array_i64_trap: 3038 return NVPTXISD::Suld1DArrayI64Trap; 3039 case Intrinsic::nvvm_suld_1d_array_v2i8_trap: 3040 return NVPTXISD::Suld1DArrayV2I8Trap; 3041 case Intrinsic::nvvm_suld_1d_array_v2i16_trap: 3042 return NVPTXISD::Suld1DArrayV2I16Trap; 3043 case Intrinsic::nvvm_suld_1d_array_v2i32_trap: 3044 return NVPTXISD::Suld1DArrayV2I32Trap; 3045 case Intrinsic::nvvm_suld_1d_array_v2i64_trap: 3046 return NVPTXISD::Suld1DArrayV2I64Trap; 3047 case Intrinsic::nvvm_suld_1d_array_v4i8_trap: 3048 return NVPTXISD::Suld1DArrayV4I8Trap; 3049 case Intrinsic::nvvm_suld_1d_array_v4i16_trap: 3050 return NVPTXISD::Suld1DArrayV4I16Trap; 3051 case Intrinsic::nvvm_suld_1d_array_v4i32_trap: 3052 return NVPTXISD::Suld1DArrayV4I32Trap; 3053 case Intrinsic::nvvm_suld_2d_i8_trap: 3054 return NVPTXISD::Suld2DI8Trap; 3055 case Intrinsic::nvvm_suld_2d_i16_trap: 3056 return NVPTXISD::Suld2DI16Trap; 3057 case Intrinsic::nvvm_suld_2d_i32_trap: 3058 return NVPTXISD::Suld2DI32Trap; 3059 case Intrinsic::nvvm_suld_2d_i64_trap: 3060 return NVPTXISD::Suld2DI64Trap; 3061 case Intrinsic::nvvm_suld_2d_v2i8_trap: 3062 return NVPTXISD::Suld2DV2I8Trap; 3063 case Intrinsic::nvvm_suld_2d_v2i16_trap: 3064 return NVPTXISD::Suld2DV2I16Trap; 3065 case Intrinsic::nvvm_suld_2d_v2i32_trap: 3066 return NVPTXISD::Suld2DV2I32Trap; 3067 case Intrinsic::nvvm_suld_2d_v2i64_trap: 3068 return NVPTXISD::Suld2DV2I64Trap; 3069 case Intrinsic::nvvm_suld_2d_v4i8_trap: 3070 return NVPTXISD::Suld2DV4I8Trap; 3071 case Intrinsic::nvvm_suld_2d_v4i16_trap: 3072 return NVPTXISD::Suld2DV4I16Trap; 3073 case Intrinsic::nvvm_suld_2d_v4i32_trap: 3074 return NVPTXISD::Suld2DV4I32Trap; 3075 case Intrinsic::nvvm_suld_2d_array_i8_trap: 3076 return NVPTXISD::Suld2DArrayI8Trap; 3077 case Intrinsic::nvvm_suld_2d_array_i16_trap: 3078 return NVPTXISD::Suld2DArrayI16Trap; 3079 case Intrinsic::nvvm_suld_2d_array_i32_trap: 3080 return NVPTXISD::Suld2DArrayI32Trap; 3081 case Intrinsic::nvvm_suld_2d_array_i64_trap: 3082 return NVPTXISD::Suld2DArrayI64Trap; 3083 case Intrinsic::nvvm_suld_2d_array_v2i8_trap: 3084 return NVPTXISD::Suld2DArrayV2I8Trap; 3085 case Intrinsic::nvvm_suld_2d_array_v2i16_trap: 3086 return NVPTXISD::Suld2DArrayV2I16Trap; 3087 case Intrinsic::nvvm_suld_2d_array_v2i32_trap: 3088 return NVPTXISD::Suld2DArrayV2I32Trap; 3089 case Intrinsic::nvvm_suld_2d_array_v2i64_trap: 3090 return NVPTXISD::Suld2DArrayV2I64Trap; 3091 case Intrinsic::nvvm_suld_2d_array_v4i8_trap: 3092 return NVPTXISD::Suld2DArrayV4I8Trap; 3093 case Intrinsic::nvvm_suld_2d_array_v4i16_trap: 3094 return NVPTXISD::Suld2DArrayV4I16Trap; 3095 case Intrinsic::nvvm_suld_2d_array_v4i32_trap: 3096 return NVPTXISD::Suld2DArrayV4I32Trap; 3097 case Intrinsic::nvvm_suld_3d_i8_trap: 3098 return NVPTXISD::Suld3DI8Trap; 3099 case Intrinsic::nvvm_suld_3d_i16_trap: 3100 return NVPTXISD::Suld3DI16Trap; 3101 case Intrinsic::nvvm_suld_3d_i32_trap: 3102 return NVPTXISD::Suld3DI32Trap; 3103 case Intrinsic::nvvm_suld_3d_i64_trap: 3104 return NVPTXISD::Suld3DI64Trap; 3105 case Intrinsic::nvvm_suld_3d_v2i8_trap: 3106 return NVPTXISD::Suld3DV2I8Trap; 3107 case Intrinsic::nvvm_suld_3d_v2i16_trap: 3108 return NVPTXISD::Suld3DV2I16Trap; 3109 case Intrinsic::nvvm_suld_3d_v2i32_trap: 3110 return NVPTXISD::Suld3DV2I32Trap; 3111 case Intrinsic::nvvm_suld_3d_v2i64_trap: 3112 return NVPTXISD::Suld3DV2I64Trap; 3113 case Intrinsic::nvvm_suld_3d_v4i8_trap: 3114 return NVPTXISD::Suld3DV4I8Trap; 3115 case Intrinsic::nvvm_suld_3d_v4i16_trap: 3116 return NVPTXISD::Suld3DV4I16Trap; 3117 case Intrinsic::nvvm_suld_3d_v4i32_trap: 3118 return NVPTXISD::Suld3DV4I32Trap; 3119 case Intrinsic::nvvm_suld_1d_i8_zero: 3120 return NVPTXISD::Suld1DI8Zero; 3121 case Intrinsic::nvvm_suld_1d_i16_zero: 3122 return NVPTXISD::Suld1DI16Zero; 3123 case Intrinsic::nvvm_suld_1d_i32_zero: 3124 return NVPTXISD::Suld1DI32Zero; 3125 case Intrinsic::nvvm_suld_1d_i64_zero: 3126 return NVPTXISD::Suld1DI64Zero; 3127 case Intrinsic::nvvm_suld_1d_v2i8_zero: 3128 return NVPTXISD::Suld1DV2I8Zero; 3129 case Intrinsic::nvvm_suld_1d_v2i16_zero: 3130 return NVPTXISD::Suld1DV2I16Zero; 3131 case Intrinsic::nvvm_suld_1d_v2i32_zero: 3132 return NVPTXISD::Suld1DV2I32Zero; 3133 case Intrinsic::nvvm_suld_1d_v2i64_zero: 3134 return NVPTXISD::Suld1DV2I64Zero; 3135 case Intrinsic::nvvm_suld_1d_v4i8_zero: 3136 return NVPTXISD::Suld1DV4I8Zero; 3137 case Intrinsic::nvvm_suld_1d_v4i16_zero: 3138 return NVPTXISD::Suld1DV4I16Zero; 3139 case Intrinsic::nvvm_suld_1d_v4i32_zero: 3140 return NVPTXISD::Suld1DV4I32Zero; 3141 case Intrinsic::nvvm_suld_1d_array_i8_zero: 3142 return NVPTXISD::Suld1DArrayI8Zero; 3143 case Intrinsic::nvvm_suld_1d_array_i16_zero: 3144 return NVPTXISD::Suld1DArrayI16Zero; 3145 case Intrinsic::nvvm_suld_1d_array_i32_zero: 3146 return NVPTXISD::Suld1DArrayI32Zero; 3147 case Intrinsic::nvvm_suld_1d_array_i64_zero: 3148 return NVPTXISD::Suld1DArrayI64Zero; 3149 case Intrinsic::nvvm_suld_1d_array_v2i8_zero: 3150 return NVPTXISD::Suld1DArrayV2I8Zero; 3151 case Intrinsic::nvvm_suld_1d_array_v2i16_zero: 3152 return NVPTXISD::Suld1DArrayV2I16Zero; 3153 case Intrinsic::nvvm_suld_1d_array_v2i32_zero: 3154 return NVPTXISD::Suld1DArrayV2I32Zero; 3155 case Intrinsic::nvvm_suld_1d_array_v2i64_zero: 3156 return NVPTXISD::Suld1DArrayV2I64Zero; 3157 case Intrinsic::nvvm_suld_1d_array_v4i8_zero: 3158 return NVPTXISD::Suld1DArrayV4I8Zero; 3159 case Intrinsic::nvvm_suld_1d_array_v4i16_zero: 3160 return NVPTXISD::Suld1DArrayV4I16Zero; 3161 case Intrinsic::nvvm_suld_1d_array_v4i32_zero: 3162 return NVPTXISD::Suld1DArrayV4I32Zero; 3163 case Intrinsic::nvvm_suld_2d_i8_zero: 3164 return NVPTXISD::Suld2DI8Zero; 3165 case Intrinsic::nvvm_suld_2d_i16_zero: 3166 return NVPTXISD::Suld2DI16Zero; 3167 case Intrinsic::nvvm_suld_2d_i32_zero: 3168 return NVPTXISD::Suld2DI32Zero; 3169 case Intrinsic::nvvm_suld_2d_i64_zero: 3170 return NVPTXISD::Suld2DI64Zero; 3171 case Intrinsic::nvvm_suld_2d_v2i8_zero: 3172 return NVPTXISD::Suld2DV2I8Zero; 3173 case Intrinsic::nvvm_suld_2d_v2i16_zero: 3174 return NVPTXISD::Suld2DV2I16Zero; 3175 case Intrinsic::nvvm_suld_2d_v2i32_zero: 3176 return NVPTXISD::Suld2DV2I32Zero; 3177 case Intrinsic::nvvm_suld_2d_v2i64_zero: 3178 return NVPTXISD::Suld2DV2I64Zero; 3179 case Intrinsic::nvvm_suld_2d_v4i8_zero: 3180 return NVPTXISD::Suld2DV4I8Zero; 3181 case Intrinsic::nvvm_suld_2d_v4i16_zero: 3182 return NVPTXISD::Suld2DV4I16Zero; 3183 case Intrinsic::nvvm_suld_2d_v4i32_zero: 3184 return NVPTXISD::Suld2DV4I32Zero; 3185 case Intrinsic::nvvm_suld_2d_array_i8_zero: 3186 return NVPTXISD::Suld2DArrayI8Zero; 3187 case Intrinsic::nvvm_suld_2d_array_i16_zero: 3188 return NVPTXISD::Suld2DArrayI16Zero; 3189 case Intrinsic::nvvm_suld_2d_array_i32_zero: 3190 return NVPTXISD::Suld2DArrayI32Zero; 3191 case Intrinsic::nvvm_suld_2d_array_i64_zero: 3192 return NVPTXISD::Suld2DArrayI64Zero; 3193 case Intrinsic::nvvm_suld_2d_array_v2i8_zero: 3194 return NVPTXISD::Suld2DArrayV2I8Zero; 3195 case Intrinsic::nvvm_suld_2d_array_v2i16_zero: 3196 return NVPTXISD::Suld2DArrayV2I16Zero; 3197 case Intrinsic::nvvm_suld_2d_array_v2i32_zero: 3198 return NVPTXISD::Suld2DArrayV2I32Zero; 3199 case Intrinsic::nvvm_suld_2d_array_v2i64_zero: 3200 return NVPTXISD::Suld2DArrayV2I64Zero; 3201 case Intrinsic::nvvm_suld_2d_array_v4i8_zero: 3202 return NVPTXISD::Suld2DArrayV4I8Zero; 3203 case Intrinsic::nvvm_suld_2d_array_v4i16_zero: 3204 return NVPTXISD::Suld2DArrayV4I16Zero; 3205 case Intrinsic::nvvm_suld_2d_array_v4i32_zero: 3206 return NVPTXISD::Suld2DArrayV4I32Zero; 3207 case Intrinsic::nvvm_suld_3d_i8_zero: 3208 return NVPTXISD::Suld3DI8Zero; 3209 case Intrinsic::nvvm_suld_3d_i16_zero: 3210 return NVPTXISD::Suld3DI16Zero; 3211 case Intrinsic::nvvm_suld_3d_i32_zero: 3212 return NVPTXISD::Suld3DI32Zero; 3213 case Intrinsic::nvvm_suld_3d_i64_zero: 3214 return NVPTXISD::Suld3DI64Zero; 3215 case Intrinsic::nvvm_suld_3d_v2i8_zero: 3216 return NVPTXISD::Suld3DV2I8Zero; 3217 case Intrinsic::nvvm_suld_3d_v2i16_zero: 3218 return NVPTXISD::Suld3DV2I16Zero; 3219 case Intrinsic::nvvm_suld_3d_v2i32_zero: 3220 return NVPTXISD::Suld3DV2I32Zero; 3221 case Intrinsic::nvvm_suld_3d_v2i64_zero: 3222 return NVPTXISD::Suld3DV2I64Zero; 3223 case Intrinsic::nvvm_suld_3d_v4i8_zero: 3224 return NVPTXISD::Suld3DV4I8Zero; 3225 case Intrinsic::nvvm_suld_3d_v4i16_zero: 3226 return NVPTXISD::Suld3DV4I16Zero; 3227 case Intrinsic::nvvm_suld_3d_v4i32_zero: 3228 return NVPTXISD::Suld3DV4I32Zero; 3229 } 3230 } 3231 3232 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as 3233 // TgtMemIntrinsic 3234 // because we need the information that is only available in the "Value" type 3235 // of destination 3236 // pointer. In particular, the address space information. 3237 bool NVPTXTargetLowering::getTgtMemIntrinsic( 3238 IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { 3239 switch (Intrinsic) { 3240 default: 3241 return false; 3242 3243 case Intrinsic::nvvm_atomic_load_add_f32: 3244 Info.opc = ISD::INTRINSIC_W_CHAIN; 3245 Info.memVT = MVT::f32; 3246 Info.ptrVal = I.getArgOperand(0); 3247 Info.offset = 0; 3248 Info.vol = 0; 3249 Info.readMem = true; 3250 Info.writeMem = true; 3251 Info.align = 0; 3252 return true; 3253 3254 case Intrinsic::nvvm_atomic_load_inc_32: 3255 case Intrinsic::nvvm_atomic_load_dec_32: 3256 Info.opc = ISD::INTRINSIC_W_CHAIN; 3257 Info.memVT = MVT::i32; 3258 Info.ptrVal = I.getArgOperand(0); 3259 Info.offset = 0; 3260 Info.vol = 0; 3261 Info.readMem = true; 3262 Info.writeMem = true; 3263 Info.align = 0; 3264 return true; 3265 3266 case Intrinsic::nvvm_ldu_global_i: 3267 case Intrinsic::nvvm_ldu_global_f: 3268 case Intrinsic::nvvm_ldu_global_p: { 3269 auto &DL = I.getModule()->getDataLayout(); 3270 Info.opc = ISD::INTRINSIC_W_CHAIN; 3271 if (Intrinsic == Intrinsic::nvvm_ldu_global_i) 3272 Info.memVT = getValueType(DL, I.getType()); 3273 else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) 3274 Info.memVT = getPointerTy(DL); 3275 else 3276 Info.memVT = getValueType(DL, I.getType()); 3277 Info.ptrVal = I.getArgOperand(0); 3278 Info.offset = 0; 3279 Info.vol = 0; 3280 Info.readMem = true; 3281 Info.writeMem = false; 3282 Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); 3283 3284 return true; 3285 } 3286 case Intrinsic::nvvm_ldg_global_i: 3287 case Intrinsic::nvvm_ldg_global_f: 3288 case Intrinsic::nvvm_ldg_global_p: { 3289 auto &DL = I.getModule()->getDataLayout(); 3290 3291 Info.opc = ISD::INTRINSIC_W_CHAIN; 3292 if (Intrinsic == Intrinsic::nvvm_ldg_global_i) 3293 Info.memVT = getValueType(DL, I.getType()); 3294 else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) 3295 Info.memVT = getPointerTy(DL); 3296 else 3297 Info.memVT = getValueType(DL, I.getType()); 3298 Info.ptrVal = I.getArgOperand(0); 3299 Info.offset = 0; 3300 Info.vol = 0; 3301 Info.readMem = true; 3302 Info.writeMem = false; 3303 Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); 3304 3305 return true; 3306 } 3307 3308 case Intrinsic::nvvm_tex_1d_v4f32_s32: 3309 case Intrinsic::nvvm_tex_1d_v4f32_f32: 3310 case Intrinsic::nvvm_tex_1d_level_v4f32_f32: 3311 case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: 3312 case Intrinsic::nvvm_tex_1d_array_v4f32_s32: 3313 case Intrinsic::nvvm_tex_1d_array_v4f32_f32: 3314 case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: 3315 case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: 3316 case Intrinsic::nvvm_tex_2d_v4f32_s32: 3317 case Intrinsic::nvvm_tex_2d_v4f32_f32: 3318 case Intrinsic::nvvm_tex_2d_level_v4f32_f32: 3319 case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: 3320 case Intrinsic::nvvm_tex_2d_array_v4f32_s32: 3321 case Intrinsic::nvvm_tex_2d_array_v4f32_f32: 3322 case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: 3323 case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: 3324 case Intrinsic::nvvm_tex_3d_v4f32_s32: 3325 case Intrinsic::nvvm_tex_3d_v4f32_f32: 3326 case Intrinsic::nvvm_tex_3d_level_v4f32_f32: 3327 case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: 3328 case Intrinsic::nvvm_tex_cube_v4f32_f32: 3329 case Intrinsic::nvvm_tex_cube_level_v4f32_f32: 3330 case Intrinsic::nvvm_tex_cube_array_v4f32_f32: 3331 case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: 3332 case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: 3333 case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: 3334 case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: 3335 case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: 3336 case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: 3337 case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: 3338 case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: 3339 case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: 3340 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: 3341 case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: 3342 case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: 3343 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: 3344 case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: 3345 case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: 3346 case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: 3347 case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: 3348 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: 3349 case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: 3350 case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: 3351 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: 3352 case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: 3353 case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: 3354 case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: 3355 case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: 3356 case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: 3357 case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: 3358 case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: 3359 case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: 3360 case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: 3361 case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: 3362 case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: 3363 case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: { 3364 Info.opc = getOpcForTextureInstr(Intrinsic); 3365 Info.memVT = MVT::v4f32; 3366 Info.ptrVal = nullptr; 3367 Info.offset = 0; 3368 Info.vol = 0; 3369 Info.readMem = true; 3370 Info.writeMem = false; 3371 Info.align = 16; 3372 return true; 3373 } 3374 case Intrinsic::nvvm_tex_1d_v4s32_s32: 3375 case Intrinsic::nvvm_tex_1d_v4s32_f32: 3376 case Intrinsic::nvvm_tex_1d_level_v4s32_f32: 3377 case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: 3378 case Intrinsic::nvvm_tex_1d_array_v4s32_s32: 3379 case Intrinsic::nvvm_tex_1d_array_v4s32_f32: 3380 case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: 3381 case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: 3382 case Intrinsic::nvvm_tex_2d_v4s32_s32: 3383 case Intrinsic::nvvm_tex_2d_v4s32_f32: 3384 case Intrinsic::nvvm_tex_2d_level_v4s32_f32: 3385 case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: 3386 case Intrinsic::nvvm_tex_2d_array_v4s32_s32: 3387 case Intrinsic::nvvm_tex_2d_array_v4s32_f32: 3388 case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: 3389 case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: 3390 case Intrinsic::nvvm_tex_3d_v4s32_s32: 3391 case Intrinsic::nvvm_tex_3d_v4s32_f32: 3392 case Intrinsic::nvvm_tex_3d_level_v4s32_f32: 3393 case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: 3394 case Intrinsic::nvvm_tex_cube_v4s32_f32: 3395 case Intrinsic::nvvm_tex_cube_level_v4s32_f32: 3396 case Intrinsic::nvvm_tex_cube_array_v4s32_f32: 3397 case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: 3398 case Intrinsic::nvvm_tex_cube_v4u32_f32: 3399 case Intrinsic::nvvm_tex_cube_level_v4u32_f32: 3400 case Intrinsic::nvvm_tex_cube_array_v4u32_f32: 3401 case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: 3402 case Intrinsic::nvvm_tex_1d_v4u32_s32: 3403 case Intrinsic::nvvm_tex_1d_v4u32_f32: 3404 case Intrinsic::nvvm_tex_1d_level_v4u32_f32: 3405 case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: 3406 case Intrinsic::nvvm_tex_1d_array_v4u32_s32: 3407 case Intrinsic::nvvm_tex_1d_array_v4u32_f32: 3408 case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: 3409 case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: 3410 case Intrinsic::nvvm_tex_2d_v4u32_s32: 3411 case Intrinsic::nvvm_tex_2d_v4u32_f32: 3412 case Intrinsic::nvvm_tex_2d_level_v4u32_f32: 3413 case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: 3414 case Intrinsic::nvvm_tex_2d_array_v4u32_s32: 3415 case Intrinsic::nvvm_tex_2d_array_v4u32_f32: 3416 case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: 3417 case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: 3418 case Intrinsic::nvvm_tex_3d_v4u32_s32: 3419 case Intrinsic::nvvm_tex_3d_v4u32_f32: 3420 case Intrinsic::nvvm_tex_3d_level_v4u32_f32: 3421 case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: 3422 case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: 3423 case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: 3424 case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: 3425 case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: 3426 case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: 3427 case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: 3428 case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: 3429 case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: 3430 case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: 3431 case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: 3432 case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: 3433 case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: 3434 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: 3435 case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: 3436 case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: 3437 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: 3438 case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: 3439 case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: 3440 case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: 3441 case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: 3442 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: 3443 case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: 3444 case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: 3445 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: 3446 case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: 3447 case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: 3448 case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: 3449 case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: 3450 case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: 3451 case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: 3452 case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: 3453 case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: 3454 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: 3455 case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: 3456 case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: 3457 case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: 3458 case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: 3459 case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: 3460 case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: 3461 case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: 3462 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: 3463 case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: 3464 case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: 3465 case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: 3466 case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: 3467 case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: 3468 case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: 3469 case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: 3470 case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: 3471 case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: 3472 case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: 3473 case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: 3474 case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: 3475 case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: 3476 case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: 3477 case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: 3478 case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: 3479 case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: 3480 case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: 3481 case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: 3482 case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: 3483 case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: 3484 case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: 3485 case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: { 3486 Info.opc = getOpcForTextureInstr(Intrinsic); 3487 Info.memVT = MVT::v4i32; 3488 Info.ptrVal = nullptr; 3489 Info.offset = 0; 3490 Info.vol = 0; 3491 Info.readMem = true; 3492 Info.writeMem = false; 3493 Info.align = 16; 3494 return true; 3495 } 3496 case Intrinsic::nvvm_suld_1d_i8_clamp: 3497 case Intrinsic::nvvm_suld_1d_v2i8_clamp: 3498 case Intrinsic::nvvm_suld_1d_v4i8_clamp: 3499 case Intrinsic::nvvm_suld_1d_array_i8_clamp: 3500 case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: 3501 case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: 3502 case Intrinsic::nvvm_suld_2d_i8_clamp: 3503 case Intrinsic::nvvm_suld_2d_v2i8_clamp: 3504 case Intrinsic::nvvm_suld_2d_v4i8_clamp: 3505 case Intrinsic::nvvm_suld_2d_array_i8_clamp: 3506 case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: 3507 case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: 3508 case Intrinsic::nvvm_suld_3d_i8_clamp: 3509 case Intrinsic::nvvm_suld_3d_v2i8_clamp: 3510 case Intrinsic::nvvm_suld_3d_v4i8_clamp: 3511 case Intrinsic::nvvm_suld_1d_i8_trap: 3512 case Intrinsic::nvvm_suld_1d_v2i8_trap: 3513 case Intrinsic::nvvm_suld_1d_v4i8_trap: 3514 case Intrinsic::nvvm_suld_1d_array_i8_trap: 3515 case Intrinsic::nvvm_suld_1d_array_v2i8_trap: 3516 case Intrinsic::nvvm_suld_1d_array_v4i8_trap: 3517 case Intrinsic::nvvm_suld_2d_i8_trap: 3518 case Intrinsic::nvvm_suld_2d_v2i8_trap: 3519 case Intrinsic::nvvm_suld_2d_v4i8_trap: 3520 case Intrinsic::nvvm_suld_2d_array_i8_trap: 3521 case Intrinsic::nvvm_suld_2d_array_v2i8_trap: 3522 case Intrinsic::nvvm_suld_2d_array_v4i8_trap: 3523 case Intrinsic::nvvm_suld_3d_i8_trap: 3524 case Intrinsic::nvvm_suld_3d_v2i8_trap: 3525 case Intrinsic::nvvm_suld_3d_v4i8_trap: 3526 case Intrinsic::nvvm_suld_1d_i8_zero: 3527 case Intrinsic::nvvm_suld_1d_v2i8_zero: 3528 case Intrinsic::nvvm_suld_1d_v4i8_zero: 3529 case Intrinsic::nvvm_suld_1d_array_i8_zero: 3530 case Intrinsic::nvvm_suld_1d_array_v2i8_zero: 3531 case Intrinsic::nvvm_suld_1d_array_v4i8_zero: 3532 case Intrinsic::nvvm_suld_2d_i8_zero: 3533 case Intrinsic::nvvm_suld_2d_v2i8_zero: 3534 case Intrinsic::nvvm_suld_2d_v4i8_zero: 3535 case Intrinsic::nvvm_suld_2d_array_i8_zero: 3536 case Intrinsic::nvvm_suld_2d_array_v2i8_zero: 3537 case Intrinsic::nvvm_suld_2d_array_v4i8_zero: 3538 case Intrinsic::nvvm_suld_3d_i8_zero: 3539 case Intrinsic::nvvm_suld_3d_v2i8_zero: 3540 case Intrinsic::nvvm_suld_3d_v4i8_zero: { 3541 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3542 Info.memVT = MVT::i8; 3543 Info.ptrVal = nullptr; 3544 Info.offset = 0; 3545 Info.vol = 0; 3546 Info.readMem = true; 3547 Info.writeMem = false; 3548 Info.align = 16; 3549 return true; 3550 } 3551 case Intrinsic::nvvm_suld_1d_i16_clamp: 3552 case Intrinsic::nvvm_suld_1d_v2i16_clamp: 3553 case Intrinsic::nvvm_suld_1d_v4i16_clamp: 3554 case Intrinsic::nvvm_suld_1d_array_i16_clamp: 3555 case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: 3556 case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: 3557 case Intrinsic::nvvm_suld_2d_i16_clamp: 3558 case Intrinsic::nvvm_suld_2d_v2i16_clamp: 3559 case Intrinsic::nvvm_suld_2d_v4i16_clamp: 3560 case Intrinsic::nvvm_suld_2d_array_i16_clamp: 3561 case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: 3562 case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: 3563 case Intrinsic::nvvm_suld_3d_i16_clamp: 3564 case Intrinsic::nvvm_suld_3d_v2i16_clamp: 3565 case Intrinsic::nvvm_suld_3d_v4i16_clamp: 3566 case Intrinsic::nvvm_suld_1d_i16_trap: 3567 case Intrinsic::nvvm_suld_1d_v2i16_trap: 3568 case Intrinsic::nvvm_suld_1d_v4i16_trap: 3569 case Intrinsic::nvvm_suld_1d_array_i16_trap: 3570 case Intrinsic::nvvm_suld_1d_array_v2i16_trap: 3571 case Intrinsic::nvvm_suld_1d_array_v4i16_trap: 3572 case Intrinsic::nvvm_suld_2d_i16_trap: 3573 case Intrinsic::nvvm_suld_2d_v2i16_trap: 3574 case Intrinsic::nvvm_suld_2d_v4i16_trap: 3575 case Intrinsic::nvvm_suld_2d_array_i16_trap: 3576 case Intrinsic::nvvm_suld_2d_array_v2i16_trap: 3577 case Intrinsic::nvvm_suld_2d_array_v4i16_trap: 3578 case Intrinsic::nvvm_suld_3d_i16_trap: 3579 case Intrinsic::nvvm_suld_3d_v2i16_trap: 3580 case Intrinsic::nvvm_suld_3d_v4i16_trap: 3581 case Intrinsic::nvvm_suld_1d_i16_zero: 3582 case Intrinsic::nvvm_suld_1d_v2i16_zero: 3583 case Intrinsic::nvvm_suld_1d_v4i16_zero: 3584 case Intrinsic::nvvm_suld_1d_array_i16_zero: 3585 case Intrinsic::nvvm_suld_1d_array_v2i16_zero: 3586 case Intrinsic::nvvm_suld_1d_array_v4i16_zero: 3587 case Intrinsic::nvvm_suld_2d_i16_zero: 3588 case Intrinsic::nvvm_suld_2d_v2i16_zero: 3589 case Intrinsic::nvvm_suld_2d_v4i16_zero: 3590 case Intrinsic::nvvm_suld_2d_array_i16_zero: 3591 case Intrinsic::nvvm_suld_2d_array_v2i16_zero: 3592 case Intrinsic::nvvm_suld_2d_array_v4i16_zero: 3593 case Intrinsic::nvvm_suld_3d_i16_zero: 3594 case Intrinsic::nvvm_suld_3d_v2i16_zero: 3595 case Intrinsic::nvvm_suld_3d_v4i16_zero: { 3596 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3597 Info.memVT = MVT::i16; 3598 Info.ptrVal = nullptr; 3599 Info.offset = 0; 3600 Info.vol = 0; 3601 Info.readMem = true; 3602 Info.writeMem = false; 3603 Info.align = 16; 3604 return true; 3605 } 3606 case Intrinsic::nvvm_suld_1d_i32_clamp: 3607 case Intrinsic::nvvm_suld_1d_v2i32_clamp: 3608 case Intrinsic::nvvm_suld_1d_v4i32_clamp: 3609 case Intrinsic::nvvm_suld_1d_array_i32_clamp: 3610 case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: 3611 case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: 3612 case Intrinsic::nvvm_suld_2d_i32_clamp: 3613 case Intrinsic::nvvm_suld_2d_v2i32_clamp: 3614 case Intrinsic::nvvm_suld_2d_v4i32_clamp: 3615 case Intrinsic::nvvm_suld_2d_array_i32_clamp: 3616 case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: 3617 case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: 3618 case Intrinsic::nvvm_suld_3d_i32_clamp: 3619 case Intrinsic::nvvm_suld_3d_v2i32_clamp: 3620 case Intrinsic::nvvm_suld_3d_v4i32_clamp: 3621 case Intrinsic::nvvm_suld_1d_i32_trap: 3622 case Intrinsic::nvvm_suld_1d_v2i32_trap: 3623 case Intrinsic::nvvm_suld_1d_v4i32_trap: 3624 case Intrinsic::nvvm_suld_1d_array_i32_trap: 3625 case Intrinsic::nvvm_suld_1d_array_v2i32_trap: 3626 case Intrinsic::nvvm_suld_1d_array_v4i32_trap: 3627 case Intrinsic::nvvm_suld_2d_i32_trap: 3628 case Intrinsic::nvvm_suld_2d_v2i32_trap: 3629 case Intrinsic::nvvm_suld_2d_v4i32_trap: 3630 case Intrinsic::nvvm_suld_2d_array_i32_trap: 3631 case Intrinsic::nvvm_suld_2d_array_v2i32_trap: 3632 case Intrinsic::nvvm_suld_2d_array_v4i32_trap: 3633 case Intrinsic::nvvm_suld_3d_i32_trap: 3634 case Intrinsic::nvvm_suld_3d_v2i32_trap: 3635 case Intrinsic::nvvm_suld_3d_v4i32_trap: 3636 case Intrinsic::nvvm_suld_1d_i32_zero: 3637 case Intrinsic::nvvm_suld_1d_v2i32_zero: 3638 case Intrinsic::nvvm_suld_1d_v4i32_zero: 3639 case Intrinsic::nvvm_suld_1d_array_i32_zero: 3640 case Intrinsic::nvvm_suld_1d_array_v2i32_zero: 3641 case Intrinsic::nvvm_suld_1d_array_v4i32_zero: 3642 case Intrinsic::nvvm_suld_2d_i32_zero: 3643 case Intrinsic::nvvm_suld_2d_v2i32_zero: 3644 case Intrinsic::nvvm_suld_2d_v4i32_zero: 3645 case Intrinsic::nvvm_suld_2d_array_i32_zero: 3646 case Intrinsic::nvvm_suld_2d_array_v2i32_zero: 3647 case Intrinsic::nvvm_suld_2d_array_v4i32_zero: 3648 case Intrinsic::nvvm_suld_3d_i32_zero: 3649 case Intrinsic::nvvm_suld_3d_v2i32_zero: 3650 case Intrinsic::nvvm_suld_3d_v4i32_zero: { 3651 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3652 Info.memVT = MVT::i32; 3653 Info.ptrVal = nullptr; 3654 Info.offset = 0; 3655 Info.vol = 0; 3656 Info.readMem = true; 3657 Info.writeMem = false; 3658 Info.align = 16; 3659 return true; 3660 } 3661 case Intrinsic::nvvm_suld_1d_i64_clamp: 3662 case Intrinsic::nvvm_suld_1d_v2i64_clamp: 3663 case Intrinsic::nvvm_suld_1d_array_i64_clamp: 3664 case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: 3665 case Intrinsic::nvvm_suld_2d_i64_clamp: 3666 case Intrinsic::nvvm_suld_2d_v2i64_clamp: 3667 case Intrinsic::nvvm_suld_2d_array_i64_clamp: 3668 case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: 3669 case Intrinsic::nvvm_suld_3d_i64_clamp: 3670 case Intrinsic::nvvm_suld_3d_v2i64_clamp: 3671 case Intrinsic::nvvm_suld_1d_i64_trap: 3672 case Intrinsic::nvvm_suld_1d_v2i64_trap: 3673 case Intrinsic::nvvm_suld_1d_array_i64_trap: 3674 case Intrinsic::nvvm_suld_1d_array_v2i64_trap: 3675 case Intrinsic::nvvm_suld_2d_i64_trap: 3676 case Intrinsic::nvvm_suld_2d_v2i64_trap: 3677 case Intrinsic::nvvm_suld_2d_array_i64_trap: 3678 case Intrinsic::nvvm_suld_2d_array_v2i64_trap: 3679 case Intrinsic::nvvm_suld_3d_i64_trap: 3680 case Intrinsic::nvvm_suld_3d_v2i64_trap: 3681 case Intrinsic::nvvm_suld_1d_i64_zero: 3682 case Intrinsic::nvvm_suld_1d_v2i64_zero: 3683 case Intrinsic::nvvm_suld_1d_array_i64_zero: 3684 case Intrinsic::nvvm_suld_1d_array_v2i64_zero: 3685 case Intrinsic::nvvm_suld_2d_i64_zero: 3686 case Intrinsic::nvvm_suld_2d_v2i64_zero: 3687 case Intrinsic::nvvm_suld_2d_array_i64_zero: 3688 case Intrinsic::nvvm_suld_2d_array_v2i64_zero: 3689 case Intrinsic::nvvm_suld_3d_i64_zero: 3690 case Intrinsic::nvvm_suld_3d_v2i64_zero: { 3691 Info.opc = getOpcForSurfaceInstr(Intrinsic); 3692 Info.memVT = MVT::i64; 3693 Info.ptrVal = nullptr; 3694 Info.offset = 0; 3695 Info.vol = 0; 3696 Info.readMem = true; 3697 Info.writeMem = false; 3698 Info.align = 16; 3699 return true; 3700 } 3701 } 3702 return false; 3703 } 3704 3705 /// isLegalAddressingMode - Return true if the addressing mode represented 3706 /// by AM is legal for this target, for a load/store of the specified type. 3707 /// Used to guide target specific optimizations, like loop strength reduction 3708 /// (LoopStrengthReduce.cpp) and memory optimization for address mode 3709 /// (CodeGenPrepare.cpp) 3710 bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, 3711 const AddrMode &AM, Type *Ty, 3712 unsigned AS) const { 3713 3714 // AddrMode - This represents an addressing mode of: 3715 // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg 3716 // 3717 // The legal address modes are 3718 // - [avar] 3719 // - [areg] 3720 // - [areg+immoff] 3721 // - [immAddr] 3722 3723 if (AM.BaseGV) { 3724 return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale; 3725 } 3726 3727 switch (AM.Scale) { 3728 case 0: // "r", "r+i" or "i" is allowed 3729 break; 3730 case 1: 3731 if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. 3732 return false; 3733 // Otherwise we have r+i. 3734 break; 3735 default: 3736 // No scale > 1 is allowed 3737 return false; 3738 } 3739 return true; 3740 } 3741 3742 //===----------------------------------------------------------------------===// 3743 // NVPTX Inline Assembly Support 3744 //===----------------------------------------------------------------------===// 3745 3746 /// getConstraintType - Given a constraint letter, return the type of 3747 /// constraint it is for this target. 3748 NVPTXTargetLowering::ConstraintType 3749 NVPTXTargetLowering::getConstraintType(StringRef Constraint) const { 3750 if (Constraint.size() == 1) { 3751 switch (Constraint[0]) { 3752 default: 3753 break; 3754 case 'b': 3755 case 'r': 3756 case 'h': 3757 case 'c': 3758 case 'l': 3759 case 'f': 3760 case 'd': 3761 case '0': 3762 case 'N': 3763 return C_RegisterClass; 3764 } 3765 } 3766 return TargetLowering::getConstraintType(Constraint); 3767 } 3768 3769 std::pair<unsigned, const TargetRegisterClass *> 3770 NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 3771 StringRef Constraint, 3772 MVT VT) const { 3773 if (Constraint.size() == 1) { 3774 switch (Constraint[0]) { 3775 case 'b': 3776 return std::make_pair(0U, &NVPTX::Int1RegsRegClass); 3777 case 'c': 3778 return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 3779 case 'h': 3780 return std::make_pair(0U, &NVPTX::Int16RegsRegClass); 3781 case 'r': 3782 return std::make_pair(0U, &NVPTX::Int32RegsRegClass); 3783 case 'l': 3784 case 'N': 3785 return std::make_pair(0U, &NVPTX::Int64RegsRegClass); 3786 case 'f': 3787 return std::make_pair(0U, &NVPTX::Float32RegsRegClass); 3788 case 'd': 3789 return std::make_pair(0U, &NVPTX::Float64RegsRegClass); 3790 } 3791 } 3792 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 3793 } 3794 3795 //===----------------------------------------------------------------------===// 3796 // NVPTX DAG Combining 3797 //===----------------------------------------------------------------------===// 3798 3799 bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, 3800 CodeGenOpt::Level OptLevel) const { 3801 const Function *F = MF.getFunction(); 3802 const TargetOptions &TO = MF.getTarget().Options; 3803 3804 // Always honor command-line argument 3805 if (FMAContractLevelOpt.getNumOccurrences() > 0) { 3806 return FMAContractLevelOpt > 0; 3807 } else if (OptLevel == 0) { 3808 // Do not contract if we're not optimizing the code 3809 return false; 3810 } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) { 3811 // Honor TargetOptions flags that explicitly say fusion is okay 3812 return true; 3813 } else if (F->hasFnAttribute("unsafe-fp-math")) { 3814 // Check for unsafe-fp-math=true coming from Clang 3815 Attribute Attr = F->getFnAttribute("unsafe-fp-math"); 3816 StringRef Val = Attr.getValueAsString(); 3817 if (Val == "true") 3818 return true; 3819 } 3820 3821 // We did not have a clear indication that fusion is allowed, so assume not 3822 return false; 3823 } 3824 3825 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 3826 /// operands N0 and N1. This is a helper for PerformADDCombine that is 3827 /// called with the default operands, and if that fails, with commuted 3828 /// operands. 3829 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 3830 TargetLowering::DAGCombinerInfo &DCI, 3831 const NVPTXSubtarget &Subtarget, 3832 CodeGenOpt::Level OptLevel) { 3833 SelectionDAG &DAG = DCI.DAG; 3834 // Skip non-integer, non-scalar case 3835 EVT VT=N0.getValueType(); 3836 if (VT.isVector()) 3837 return SDValue(); 3838 3839 // fold (add (mul a, b), c) -> (mad a, b, c) 3840 // 3841 if (N0.getOpcode() == ISD::MUL) { 3842 assert (VT.isInteger()); 3843 // For integer: 3844 // Since integer multiply-add costs the same as integer multiply 3845 // but is more costly than integer add, do the fusion only when 3846 // the mul is only used in the add. 3847 if (OptLevel==CodeGenOpt::None || VT != MVT::i32 || 3848 !N0.getNode()->hasOneUse()) 3849 return SDValue(); 3850 3851 // Do the folding 3852 return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, 3853 N0.getOperand(0), N0.getOperand(1), N1); 3854 } 3855 else if (N0.getOpcode() == ISD::FMUL) { 3856 if (VT == MVT::f32 || VT == MVT::f64) { 3857 const auto *TLI = static_cast<const NVPTXTargetLowering *>( 3858 &DAG.getTargetLoweringInfo()); 3859 if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) 3860 return SDValue(); 3861 3862 // For floating point: 3863 // Do the fusion only when the mul has less than 5 uses and all 3864 // are add. 3865 // The heuristic is that if a use is not an add, then that use 3866 // cannot be fused into fma, therefore mul is still needed anyway. 3867 // If there are more than 4 uses, even if they are all add, fusing 3868 // them will increase register pressue. 3869 // 3870 int numUses = 0; 3871 int nonAddCount = 0; 3872 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 3873 UE = N0.getNode()->use_end(); 3874 UI != UE; ++UI) { 3875 numUses++; 3876 SDNode *User = *UI; 3877 if (User->getOpcode() != ISD::FADD) 3878 ++nonAddCount; 3879 } 3880 if (numUses >= 5) 3881 return SDValue(); 3882 if (nonAddCount) { 3883 int orderNo = N->getIROrder(); 3884 int orderNo2 = N0.getNode()->getIROrder(); 3885 // simple heuristics here for considering potential register 3886 // pressure, the logics here is that the differnce are used 3887 // to measure the distance between def and use, the longer distance 3888 // more likely cause register pressure. 3889 if (orderNo - orderNo2 < 500) 3890 return SDValue(); 3891 3892 // Now, check if at least one of the FMUL's operands is live beyond the node N, 3893 // which guarantees that the FMA will not increase register pressure at node N. 3894 bool opIsLive = false; 3895 const SDNode *left = N0.getOperand(0).getNode(); 3896 const SDNode *right = N0.getOperand(1).getNode(); 3897 3898 if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right)) 3899 opIsLive = true; 3900 3901 if (!opIsLive) 3902 for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) { 3903 SDNode *User = *UI; 3904 int orderNo3 = User->getIROrder(); 3905 if (orderNo3 > orderNo) { 3906 opIsLive = true; 3907 break; 3908 } 3909 } 3910 3911 if (!opIsLive) 3912 for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) { 3913 SDNode *User = *UI; 3914 int orderNo3 = User->getIROrder(); 3915 if (orderNo3 > orderNo) { 3916 opIsLive = true; 3917 break; 3918 } 3919 } 3920 3921 if (!opIsLive) 3922 return SDValue(); 3923 } 3924 3925 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 3926 N0.getOperand(0), N0.getOperand(1), N1); 3927 } 3928 } 3929 3930 return SDValue(); 3931 } 3932 3933 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 3934 /// 3935 static SDValue PerformADDCombine(SDNode *N, 3936 TargetLowering::DAGCombinerInfo &DCI, 3937 const NVPTXSubtarget &Subtarget, 3938 CodeGenOpt::Level OptLevel) { 3939 SDValue N0 = N->getOperand(0); 3940 SDValue N1 = N->getOperand(1); 3941 3942 // First try with the default operand order. 3943 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, 3944 OptLevel); 3945 if (Result.getNode()) 3946 return Result; 3947 3948 // If that didn't work, try again with the operands commuted. 3949 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel); 3950 } 3951 3952 static SDValue PerformANDCombine(SDNode *N, 3953 TargetLowering::DAGCombinerInfo &DCI) { 3954 // The type legalizer turns a vector load of i8 values into a zextload to i16 3955 // registers, optionally ANY_EXTENDs it (if target type is integer), 3956 // and ANDs off the high 8 bits. Since we turn this load into a 3957 // target-specific DAG node, the DAG combiner fails to eliminate these AND 3958 // nodes. Do that here. 3959 SDValue Val = N->getOperand(0); 3960 SDValue Mask = N->getOperand(1); 3961 3962 if (isa<ConstantSDNode>(Val)) { 3963 std::swap(Val, Mask); 3964 } 3965 3966 SDValue AExt; 3967 // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and 3968 if (Val.getOpcode() == ISD::ANY_EXTEND) { 3969 AExt = Val; 3970 Val = Val->getOperand(0); 3971 } 3972 3973 if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { 3974 Val = Val->getOperand(0); 3975 } 3976 3977 if (Val->getOpcode() == NVPTXISD::LoadV2 || 3978 Val->getOpcode() == NVPTXISD::LoadV4) { 3979 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask); 3980 if (!MaskCnst) { 3981 // Not an AND with a constant 3982 return SDValue(); 3983 } 3984 3985 uint64_t MaskVal = MaskCnst->getZExtValue(); 3986 if (MaskVal != 0xff) { 3987 // Not an AND that chops off top 8 bits 3988 return SDValue(); 3989 } 3990 3991 MemSDNode *Mem = dyn_cast<MemSDNode>(Val); 3992 if (!Mem) { 3993 // Not a MemSDNode?!? 3994 return SDValue(); 3995 } 3996 3997 EVT MemVT = Mem->getMemoryVT(); 3998 if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { 3999 // We only handle the i8 case 4000 return SDValue(); 4001 } 4002 4003 unsigned ExtType = 4004 cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))-> 4005 getZExtValue(); 4006 if (ExtType == ISD::SEXTLOAD) { 4007 // If for some reason the load is a sextload, the and is needed to zero 4008 // out the high 8 bits 4009 return SDValue(); 4010 } 4011 4012 bool AddTo = false; 4013 if (AExt.getNode() != 0) { 4014 // Re-insert the ext as a zext. 4015 Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 4016 AExt.getValueType(), Val); 4017 AddTo = true; 4018 } 4019 4020 // If we get here, the AND is unnecessary. Just replace it with the load 4021 DCI.CombineTo(N, Val, AddTo); 4022 } 4023 4024 return SDValue(); 4025 } 4026 4027 static SDValue PerformSELECTCombine(SDNode *N, 4028 TargetLowering::DAGCombinerInfo &DCI) { 4029 // Currently this detects patterns for integer min and max and 4030 // lowers them to PTX-specific intrinsics that enable hardware 4031 // support. 4032 4033 const SDValue Cond = N->getOperand(0); 4034 if (Cond.getOpcode() != ISD::SETCC) return SDValue(); 4035 4036 const SDValue LHS = Cond.getOperand(0); 4037 const SDValue RHS = Cond.getOperand(1); 4038 const SDValue True = N->getOperand(1); 4039 const SDValue False = N->getOperand(2); 4040 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) 4041 return SDValue(); 4042 4043 const EVT VT = N->getValueType(0); 4044 if (VT != MVT::i32 && VT != MVT::i64) return SDValue(); 4045 4046 const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 4047 SDValue Larger; // The larger of LHS and RHS when condition is true. 4048 switch (CC) { 4049 case ISD::SETULT: 4050 case ISD::SETULE: 4051 case ISD::SETLT: 4052 case ISD::SETLE: 4053 Larger = RHS; 4054 break; 4055 4056 case ISD::SETGT: 4057 case ISD::SETGE: 4058 case ISD::SETUGT: 4059 case ISD::SETUGE: 4060 Larger = LHS; 4061 break; 4062 4063 default: 4064 return SDValue(); 4065 } 4066 const bool IsMax = (Larger == True); 4067 const bool IsSigned = ISD::isSignedIntSetCC(CC); 4068 4069 unsigned IntrinsicId; 4070 if (VT == MVT::i32) { 4071 if (IsSigned) 4072 IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i; 4073 else 4074 IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui; 4075 } else { 4076 assert(VT == MVT::i64); 4077 if (IsSigned) 4078 IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll; 4079 else 4080 IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull; 4081 } 4082 4083 SDLoc DL(N); 4084 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, 4085 DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS); 4086 } 4087 4088 enum OperandSignedness { 4089 Signed = 0, 4090 Unsigned, 4091 Unknown 4092 }; 4093 4094 /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand 4095 /// that can be demoted to \p OptSize bits without loss of information. The 4096 /// signedness of the operand, if determinable, is placed in \p S. 4097 static bool IsMulWideOperandDemotable(SDValue Op, 4098 unsigned OptSize, 4099 OperandSignedness &S) { 4100 S = Unknown; 4101 4102 if (Op.getOpcode() == ISD::SIGN_EXTEND || 4103 Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { 4104 EVT OrigVT = Op.getOperand(0).getValueType(); 4105 if (OrigVT.getSizeInBits() <= OptSize) { 4106 S = Signed; 4107 return true; 4108 } 4109 } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { 4110 EVT OrigVT = Op.getOperand(0).getValueType(); 4111 if (OrigVT.getSizeInBits() <= OptSize) { 4112 S = Unsigned; 4113 return true; 4114 } 4115 } 4116 4117 return false; 4118 } 4119 4120 /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can 4121 /// be demoted to \p OptSize bits without loss of information. If the operands 4122 /// contain a constant, it should appear as the RHS operand. The signedness of 4123 /// the operands is placed in \p IsSigned. 4124 static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, 4125 unsigned OptSize, 4126 bool &IsSigned) { 4127 4128 OperandSignedness LHSSign; 4129 4130 // The LHS operand must be a demotable op 4131 if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) 4132 return false; 4133 4134 // We should have been able to determine the signedness from the LHS 4135 if (LHSSign == Unknown) 4136 return false; 4137 4138 IsSigned = (LHSSign == Signed); 4139 4140 // The RHS can be a demotable op or a constant 4141 if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) { 4142 APInt Val = CI->getAPIntValue(); 4143 if (LHSSign == Unsigned) { 4144 return Val.isIntN(OptSize); 4145 } else { 4146 return Val.isSignedIntN(OptSize); 4147 } 4148 } else { 4149 OperandSignedness RHSSign; 4150 if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) 4151 return false; 4152 4153 return LHSSign == RHSSign; 4154 } 4155 } 4156 4157 /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply 4158 /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform 4159 /// works on both multiply DAG nodes and SHL DAG nodes with a constant shift 4160 /// amount. 4161 static SDValue TryMULWIDECombine(SDNode *N, 4162 TargetLowering::DAGCombinerInfo &DCI) { 4163 EVT MulType = N->getValueType(0); 4164 if (MulType != MVT::i32 && MulType != MVT::i64) { 4165 return SDValue(); 4166 } 4167 4168 SDLoc DL(N); 4169 unsigned OptSize = MulType.getSizeInBits() >> 1; 4170 SDValue LHS = N->getOperand(0); 4171 SDValue RHS = N->getOperand(1); 4172 4173 // Canonicalize the multiply so the constant (if any) is on the right 4174 if (N->getOpcode() == ISD::MUL) { 4175 if (isa<ConstantSDNode>(LHS)) { 4176 std::swap(LHS, RHS); 4177 } 4178 } 4179 4180 // If we have a SHL, determine the actual multiply amount 4181 if (N->getOpcode() == ISD::SHL) { 4182 ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS); 4183 if (!ShlRHS) { 4184 return SDValue(); 4185 } 4186 4187 APInt ShiftAmt = ShlRHS->getAPIntValue(); 4188 unsigned BitWidth = MulType.getSizeInBits(); 4189 if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { 4190 APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; 4191 RHS = DCI.DAG.getConstant(MulVal, DL, MulType); 4192 } else { 4193 return SDValue(); 4194 } 4195 } 4196 4197 bool Signed; 4198 // Verify that our operands are demotable 4199 if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { 4200 return SDValue(); 4201 } 4202 4203 EVT DemotedVT; 4204 if (MulType == MVT::i32) { 4205 DemotedVT = MVT::i16; 4206 } else { 4207 DemotedVT = MVT::i32; 4208 } 4209 4210 // Truncate the operands to the correct size. Note that these are just for 4211 // type consistency and will (likely) be eliminated in later phases. 4212 SDValue TruncLHS = 4213 DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS); 4214 SDValue TruncRHS = 4215 DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS); 4216 4217 unsigned Opc; 4218 if (Signed) { 4219 Opc = NVPTXISD::MUL_WIDE_SIGNED; 4220 } else { 4221 Opc = NVPTXISD::MUL_WIDE_UNSIGNED; 4222 } 4223 4224 return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS); 4225 } 4226 4227 /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. 4228 static SDValue PerformMULCombine(SDNode *N, 4229 TargetLowering::DAGCombinerInfo &DCI, 4230 CodeGenOpt::Level OptLevel) { 4231 if (OptLevel > 0) { 4232 // Try mul.wide combining at OptLevel > 0 4233 SDValue Ret = TryMULWIDECombine(N, DCI); 4234 if (Ret.getNode()) 4235 return Ret; 4236 } 4237 4238 return SDValue(); 4239 } 4240 4241 /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. 4242 static SDValue PerformSHLCombine(SDNode *N, 4243 TargetLowering::DAGCombinerInfo &DCI, 4244 CodeGenOpt::Level OptLevel) { 4245 if (OptLevel > 0) { 4246 // Try mul.wide combining at OptLevel > 0 4247 SDValue Ret = TryMULWIDECombine(N, DCI); 4248 if (Ret.getNode()) 4249 return Ret; 4250 } 4251 4252 return SDValue(); 4253 } 4254 4255 SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, 4256 DAGCombinerInfo &DCI) const { 4257 CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); 4258 switch (N->getOpcode()) { 4259 default: break; 4260 case ISD::ADD: 4261 case ISD::FADD: 4262 return PerformADDCombine(N, DCI, STI, OptLevel); 4263 case ISD::MUL: 4264 return PerformMULCombine(N, DCI, OptLevel); 4265 case ISD::SHL: 4266 return PerformSHLCombine(N, DCI, OptLevel); 4267 case ISD::AND: 4268 return PerformANDCombine(N, DCI); 4269 case ISD::SELECT: 4270 return PerformSELECTCombine(N, DCI); 4271 } 4272 return SDValue(); 4273 } 4274 4275 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. 4276 static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, 4277 SmallVectorImpl<SDValue> &Results) { 4278 EVT ResVT = N->getValueType(0); 4279 SDLoc DL(N); 4280 4281 assert(ResVT.isVector() && "Vector load must have vector type"); 4282 4283 // We only handle "native" vector sizes for now, e.g. <4 x double> is not 4284 // legal. We can (and should) split that into 2 loads of <2 x double> here 4285 // but I'm leaving that as a TODO for now. 4286 assert(ResVT.isSimple() && "Can only handle simple types"); 4287 switch (ResVT.getSimpleVT().SimpleTy) { 4288 default: 4289 return; 4290 case MVT::v2i8: 4291 case MVT::v2i16: 4292 case MVT::v2i32: 4293 case MVT::v2i64: 4294 case MVT::v2f32: 4295 case MVT::v2f64: 4296 case MVT::v4i8: 4297 case MVT::v4i16: 4298 case MVT::v4i32: 4299 case MVT::v4f32: 4300 // This is a "native" vector type 4301 break; 4302 } 4303 4304 LoadSDNode *LD = cast<LoadSDNode>(N); 4305 4306 unsigned Align = LD->getAlignment(); 4307 auto &TD = DAG.getDataLayout(); 4308 unsigned PrefAlign = 4309 TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); 4310 if (Align < PrefAlign) { 4311 // This load is not sufficiently aligned, so bail out and let this vector 4312 // load be scalarized. Note that we may still be able to emit smaller 4313 // vector loads. For example, if we are loading a <4 x float> with an 4314 // alignment of 8, this check will fail but the legalizer will try again 4315 // with 2 x <2 x float>, which will succeed with an alignment of 8. 4316 return; 4317 } 4318 4319 EVT EltVT = ResVT.getVectorElementType(); 4320 unsigned NumElts = ResVT.getVectorNumElements(); 4321 4322 // Since LoadV2 is a target node, we cannot rely on DAG type legalization. 4323 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 4324 // loaded type to i16 and propagate the "real" type as the memory type. 4325 bool NeedTrunc = false; 4326 if (EltVT.getSizeInBits() < 16) { 4327 EltVT = MVT::i16; 4328 NeedTrunc = true; 4329 } 4330 4331 unsigned Opcode = 0; 4332 SDVTList LdResVTs; 4333 4334 switch (NumElts) { 4335 default: 4336 return; 4337 case 2: 4338 Opcode = NVPTXISD::LoadV2; 4339 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 4340 break; 4341 case 4: { 4342 Opcode = NVPTXISD::LoadV4; 4343 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 4344 LdResVTs = DAG.getVTList(ListVTs); 4345 break; 4346 } 4347 } 4348 4349 // Copy regular operands 4350 SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end()); 4351 4352 // The select routine does not have access to the LoadSDNode instance, so 4353 // pass along the extension information 4354 OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL)); 4355 4356 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, 4357 LD->getMemoryVT(), 4358 LD->getMemOperand()); 4359 4360 SmallVector<SDValue, 4> ScalarRes; 4361 4362 for (unsigned i = 0; i < NumElts; ++i) { 4363 SDValue Res = NewLD.getValue(i); 4364 if (NeedTrunc) 4365 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 4366 ScalarRes.push_back(Res); 4367 } 4368 4369 SDValue LoadChain = NewLD.getValue(NumElts); 4370 4371 SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); 4372 4373 Results.push_back(BuildVec); 4374 Results.push_back(LoadChain); 4375 } 4376 4377 static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, 4378 SmallVectorImpl<SDValue> &Results) { 4379 SDValue Chain = N->getOperand(0); 4380 SDValue Intrin = N->getOperand(1); 4381 SDLoc DL(N); 4382 4383 // Get the intrinsic ID 4384 unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); 4385 switch (IntrinNo) { 4386 default: 4387 return; 4388 case Intrinsic::nvvm_ldg_global_i: 4389 case Intrinsic::nvvm_ldg_global_f: 4390 case Intrinsic::nvvm_ldg_global_p: 4391 case Intrinsic::nvvm_ldu_global_i: 4392 case Intrinsic::nvvm_ldu_global_f: 4393 case Intrinsic::nvvm_ldu_global_p: { 4394 EVT ResVT = N->getValueType(0); 4395 4396 if (ResVT.isVector()) { 4397 // Vector LDG/LDU 4398 4399 unsigned NumElts = ResVT.getVectorNumElements(); 4400 EVT EltVT = ResVT.getVectorElementType(); 4401 4402 // Since LDU/LDG are target nodes, we cannot rely on DAG type 4403 // legalization. 4404 // Therefore, we must ensure the type is legal. For i1 and i8, we set the 4405 // loaded type to i16 and propagate the "real" type as the memory type. 4406 bool NeedTrunc = false; 4407 if (EltVT.getSizeInBits() < 16) { 4408 EltVT = MVT::i16; 4409 NeedTrunc = true; 4410 } 4411 4412 unsigned Opcode = 0; 4413 SDVTList LdResVTs; 4414 4415 switch (NumElts) { 4416 default: 4417 return; 4418 case 2: 4419 switch (IntrinNo) { 4420 default: 4421 return; 4422 case Intrinsic::nvvm_ldg_global_i: 4423 case Intrinsic::nvvm_ldg_global_f: 4424 case Intrinsic::nvvm_ldg_global_p: 4425 Opcode = NVPTXISD::LDGV2; 4426 break; 4427 case Intrinsic::nvvm_ldu_global_i: 4428 case Intrinsic::nvvm_ldu_global_f: 4429 case Intrinsic::nvvm_ldu_global_p: 4430 Opcode = NVPTXISD::LDUV2; 4431 break; 4432 } 4433 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); 4434 break; 4435 case 4: { 4436 switch (IntrinNo) { 4437 default: 4438 return; 4439 case Intrinsic::nvvm_ldg_global_i: 4440 case Intrinsic::nvvm_ldg_global_f: 4441 case Intrinsic::nvvm_ldg_global_p: 4442 Opcode = NVPTXISD::LDGV4; 4443 break; 4444 case Intrinsic::nvvm_ldu_global_i: 4445 case Intrinsic::nvvm_ldu_global_f: 4446 case Intrinsic::nvvm_ldu_global_p: 4447 Opcode = NVPTXISD::LDUV4; 4448 break; 4449 } 4450 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; 4451 LdResVTs = DAG.getVTList(ListVTs); 4452 break; 4453 } 4454 } 4455 4456 SmallVector<SDValue, 8> OtherOps; 4457 4458 // Copy regular operands 4459 4460 OtherOps.push_back(Chain); // Chain 4461 // Skip operand 1 (intrinsic ID) 4462 // Others 4463 OtherOps.append(N->op_begin() + 2, N->op_end()); 4464 4465 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 4466 4467 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, 4468 MemSD->getMemoryVT(), 4469 MemSD->getMemOperand()); 4470 4471 SmallVector<SDValue, 4> ScalarRes; 4472 4473 for (unsigned i = 0; i < NumElts; ++i) { 4474 SDValue Res = NewLD.getValue(i); 4475 if (NeedTrunc) 4476 Res = 4477 DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); 4478 ScalarRes.push_back(Res); 4479 } 4480 4481 SDValue LoadChain = NewLD.getValue(NumElts); 4482 4483 SDValue BuildVec = 4484 DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); 4485 4486 Results.push_back(BuildVec); 4487 Results.push_back(LoadChain); 4488 } else { 4489 // i8 LDG/LDU 4490 assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && 4491 "Custom handling of non-i8 ldu/ldg?"); 4492 4493 // Just copy all operands as-is 4494 SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); 4495 4496 // Force output to i16 4497 SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); 4498 4499 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); 4500 4501 // We make sure the memory type is i8, which will be used during isel 4502 // to select the proper instruction. 4503 SDValue NewLD = 4504 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, 4505 MVT::i8, MemSD->getMemOperand()); 4506 4507 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, 4508 NewLD.getValue(0))); 4509 Results.push_back(NewLD.getValue(1)); 4510 } 4511 } 4512 } 4513 } 4514 4515 void NVPTXTargetLowering::ReplaceNodeResults( 4516 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { 4517 switch (N->getOpcode()) { 4518 default: 4519 report_fatal_error("Unhandled custom legalization"); 4520 case ISD::LOAD: 4521 ReplaceLoadVector(N, DAG, Results); 4522 return; 4523 case ISD::INTRINSIC_W_CHAIN: 4524 ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); 4525 return; 4526 } 4527 } 4528 4529 // Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file. 4530 void NVPTXSection::anchor() {} 4531 4532 NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { 4533 delete static_cast<NVPTXSection *>(TextSection); 4534 delete static_cast<NVPTXSection *>(DataSection); 4535 delete static_cast<NVPTXSection *>(BSSSection); 4536 delete static_cast<NVPTXSection *>(ReadOnlySection); 4537 4538 delete static_cast<NVPTXSection *>(StaticCtorSection); 4539 delete static_cast<NVPTXSection *>(StaticDtorSection); 4540 delete static_cast<NVPTXSection *>(LSDASection); 4541 delete static_cast<NVPTXSection *>(EHFrameSection); 4542 delete static_cast<NVPTXSection *>(DwarfAbbrevSection); 4543 delete static_cast<NVPTXSection *>(DwarfInfoSection); 4544 delete static_cast<NVPTXSection *>(DwarfLineSection); 4545 delete static_cast<NVPTXSection *>(DwarfFrameSection); 4546 delete static_cast<NVPTXSection *>(DwarfPubTypesSection); 4547 delete static_cast<const NVPTXSection *>(DwarfDebugInlineSection); 4548 delete static_cast<NVPTXSection *>(DwarfStrSection); 4549 delete static_cast<NVPTXSection *>(DwarfLocSection); 4550 delete static_cast<NVPTXSection *>(DwarfARangesSection); 4551 delete static_cast<NVPTXSection *>(DwarfRangesSection); 4552 } 4553 4554 MCSection * 4555 NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, 4556 SectionKind Kind, Mangler &Mang, 4557 const TargetMachine &TM) const { 4558 return getDataSection(); 4559 } 4560